Commit 5a88271b authored by Jim Bankoski's avatar Jim Bankoski

don't tokenize & encode tokens for blocks in UMV

This avoids encoding tokens for blocks that are entirely
in the UMV border. This changes the bitstream.

Change-Id: I32b4df46ac8a990d0c37cee92fd34f8ddd4fb6c9
parent 28d31aed
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <climits>
#include <vector>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
namespace {
class BordersTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
BordersTest() : EncoderTest(GET_PARAM(0)) {}
virtual void SetUp() {
InitializeConfig();
SetMode(GET_PARAM(1));
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if ( video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, 5);
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
}
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
}
}
};
TEST_P(BordersTest, TestEncodeHighBitrate) {
// Validate that this non multiple of 64 wide clip encodes and decodes
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 2000;
cfg_.rc_max_quantizer = 10;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
40);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
TEST_P(BordersTest, TestLowBitrate) {
// Validate that this clip encodes and decodes without a mismatch
// when passing in a very high min q. This pushes the encoder to producing
// lots of small partitions which might will test the other condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 200;
cfg_.rc_min_quantizer = 40;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
40);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
::libvpx_test::kTwoPassGood));
} // namespace
d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv
b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv
5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf
65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf
906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf
......@@ -120,4 +121,4 @@ f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5
41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5
086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5
d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5
8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5
\ No newline at end of file
8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5
......@@ -22,6 +22,7 @@ LIBVPX_TEST_SRCS-yes += encode_test_driver.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c
......@@ -92,6 +93,7 @@ endif
## TEST DATA
##
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf
......
......@@ -146,6 +146,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
// FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
// information is exposed at this level
mi_cols = mi_cols_aligned_to_sb(oci);
# if CONFIG_ALPHA
// TODO(jkoleszar): Why is this * 2?
oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 8 * mi_cols, 1);
......
......@@ -631,14 +631,14 @@ static INLINE void foreach_transformed_block_in_plane(
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
// transform size varies per plane, look it up in a common way.
const TX_SIZE tx_size = plane ? get_uv_tx_size(xd)
: xd->mode_info_context->mbmi.txfm_size;
const TX_SIZE tx_size =
plane ? get_uv_tx_size(xd) : xd->mode_info_context->mbmi.txfm_size;
const int block_size_b = bw + bh;
const int txfrm_size_b = tx_size * 2;
// subsampled size of the block
const int ss_sum = xd->plane[plane].subsampling_x +
xd->plane[plane].subsampling_y;
const int ss_sum = xd->plane[plane].subsampling_x
+ xd->plane[plane].subsampling_y;
const int ss_block_size = block_size_b - ss_sum;
const int step = 1 << txfrm_size_b;
......@@ -647,8 +647,42 @@ static INLINE void foreach_transformed_block_in_plane(
assert(txfrm_size_b <= block_size_b);
assert(txfrm_size_b <= ss_block_size);
for (i = 0; i < (1 << ss_block_size); i += step) {
visit(plane, i, bsize, txfrm_size_b, arg);
// If mb_to_right_edge is < 0 we are in a situation in which
// the current block size extends into the UMV and we won't
// visit the sub blocks that are wholly within the UMV.
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
int r, c;
const int sw = bw - xd->plane[plane].subsampling_x;
const int sh = bh - xd->plane[plane].subsampling_y;
int max_blocks_wide = 1 << sw;
int max_blocks_high = 1 << sh;
// xd->mb_to_right_edge is in units of pixels * 8. This converts
// it to 4x4 block sizes.
if (xd->mb_to_right_edge < 0)
max_blocks_wide +=
+ (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
if (xd->mb_to_bottom_edge < 0)
max_blocks_high +=
+ (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
i = 0;
// Unlike the normal case - in here we have to keep track of the
// row and column of the blocks we use so that we know if we are in
// the unrestricted motion border..
for (r = 0; r < (1 << sh); r += (1 << tx_size)) {
for (c = 0; c < (1 << sw); c += (1 << tx_size)) {
if (r < max_blocks_high && c < max_blocks_wide)
visit(plane, i, bsize, txfrm_size_b, arg);
i += step;
}
}
} else {
for (i = 0; i < (1 << ss_block_size); i += step) {
visit(plane, i, bsize, txfrm_size_b, arg);
}
}
}
......@@ -780,4 +814,89 @@ static void txfrm_block_to_raster_xy(MACROBLOCKD *xd,
*x = (raster_mb & (tx_cols - 1)) << (txwl);
*y = raster_mb >> tx_cols_lg2 << (txwl);
}
static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) {
const int bw = plane_block_width(bsize, &xd->plane[plane]);
const int bh = plane_block_height(bsize, &xd->plane[plane]);
int x, y;
txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
x = x * 4 - 1;
y = y * 4 - 1;
// Copy a pixel into the umv if we are in a situation where the block size
// extends into the UMV.
// TODO(JBB): Should be able to do the full extend in place so we don't have
// to do this multiple times.
if (xd->mb_to_right_edge < 0) {
int umv_border_start = bw
+ (xd->mb_to_right_edge >> (3 + xd->plane[plane].subsampling_x));
if (x + bw > umv_border_start)
vpx_memset(
xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
+ umv_border_start,
*(xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
+ umv_border_start - 1),
bw);
}
if (xd->mb_to_bottom_edge < 0) {
int umv_border_start = bh
+ (xd->mb_to_bottom_edge >> (3 + xd->plane[plane].subsampling_y));
int i;
uint8_t c = *(xd->plane[plane].dst.buf
+ (umv_border_start - 1) * xd->plane[plane].dst.stride + x);
uint8_t *d = xd->plane[plane].dst.buf
+ umv_border_start * xd->plane[plane].dst.stride + x;
if (y + bh > umv_border_start)
for (i = 0; i < bh; i++, d += xd->plane[plane].dst.stride)
*d = c;
}
}
static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
int plane, int ss_tx_size, int eob, int aoff,
int loff, ENTROPY_CONTEXT *A,
ENTROPY_CONTEXT *L) {
const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
const int sw = bw - xd->plane[plane].subsampling_x;
const int sh = bh - xd->plane[plane].subsampling_y;
int mi_blocks_wide = 1 << sw;
int mi_blocks_high = 1 << sh;
int tx_size_in_blocks = (1 << ss_tx_size);
int above_contexts = tx_size_in_blocks;
int left_contexts = tx_size_in_blocks;
int pt;
// xd->mb_to_right_edge is in units of pixels * 8. This converts
// it to 4x4 block sizes.
if (xd->mb_to_right_edge < 0) {
mi_blocks_wide += (xd->mb_to_right_edge
>> (5 + xd->plane[plane].subsampling_x));
}
// this code attempts to avoid copying into contexts that are outside
// our border. Any blocks that do are set to 0...
if (above_contexts + aoff > mi_blocks_wide)
above_contexts = mi_blocks_wide - aoff;
if (xd->mb_to_bottom_edge < 0) {
mi_blocks_high += (xd->mb_to_bottom_edge
>> (5 + xd->plane[plane].subsampling_y));
}
if (left_contexts + loff > mi_blocks_high) {
left_contexts = mi_blocks_high - loff;
}
for (pt = 0; pt < above_contexts; pt++)
A[pt] = eob > 0;
for (pt = above_contexts; pt < (1 << ss_tx_size); pt++)
A[pt] = 0;
for (pt = 0; pt < left_contexts; pt++)
L[pt] = eob > 0;
for (pt = left_contexts; pt < (1 << ss_tx_size); pt++)
L[pt] = 0;
}
#endif // VP9_COMMON_VP9_BLOCKD_H_
......@@ -154,17 +154,49 @@ void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES);
vpx_memset(candidate_scores, 0, sizeof(candidate_scores));
if (mbmi->sb_type == BLOCK_SIZE_SB64X64) {
mv_ref_search = sb64_mv_ref_search;
} else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32) {
mv_ref_search = sb_mv_ref_search;
} else if (mbmi->sb_type >= BLOCK_SIZE_MB16X16) {
mv_ref_search = mb_mv_ref_search;
} else {
mv_ref_search = b_mv_ref_search;
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
x_idx = block_idx & 1;
y_idx = block_idx >> 1;
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
int pixels_wide = 4 * b_width_log2(mbmi->sb_type);
int pixels_high = 4 * b_height_log2(mbmi->sb_type);
int pixels_square = 0;
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
if (xd->mb_to_bottom_edge < 0)
pixels_high += (xd->mb_to_bottom_edge >> 3);
if ( pixels_wide < pixels_high )
pixels_square = pixels_wide;
else
pixels_square = pixels_high;
if (pixels_square == 64) {
mv_ref_search = sb64_mv_ref_search;
} else if (pixels_square == 32) {
mv_ref_search = sb_mv_ref_search;
} else if (pixels_square == 16) {
mv_ref_search = mb_mv_ref_search;
} else {
mv_ref_search = b_mv_ref_search;
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
x_idx = block_idx & 1;
y_idx = block_idx >> 1;
}
}
}
else {
if (mbmi->sb_type == BLOCK_SIZE_SB64X64) {
mv_ref_search = sb64_mv_ref_search;
} else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32) {
mv_ref_search = sb_mv_ref_search;
} else if (mbmi->sb_type >= BLOCK_SIZE_MB16X16) {
mv_ref_search = mb_mv_ref_search;
} else {
mv_ref_search = b_mv_ref_search;
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
x_idx = block_idx & 1;
y_idx = block_idx >> 1;
}
}
}
......
......@@ -240,6 +240,7 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
mode = plane == 0? xd->mode_info_context->mbmi.mode:
xd->mode_info_context->mbmi.uv_mode;
if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 && plane == 0) {
assert(bsize == BLOCK_SIZE_SB8X8);
b_mode = xd->mode_info_context->bmi[raster_block].as_mode.first;
......@@ -247,6 +248,10 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
b_mode = mode;
}
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
extend_for_intra(xd, plane, block, bsize, ss_txfrm_size);
}
plane_b_size = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size,
b_mode, dst, xd->plane[plane].dst.stride);
......
......@@ -288,9 +288,6 @@ SKIP_START:
if (c < seg_eob)
coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN]++;
for (pt = 0; pt < (1 << txfm_size); pt++) {
A[pt] = L[pt] = c > 0;
}
return c;
}
......@@ -299,7 +296,6 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) {
return vp9_get_segdata(xd, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
struct decode_block_args {
VP9D_COMP *pbi;
MACROBLOCKD *xd;
......@@ -314,6 +310,7 @@ static void decode_block(int plane, int block,
const int bw = b_width_log2(bsize);
// find the maximum eob for this transform size, adjusted by segment
MACROBLOCKD *xd = arg->xd;
const int segment_id = arg->xd->mode_info_context->mbmi.segment_id;
const TX_SIZE ss_tx_size = ss_txfrm_size / 2;
const int seg_eob = get_eob(arg->xd, segment_id, 16 << ss_txfrm_size);
......@@ -322,14 +319,23 @@ static void decode_block(int plane, int block,
const int mod = bw - ss_tx_size - arg->xd->plane[plane].subsampling_x;
const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size;
const int loff = (off >> mod) << ss_tx_size;
int pt;
ENTROPY_CONTEXT *A = arg->xd->plane[plane].above_context + aoff;
ENTROPY_CONTEXT *L = arg->xd->plane[plane].left_context + loff;
const int eob = decode_coefs(arg->pbi, arg->xd, arg->r, block,
arg->xd->plane[plane].plane_type, seg_eob,
BLOCK_OFFSET(qcoeff_base, block, 16),
ss_tx_size, arg->xd->plane[plane].dequant,
arg->xd->plane[plane].above_context + aoff,
arg->xd->plane[plane].left_context + loff);
A,
L);
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L);
} else {
for (pt = 0; pt < (1 << ss_tx_size); pt++) {
A[pt] = L[pt] = eob > 0;
}
}
arg->xd->plane[plane].eobs[block] = eob;
arg->eobtotal[0] += eob;
}
......
......@@ -467,7 +467,9 @@ static void update_state(VP9_COMP *cpi,
int i, j;
for (j = 0; j < bh; ++j)
for (i = 0; i < bw; ++i)
xd->mode_info_context[mis * j + i].mbmi = *mbmi;
if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > j &&
(xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > i)
xd->mode_info_context[mis * j + i].mbmi = *mbmi;
}
if (cpi->common.mcomp_filter_type == SWITCHABLE &&
......@@ -915,13 +917,16 @@ static void set_block_size(VP9_COMMON *const cm,
MODE_INFO *m, BLOCK_SIZE_TYPE bsize, int mis,
int mi_row, int mi_col) {
int row, col;
int bsl = b_width_log2(bsize);
int bwl = b_width_log2(bsize);
int bhl = b_height_log2(bsize);
int bsl = (bwl > bhl ? bwl : bhl);
int bs = (1 << bsl) / 2; //
MODE_INFO *m2 = m + mi_row * mis + mi_col;
for (row = 0; row < bs; row++) {
for (col = 0; col < bs; col++) {
if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols)
return;
continue;
m2[row*mis+col].mbmi.sb_type = bsize;
}
}
......@@ -961,21 +966,6 @@ static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
/ v->count;
}
// Fills a 16x16 variance tree node by calling get var8x8 var..
static void fill_16x16_variance(const unsigned char *s, int sp,
const unsigned char *d, int dp, v16x16 *vt) {
unsigned int sse;
int sum;
vp9_get_sse_sum_8x8(s, sp, d, dp, &sse, &sum);
fill_variance(&vt->split[0].none, sse, sum, 64);
vp9_get_sse_sum_8x8(s + 8, sp, d + 8, dp, &sse, &sum);
fill_variance(&vt->split[1].none, sse, sum, 64);
vp9_get_sse_sum_8x8(s + 8 * sp, sp, d + 8 * dp, dp, &sse, &sum);
fill_variance(&vt->split[2].none, sse, sum, 64);
vp9_get_sse_sum_8x8(s + 8 * sp + 8, sp, d + 8 + 8 * dp, dp, &sse, &sum);
fill_variance(&vt->split[3].none, sse, sum, 64);
}
// Combine 2 variance structures by summing the sum_error, sum_square_error,
// and counts and then calculating the new variance.
void sum_2_variances(var *r, var *a, var*b) {
......@@ -1021,8 +1011,18 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
int sp;
const unsigned char * d = xd->plane[0].pre->buf;
int dp = xd->plane[0].pre->stride;
int pixels_wide = 64, pixels_high = 64;
vpx_memset(&vt, 0, sizeof(vt));
set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
if (xd->mb_to_bottom_edge < 0)
pixels_high += (xd->mb_to_bottom_edge >> 3);
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
......@@ -1034,6 +1034,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
d = vp9_64x64_zeros;
dp = 64;
// }
// Fill in the entire tree of 8x8 variances for splits.
for (i = 0; i < 4; i++) {
const int x32_idx = ((i & 1) << 5);
......@@ -1041,8 +1042,28 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
for (j = 0; j < 4; j++) {
const int x_idx = x32_idx + ((j & 1) << 4);
const int y_idx = y32_idx + ((j >> 1) << 4);
fill_16x16_variance(s + y_idx * sp + x_idx, sp, d + y_idx * dp + x_idx,
dp, &vt.split[i].split[j]);
const uint8_t *st = s + y_idx * sp + x_idx;
const uint8_t *dt = d + y_idx * dp + x_idx;
unsigned int sse = 0;
int sum = 0;
v16x16 *vst = &vt.split[i].split[j];
sse = sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high)
vp9_get_sse_sum_8x8(st, sp, dt, dp, &sse, &sum);
fill_variance(&vst->split[0].none, sse, sum, 64);
sse = sum = 0;
if (x_idx + 8 < pixels_wide && y_idx < pixels_high)
vp9_get_sse_sum_8x8(st + 8, sp, dt + 8, dp, &sse, &sum);
fill_variance(&vst->split[1].none, sse, sum, 64);
sse = sum = 0;
if (x_idx < pixels_wide && y_idx + 8 < pixels_high)
vp9_get_sse_sum_8x8(st + 8 * sp, sp, dt + 8 * dp, dp, &sse, &sum);
fill_variance(&vst->split[2].none, sse, sum, 64);
sse = sum = 0;
if (x_idx + 8 < pixels_wide && y_idx + 8 < pixels_high)
vp9_get_sse_sum_8x8(st + 8 * sp + 8, sp, dt + 8 + 8 * dp, dp, &sse,
&sum);
fill_variance(&vst->split[3].none, sse, sum, 64);
}
}
// Fill the rest of the variance tree by summing the split partition
......@@ -1088,8 +1109,10 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
const int mis = cm->mode_info_stride;
int bwl, bhl;
int bwl = b_width_log2(m->mbmi.sb_type);
int bhl = b_height_log2(m->mbmi.sb_type);
int bsl = b_width_log2(bsize);
int bh = (1 << bhl);
int bs = (1 << bsl);
int bss = (1 << bsl)/4;
int i, pl;
......@@ -1103,9 +1126,6 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
return;
bwl = b_width_log2(m->mbmi.sb_type);
bhl = b_height_log2(m->mbmi.sb_type);
// parse the partition type
if ((bwl == bsl) && (bhl == bsl))
partition = PARTITION_NONE;
......@@ -1144,7 +1164,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize));
if (mi_row + (bs >> 1) <= cm->mi_rows) {
if (mi_row + (bh >> 1) <= cm->mi_rows) {
int rt, dt;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
......@@ -1404,18 +1424,13 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row,
for (mi_col = cm->cur_tile_mi_col_start;
mi_col < cm->cur_tile_mi_col_end; mi_col += 8) {
int dummy_rate, dummy_dist;
// TODO(JBB): remove the border conditions for 64x64 blocks once its fixed
// without this border check choose will fail on the border of every
// non 64x64.
if (cpi->speed < 5 ||
mi_col + 8 > cm->cur_tile_mi_col_end ||
mi_row + 8 > cm->cur_tile_mi_row_end) {
if (cpi->speed < 5) {
rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
&dummy_rate, &dummy_dist);
} else {
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO *m = cm->mi + idx_str;
// set_partitioning(cpi, m, BLOCK_SIZE_SB8X8);
// set_partitioning(cpi, m, BLOCK_SIZE_SB64X64);
choose_partitioning(cpi, cm->mi, mi_row, mi_col);
rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
&dummy_rate, &dummy_dist);
......
......@@ -615,6 +615,10 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
TX_TYPE tx_type;
int mode, b_mode;
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
extend_for_intra(xd, plane, block, bsize, ss_txfrm_size);
}
mode = plane == 0? mbmi->mode: mbmi->uv_mode;
if (plane == 0 &&
mbmi->sb_type < BLOCK_SIZE_SB8X8 &&
......@@ -684,7 +688,6 @@ void vp9_encode_intra_block_uv(VP9_COMMON *cm, MACROBLOCK *x,
MACROBLOCKD* const xd = &x->e_mbd;
struct optimize_ctx ctx;
struct encode_b_args arg = {cm, x, &ctx};
foreach_transformed_block_uv(xd, bsize, encode_block_intra, &arg);
}
......@@ -505,33 +505,48 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
return sum > INT_MAX ? INT_MAX : (int)sum;
}
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
struct rdcost_block_args {
VP9_COMMON *cm;
MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16];
ENTROPY_CONTEXT t_left[16];
TX_SIZE tx_size;
int bw;
int bh;
int cost;
};
static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size, void *arg) {
struct rdcost_block_args* args = arg;
int x_idx, y_idx;
MACROBLOCKD * const xd = &args->x->e_mbd;
txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
&y_idx);
args->cost += cost_coeffs(args->cm, args->x, plane, block,
xd->plane[plane].plane_type, args->t_above + x_idx,
args->t_left + y_idx, args->tx_size,
args->bw * args->bh);
}
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD * const xd = &x->e_mbd;
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
const int bw = 1 << bwl, bh = 1 << bhl;
ENTROPY_CONTEXT t_above[16], t_left[16];
int block, cost;
struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
vpx_memcpy(&t_above, xd->plane[plane].above_context,
vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
sizeof(ENTROPY_CONTEXT) * bw);
vpx_memcpy(&t_left, xd->plane[plane].left_context,
vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
sizeof(ENTROPY_CONTEXT) * bh);
cost = 0;
for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
int x_idx, y_idx;
foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
&x_idx, &y_idx);
cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
t_above + x_idx, t_left + y_idx,
tx_size, bw * bh);
}
return cost;
return args.cost;
}
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
......@@ -582,6 +597,7 @@ static void super_block_yrd(VP9_COMP *cpi,
} else {
mbmi->txfm_size = TX_4X4;
}
vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
mbmi->txfm_size);
return;
......@@ -826,6 +842,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t local_txfm_cache[NB_TXFM_MODES];
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
if (cpi->common.frame_type == KEY_FRAME) {
const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
const MB_PREDICTION_MODE L = xd->left_available ?
......@@ -2410,6 +2427,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t err4x4 = INT64_MAX;
int i;
vpx_memset(&txfm_cache,0,sizeof(txfm_cache));
ctx->skip = 0;
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
......@@ -2502,6 +2520,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int_mv seg_mvs[4][MAX_REF_FRAMES];
union b_mode_info best_bmodes[4];
PARTITION_INFO best_partition;
int bwsl = b_width_log2(bsize);
int bws = (1 << bwsl) / 4; // mode_info step for subsize
int bhsl = b_width_log2(bsize);
int bhs = (1 << bhsl) / 4; // mode_info step for subsize
for (i = 0; i < 4; i++) {
int j;
......@@ -2723,6 +2745,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
}
// TODO(JBB): This is to make up for the fact that we don't have sad
// functions that work when the block size reads outside the umv. We
// should fix this either by making the motion search just work on
// a representative block in the boundary ( first ) and then implement a
// function that does sads when inside the border..
if (((mi_row + bhs) < cm->mi_rows || (mi_col + bws) < cm->mi_cols) &&
this_mode == NEWMV) {
continue;
}
if (this_mode == I4X4_PRED) {
int rate