diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index bb491695d634a76e55a84cac820250fe3937984b..7b834d7a07dfbc0f2bc38e6dd0814b97ee4b21af 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -208,7 +208,6 @@ specialize vp9_short_iht16x16_add prototype void vp9_idct4_1d "int16_t *input, int16_t *output" specialize vp9_idct4_1d sse2 - # dct and add prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" @@ -265,6 +264,10 @@ specialize vp9_variance8x16 mmx sse2 prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance8x8 mmx sse2 +prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum" +specialize vp9_get_sse_sum_8x8 sse2 +vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2 + prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance8x4 sse2 diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index e67c0fed0b9a8db534b86531df04c5c69d43df67..ac5fc7f937effa9265b3aba6569ac79e205c420b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -10,6 +10,7 @@ #include "./vpx_config.h" +#include "./vp9_rtcd.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" @@ -97,6 +98,8 @@ static unsigned int alt_activity_measure(VP9_COMP *cpi, return vp9_encode_intra(cpi, x, use_dc_pred); } +DECLARE_ALIGNED(16, static const uint8_t, vp9_64x64_zeros[64*64]) = { 0 }; + // Measure the activity of the current macroblock // What we measure here is TBD so abstracted to this function @@ -765,6 +768,35 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl, sizeof(PARTITION_CONTEXT) * mh); } +static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, + ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], + PARTITION_CONTEXT sa[8], + PARTITION_CONTEXT sl[8], + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int p; + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int mwl = mi_width_log2(bsize), mw = 1 << mwl; + int mhl = mi_height_log2(bsize), mh = 1 << mhl; + + // buffer the above/left context information of the block in search. + for (p = 0; p < MAX_MB_PLANE; ++p) { + vpx_memcpy(a + bw * p, cm->above_context[p] + + (mi_col * 2 >> xd->plane[p].subsampling_x), + sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x); + vpx_memcpy(l + bh * p, cm->left_context[p] + + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), + sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y); + } + vpx_memcpy(sa, cm->above_seg_context + mi_col, + sizeof(PARTITION_CONTEXT) * mw); + vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK), + sizeof(PARTITION_CONTEXT) * mh); +} static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, @@ -857,6 +889,337 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, } } +static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m, + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mode_info_stride; + int bsl = b_width_log2(bsize); + int bs = (1 << bsl) / 2; // + int block_row, block_col; + int row, col; + + // this test function sets the entire macroblock to the same bsize + for (block_row = 0; block_row < 8; block_row += bs) { + for (block_col = 0; block_col < 8; block_col += bs) { + for (row = 0; row < bs; row++) { + for (col = 0; col < bs; col++) { + m[(block_row+row)*mis + block_col+col].mbmi.sb_type = bsize; + } + } + } + } +} + +static void set_block_size(VP9_COMMON *const cm, + MODE_INFO *m, BLOCK_SIZE_TYPE bsize, int mis, + int mi_row, int mi_col) { + int row, col; + int bsl = b_width_log2(bsize); + int bs = (1 << bsl) / 2; // + MODE_INFO *m2 = m + mi_row * mis + mi_col; + for (row = 0; row < bs; row++) { + for (col = 0; col < bs; col++) { + if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols) + return; + m2[row*mis+col].mbmi.sb_type = bsize; + } + } +} +typedef struct { + int64_t sum_square_error; + int64_t sum_error; + int count; + int variance; +} var; + +#define VT(TYPE, BLOCKSIZE) \ + typedef struct { \ + var none; \ + var horz[2]; \ + var vert[2]; \ + BLOCKSIZE split[4]; } TYPE; + +VT(v8x8, var) +VT(v16x16, v8x8) +VT(v32x32, v16x16) +VT(v64x64, v32x32) + +typedef enum { + V16X16, + V32X32, + V64X64, +} TREE_LEVEL; + +// Set variance values given sum square error, sum error, count. +static void fill_variance(var *v, int64_t s2, int64_t s, int c) { + v->sum_square_error = s2; + v->sum_error = s; + v->count = c; + v->variance = 256 + * (v->sum_square_error - v->sum_error * v->sum_error / v->count) + / v->count; +} + +// Fills a 16x16 variance tree node by calling get var8x8 var.. +static void fill_16x16_variance(const unsigned char *s, int sp, + const unsigned char *d, int dp, v16x16 *vt) { + unsigned int sse; + int sum; + vp9_get_sse_sum_8x8(s, sp, d, dp, &sse, &sum); + fill_variance(&vt->split[0].none, sse, sum, 64); + vp9_get_sse_sum_8x8(s + 8, sp, d + 8, dp, &sse, &sum); + fill_variance(&vt->split[1].none, sse, sum, 64); + vp9_get_sse_sum_8x8(s + 8 * sp, sp, d + 8 * dp, dp, &sse, &sum); + fill_variance(&vt->split[2].none, sse, sum, 64); + vp9_get_sse_sum_8x8(s + 8 * sp + 8, sp, d + 8 + 8 * dp, dp, &sse, &sum); + fill_variance(&vt->split[3].none, sse, sum, 64); +} + +// Combine 2 variance structures by summing the sum_error, sum_square_error, +// and counts and then calculating the new variance. +void sum_2_variances(var *r, var *a, var*b) { + fill_variance(r, a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->count + b->count); +} +// Fill one level of our variance tree, by summing the split sums into each of +// the horizontal, vertical and none from split and recalculating variance. +#define fill_variance_tree(VT) \ + sum_2_variances(VT.horz[0], VT.split[0].none, VT.split[1].none); \ + sum_2_variances(VT.horz[1], VT.split[2].none, VT.split[3].none); \ + sum_2_variances(VT.vert[0], VT.split[0].none, VT.split[2].none); \ + sum_2_variances(VT.vert[1], VT.split[1].none, VT.split[3].none); \ + sum_2_variances(VT.none, VT.vert[0], VT.vert[1]); + +// Set the blocksize in the macroblock info structure if the variance is less +// than our threshold to one of none, horz, vert. +#define set_vt_size(VT, BLOCKSIZE, R, C, ACTION) \ + if (VT.none.variance < threshold) { \ + set_block_size(cm, m, BLOCKSIZE, mis, R, C); \ + ACTION; \ + } \ + if (VT.horz[0].variance < threshold && VT.horz[1].variance < threshold ) { \ + set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_HORZ), mis, R, C); \ + ACTION; \ + } \ + if (VT.vert[0].variance < threshold && VT.vert[1].variance < threshold ) { \ + set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_VERT), mis, R, C); \ + ACTION; \ + } + +static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, + int mi_col) { + VP9_COMMON * const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + MACROBLOCKD *xd = &cpi->mb.e_mbd; + const int mis = cm->mode_info_stride; + // TODO(JBB): More experimentation or testing of this threshold; + int64_t threshold = 4; + int i, j, k; + v64x64 vt; + unsigned char * s; + int sp; + const unsigned char * d = xd->plane[0].pre->buf; + int dp = xd->plane[0].pre->stride; + + set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64); + s = x->plane[0].src.buf; + sp = x->plane[0].src.stride; + + // TODO(JBB): Clearly the higher the quantizer the fewer partitions we want + // but this needs more experimentation. + threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex; + + // if ( cm->frame_type == KEY_FRAME ) { + d = vp9_64x64_zeros; + dp = 64; + // } + // Fill in the entire tree of 8x8 variances for splits. + for (i = 0; i < 4; i++) { + const int x32_idx = ((i & 1) << 5); + const int y32_idx = ((i >> 1) << 5); + for (j = 0; j < 4; j++) { + const int x_idx = x32_idx + ((j & 1) << 4); + const int y_idx = y32_idx + ((j >> 1) << 4); + fill_16x16_variance(s + y_idx * sp + x_idx, sp, d + y_idx * dp + x_idx, + dp, &vt.split[i].split[j]); + } + } + // Fill the rest of the variance tree by summing the split partition + // values. + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + fill_variance_tree(&vt.split[i].split[j]) + } + fill_variance_tree(&vt.split[i]) + } + fill_variance_tree(&vt) + + // Now go through the entire structure, splitting every blocksize until + // we get to one that's got a variance lower than our threshold, or we + // hit 8x8. + set_vt_size( vt, BLOCK_SIZE_SB64X64, mi_row, mi_col, return); + for (i = 0; i < 4; ++i) { + const int x32_idx = ((i & 1) << 2); + const int y32_idx = ((i >> 1) << 2); + set_vt_size(vt, BLOCK_SIZE_SB32X32, mi_row + y32_idx, mi_col + x32_idx, + continue); + + for (j = 0; j < 4; ++j) { + const int x16_idx = ((j & 1) << 1); + const int y16_idx = ((j >> 1) << 1); + set_vt_size(vt, BLOCK_SIZE_MB16X16, mi_row + y32_idx + y16_idx, + mi_col+x32_idx+x16_idx, continue); + + for (k = 0; k < 4; ++k) { + const int x8_idx = (k & 1); + const int y8_idx = (k >> 1); + set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis, + mi_row + y32_idx + y16_idx + y8_idx, + mi_col + x32_idx + x16_idx + x8_idx); + } + } + } +} +static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, + int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, + int *rate, int *dist) { + VP9_COMMON * const cm = &cpi->common; + MACROBLOCK * const x = &cpi->mb; + MACROBLOCKD *xd = &cpi->mb.e_mbd; + const int mis = cm->mode_info_stride; + int bwl, bhl; + int bsl = b_width_log2(bsize); + int bs = (1 << bsl); + int bss = (1 << bsl)/4; + int i, pl; + PARTITION_TYPE partition; + BLOCK_SIZE_TYPE subsize; + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; + PARTITION_CONTEXT sl[8], sa[8]; + int r = 0, d = 0; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + + bwl = b_width_log2(m->mbmi.sb_type); + bhl = b_height_log2(m->mbmi.sb_type); + + // parse the partition type + if ((bwl == bsl) && (bhl == bsl)) + partition = PARTITION_NONE; + else if ((bwl == bsl) && (bhl < bsl)) + partition = PARTITION_HORZ; + else if ((bwl < bsl) && (bhl == bsl)) + partition = PARTITION_VERT; + else if ((bwl < bsl) && (bhl < bsl)) + partition = PARTITION_SPLIT; + else + assert(0); + + subsize = get_subsize(bsize, partition); + + // TODO(JBB): this restriction is here because pick_sb_modes can return + // r's that are INT_MAX meaning we can't select a mode / mv for this block. + // when the code is made to work for less than sb8x8 we need to come up with + // a solution to this problem. + assert(subsize >= BLOCK_SIZE_SB8X8); + + if (bsize >= BLOCK_SIZE_SB8X8) { + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = cm->above_seg_context + mi_col; + *(get_sb_partitioning(x, bsize)) = subsize; + } + + pl = partition_plane_context(xd, bsize); + save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + switch (partition) { + case PARTITION_NONE: + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize, + get_block_context(x, bsize)); + r += x->partition_cost[pl][PARTITION_NONE]; + break; + case PARTITION_HORZ: + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, + get_block_context(x, subsize)); + if (mi_row + (bs >> 1) <= cm->mi_rows) { + int rt, dt; + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row + (bs >> 2), mi_col, tp, &rt, &dt, subsize, + get_block_context(x, subsize)); + r += rt; + d += dt; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_HORZ]; + break; + case PARTITION_VERT: + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, + get_block_context(x, subsize)); + if (mi_col + (bs >> 1) <= cm->mi_cols) { + int rt, dt; + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row, mi_col + (bs >> 2), tp, &rt, &dt, subsize, + get_block_context(x, subsize)); + r += rt; + d += dt; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_VERT]; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + break; + case PARTITION_SPLIT: + for (i = 0; i < 4; i++) { + int x_idx = (i & 1) * (bs >> 2); + int y_idx = (i >> 1) * (bs >> 2); + int jj = i >> 1, ii = i & 0x01; + int rt, dt; + + if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) + continue; + + *(get_sb_index(xd, subsize)) = i; + + rd_use_partition(cpi, m + jj * bss * mis + ii * bss, tp, mi_row + y_idx, + mi_col + x_idx, subsize, &rt, &dt); + r += rt; + d += dt; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_SPLIT]; + break; + default: + assert(0); + } + + // update partition context +#if CONFIG_AB4X4 + if (bsize >= BLOCK_SIZE_SB8X8 && + (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { +#else + if (bsize > BLOCK_SIZE_SB8X8 + && (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) { +#endif + set_partition_seg_context(cm, xd, mi_row, mi_col); + update_partition_context(xd, subsize, bsize); + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + + if (r < INT_MAX && d < INT_MAX) + encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize); + *rate = r; + *dist = d; +} + // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previously rate-distortion optimization @@ -873,7 +1236,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; - int i, p, pl; + int i, pl; BLOCK_SIZE_TYPE subsize; int srate = INT_MAX, sdist = INT_MAX; @@ -885,19 +1248,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, } assert(mi_height_log2(bsize) == mi_width_log2(bsize)); - // buffer the above/left context information of the block in search. - for (p = 0; p < MAX_MB_PLANE; ++p) { - vpx_memcpy(a + bs * p, cm->above_context[p] + - (mi_col * 2 >> xd->plane[p].subsampling_x), - sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_x); - vpx_memcpy(l + bs * p, cm->left_context[p] + - ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), - sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_y); - } - vpx_memcpy(sa, cm->above_seg_context + mi_col, - sizeof(PARTITION_CONTEXT) * ms); - vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK), - sizeof(PARTITION_CONTEXT) * ms); + save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); // PARTITION_SPLIT if (bsize >= BLOCK_SIZE_SB8X8) { @@ -1025,6 +1376,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, *rate = srate; *dist = sdist; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + if (srate < INT_MAX && sdist < INT_MAX) encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize); @@ -1050,8 +1403,22 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; mi_col += 8) { int dummy_rate, dummy_dist; - rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, - &dummy_rate, &dummy_dist); + // TODO(JBB): remove the border conditions for 64x64 blocks once its fixed + // without this border check choose will fail on the border of every + // non 64x64. + if (cpi->speed < 5 || + mi_col + 8 > cm->cur_tile_mi_col_end || + mi_row + 8 > cm->cur_tile_mi_row_end) { + rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, + &dummy_rate, &dummy_dist); + } else { + const int idx_str = cm->mode_info_stride * mi_row + mi_col; + MODE_INFO *m = cm->mi + idx_str; + // set_partitioning(cpi, m, BLOCK_SIZE_SB8X8); + choose_partitioning(cpi, cm->mi, mi_row, mi_col); + rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, + &dummy_rate, &dummy_dist); + } } } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 4b4a479ddca190cc5db5cd34e7cf1aae7909888f..f8b84301f497ac7a2a280635d2e9bbf72b49646b 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -696,6 +696,25 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_COMP_SPLITGA ] += speed_multiplier * 4500; sf->thresh_mult[THR_COMP_SPLITLG ] += speed_multiplier * 4500; + if (speed > 4) { + for (i = 0; i < MAX_MODES; ++i) + sf->thresh_mult[i] = INT_MAX; + + sf->thresh_mult[THR_DC ] = 0; + sf->thresh_mult[THR_TM ] = 0; + sf->thresh_mult[THR_NEWMV ] = 4000; + sf->thresh_mult[THR_NEWG ] = 4000; + sf->thresh_mult[THR_NEWA ] = 4000; + sf->thresh_mult[THR_NEARESTMV] = 0; + sf->thresh_mult[THR_NEARESTG ] = 0; + sf->thresh_mult[THR_NEARESTA ] = 0; + sf->thresh_mult[THR_NEARMV ] = 2000; + sf->thresh_mult[THR_NEARG ] = 2000; + sf->thresh_mult[THR_NEARA ] = 2000; + sf->thresh_mult[THR_COMP_NEARESTLA] = 2000; + sf->recode_loop = 0; + } + /* disable frame modes if flags not set */ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { sf->thresh_mult[THR_NEWMV ] = INT_MAX; @@ -804,48 +823,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { #endif #endif sf->mb16_breakout = 0; - - if (speed > 0) { - /* Disable coefficient optimization above speed 0 */ - sf->optimize_coefficients = 0; - sf->no_skip_block4x4_search = 0; - sf->comp_inter_joint_search = 0; - - sf->first_step = 1; - - cpi->mode_check_freq[THR_SPLITG] = 2; - cpi->mode_check_freq[THR_SPLITA] = 2; - cpi->mode_check_freq[THR_SPLITMV] = 0; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 2; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 2; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 0; - } - - if (speed > 1) { - cpi->mode_check_freq[THR_SPLITG] = 4; - cpi->mode_check_freq[THR_SPLITA] = 4; - cpi->mode_check_freq[THR_SPLITMV] = 2; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 4; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 4; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 2; - } - - if (speed > 2) { - cpi->mode_check_freq[THR_SPLITG] = 15; - cpi->mode_check_freq[THR_SPLITA] = 15; - cpi->mode_check_freq[THR_SPLITMV] = 7; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 15; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 15; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 7; - - // Only do recode loop on key frames, golden frames and - // alt ref frames - sf->recode_loop = 2; - } - break; }; /* switch */ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 8b0038f795abcdb2f3544951a77ff06bf1825965..0b78ae5439e064083a7c5f4188841dc5cbe8def0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -620,9 +620,25 @@ static void super_block_yrd(VP9_COMP *cpi, int64_t txfm_cache[NB_TXFM_MODES]) { VP9_COMMON *const cm = &cpi->common; int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB]; + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_subtract_sby(x, bs); + if (cpi->speed > 4) { + if (bs >= BLOCK_SIZE_SB32X32) { + mbmi->txfm_size = TX_32X32; + } else if (bs >= BLOCK_SIZE_MB16X16) { + mbmi->txfm_size = TX_16X16; + } else if (bs >= BLOCK_SIZE_SB8X8) { + mbmi->txfm_size = TX_8X8; + } else { + mbmi->txfm_size = TX_4X4; + } + super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs, + mbmi->txfm_size); + return; + } if (bs >= BLOCK_SIZE_SB32X32) super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], bs, TX_32X32); @@ -842,7 +858,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - MACROBLOCKD *xd = &x->e_mbd; + MACROBLOCKD *const xd = &x->e_mbd; int this_rate, this_rate_tokenonly; int this_distortion, s; int64_t best_rd = INT64_MAX, this_rd; @@ -863,7 +879,6 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t local_txfm_cache[NB_TXFM_MODES]; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; - if (cpi->common.frame_type == KEY_FRAME) { const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); const MB_PREDICTION_MODE L = xd->left_available ? @@ -871,12 +886,12 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, bmode_costs = x->y_mode_costs[A][L]; } - x->e_mbd.mode_info_context->mbmi.mode = mode; vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, bsize, local_txfm_cache); + this_rate = this_rate_tokenonly + bmode_costs[mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); @@ -2273,7 +2288,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (mbmi->mv[1].as_mv.col & 15) == 0; // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used - if (1) { + if (cpi->speed > 4) { + *best_filter = EIGHTTAP; + } else { int i, newbest; int tmp_rate_sum = 0, tmp_dist_sum = 0; for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { @@ -2410,6 +2427,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Y cost and distortion super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, bsize, txfm_cache); + *rate2 += *rate_y; *distortion += *distortion_y; diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index 7231dcf22074f06f2f8fc649cbb71dae17440c85..e9b36f356362e39a896c217ea2b1abd93c7ab2f8 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -318,6 +318,11 @@ unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, return (var - (((unsigned int)avg * avg) >> 7)); } +void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); +} unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride,