diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3dd235a1f9b1a7b41cc70944b731957aa3463077..56390ab3c92bcc27b5b0fcf132f3962416288ef5 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -542,7 +542,8 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, int *totalrate, int64_t *totaldist, - BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) { + BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx, + int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -561,10 +562,11 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (cm->frame_type == KEY_FRAME) - vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx); + vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx, + best_rd); else vp9_rd_pick_inter_mode_sb(cpi, x, mi_row, mi_col, totalrate, totaldist, - bsize, ctx); + bsize, ctx, best_rd); } static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) { @@ -1230,7 +1232,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, mi_col + (ms >> 1) < cm->mi_cols) { *(get_sb_partitioning(x, bsize)) = bsize; pick_sb_modes(cpi, mi_row, mi_col, &none_rate, &none_dist, bsize, - get_block_context(x, bsize)); + get_block_context(x, bsize), INT64_MAX); set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); @@ -1245,7 +1247,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, switch (partition) { case PARTITION_NONE: pick_sb_modes(cpi, mi_row, mi_col, &last_part_rate, &last_part_dist, - bsize, get_block_context(x, bsize)); + bsize, get_block_context(x, bsize), INT64_MAX); set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); last_part_rate += x->partition_cost[pl][PARTITION_NONE]; @@ -1253,7 +1255,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, case PARTITION_HORZ: *(get_sb_index(xd, subsize)) = 0; pick_sb_modes(cpi, mi_row, mi_col, &last_part_rate, &last_part_dist, - subsize, get_block_context(x, subsize)); + subsize, get_block_context(x, subsize), INT64_MAX); if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (mh >> 1) < cm->mi_rows) { int rt = 0; int64_t dt = 0; @@ -1261,7 +1263,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *(get_sb_index(xd, subsize)) = 1; pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &rt, &dt, subsize, - get_block_context(x, subsize)); + get_block_context(x, subsize), INT64_MAX); last_part_rate += rt; last_part_dist += dt; } @@ -1272,7 +1274,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, case PARTITION_VERT: *(get_sb_index(xd, subsize)) = 0; pick_sb_modes(cpi, mi_row, mi_col, &last_part_rate, &last_part_dist, - subsize, get_block_context(x, subsize)); + subsize, get_block_context(x, subsize), INT64_MAX); if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) { int rt = 0; int64_t dt = 0; @@ -1280,7 +1282,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *(get_sb_index(xd, subsize)) = 1; pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &rt, &dt, subsize, - get_block_context(x, subsize)); + get_block_context(x, subsize), INT64_MAX); last_part_rate += rt; last_part_dist += dt; } @@ -1345,7 +1347,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, &rt, &dt, - split_subsize, get_block_context(x, split_subsize)); + split_subsize, get_block_context(x, split_subsize), + INT64_MAX); restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); @@ -1403,7 +1406,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, // results, for encoding speed-up. static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, int *rate, - int64_t *dist, int do_recon) { + int64_t *dist, int do_recon, int64_t best_rd) { VP9_COMMON * const cm = &cpi->common; MACROBLOCK * const x = &cpi->mb; MACROBLOCKD * const xd = &x->e_mbd; @@ -1433,13 +1436,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, if (!cpi->sf.use_partitions_greater_than || (cpi->sf.use_partitions_greater_than && bsize > cpi->sf.greater_than_block_size)) { - if (bsize >= BLOCK_SIZE_SB8X8) { + if (bsize > BLOCK_SIZE_SB8X8) { int r4 = 0; - int64_t d4 = 0; + int64_t d4 = 0, sum_rd = 0; subsize = get_subsize(bsize, PARTITION_SPLIT); - *(get_sb_partitioning(x, bsize)) = subsize; - for (i = 0; i < 4; ++i) { + for (i = 0; i < 4 && sum_rd < best_rd; ++i) { int x_idx = (i & 1) * (ms >> 1); int y_idx = (i >> 1) * (ms >> 1); int r = 0; @@ -1450,19 +1452,28 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, *(get_sb_index(xd, subsize)) = i; rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, &r, - &d, i != 3); + &d, i != 3, best_rd - sum_rd); - r4 += r; - d4 += d; + if (r == INT_MAX) { + r4 = INT_MAX; + sum_rd = INT64_MAX; + } else { + r4 += r; + d4 += d; + sum_rd = RDCOST(x->rdmult, x->rddiv, r4, d4); + } } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); - if (r4 < INT_MAX) + if (r4 != INT_MAX && i == 4) { r4 += x->partition_cost[pl][PARTITION_SPLIT]; - assert(r4 >= 0); - assert(d4 >= 0); - srate = r4; - sdist = d4; + *(get_sb_partitioning(x, bsize)) = subsize; + assert(r4 >= 0); + assert(d4 >= 0); + srate = r4; + sdist = d4; + best_rd = MIN(best_rd, RDCOST(x->rdmult, x->rddiv, r4, d4)); + } restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } } @@ -1608,15 +1619,18 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int r; int64_t d; pick_sb_modes(cpi, mi_row, mi_col, &r, &d, bsize, - get_block_context(x, bsize)); - if (bsize >= BLOCK_SIZE_SB8X8) { + get_block_context(x, bsize), best_rd); + if (r != INT_MAX && bsize >= BLOCK_SIZE_SB8X8) { set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); r += x->partition_cost[pl][PARTITION_NONE]; } - if (RDCOST(x->rdmult, x->rddiv, r, d) - < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + if (r != INT_MAX && + (bsize == BLOCK_SIZE_SB8X8 || + RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, srate, sdist))) { + best_rd = MIN(best_rd, RDCOST(x->rdmult, x->rddiv, r, d)); srate = r; sdist = d; larger_is_better = 1; @@ -1624,33 +1638,83 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, *(get_sb_partitioning(x, bsize)) = bsize; } } + + if (bsize == BLOCK_SIZE_SB8X8) { + int r4 = 0; + int64_t d4 = 0, sum_rd = 0; + subsize = get_subsize(bsize, PARTITION_SPLIT); + + for (i = 0; i < 4 && sum_rd < best_rd; ++i) { + int x_idx = (i & 1) * (ms >> 1); + int y_idx = (i >> 1) * (ms >> 1); + int r = 0; + int64_t d = 0; + + if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) + continue; + + *(get_sb_index(xd, subsize)) = i; + rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, &r, + &d, i != 3, best_rd - sum_rd); + + if (r == INT_MAX) { + r4 = INT_MAX; + sum_rd = INT64_MAX; + } else { + r4 += r; + d4 += d; + sum_rd = RDCOST(x->rdmult, x->rddiv, r4, d4); + } + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + if (r4 != INT_MAX && i == 4) { + r4 += x->partition_cost[pl][PARTITION_SPLIT]; + if (RDCOST(x->rdmult, x->rddiv, r4, d4) < + RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + srate = r4; + sdist = d4; + *(get_sb_partitioning(x, bsize)) = subsize; + best_rd = MIN(best_rd, RDCOST(x->rdmult, x->rddiv, r4, d4)); + } + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } + if (!cpi->sf.use_square_partition_only && (!cpi->sf.less_rectangular_check ||!larger_is_better)) { // PARTITION_HORZ if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) { int r2, r = 0; - int64_t d2, d = 0; + int64_t d2, d = 0, h_rd; subsize = get_subsize(bsize, PARTITION_HORZ); *(get_sb_index(xd, subsize)) = 0; pick_sb_modes(cpi, mi_row, mi_col, &r2, &d2, subsize, - get_block_context(x, subsize)); + get_block_context(x, subsize), best_rd); + h_rd = RDCOST(x->rdmult, x->rddiv, r2, d2); - if (mi_row + (ms >> 1) < cm->mi_rows) { + if (r2 != INT_MAX && h_rd < best_rd && + mi_row + (ms >> 1) < cm->mi_rows) { update_state(cpi, get_block_context(x, subsize), subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *(get_sb_index(xd, subsize)) = 1; pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &r, &d, subsize, - get_block_context(x, subsize)); - r2 += r; - d2 += d; + get_block_context(x, subsize), best_rd - h_rd); + if (r == INT_MAX) { + r2 = INT_MAX; + } else { + r2 += r; + d2 += d; + } } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); if (r2 < INT_MAX) r2 += x->partition_cost[pl][PARTITION_HORZ]; - if (RDCOST(x->rdmult, x->rddiv, r2, d2) + if (r2 != INT_MAX && RDCOST(x->rdmult, x->rddiv, r2, d2) < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + best_rd = MIN(best_rd, RDCOST(x->rdmult, x->rddiv, r2, d2)); srate = r2; sdist = d2; *(get_sb_partitioning(x, bsize)) = subsize; @@ -1661,12 +1725,14 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, // PARTITION_VERT if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) { int r2; - int64_t d2; + int64_t d2, v_rd; subsize = get_subsize(bsize, PARTITION_VERT); *(get_sb_index(xd, subsize)) = 0; pick_sb_modes(cpi, mi_row, mi_col, &r2, &d2, subsize, - get_block_context(x, subsize)); - if (mi_col + (ms >> 1) < cm->mi_cols) { + get_block_context(x, subsize), best_rd); + v_rd = RDCOST(x->rdmult, x->rddiv, r2, d2); + if (r2 != INT_MAX && v_rd < best_rd && + mi_col + (ms >> 1) < cm->mi_cols) { int r = 0; int64_t d = 0; update_state(cpi, get_block_context(x, subsize), subsize, 0); @@ -1674,15 +1740,20 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, *(get_sb_index(xd, subsize)) = 1; pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &r, &d, subsize, - get_block_context(x, subsize)); - r2 += r; - d2 += d; + get_block_context(x, subsize), best_rd - v_rd); + if (r == INT_MAX) { + r2 = INT_MAX; + } else { + r2 += r; + d2 += d; + } } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); if (r2 < INT_MAX) r2 += x->partition_cost[pl][PARTITION_VERT]; - if (RDCOST(x->rdmult, x->rddiv, r2, d2) + if (r2 != INT_MAX && + RDCOST(x->rdmult, x->rddiv, r2, d2) < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { srate = r2; sdist = d2; @@ -1733,7 +1804,7 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, (mi_col + (ms >> 1) < cm->mi_cols)) { cpi->set_ref_frame_mask = 1; pick_sb_modes(cpi, mi_row, mi_col, &r, &d, BLOCK_SIZE_SB64X64, - get_block_context(x, BLOCK_SIZE_SB64X64)); + get_block_context(x, BLOCK_SIZE_SB64X64), INT64_MAX); set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64); r += x->partition_cost[pl][PARTITION_NONE]; @@ -1811,7 +1882,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, || cpi->common.frame_type == KEY_FRAME || cpi->is_src_frame_alt_ref) { rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, - &dummy_rate, &dummy_dist, 1); + &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { copy_partitioning(cpi, m, p); rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, @@ -1820,7 +1891,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, } } else { rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, - &dummy_rate, &dummy_dist, 1); + &dummy_rate, &dummy_dist, 1, INT64_MAX); } } } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 6df17018e6796292ce18dc7e0d269fdd2e81aae1..6d1430d73d989efdb50c06397794cf57754fbb60 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -776,10 +776,14 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize, MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; struct encode_b_args encode_args = {args->cm, x, NULL}; + int64_t rd1, rd2, rd; if (args->skip) return; - if (RDCOST(x->rdmult, x->rddiv, args->rate, args->dist) > args->best_rd) { + rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); + rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); + rd = MIN(rd1, rd2); + if (rd > args->best_rd) { args->skip = 1; args->rate = INT_MAX; args->dist = INT64_MAX; @@ -2949,7 +2953,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int64_t *returndist, BLOCK_SIZE_TYPE bsize, - PICK_MODE_CONTEXT *ctx) { + PICK_MODE_CONTEXT *ctx, int64_t best_rd) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; int rate_y = 0, rate_uv = 0; @@ -3016,7 +3020,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int64_t *returndistortion, BLOCK_SIZE_TYPE bsize, - PICK_MODE_CONTEXT *ctx) { + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -3034,8 +3039,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx}; - int64_t best_rd = INT64_MAX; - int64_t best_yrd = INT64_MAX; + int64_t best_rd = best_rd_so_far; + int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise int64_t best_txfm_rd[NB_TXFM_MODES]; int64_t best_txfm_diff[NB_TXFM_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; @@ -3098,6 +3103,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) best_filter_rd[i] = INT64_MAX; + *returnrate = INT_MAX; + // Create a mask set to 1 for each frame used by a smaller resolution. if (cpi->sf.use_avoid_tested_higherror) { switch (block_size) { @@ -3833,6 +3840,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (x->skip && !mode_excluded) break; } + if (best_rd >= best_rd_so_far) + return INT64_MAX; // If we used an estimate for the uv intra rd in the loop above... if (cpi->sf.use_uv_intra_rd_estimate) { diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 22d0a950a1eb9887e36e974a10a24c7ff3ed0bb2..7c84b48e4890a14179772a8a55321b3492964ebe 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -23,12 +23,12 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *r, int64_t *d, BLOCK_SIZE_TYPE bsize, - PICK_MODE_CONTEXT *ctx); + PICK_MODE_CONTEXT *ctx, int64_t best_rd); int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *r, int64_t *d, BLOCK_SIZE_TYPE bsize, - PICK_MODE_CONTEXT *ctx); + PICK_MODE_CONTEXT *ctx, int64_t best_rd); void vp9_init_me_luts();