diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 1582e380d4012c66588f79ba9f1066b42d0487df..f3bbc17ce9fc40747d46632d8b24aedc7fc344b0 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -784,94 +784,170 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, vp9_coeff_probs_model *old_frame_coef_probs = cpi->common.fc.coef_probs[tx_size]; vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size]; - int i, j, k, l, t; - int update[2] = {0, 0}; - int savings; - + const vp9_prob upd = VP9_COEF_UPDATE_PROB; const int entropy_nodes_update = UNCONSTRAINED_NODES; + int i, j, k, l, t; + switch (cpi->sf.use_fast_coef_updates) { + case 0: { + /* dry run to see if there is any udpate at all needed */ + int savings = 0; + int update[2] = {0, 0}; + for (i = 0; i < BLOCK_TYPES; ++i) { + for (j = 0; j < REF_TYPES; ++j) { + for (k = 0; k < COEF_BANDS; ++k) { + for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { + for (t = 0; t < entropy_nodes_update; ++t) { + vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; + const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t]; + int s; + int u = 0; + + if (l >= 3 && k == 0) + continue; + if (t == PIVOT_NODE) + s = vp9_prob_diff_update_savings_search_model( + frame_branch_ct[i][j][k][l][0], + old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); + else + s = vp9_prob_diff_update_savings_search( + frame_branch_ct[i][j][k][l][t], oldp, &newp, upd); + if (s > 0 && newp != oldp) + u = 1; + if (u) + savings += s - (int)(vp9_cost_zero(upd)); + else + savings -= (int)(vp9_cost_zero(upd)); + update[u]++; + } + } + } + } + } - const int tstart = 0; - /* dry run to see if there is any udpate at all needed */ - savings = 0; - for (i = 0; i < BLOCK_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - // int prev_coef_savings[ENTROPY_NODES] = {0}; - for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { - for (t = tstart; t < entropy_nodes_update; ++t) { - vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; - const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t]; - const vp9_prob upd = VP9_COEF_UPDATE_PROB; - int s; - int u = 0; - - if (l >= 3 && k == 0) - continue; - if (t == PIVOT_NODE) - s = vp9_prob_diff_update_savings_search_model( - frame_branch_ct[i][j][k][l][0], - old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); - else - s = vp9_prob_diff_update_savings_search( - frame_branch_ct[i][j][k][l][t], oldp, &newp, upd); - if (s > 0 && newp != oldp) - u = 1; - if (u) - savings += s - (int)(vp9_cost_zero(upd)); - else - savings -= (int)(vp9_cost_zero(upd)); - update[u]++; + // printf("Update %d %d, savings %d\n", update[0], update[1], savings); + /* Is coef updated at all */ + if (update[1] == 0 || savings < 0) { + vp9_write_bit(bc, 0); + return; + } + vp9_write_bit(bc, 1); + for (i = 0; i < BLOCK_TYPES; ++i) { + for (j = 0; j < REF_TYPES; ++j) { + for (k = 0; k < COEF_BANDS; ++k) { + for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { + // calc probs and branch cts for this frame only + for (t = 0; t < entropy_nodes_update; ++t) { + vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; + vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; + const vp9_prob upd = VP9_COEF_UPDATE_PROB; + int s; + int u = 0; + if (l >= 3 && k == 0) + continue; + if (t == PIVOT_NODE) + s = vp9_prob_diff_update_savings_search_model( + frame_branch_ct[i][j][k][l][0], + old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); + else + s = vp9_prob_diff_update_savings_search( + frame_branch_ct[i][j][k][l][t], + *oldp, &newp, upd); + if (s > 0 && newp != *oldp) + u = 1; + vp9_write(bc, u, upd); +#ifdef ENTROPY_STATS + if (!cpi->dummy_packing) + ++tree_update_hist[tx_size][i][j][k][l][t][u]; +#endif + if (u) { + /* send/use new probability */ + vp9_write_prob_diff_update(bc, newp, *oldp); + *oldp = newp; + } + } + } } } } + return; } - } - // printf("Update %d %d, savings %d\n", update[0], update[1], savings); - /* Is coef updated at all */ - if (update[1] == 0 || savings < 0) { - vp9_write_bit(bc, 0); - return; - } - vp9_write_bit(bc, 1); - for (i = 0; i < BLOCK_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - // int prev_coef_savings[ENTROPY_NODES] = {0}; - for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { - // calc probs and branch cts for this frame only - for (t = tstart; t < entropy_nodes_update; ++t) { - vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; - vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; - const vp9_prob upd = VP9_COEF_UPDATE_PROB; - int s; - int u = 0; - if (l >= 3 && k == 0) - continue; - if (t == PIVOT_NODE) - s = vp9_prob_diff_update_savings_search_model( - frame_branch_ct[i][j][k][l][0], - old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); - else - s = vp9_prob_diff_update_savings_search( - frame_branch_ct[i][j][k][l][t], - *oldp, &newp, upd); - if (s > 0 && newp != *oldp) - u = 1; - vp9_write(bc, u, upd); + case 1: + case 2: { + const int prev_coef_contexts_to_update = + (cpi->sf.use_fast_coef_updates == 2 ? + PREV_COEF_CONTEXTS >> 1 : PREV_COEF_CONTEXTS); + const int coef_band_to_update = + (cpi->sf.use_fast_coef_updates == 2 ? + COEF_BANDS >> 1 : COEF_BANDS); + int updates = 0; + int noupdates_before_first = 0; + for (i = 0; i < BLOCK_TYPES; ++i) { + for (j = 0; j < REF_TYPES; ++j) { + for (k = 0; k < COEF_BANDS; ++k) { + for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { + // calc probs and branch cts for this frame only + for (t = 0; t < entropy_nodes_update; ++t) { + vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; + vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; + int s; + int u = 0; + if (l >= 3 && k == 0) + continue; + if (l >= prev_coef_contexts_to_update || + k >= coef_band_to_update) { + u = 0; + } else { + if (t == PIVOT_NODE) + s = vp9_prob_diff_update_savings_search_model( + frame_branch_ct[i][j][k][l][0], + old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); + else + s = vp9_prob_diff_update_savings_search( + frame_branch_ct[i][j][k][l][t], + *oldp, &newp, upd); + if (s > 0 && newp != *oldp) + u = 1; + } + updates += u; + if (u == 0 && updates == 0) { + noupdates_before_first++; #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - ++tree_update_hist[tx_size][i][j][k][l][t][u]; + if (!cpi->dummy_packing) + ++tree_update_hist[tx_size][i][j][k][l][t][u]; #endif - if (u) { - /* send/use new probability */ - vp9_write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; + continue; + } + if (u == 1 && updates == 1) { + int v; + // first update + vp9_write_bit(bc, 1); + for (v = 0; v < noupdates_before_first; ++v) + vp9_write(bc, 0, upd); + } + vp9_write(bc, u, upd); +#ifdef ENTROPY_STATS + if (!cpi->dummy_packing) + ++tree_update_hist[tx_size][i][j][k][l][t][u]; +#endif + if (u) { + /* send/use new probability */ + vp9_write_prob_diff_update(bc, newp, *oldp); + *oldp = newp; + } + } } } } } + if (updates == 0) { + vp9_write_bit(bc, 0); // no updates + } + return; } + + default: + assert(0); } } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 34bd43ef15c2e78bfc858ed47d3a31608f90eeea..22ad682aead61474951f787e4c8f5517beb19df7 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -741,6 +741,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->skip_encode_sb = 0; sf->use_uv_intra_rd_estimate = 0; sf->use_fast_lpf_pick = 0; + sf->use_fast_coef_updates = 0; sf->using_small_partition_info = 0; // Skip any mode not chosen at size < X for all sizes > X // Hence BLOCK_64X64 (skip is off) @@ -802,6 +803,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->intra_y_mode_mask = INTRA_DC_TM_H_V; sf->intra_uv_mode_mask = INTRA_DC_TM_H_V; + sf->use_fast_coef_updates = 1; } if (speed == 2) { sf->adjust_thresholds_by_speed = 1; @@ -840,6 +842,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->auto_min_max_partition_interval = 2; sf->disable_split_var_thresh = 32; sf->disable_filter_search_var_thresh = 32; + sf->use_fast_coef_updates = 2; } if (speed == 3) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -866,6 +869,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_filter_search_var_thresh = 64; sf->intra_y_mode_mask = INTRA_DC_ONLY; sf->intra_uv_mode_mask = INTRA_DC_ONLY; + sf->use_fast_coef_updates = 2; } if (speed == 4) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -894,6 +898,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->subpel_iters_per_step = 1; sf->disable_split_var_thresh = 64; sf->disable_filter_search_var_thresh = 96; + sf->use_fast_coef_updates = 2; } /* if (speed == 2) { diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 940763d7648f99cc57a2f123c98df81c417b2732..0933185b99bca6003d817334e5a67879b41bd817 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -298,6 +298,7 @@ typedef struct { int use_rd_breakout; int use_uv_intra_rd_estimate; int use_fast_lpf_pick; + int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced } SPEED_FEATURES; typedef struct VP9_COMP {