Commit 3a27961c authored by Jingning Han's avatar Jingning Han

Refactor recursive transform block scheme

This commit re-designs the recursive transform block partition
rate-distortion optimization framework. It allows the encoder to
improve speed by 10%.

Change-Id: I6dd3a7dd428a530d8012e5c6ddc40e650c8b392b
parent 2cdc1274
......@@ -189,6 +189,7 @@ static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
if (tx_size == plane_tx_size) {
++get_tx_counts(max_tx_size, ctx, tx_counts)[tx_size];
mbmi->tx_size = tx_size;
} else {
int bsl = b_width_log2_lookup[bsize];
int i;
......
......@@ -1286,7 +1286,6 @@ static void tx_block_rd_b(MACROBLOCK *x, TX_SIZE tx_size,
static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
int blk_row, int blk_col, int plane, int block,
TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
BLOCK_SIZE txb_bsize,
ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
int *rate, int64_t *dist,
int64_t *bsse, int *skip) {
......@@ -1299,34 +1298,38 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
int64_t this_rd = INT64_MAX;
ENTROPY_CONTEXT ctxa[16], ctxl[16];
ENTROPY_CONTEXT *pta = ta + blk_col;
ENTROPY_CONTEXT *ptl = tl + blk_row;
ENTROPY_CONTEXT stxa = 0, stxl = 0;
int coeff_ctx, i;
memcpy(ctxa, ta, sizeof(ENTROPY_CONTEXT) * max_blocks_wide);
memcpy(ctxl, tl, sizeof(ENTROPY_CONTEXT) * max_blocks_high);
int64_t sum_dist = 0, sum_bsse = 0;
int64_t sum_rd = INT64_MAX;
int sum_rate = vp10_cost_bit(128, 1);
int all_skip = 1;
TX_SIZE swap_tx_size = TX_SIZES;
switch (tx_size) {
case TX_4X4:
stxa = pta[0];
stxl = ptl[0];
break;
case TX_8X8:
pta[0] = !!*(const uint16_t *)&pta[0];
ptl[0] = !!*(const uint16_t *)&ptl[0];
stxa = !!*(const uint16_t *)&pta[0];
stxl = !!*(const uint16_t *)&ptl[0];
break;
case TX_16X16:
pta[0] = !!*(const uint32_t *)&pta[0];
ptl[0] = !!*(const uint32_t *)&ptl[0];
stxa = !!*(const uint32_t *)&pta[0];
stxl = !!*(const uint32_t *)&ptl[0];
break;
case TX_32X32:
pta[0] = !!*(const uint64_t *)&pta[0];
ptl[0] = !!*(const uint64_t *)&ptl[0];
stxa = !!*(const uint64_t *)&pta[0];
stxl = !!*(const uint64_t *)&ptl[0];
break;
default:
assert(0 && "Invalid transform size.");
break;
}
coeff_ctx = combine_entropy_contexts(pta[0], ptl[0]);
coeff_ctx = combine_entropy_contexts(stxa, stxl);
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
......@@ -1341,38 +1344,23 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
mbmi->inter_tx_size[tx_idx] = tx_size;
mbmi->tx_size = tx_size;
if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block,
plane_bsize, coeff_ctx, rate, dist, bsse, skip);
if (tx_size > TX_4X4)
*rate += vp10_cost_bit(128, 0);
this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
for (i = 0; i < (1 << tx_size); ++i) {
pta[i] = !(p->eobs[block] == 0);
ptl[i] = !(p->eobs[block] == 0);
}
}
if (tx_size > TX_4X4) {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
int bsl = b_height_log2_lookup[bsize];
int sub_step = 1 << (2 * (tx_size - 1));
int i;
int this_rate, sum_rate = vp10_cost_bit(128, 1);
int64_t this_dist, sum_dist = 0;
int64_t this_bsse, sum_bsse = 0;
int this_skip, all_skip = 1;
int64_t sum_rd;
int this_rate;
int64_t this_dist;
int64_t this_bsse;
int this_skip;
--bsl;
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
int offsetr = (i >> 1) << bsl;
int offsetc = (i & 0x01) << bsl;
select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
plane, block + i * sub_step, tx_size - 1,
plane_bsize, txsize_to_bsize[tx_size - 1],
ctxa, ctxl, &this_rate, &this_dist,
plane_bsize, ta, tl, &this_rate, &this_dist,
&this_bsse, &this_skip);
sum_rate += this_rate;
sum_dist += this_dist;
......@@ -1380,24 +1368,29 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
all_skip &= this_skip;
}
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
}
if (this_rd < sum_rd) {
int idx, idy;
for (idy = blk_row; idy < blk_row + bh; idy += 2)
for (idx = blk_col; idx < blk_col + bh; idx += 2)
mbmi->inter_tx_size[(idy / 2) * 8 + (idx / 2)] = tx_size;
mbmi->tx_size = tx_size;
} else {
*rate = sum_rate;
*dist = sum_dist;
*bsse = sum_bsse;
*skip = all_skip;
memcpy(pta, ctxa + (blk_col >> pd->subsampling_x),
sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[txb_bsize]);
memcpy(ptl, ctxl + (blk_row >> pd->subsampling_y),
sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[txb_bsize]);
}
if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
swap_tx_size = mbmi->inter_tx_size[tx_idx];
mbmi->inter_tx_size[tx_idx] = tx_size;
tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block,
plane_bsize, coeff_ctx, rate, dist, bsse, skip);
if (tx_size > TX_4X4)
*rate += vp10_cost_bit(128, 0);
this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
}
if (this_rd < sum_rd) {
for (i = 0; i < (1 << tx_size); ++i)
pta[i] = ptl[i] = !(p->eobs[block] == 0);
mbmi->tx_size = tx_size;
} else {
*rate = sum_rate;
*dist = sum_dist;
*bsse = sum_bsse;
*skip = all_skip;
mbmi->inter_tx_size[tx_idx] = swap_tx_size;
}
}
......@@ -1437,7 +1430,7 @@ static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bh) {
select_tx_block(cpi, x, idy, idx, 0, block,
max_txsize_lookup[plane_bsize], plane_bsize, txb_size,
max_txsize_lookup[plane_bsize], plane_bsize,
ctxa, ctxl, &pnrate, &pndist, &pnsse, &pnskip);
*rate += pnrate;
*distortion += pndist;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment