Commit 4e82f015 authored by Tero Rintaluoma's avatar Tero Rintaluoma
Browse files

Tokenize MB optimized

Optimized C-code of the following functions:
 - vp8_tokenize_mb
 - tokenize1st_order_b
 - tokenize2nd_order_b
Gives ~1-5% speed-up for RT encoding on Cortex-A8/A9
depending on encoding parameters.

Change-Id: I6be86104a589a06dcbc9ed3318e8bf264ef4176c
Showing with 144 additions and 76 deletions
...@@ -95,101 +95,183 @@ static void fill_value_tokens() ...@@ -95,101 +95,183 @@ static void fill_value_tokens()
static void tokenize2nd_order_b static void tokenize2nd_order_b
( (
const BLOCKD *const b, MACROBLOCKD *x,
TOKENEXTRA **tp, TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi VP8_COMP *cpi
) )
{ {
int pt; /* near block/prev token context index */ int pt; /* near block/prev token context index */
int c = 0; /* start at DC */ int c; /* start at DC */
const int eob = b->eob; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp;/* store tokens starting here */
TOKENEXTRA *t = *tp; /* store tokens starting here */ const BLOCKD *b;
int x; const short *qcoeff_ptr;
const short *qcoeff_ptr = b->qcoeff; ENTROPY_CONTEXT * a;
ENTROPY_CONTEXT * l;
int band, rc, v, token;
b = x->block + 24;
qcoeff_ptr = b->qcoeff;
a = (ENTROPY_CONTEXT *)x->above_context + 8;
l = (ENTROPY_CONTEXT *)x->left_context + 8;
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
do for (c = 0; c < b->eob; c++)
{ {
const int band = vp8_coef_bands[c]; rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c];
v = qcoeff_ptr[rc];
if (c < eob) t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
{ token = vp8_dct_value_tokens_ptr[v].Token;
int rc = vp8_default_zig_zag1d[c];
const int v = qcoeff_ptr[rc]; t->Token = token;
#if CONFIG_DEBUG t->context_tree = cpi->common.fc.coef_probs [1] [band] [pt];
assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
#endif t->skip_eob_node = ((pt == 0) && (band > 0));
t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
x = vp8_dct_value_tokens_ptr[v].Token; ++cpi->coef_counts [1] [band] [pt] [token];
}
else pt = vp8_prev_token_class[token];
x = DCT_EOB_TOKEN; t++;
}
if (c < 16)
{
band = vp8_coef_bands[c];
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [1] [band] [pt];
t->Token = x; t->skip_eob_node = ((pt == 0) && (band > 0));
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); ++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN];
++cpi->coef_counts [type] [band] [pt] [x]; t++;
} }
while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 16);
*tp = t; *tp = t;
pt = (c != !type); /* 0 <-> all coeff data is zero */ pt = (c != 0); /* 0 <-> all coeff data is zero */
*a = *l = pt; *a = *l = pt;
} }
static void tokenize1st_order_b static void tokenize1st_order_b
( (
const BLOCKD *const b, MACROBLOCKD *x,
TOKENEXTRA **tp, TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi VP8_COMP *cpi
) )
{ {
int pt; /* near block/prev token context index */ unsigned int block;
int c = type ? 0 : 1; /* start at DC unless type 0 */ const BLOCKD *b;
const int eob = b->eob; /* one beyond last nonzero coeff */ int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */ int c;
int x; int token;
const short *qcoeff_ptr = b->qcoeff; TOKENEXTRA *t = *tp;/* store tokens starting here */
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); const short *qcoeff_ptr;
ENTROPY_CONTEXT * a;
do ENTROPY_CONTEXT * l;
int band, rc, v;
int tmp1, tmp2;
b = x->block;
/* Luma */
for (block = 0; block < 16; block++, b++)
{ {
const int band = vp8_coef_bands[c]; tmp1 = vp8_block2above[block];
tmp2 = vp8_block2left[block];
qcoeff_ptr = b->qcoeff;
a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
x = DCT_EOB_TOKEN; c = type ? 0 : 1;
if (c < eob) for (; c < b->eob; c++)
{ {
int rc = vp8_default_zig_zag1d[c]; rc = vp8_default_zig_zag1d[c];
const int v = qcoeff_ptr[rc]; band = vp8_coef_bands[c];
#if CONFIG_DEBUG v = qcoeff_ptr[rc];
assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
#endif
t->Extra = vp8_dct_value_tokens_ptr[v].Extra; t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
x = vp8_dct_value_tokens_ptr[v].Token; token = vp8_dct_value_tokens_ptr[v].Token;
t->Token = token;
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->skip_eob_node = pt == 0 &&
((band > 0 && type > 0) || (band > 1 && type == 0));
++cpi->coef_counts [type] [band] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
} }
if (c < 16)
{
band = vp8_coef_bands[c];
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->Token = x; t->skip_eob_node = pt == 0 &&
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; ((band > 0 && type > 0) || (band > 1 && type == 0));
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN];
t++;
}
*tp = t;
pt = (c != !type); /* 0 <-> all coeff data is zero */
*a = *l = pt;
++cpi->coef_counts [type] [band] [pt] [x];
} }
while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 16); /* Chroma */
for (block = 16; block < 24; block++, b++)
{
tmp1 = vp8_block2above[block];
tmp2 = vp8_block2left[block];
qcoeff_ptr = b->qcoeff;
a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
*tp = t; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
pt = (c != !type); /* 0 <-> all coeff data is zero */
*a = *l = pt; for (c = 0; c < b->eob; c++)
{
rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c];
v = qcoeff_ptr[rc];
t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
token = vp8_dct_value_tokens_ptr[v].Token;
t->Token = token;
t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
t->skip_eob_node = ((pt == 0) && (band > 0));
++cpi->coef_counts [2] [band] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
}
if (c < 16)
{
band = vp8_coef_bands[c];
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
t->skip_eob_node = ((pt == 0) && (band > 0));
++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN];
t++;
}
*tp = t;
pt = (c != 0); /* 0 <-> all coeff data is zero */
*a = *l = pt;
}
} }
...@@ -214,10 +296,7 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) ...@@ -214,10 +296,7 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
{ {
ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
int plane_type; int plane_type;
int b;
int has_y2_block; int has_y2_block;
has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
...@@ -240,26 +319,15 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ...@@ -240,26 +319,15 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
cpi->skip_false_count++; cpi->skip_false_count++;
plane_type = 3; plane_type = 3;
if(has_y2_block) if(has_y2_block)
{ {
tokenize2nd_order_b(x->block + 24, t, 1, tokenize2nd_order_b(x, t, cpi);
A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0; plane_type = 0;
} }
for (b = 0; b < 16; b++) tokenize1st_order_b(x, t, plane_type, cpi);
tokenize1st_order_b(x->block + b, t, plane_type,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
tokenize1st_order_b(x->block + b, t, 2,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
} }
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment