Commit e85e6315 authored by Scott LaVarnway's avatar Scott LaVarnway

Changed above and left context data layout

The main reason for the change was to reduce cycles in the token
decoder. (~1.5% gain for 32 bit)  This layout should be more
cache friendly.

As a result of this change, the encoder had to be updated.

Change-Id: Id5e804169d8889da0378b3a519ac04dabd28c837
Note: dixie uses a similar layout
parent aaad6d1b
......@@ -42,16 +42,10 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
vpx_free(oci->above_context[Y1CONTEXT]);
vpx_free(oci->above_context[UCONTEXT]);
vpx_free(oci->above_context[VCONTEXT]);
vpx_free(oci->above_context[Y2CONTEXT]);
vpx_free(oci->above_context);
vpx_free(oci->mip);
oci->above_context[Y1CONTEXT] = 0;
oci->above_context[UCONTEXT] = 0;
oci->above_context[VCONTEXT] = 0;
oci->above_context[Y2CONTEXT] = 0;
oci->above_context = 0;
oci->mip = 0;
}
......@@ -118,33 +112,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
oci->mi = oci->mip + oci->mode_info_stride + 1;
oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1);
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
if (!oci->above_context[Y1CONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[UCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
if (!oci->above_context[UCONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[VCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
if (!oci->above_context[VCONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols , 1);
if (!oci->above_context[Y2CONTEXT])
if (!oci->above_context)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
......
......@@ -12,13 +12,13 @@
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count)
{
vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count);
vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count);
}
const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0};
const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0};
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3};
const unsigned char vp8_block2left[25] =
{
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
};
const unsigned char vp8_block2above[25] =
{
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
};
......@@ -49,19 +49,19 @@ typedef struct
} POS;
typedef int ENTROPY_CONTEXT;
typedef char ENTROPY_CONTEXT;
typedef struct
{
ENTROPY_CONTEXT l[4];
ENTROPY_CONTEXT a[4];
} TEMP_CONTEXT;
ENTROPY_CONTEXT y1[4];
ENTROPY_CONTEXT u[2];
ENTROPY_CONTEXT v[2];
ENTROPY_CONTEXT y2;
} ENTROPY_CONTEXT_PLANES;
extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count);
extern const int vp8_block2left[25];
extern const int vp8_block2above[25];
extern const int vp8_block2type[25];
extern const int vp8_block2context[25];
extern const unsigned char vp8_block2left[25];
extern const unsigned char vp8_block2above[25];
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
Dest = ((A)!=0) + ((B)!=0);
......@@ -237,8 +237,8 @@ typedef struct
int left_available;
// Y,U,V,Y2
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
ENTROPY_CONTEXT(*left_context)[4]; // (up to) 4 contexts ""
ENTROPY_CONTEXT_PLANES *above_context;
ENTROPY_CONTEXT_PLANES *left_context;
// 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
unsigned char segmentation_enabled;
......
......@@ -165,8 +165,8 @@ typedef struct VP8Common
int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1
// Y,U,V,Y2
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
ENTROPY_CONTEXT left_context[4][4]; // (up to) 4 contexts ""
ENTROPY_CONTEXT_PLANES *above_context; // row of context for each plane
ENTROPY_CONTEXT_PLANES left_context; // (up to) 4 contexts ""
// keyframe block modes are predicted by their above, left neighbors
......
......@@ -13,7 +13,7 @@
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
......
......@@ -95,7 +95,7 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
#define IF_RTCD(x) NULL
//#endif
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
/* wrapper functions to hide RTCD. static means inline means hopefully no
* penalty
......
......@@ -338,15 +338,12 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
recon_yoffset = mb_row * recon_y_stride * 16;
recon_uvoffset = mb_row * recon_uv_stride * 8;
// reset above block coeffs
xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
xd->above_context = pc->above_context;
xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
......@@ -403,10 +400,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
++xd->mode_info_context; /* next mb */
xd->above_context[Y1CONTEXT] += 4;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
xd->above_context++;
pbi->current_mb_col_main = mb_col;
}
......@@ -561,7 +555,7 @@ static void init_frame(VP8D_COMP *pbi)
}
}
xd->left_context = pc->left_context;
xd->left_context = &pc->left_context;
xd->mode_info_context = pc->mi;
xd->frame_type = pc->frame_type;
xd->mode_info_context->mbmi.mode = DC_PRED;
......@@ -849,11 +843,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
else
vp8_decode_mode_mvs(pbi);
// reset since these guys are used as iterators
vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4);
vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
......
......@@ -19,7 +19,7 @@
#define BOOL_DATA UINT8
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2
......@@ -61,47 +61,16 @@ DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTR
void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{
ENTROPY_CONTEXT **const A = x->above_context;
ENTROPY_CONTEXT(* const L)[4] = x->left_context;
ENTROPY_CONTEXT *a;
ENTROPY_CONTEXT *l;
/* Clear entropy contexts for Y blocks */
a = A[Y1CONTEXT];
l = L[Y1CONTEXT];
*a = 0;
*(a+1) = 0;
*(a+2) = 0;
*(a+3) = 0;
*l = 0;
*(l+1) = 0;
*(l+2) = 0;
*(l+3) = 0;
/* Clear entropy contexts for U blocks */
a = A[UCONTEXT];
l = L[UCONTEXT];
*a = 0;
*(a+1) = 0;
*l = 0;
*(l+1) = 0;
/* Clear entropy contexts for V blocks */
a = A[VCONTEXT];
l = L[VCONTEXT];
*a = 0;
*(a+1) = 0;
*l = 0;
*(l+1) = 0;
/* Clear entropy contexts for Y2 blocks */
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
a = A[Y2CONTEXT];
l = L[Y2CONTEXT];
*a = 0;
*l = 0;
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
else
{
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
}
}
......@@ -132,7 +101,7 @@ void vp8_init_detokenizer(VP8D_COMP *dx)
}
#endif
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
#define FILL \
if(count < 0) \
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
......@@ -260,8 +229,8 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
#else
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{
ENTROPY_CONTEXT **const A = x->above_context;
ENTROPY_CONTEXT(* const L)[4] = x->left_context;
ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
const VP8_COMMON *const oc = & dx->common;
BOOL_DECODER *bc = x->current_bc;
......@@ -291,29 +260,24 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
int stop;
INT16 val, bits_count;
INT16 c;
INT16 t;
INT16 v;
const vp8_prob *Prob;
//int *scan;
type = 3;
i = 0;
stop = 16;
scan = vp8_default_zig_zag1d;
qcoeff_ptr = &x->qcoeff[0];
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
i = 24;
stop = 24;
type = 1;
qcoeff_ptr = &x->qcoeff[24*16];
scan = vp8_default_zig_zag1d;
qcoeff_ptr += 24*16;
eobtotal -= 16;
}
else
{
scan = vp8_default_zig_zag1d;
qcoeff_ptr = &x->qcoeff[0];
}
bufend = bc->user_buffer_end;
bufptr = bc->user_buffer;
......@@ -325,13 +289,15 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
BLOCK_LOOP:
a = A[ vp8_block2context[i] ] + vp8_block2above[i];
l = L[ vp8_block2context[i] ] + vp8_block2left[i];
a = A + vp8_block2above[i];
l = L + vp8_block2left[i];
c = (INT16)(!type);
VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
// Dest = ((A)!=0) + ((B)!=0);
VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
Prob = coef_probs;
Prob += t * ENTROPY_NODES;
Prob += v * ENTROPY_NODES;
DO_WHILE:
Prob += vp8_coef_bands_x[c];
......@@ -418,9 +384,8 @@ ONE_CONTEXT_NODE_0_:
qcoeff_ptr [ scan[15] ] = (INT16) v;
BLOCK_FINISHED:
t = ((eobs[i] = c) != !type); // any nonzero data?
*a = *l = ((eobs[i] = c) != !type); // any nonzero data?
eobtotal += c;
*a = *l = t;
qcoeff_ptr += 16;
i++;
......@@ -430,12 +395,11 @@ BLOCK_FINISHED:
if (i == 25)
{
scan = vp8_default_zig_zag1d;//x->scan_order1d;
type = 0;
i = 0;
stop = 16;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
qcoeff_ptr = &x->qcoeff[0];
qcoeff_ptr -= (24*16 + 16);
goto BLOCK_LOOP;
}
......
......@@ -157,7 +157,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
ENTROPY_CONTEXT mb_row_left_context[4][4];
ENTROPY_CONTEXT_PLANES mb_row_left_context;
while (1)
{
......@@ -197,12 +197,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
recon_uvoffset = mb_row * recon_uv_stride * 8;
// reset above block coeffs
xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
xd->left_context = mb_row_left_context;
vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
xd->above_context = pc->above_context;
xd->left_context = &mb_row_left_context;
vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
......@@ -260,10 +257,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
++xd->mode_info_context; /* next mb */
xd->above_context[Y1CONTEXT] += 4;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
xd->above_context++;
//pbi->mb_row_di[ithread].current_mb_col = mb_col;
pbi->current_mb_col[mb_row] = mb_col;
......@@ -604,15 +598,12 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
if (mb_row > 0)
last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
recon_yoffset = mb_row * recon_y_stride * 16;
recon_uvoffset = mb_row * recon_uv_stride * 8;
// reset above block coeffs
xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
xd->above_context = pc->above_context;
xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
......@@ -672,10 +663,7 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
++xd->mode_info_context; /* next mb */
xd->above_context[Y1CONTEXT] += 4;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
xd->above_context++;
//pbi->current_mb_col_main = mb_col;
pbi->current_mb_col[mb_row] = mb_col;
......
......@@ -348,10 +348,7 @@ void encode_mb_row(VP8_COMP *cpi,
// reset above block coeffs
xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
xd->above_context = cm->above_context;
xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
......@@ -472,10 +469,7 @@ void encode_mb_row(VP8_COMP *cpi,
// skip to next mb
xd->mode_info_context++;
xd->above_context[Y1CONTEXT] += 4;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
xd->above_context++;
cpi->current_mb_col_main = mb_col;
}
......@@ -626,7 +620,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
xd->left_context = cm->left_context;
xd->left_context = &cm->left_context;
vp8_zero(cpi->count_mb_ref_frame_usage)
vp8_zero(cpi->ymode_count)
......@@ -634,17 +628,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->mvc = cm->fc.mvc;
// vp8_zero( entropy_stats)
{
ENTROPY_CONTEXT **p = cm->above_context;
const size_t L = cm->mb_cols;
vp8_zero_array(p [Y1CONTEXT], L * 4)
vp8_zero_array(p [ UCONTEXT], L * 2)
vp8_zero_array(p [ VCONTEXT], L * 2)
vp8_zero_array(p [Y2CONTEXT], L)
}
vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
{
struct vpx_usec_timer emr_timer;
......@@ -1128,7 +1112,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
extern int cnt_pm;
#endif
extern void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
extern void vp8_fix_contexts(MACROBLOCKD *x);
int vp8cx_encode_inter_macroblock
(
......@@ -1282,7 +1266,7 @@ int vp8cx_encode_inter_macroblock
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
cpi->skip_true_count ++;
vp8_fix_contexts(cpi, xd);
vp8_fix_contexts(xd);
}
else
{
......
......@@ -488,12 +488,18 @@ void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{
int b;
TEMP_CONTEXT t, t2;
int type;
int has_2nd_order;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
x->e_mbd.left_context[Y1CONTEXT], 4);
has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
type = has_2nd_order ? 0 : 3;
......@@ -501,24 +507,19 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
for (b = 0; b < 16; b++)
{
vp8_optimize_b(x, b, type,
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT],
x->e_mbd.left_context[UCONTEXT], 2);
vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT],
x->e_mbd.left_context[VCONTEXT], 2);
for (b = 16; b < 20; b++)
{
vp8_optimize_b(x, b, vp8_block2type[b],
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
for (b = 20; b < 24; b++)
{
vp8_optimize_b(x, b, vp8_block2type[b],
t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
......@@ -565,17 +566,25 @@ static void vp8_find_mb_skip_coef(MACROBLOCK *x)
void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{
int b;
TEMP_CONTEXT t;
int type;
int has_2nd_order;
if (!x->e_mbd.above_context[Y1CONTEXT])
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
if (!x->e_mbd.above_context)
return;
if (!x->e_mbd.left_context[Y1CONTEXT])
if (!x->e_mbd.left_context)
return;
vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
x->e_mbd.left_context[Y1CONTEXT], 4);
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
type = has_2nd_order ? 0 : 3;
......@@ -583,7 +592,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
for (b = 0; b < 16; b++)
{
vp8_optimize_b(x, b, type,
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
/*
......@@ -599,33 +608,32 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{
int b;
TEMP_CONTEXT t, t2;
if (!x->e_mbd.above_context[UCONTEXT])
return;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
if (!x->e_mbd.left_context[UCONTEXT])
if (!x->e_mbd.above_context)
return;
if (!x->e_mbd.above_context[VCONTEXT])
if (!x->e_mbd.left_context)
return;
if (!x->e_mbd.left_context[VCONTEXT])
return;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 16; b < 20; b++)
{
vp8_optimize_b(x, b, vp8_block2type[b],
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
for (b = 20; b < 24; b++)
{
vp8_optimize_b(x, b, vp8_block2type[b],
t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
}
......
......@@ -28,7 +28,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread;
VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
ENTROPY_CONTEXT mb_row_left_context[4][4];
ENTROPY_CONTEXT_PLANES mb_row_left_context;
//printf("Started thread %d\n", ithread);
......@@ -68,11 +68,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
last_row_current_mb_col = &cpi->current_mb_col_main;
// reset above block coeffs
xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
xd->left_context = mb_row_left_context;
xd->above_context = cm->above_context;
xd->left_context = &mb_row_left_context;
vp8_zero(mb_row_left_context);
......@@ -183,10 +180,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// skip to next mb
xd->mode_info_context++;
xd->above_context[Y1CONTEXT] += 4;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
xd->above_context++;
cpi->mb_row_ei[ithread].current_mb_col = mb_col;
......@@ -330,11 +324,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
/*
memcpy(zd->above_context, xd->above_context, sizeof(xd->above_context));
memcpy(zd->mb_segment_tree_probs, xd->mb_segment_tree_probs, sizeof(xd->mb_segment_tree_probs));
memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
*/
for (i = 0; i < 25; i++)
{
zd->block[i].dequant = xd->block[i].dequant;
......@@ -402,7 +391,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mb->rddiv = cpi->RDDIV;
mb->rdmult = cpi->RDMULT;
mbd->left_context = cm->left_context;
mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc;
setup_mbby_copy(&mbr_ei[i].mb, x);
......
......@@ -220,13 +220,20 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
{
MACROBLOCKD *const xd = &mb->e_mbd;
int i;
TEMP_CONTEXT t;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode
int distortion = 0;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;