Commit 52cf4dca authored by Attila Nagy's avatar Attila Nagy Committed by Yunqing Wang
Browse files

Packing bitstream on-the-fly with delayed context updates

Produce the token partitions on-the-fly, while processing each MB.
Context is updated at the beginning of each frame based on the
previoud frame's counters. Optimally encoder outputs partitions in
separate buffers. For frame based output, partitions are concatenated
internally.

Limitations:
    - enabled just in combination with realtime-only mode
    - number of encoding threads has to be equal or less than the
    number of token partitions. For this reason, by default the encoder
    will do 8 token partitions.
    - vpxenc supports partition output (-P) just in combination with
    IVF output format (--ivf)

Performance:
    - Realtime encoder can be up to 13% faster (ARM) depending on the number
    of threads and bitrate settings. Constant gain over the 5-16 speed
    range.
    - Token buffer reduced from one frame to 8 MBs

Quality:
    - quality is affected by the delayed context updates. This again
    dependents on input material, speed and bitrate settings. For VC
    style input the loss seen is up to 0.2dB. If error-resilient=2
    mode is used than the effect of this change is negligible.

Example:
./configure --enable-realtime-only --enable-onthefly-bitpacking
./vpxenc --rt --end-usage=1 --fps=30000/1000 -w 640 -h 480
--target-bitrate=1000 --token-parts=3 --static-thresh=2000
--ivf -P -t 4 -o strm.ivf tanya_640x480.yuv

Change-Id: I127295cb85b835fc287e1c0201a67e378d025d76
parent ce328b85
......@@ -39,6 +39,7 @@ Advanced options:
${toggle_multithread} multithreaded encoding and decoding
${toggle_spatial_resampling} spatial sampling (scaling) support
${toggle_realtime_only} enable this option while building for real-time encoding
${toggle_onthefly_bitpacking} enable on-the-fly bitpacking in real-time encoding
${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses
${toggle_runtime_cpu_detect} runtime cpu detection
${toggle_shared} shared library support
......@@ -253,6 +254,7 @@ CONFIG_LIST="
static_msvcrt
spatial_resampling
realtime_only
onthefly_bitpacking
error_concealment
shared
static
......@@ -297,6 +299,7 @@ CMDLINE_SELECT="
mem_tracker
spatial_resampling
realtime_only
onthefly_bitpacking
error_concealment
shared
static
......
......@@ -24,6 +24,7 @@
#include "bitstream.h"
#include "defaultcoefcounts.h"
#include "vp8/common/common.h"
const int vp8cx_base_skip_false_prob[128] =
{
......@@ -159,7 +160,7 @@ static void write_split(vp8_writer *bc, int x)
);
}
static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *const stop = p + xcount;
unsigned int split;
......@@ -398,7 +399,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = stop - p;
pack_tokens_c(w, p, tokens);
vp8_pack_tokens_c(w, p, tokens);
}
vp8_stop_encode(w);
......@@ -417,7 +418,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = stop - p;
pack_tokens_c(w, p, tokens);
vp8_pack_tokens_c(w, p, tokens);
}
}
......@@ -783,6 +784,7 @@ static void write_kfmodes(VP8_COMP *cpi)
}
}
#if 0
/* This function is used for debugging probability trees. */
static void print_prob_tree(vp8_prob
coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES])
......@@ -814,6 +816,7 @@ static void print_prob_tree(vp8_prob
fprintf(f, "}\n");
fclose(f);
}
#endif
static void sum_probs_over_prev_coef_context(
const unsigned int probs[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
......@@ -943,7 +946,6 @@ static int default_coef_context_savings(VP8_COMP *cpi)
int t = 0; /* token/prob index */
vp8_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
cpi->frame_coef_probs [i][j][k],
......@@ -1048,10 +1050,33 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
return savings;
}
static void update_coef_probs(VP8_COMP *cpi)
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
int vp8_update_coef_context(VP8_COMP *cpi)
{
int savings = 0;
if (cpi->common.frame_type == KEY_FRAME)
{
/* Reset to default counts/probabilities at key frames */
vp8_copy(cpi->coef_counts, default_coef_counts);
}
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
savings += independent_coef_context_savings(cpi);
else
savings += default_coef_context_savings(cpi);
return savings;
}
#endif
void vp8_update_coef_probs(VP8_COMP *cpi)
{
int i = 0;
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *const w = cpi->bc;
#endif
int savings = 0;
vp8_clear_system_state(); //__asm emms;
......@@ -1131,7 +1156,11 @@ static void update_coef_probs(VP8_COMP *cpi)
cpi->common.frame_type == KEY_FRAME && newp != *Pold)
u = 1;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
cpi->update_probs[i][j][k][t] = u;
#else
vp8_write(w, u, upd);
#endif
#ifdef ENTROPY_STATS
......@@ -1143,7 +1172,9 @@ static void update_coef_probs(VP8_COMP *cpi)
/* send/use new probability */
*Pold = newp;
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_write_literal(w, newp, 8);
#endif
savings += s;
......@@ -1172,6 +1203,50 @@ static void update_coef_probs(VP8_COMP *cpi)
while (++i < BLOCK_TYPES);
}
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
static void pack_coef_probs(VP8_COMP *cpi)
{
int i = 0;
vp8_writer *const w = cpi->bc;
do
{
int j = 0;
do
{
int k = 0;
do
{
int t = 0; /* token/prob index */
do
{
const vp8_prob newp = cpi->common.fc.coef_probs [i][j][k][t];
const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
const char u = cpi->update_probs[i][j][k][t] ;
vp8_write(w, u, upd);
if (u)
{
/* send/use new probability */
vp8_write_literal(w, newp, 8);
}
}
while (++t < ENTROPY_NODES);
}
while (++k < PREV_COEF_CONTEXTS);
}
while (++j < COEF_BANDS);
}
while (++i < BLOCK_TYPES);
}
#endif
#ifdef PACKET_TESTING
FILE *vpxlogc = 0;
#endif
......@@ -1434,6 +1509,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
}
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
{
if (pc->frame_type == KEY_FRAME)
......@@ -1441,6 +1517,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
else
pc->refresh_entropy_probs = 0;
}
#endif
vp8_write_bit(bc, pc->refresh_entropy_probs);
......@@ -1458,13 +1535,17 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_clear_system_state(); //__asm emms;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
pack_coef_probs(cpi);
#else
if (pc->refresh_entropy_probs == 0)
{
// save a copy for later refresh
vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
}
update_coef_probs(cpi);
vp8_update_coef_probs(cpi);
#endif
#ifdef ENTROPY_STATS
active_section = 2;
......@@ -1512,6 +1593,45 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
cpi->partition_sz[0] = *size;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
const int num_part = (1 << pc->multi_token_partition);
unsigned char * dp = cpi->partition_d[0] + cpi->partition_sz[0];
if (num_part > 1)
{
/* write token part sizes (all but last) if more than 1 */
validate_buffer(dp, 3 * (num_part - 1), cpi->partition_d_end[0],
&pc->error);
cpi->partition_sz[0] += 3*(num_part-1);
for(i = 1; i < num_part; i++)
{
write_partition_size(dp, cpi->partition_sz[i]);
dp += 3;
}
}
if (!cpi->output_partition)
{
/* concatenate partition buffers */
for(i = 0; i < num_part; i++)
{
vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
cpi->partition_d[i+1] = dp;
dp += cpi->partition_sz[i+1];
}
}
/* update total size */
*size = 0;
for(i = 0; i < num_part+1; i++)
{
*size += cpi->partition_sz[i];
}
}
#else
if (pc->multi_token_partition != ONE_PARTITION)
{
int num_part = 1 << pc->multi_token_partition;
......@@ -1561,6 +1681,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
*size += cpi->bc[1].pos;
cpi->partition_sz[1] = cpi->bc[1].pos;
}
#endif
}
#ifdef ENTROPY_STATS
......
......@@ -35,7 +35,10 @@ void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
# define pack_mb_row_tokens(a,b) \
vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
#else
# define pack_tokens(a,b,c) pack_tokens_c(a,b,c)
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c)
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
#endif
......
......@@ -28,6 +28,9 @@
#include <limits.h>
#include "vp8/common/invtrans.h"
#include "vpx_ports/vpx_timer.h"
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
#include "bitstream.h"
#endif
extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,
......@@ -373,10 +376,17 @@ void encode_mb_row(VP8_COMP *cpi,
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cpi->common.mb_cols);
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
const int num_part = (1 << cm->multi_token_partition);
TOKENEXTRA * tp_start = cpi->tok;
vp8_writer *w;
#endif
#if CONFIG_MULTITHREAD
const int nsync = cpi->mt_sync_range;
const int rightmost_col = cm->mb_cols - 1;
const int rightmost_col = cm->mb_cols + nsync;
volatile const int *last_row_current_mb_col;
volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
......@@ -384,6 +394,13 @@ void encode_mb_row(VP8_COMP *cpi,
last_row_current_mb_col = &rightmost_col;
#endif
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
if(num_part > 1)
w= &cpi->bc[1 + (mb_row % num_part)];
else
w = &cpi->bc[1];
#endif
// reset above block coeffs
xd->above_context = cm->above_context;
......@@ -411,6 +428,10 @@ void encode_mb_row(VP8_COMP *cpi,
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
*tp = cpi->tok;
#endif
// Distance of Mb to the left & right edges, specified in
// 1/8th pel units as they are always compared to values
// that are in 1/8th pel units
......@@ -435,12 +456,13 @@ void encode_mb_row(VP8_COMP *cpi,
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
#if CONFIG_MULTITHREAD
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
if (cpi->b_multi_threaded != 0)
{
*current_mb_col = mb_col - 1; // set previous MB done
if ((mb_col & (nsync - 1)) == 0)
{
while (mb_col > (*last_row_current_mb_col - nsync)
&& (*last_row_current_mb_col) != (cm->mb_cols - 1))
while (mb_col > (*last_row_current_mb_col - nsync))
{
x86_pause_hint();
thread_sleep(0);
......@@ -495,13 +517,13 @@ void encode_mb_row(VP8_COMP *cpi,
#endif
// Count of last ref frame 0,0 useage
// Count of last ref frame 0,0 usage
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
cpi->inter_zz_count ++;
// Special case code for cyclic refresh
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
// during vp8cx_encode_inter_macroblock()) back into the global segmentation map
if ((cpi->current_layer == 0) &&
(cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled))
{
......@@ -525,7 +547,14 @@ void encode_mb_row(VP8_COMP *cpi,
cpi->tplist[mb_row].stop = *tp;
// Increment pointer into gf useage flags structure.
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
/* pack tokens for this MB */
{
int tok_count = *tp - tp_start;
pack_tokens(w, tp_start, tok_count);
}
#endif
// Increment pointer into gf usage flags structure.
x->gf_active_ptr++;
// Increment the activity mask pointers.
......@@ -539,39 +568,29 @@ void encode_mb_row(VP8_COMP *cpi,
recon_yoffset += 16;
recon_uvoffset += 8;
// Keep track of segment useage
// Keep track of segment usage
segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
// skip to next mb
xd->mode_info_context++;
x->partition_info++;
xd->above_context++;
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded != 0)
{
cpi->mt_current_mb_col[mb_row] = mb_col;
}
#endif
}
//extend the recon for intra prediction
vp8_extend_mb_row(
&cm->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded != 0)
*current_mb_col = rightmost_col;
#endif
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
#if CONFIG_MULTITHREAD
if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
{
sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
}
#endif
}
void init_encode_frame_mb_context(VP8_COMP *cpi)
......@@ -599,7 +618,7 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
if (cm->frame_type == KEY_FRAME)
vp8_init_mbmode_probs(cm);
// Copy data over into macro block data sturctures.
// Copy data over into macro block data structures.
x->src = * cpi->Source;
xd->pre = cm->yv12_fb[cm->lst_fb_idx];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
......@@ -656,10 +675,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
MACROBLOCK *const x = & cpi->mb;
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
TOKENEXTRA *tp = cpi->tok;
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition
const int num_part = (1 << cm->multi_token_partition);
#endif
vpx_memset(segment_counts, 0, sizeof(segment_counts));
totalrate = 0;
......@@ -694,6 +716,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->prediction_error = 0;
cpi->intra_error = 0;
cpi->skip_true_count = 0;
cpi->tok_count = 0;
#if 0
// Experimental code
......@@ -704,6 +727,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
xd->mode_info_context = cm->mi;
vp8_zero(cpi->MVcount);
vp8_zero(cpi->coef_counts);
vp8cx_frame_init_quantizer(cpi);
......@@ -722,9 +746,22 @@ void vp8_encode_frame(VP8_COMP *cpi)
build_activity_map(cpi);
}
// re-initencode frame context.
// re-init encode frame context.
init_encode_frame_mb_context(cpi);
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
int i;
for(i = 0; i < num_part; i++)
{
vp8_start_encode(&bc[i], cpi->partition_d[i + 1],
cpi->partition_d_end[i + 1]);
bc[i].error = &cm->error;
}
}
#endif
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
......@@ -748,7 +785,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
{
vp8_zero(cm->left_context)
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
tp = cpi->tok;
#else
tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
#endif
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
......@@ -761,12 +802,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count;
if(mb_row == cm->mb_rows - 1)
{
sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
}
}
sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
cpi->tok_count = 0;
for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
{
cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
......@@ -799,9 +842,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
vp8_zero(cm->left_context)
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
tp = cpi->tok;
#endif
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
// adjust to the next row of mbs
......@@ -811,16 +857,25 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
cpi->tok_count = tp - cpi->tok;
}
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
int i;
for(i = 0; i < num_part; i++)
{
vp8_stop_encode(&bc[i]);
cpi->partition_sz[i+1] = bc[i].pos;
}
}
#endif
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
}
// Work out the segment probabilites if segmentation is enabled
// Work out the segment probabilities if segmentation is enabled
if (xd->segmentation_enabled)
{
int tot_count;
......@@ -908,20 +963,16 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
#endif
// Adjust the projected reference frame useage probability numbers to reflect
// what we have just seen. This may be usefull when we make multiple itterations
#if ! CONFIG_REALTIME_ONLY
// Adjust the projected reference frame usage probability numbers to reflect
// what we have just seen. This may be useful when we make multiple iterations
// of the recode loop rather than continuing to use values from the previous frame.
if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
(!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)))
{
vp8_convert_rfct_to_prob(cpi);
}
#if 0
// Keep record of the total distortion this time around for future use
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
}
void vp8_setup_block_ptrs(MACROBLOCK *x)
{
......
......@@ -13,6 +13,8 @@
#include "vp8/common/common.h"
#include "vp8/common/extend.h"
#include "bitstream.h"
#if CONFIG_MULTITHREAD
extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
......@@ -74,6 +76,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
MACROBLOCK *x = &mbri->mb;
MACROBLOCKD *xd = &x->e_mbd;
TOKENEXTRA *tp ;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24);
const int num_part = (1 << cm->multi_token_partition);
#endif
int *segment_counts = mbri->segment_counts;
int *totalrate = &mbri->totalrate;
......@@ -91,9 +97,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cm->mb_cols);
volatile int *last_row_current_mb_col;
volatile const int *last_row_current_mb_col;
volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
#else
tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24));
cpi->tplist[mb_row].start = tp;
#endif
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
......@@ -107,25 +119,27 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8);