diff --git a/configure b/configure index 4ac97b137b19c2ea5194a7a3a496e436e476fdd3..cf3eb32ec5ad3d6d7f65a80947dfd975ddda7136 100755 --- a/configure +++ b/configure @@ -39,6 +39,7 @@ Advanced options: ${toggle_multithread} multithreaded encoding and decoding ${toggle_spatial_resampling} spatial sampling (scaling) support ${toggle_realtime_only} enable this option while building for real-time encoding + ${toggle_onthefly_bitpacking} enable on-the-fly bitpacking in real-time encoding ${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses ${toggle_runtime_cpu_detect} runtime cpu detection ${toggle_shared} shared library support @@ -253,6 +254,7 @@ CONFIG_LIST=" static_msvcrt spatial_resampling realtime_only + onthefly_bitpacking error_concealment shared static @@ -297,6 +299,7 @@ CMDLINE_SELECT=" mem_tracker spatial_resampling realtime_only + onthefly_bitpacking error_concealment shared static diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index f26b4e4ebf3f3bd6d8071e7c353eeec07207d056..af00f7c8c76b25fb2e22dbab96393cf75b1f3ff5 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -24,6 +24,7 @@ #include "bitstream.h" #include "defaultcoefcounts.h" +#include "vp8/common/common.h" const int vp8cx_base_skip_false_prob[128] = { @@ -159,7 +160,7 @@ static void write_split(vp8_writer *bc, int x) ); } -static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) +void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) { const TOKENEXTRA *const stop = p + xcount; unsigned int split; @@ -398,7 +399,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; int tokens = stop - p; - pack_tokens_c(w, p, tokens); + vp8_pack_tokens_c(w, p, tokens); } vp8_stop_encode(w); @@ -417,7 +418,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w) const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; int tokens = stop - p; - pack_tokens_c(w, p, tokens); + vp8_pack_tokens_c(w, p, tokens); } } @@ -783,6 +784,7 @@ static void write_kfmodes(VP8_COMP *cpi) } } +#if 0 /* This function is used for debugging probability trees. */ static void print_prob_tree(vp8_prob coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) @@ -814,6 +816,7 @@ static void print_prob_tree(vp8_prob fprintf(f, "}\n"); fclose(f); } +#endif static void sum_probs_over_prev_coef_context( const unsigned int probs[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], @@ -943,7 +946,6 @@ static int default_coef_context_savings(VP8_COMP *cpi) int t = 0; /* token/prob index */ - vp8_tree_probs_from_distribution( MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, cpi->frame_coef_probs [i][j][k], @@ -1048,10 +1050,33 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) return savings; } -static void update_coef_probs(VP8_COMP *cpi) +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING +int vp8_update_coef_context(VP8_COMP *cpi) +{ + int savings = 0; + + + if (cpi->common.frame_type == KEY_FRAME) + { + /* Reset to default counts/probabilities at key frames */ + vp8_copy(cpi->coef_counts, default_coef_counts); + } + + if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) + savings += independent_coef_context_savings(cpi); + else + savings += default_coef_context_savings(cpi); + + return savings; +} +#endif + +void vp8_update_coef_probs(VP8_COMP *cpi) { int i = 0; +#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_writer *const w = cpi->bc; +#endif int savings = 0; vp8_clear_system_state(); //__asm emms; @@ -1131,7 +1156,11 @@ static void update_coef_probs(VP8_COMP *cpi) cpi->common.frame_type == KEY_FRAME && newp != *Pold) u = 1; +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + cpi->update_probs[i][j][k][t] = u; +#else vp8_write(w, u, upd); +#endif #ifdef ENTROPY_STATS @@ -1143,7 +1172,9 @@ static void update_coef_probs(VP8_COMP *cpi) /* send/use new probability */ *Pold = newp; +#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_write_literal(w, newp, 8); +#endif savings += s; @@ -1172,6 +1203,50 @@ static void update_coef_probs(VP8_COMP *cpi) while (++i < BLOCK_TYPES); } + +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING +static void pack_coef_probs(VP8_COMP *cpi) +{ + int i = 0; + vp8_writer *const w = cpi->bc; + + do + { + int j = 0; + + do + { + int k = 0; + + do + { + int t = 0; /* token/prob index */ + + do + { + const vp8_prob newp = cpi->common.fc.coef_probs [i][j][k][t]; + const vp8_prob upd = vp8_coef_update_probs [i][j][k][t]; + + const char u = cpi->update_probs[i][j][k][t] ; + + vp8_write(w, u, upd); + + if (u) + { + /* send/use new probability */ + vp8_write_literal(w, newp, 8); + } + } + while (++t < ENTROPY_NODES); + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); +} +#endif + #ifdef PACKET_TESTING FILE *vpxlogc = 0; #endif @@ -1434,6 +1509,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); } +#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) { if (pc->frame_type == KEY_FRAME) @@ -1441,6 +1517,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest else pc->refresh_entropy_probs = 0; } +#endif vp8_write_bit(bc, pc->refresh_entropy_probs); @@ -1458,13 +1535,17 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_clear_system_state(); //__asm emms; +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + pack_coef_probs(cpi); +#else if (pc->refresh_entropy_probs == 0) { // save a copy for later refresh vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); } - update_coef_probs(cpi); + vp8_update_coef_probs(cpi); +#endif #ifdef ENTROPY_STATS active_section = 2; @@ -1512,6 +1593,45 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest cpi->partition_sz[0] = *size; +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + { + const int num_part = (1 << pc->multi_token_partition); + unsigned char * dp = cpi->partition_d[0] + cpi->partition_sz[0]; + + if (num_part > 1) + { + /* write token part sizes (all but last) if more than 1 */ + validate_buffer(dp, 3 * (num_part - 1), cpi->partition_d_end[0], + &pc->error); + + cpi->partition_sz[0] += 3*(num_part-1); + + for(i = 1; i < num_part; i++) + { + write_partition_size(dp, cpi->partition_sz[i]); + dp += 3; + } + } + + if (!cpi->output_partition) + { + /* concatenate partition buffers */ + for(i = 0; i < num_part; i++) + { + vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]); + cpi->partition_d[i+1] = dp; + dp += cpi->partition_sz[i+1]; + } + } + + /* update total size */ + *size = 0; + for(i = 0; i < num_part+1; i++) + { + *size += cpi->partition_sz[i]; + } + } +#else if (pc->multi_token_partition != ONE_PARTITION) { int num_part = 1 << pc->multi_token_partition; @@ -1561,6 +1681,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest *size += cpi->bc[1].pos; cpi->partition_sz[1] = cpi->bc[1].pos; } +#endif } #ifdef ENTROPY_STATS diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h index 736b9d494392501feffc8b48c52f7ee756d1734a..e0bbdc4f3f1364d24f4d5f710b63ec15e121191c 100644 --- a/vp8/encoder/bitstream.h +++ b/vp8/encoder/bitstream.h @@ -35,7 +35,10 @@ void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w, # define pack_mb_row_tokens(a,b) \ vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) #else -# define pack_tokens(a,b,c) pack_tokens_c(a,b,c) + +void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount); + +# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c) # define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d) # define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b) #endif diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 989956507747510e94d205a431b642f8a9f93516..21757f8f05c724f55bfb08f15c9ab4c79d6b7135 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -28,6 +28,9 @@ #include <limits.h> #include "vp8/common/invtrans.h" #include "vpx_ports/vpx_timer.h" +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING +#include "bitstream.h" +#endif extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; extern void vp8_calc_ref_frame_costs(int *ref_frame_cost, @@ -373,10 +376,17 @@ void encode_mb_row(VP8_COMP *cpi, int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cpi->common.mb_cols); +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + const int num_part = (1 << cm->multi_token_partition); + TOKENEXTRA * tp_start = cpi->tok; + vp8_writer *w; +#endif + #if CONFIG_MULTITHREAD const int nsync = cpi->mt_sync_range; - const int rightmost_col = cm->mb_cols - 1; + const int rightmost_col = cm->mb_cols + nsync; volatile const int *last_row_current_mb_col; + volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; @@ -384,6 +394,13 @@ void encode_mb_row(VP8_COMP *cpi, last_row_current_mb_col = &rightmost_col; #endif +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + if(num_part > 1) + w= &cpi->bc[1 + (mb_row % num_part)]; + else + w = &cpi->bc[1]; +#endif + // reset above block coeffs xd->above_context = cm->above_context; @@ -411,6 +428,10 @@ void encode_mb_row(VP8_COMP *cpi, // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { + +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + *tp = cpi->tok; +#endif // Distance of Mb to the left & right edges, specified in // 1/8th pel units as they are always compared to values // that are in 1/8th pel units @@ -435,12 +456,13 @@ void encode_mb_row(VP8_COMP *cpi, vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #if CONFIG_MULTITHREAD - if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) + if (cpi->b_multi_threaded != 0) { + *current_mb_col = mb_col - 1; // set previous MB done + if ((mb_col & (nsync - 1)) == 0) { - while (mb_col > (*last_row_current_mb_col - nsync) - && (*last_row_current_mb_col) != (cm->mb_cols - 1)) + while (mb_col > (*last_row_current_mb_col - nsync)) { x86_pause_hint(); thread_sleep(0); @@ -495,13 +517,13 @@ void encode_mb_row(VP8_COMP *cpi, #endif - // Count of last ref frame 0,0 useage + // Count of last ref frame 0,0 usage if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) cpi->inter_zz_count ++; // Special case code for cyclic refresh // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode - // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map + // during vp8cx_encode_inter_macroblock()) back into the global segmentation map if ((cpi->current_layer == 0) && (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) { @@ -525,7 +547,14 @@ void encode_mb_row(VP8_COMP *cpi, cpi->tplist[mb_row].stop = *tp; - // Increment pointer into gf useage flags structure. +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + /* pack tokens for this MB */ + { + int tok_count = *tp - tp_start; + pack_tokens(w, tp_start, tok_count); + } +#endif + // Increment pointer into gf usage flags structure. x->gf_active_ptr++; // Increment the activity mask pointers. @@ -539,39 +568,29 @@ void encode_mb_row(VP8_COMP *cpi, recon_yoffset += 16; recon_uvoffset += 8; - // Keep track of segment useage + // Keep track of segment usage segment_counts[xd->mode_info_context->mbmi.segment_id] ++; // skip to next mb xd->mode_info_context++; x->partition_info++; - xd->above_context++; -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded != 0) - { - cpi->mt_current_mb_col[mb_row] = mb_col; - } -#endif } //extend the recon for intra prediction - vp8_extend_mb_row( - &cm->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, - xd->dst.v_buffer + 8); + vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], + xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, + xd->dst.v_buffer + 8); + +#if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded != 0) + *current_mb_col = rightmost_col; +#endif // this is to account for the border xd->mode_info_context++; x->partition_info++; - -#if CONFIG_MULTITHREAD - if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1)) - { - sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ - } -#endif } void init_encode_frame_mb_context(VP8_COMP *cpi) @@ -599,7 +618,7 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) if (cm->frame_type == KEY_FRAME) vp8_init_mbmode_probs(cm); - // Copy data over into macro block data sturctures. + // Copy data over into macro block data structures. x->src = * cpi->Source; xd->pre = cm->yv12_fb[cm->lst_fb_idx]; xd->dst = cm->yv12_fb[cm->new_fb_idx]; @@ -656,10 +675,13 @@ void vp8_encode_frame(VP8_COMP *cpi) MACROBLOCK *const x = & cpi->mb; VP8_COMMON *const cm = & cpi->common; MACROBLOCKD *const xd = & x->e_mbd; - TOKENEXTRA *tp = cpi->tok; int segment_counts[MAX_MB_SEGMENTS]; int totalrate; +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition + const int num_part = (1 << cm->multi_token_partition); +#endif vpx_memset(segment_counts, 0, sizeof(segment_counts)); totalrate = 0; @@ -694,6 +716,7 @@ void vp8_encode_frame(VP8_COMP *cpi) cpi->prediction_error = 0; cpi->intra_error = 0; cpi->skip_true_count = 0; + cpi->tok_count = 0; #if 0 // Experimental code @@ -704,6 +727,7 @@ void vp8_encode_frame(VP8_COMP *cpi) xd->mode_info_context = cm->mi; vp8_zero(cpi->MVcount); + vp8_zero(cpi->coef_counts); vp8cx_frame_init_quantizer(cpi); @@ -722,9 +746,22 @@ void vp8_encode_frame(VP8_COMP *cpi) build_activity_map(cpi); } - // re-initencode frame context. + // re-init encode frame context. init_encode_frame_mb_context(cpi); +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + { + int i; + for(i = 0; i < num_part; i++) + { + vp8_start_encode(&bc[i], cpi->partition_d[i + 1], + cpi->partition_d_end[i + 1]); + bc[i].error = &cm->error; + } + } + +#endif + { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); @@ -748,7 +785,11 @@ void vp8_encode_frame(VP8_COMP *cpi) { vp8_zero(cm->left_context) +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + tp = cpi->tok; +#else tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24); +#endif encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); @@ -761,12 +802,14 @@ void vp8_encode_frame(VP8_COMP *cpi) x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; + if(mb_row == cm->mb_rows - 1) + { + sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ + } } sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */ - cpi->tok_count = 0; - for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) { cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start; @@ -799,9 +842,12 @@ void vp8_encode_frame(VP8_COMP *cpi) // for each macroblock row in image for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - vp8_zero(cm->left_context) +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + tp = cpi->tok; +#endif + encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); // adjust to the next row of mbs @@ -811,16 +857,25 @@ void vp8_encode_frame(VP8_COMP *cpi) } cpi->tok_count = tp - cpi->tok; + } +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + { + int i; + for(i = 0; i < num_part; i++) + { + vp8_stop_encode(&bc[i]); + cpi->partition_sz[i+1] = bc[i].pos; + } } +#endif vpx_usec_timer_mark(&emr_timer); cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer); - } - // Work out the segment probabilites if segmentation is enabled + // Work out the segment probabilities if segmentation is enabled if (xd->segmentation_enabled) { int tot_count; @@ -908,20 +963,16 @@ void vp8_encode_frame(VP8_COMP *cpi) } #endif - // Adjust the projected reference frame useage probability numbers to reflect - // what we have just seen. This may be usefull when we make multiple itterations +#if ! CONFIG_REALTIME_ONLY + // Adjust the projected reference frame usage probability numbers to reflect + // what we have just seen. This may be useful when we make multiple iterations // of the recode loop rather than continuing to use values from the previous frame. if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) { vp8_convert_rfct_to_prob(cpi); } - -#if 0 - // Keep record of the total distortion this time around for future use - cpi->last_frame_distortion = cpi->frame_distortion; #endif - } void vp8_setup_block_ptrs(MACROBLOCK *x) { diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 2874e78458c465367bc0bb8f063664aef9fc8778..b549a7dcab5d234a74c8a863d9fbad27621e5e56 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -13,6 +13,8 @@ #include "vp8/common/common.h" #include "vp8/common/extend.h" +#include "bitstream.h" + #if CONFIG_MULTITHREAD extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, @@ -74,6 +76,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) MACROBLOCK *x = &mbri->mb; MACROBLOCKD *xd = &x->e_mbd; TOKENEXTRA *tp ; +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24); + const int num_part = (1 << cm->multi_token_partition); +#endif int *segment_counts = mbri->segment_counts; int *totalrate = &mbri->totalrate; @@ -91,9 +97,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cm->mb_cols); - volatile int *last_row_current_mb_col; + volatile const int *last_row_current_mb_col; + volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)]; +#else tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); + cpi->tplist[mb_row].start = tp; +#endif last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; @@ -107,25 +119,27 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); - cpi->tplist[mb_row].start = tp; - - //printf("Thread mb_row = %d\n", mb_row); - // Set the mb activity pointer to the start of the row. x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { + *current_mb_col = mb_col - 1; + if ((mb_col & (nsync - 1)) == 0) { - while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1) + while (mb_col > (*last_row_current_mb_col - nsync)) { x86_pause_hint(); thread_sleep(0); } } +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + tp = tp_start; +#endif + // Distance of Mb to the various image edges. // These specified to 8th pel as they are always compared to values that are in 1/8th pel units xd->mb_to_left_edge = -((mb_col * 16) << 3); @@ -154,7 +168,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) vp8_activity_masking(cpi, x); // Is segmentation enabled - // MB level adjutment to quantizer + // MB level adjustment to quantizer if (xd->segmentation_enabled) { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) @@ -196,13 +210,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) #endif - // Count of last ref frame 0,0 useage + // Count of last ref frame 0,0 usage if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) cpi->inter_zz_count++; // Special case code for cyclic refresh // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode - // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map + // during vp8cx_encode_inter_macroblock()) back into the global segmentation map if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) { const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; @@ -223,9 +237,17 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) } } - cpi->tplist[mb_row].stop = tp; - // Increment pointer into gf useage flags structure. +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + /* pack tokens for this MB */ + { + int tok_count = tp - tp_start; + pack_tokens(w, tp_start, tok_count); + } +#else + cpi->tplist[mb_row].stop = tp; +#endif + // Increment pointer into gf usage flags structure. x->gf_active_ptr++; // Increment the activity mask pointers. @@ -239,23 +261,21 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) recon_yoffset += 16; recon_uvoffset += 8; - // Keep track of segment useage + // Keep track of segment usage segment_counts[xd->mode_info_context->mbmi.segment_id]++; // skip to next mb xd->mode_info_context++; x->partition_info++; xd->above_context++; - - cpi->mt_current_mb_col[mb_row] = mb_col; } - //extend the recon for intra prediction - vp8_extend_mb_row( - &cm->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, - xd->dst.v_buffer + 8); + vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], + xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, + xd->dst.v_buffer + 8); + + *current_mb_col = mb_col + nsync; // this is to account for the border xd->mode_info_context++; @@ -271,7 +291,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) if (mb_row == cm->mb_rows - 1) { - //SetEvent(cpi->h_event_main); sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ } } diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 6645442319ebb94b84b6ccbab26f18b46b298e08..5ad51e84609d54e383bf475eb7113a7fc42b4146 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -42,6 +42,11 @@ #include <stdio.h> #include <limits.h> +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING +extern int vp8_update_coef_context(VP8_COMP *cpi); +extern void vp8_update_coef_probs(VP8_COMP *cpi); +#endif + extern void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi); extern void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val); extern void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi); @@ -1106,8 +1111,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) vpx_free(cpi->tok); { +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + unsigned int tokens = 8 * 24 * 16; /* one MB for each thread */ +#else unsigned int tokens = cm->mb_rows * cm->mb_cols * 24 * 16; - +#endif CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); } @@ -1514,6 +1522,10 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cm->refresh_last_frame = 1; cm->refresh_entropy_probs = 1; +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + cpi->oxcf.token_partitions = 3; +#endif + if (cpi->oxcf.token_partitions >= 0 && cpi->oxcf.token_partitions <= 3) cm->multi_token_partition = (TOKEN_PARTITION) cpi->oxcf.token_partitions; @@ -3725,12 +3737,40 @@ static void encode_frame_to_data_rate } #endif +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + { + if(cpi->oxcf.error_resilient_mode) + cm->refresh_entropy_probs = 0; + + if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) + { + if (cm->frame_type == KEY_FRAME) + cm->refresh_entropy_probs = 1; + } + + if (cm->refresh_entropy_probs == 0) + { + // save a copy for later refresh + vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); + } + + vp8_update_coef_context(cpi); + + vp8_update_coef_probs(cpi); + + // transform / motion compensation build reconstruction frame + // +pack coef partitions + vp8_encode_frame(cpi); + + /* cpi->projected_frame_size is not needed for RT mode */ + } +#else // transform / motion compensation build reconstruction frame vp8_encode_frame(cpi); cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0; - +#endif vp8_clear_system_state(); //__asm emms; // Test to see if the stats generated for this frame indicate that we should have coded a key frame @@ -4093,10 +4133,12 @@ static void encode_frame_to_data_rate update_reference_frames(cm); +#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) if (cpi->oxcf.error_resilient_mode) { cm->refresh_entropy_probs = 0; } +#endif #if CONFIG_MULTITHREAD /* wait that filter_level is picked so that we can continue with stream packing */ @@ -4819,6 +4861,29 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l vpx_usec_timer_start(&ticktimer); } +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + { + int i; + const int num_part = (1 << cm->multi_token_partition); + /* the available bytes in dest */ + const unsigned long dest_size = dest_end - dest; + const int tok_part_buff_size = (dest_size * 9) / (10 * num_part); + + unsigned char *dp = dest; + + cpi->partition_d[0] = dp; + dp += dest_size/10; /* reserve 1/10 for control partition */ + cpi->partition_d_end[0] = dp; + + for(i = 0; i < num_part; i++) + { + cpi->partition_d[i + 1] = dp; + dp += tok_part_buff_size; + cpi->partition_d_end[i + 1] = dp; + } + } +#endif + // start with a 0 size frame *size = 0; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index dcefbacadc5a2b192e5fd2ee170a762877c154f9..e17d8ead0dd0e93e96503346c0694b6653945f47 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -443,9 +443,12 @@ typedef struct VP8_COMP unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ + //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]); //not used any more //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + char update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; int gfu_boost; @@ -543,6 +546,8 @@ typedef struct VP8_COMP TOKENLIST *tplist; unsigned int partition_sz[MAX_PARTITIONS]; + unsigned char *partition_d[MAX_PARTITIONS]; + unsigned char *partition_d_end[MAX_PARTITIONS]; // end of multithread data diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 42da7be80599440b1061b1c9f0c429e2624e2d5e..fa78ec31cfd286234f4330c5e91e30c3f5676136 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -66,7 +66,11 @@ static const struct extraconfig_map extracfg_map[] = 0, /* noise_sensitivity */ 0, /* Sharpness */ 0, /* static_thresh */ +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + VP8_EIGHT_TOKENPARTITION, +#else VP8_ONE_TOKENPARTITION, /* token_partitions */ +#endif 0, /* arnr_max_frames */ 3, /* arnr_strength */ 3, /* arnr_type*/ @@ -241,6 +245,11 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, ts_layer_id[i], cfg->ts_number_layers-1); } +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) + if(cfg->g_threads > (1 << vp8_cfg->token_partitions)) + ERROR("g_threads cannot be bigger than number of token partitions"); +#endif + return VPX_CODEC_OK; } @@ -919,16 +928,28 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, for (i = 0; i < num_partitions; ++i) { +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + pkt.data.frame.buf = cpi->partition_d[i]; +#else pkt.data.frame.buf = cx_data; + cx_data += cpi->partition_sz[i]; + cx_data_sz -= cpi->partition_sz[i]; +#endif pkt.data.frame.sz = cpi->partition_sz[i]; pkt.data.frame.partition_id = i; /* don't set the fragment bit for the last partition */ if (i == (num_partitions - 1)) pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT; vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - cx_data += cpi->partition_sz[i]; - cx_data_sz -= cpi->partition_sz[i]; } +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING + /* In lagged mode the encoder can buffer multiple frames. + * We don't want this in partitioned output because + * partitions are spread all over the output buffer. + * So, force an exit! + */ + cx_data_sz -= ctx->cx_data_sz / 2; +#endif } else { diff --git a/vpxenc.c b/vpxenc.c index a4435293953ee8e2af73fc6a09f676394947e673..0480fbdc0ef9bf42abfa35711e9870e75fcf6f2a 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -489,6 +489,13 @@ static void write_ivf_frame_header(FILE *outfile, if(fwrite(header, 1, 12, outfile)); } +static void write_ivf_frame_size(FILE *outfile, size_t size) +{ + char header[4]; + mem_put_le32(header, size); + fwrite(header, 1, 4, outfile); +} + typedef off_t EbmlLoc; @@ -945,7 +952,6 @@ static double vp8_mse2psnr(double Samples, double Peak, double Mse) #include "args.h" - static const arg_def_t debugmode = ARG_DEF("D", "debug", 0, "Debug mode (makes output deterministic)"); static const arg_def_t outputfile = ARG_DEF("o", "output", 1, @@ -980,6 +986,8 @@ static const arg_def_t framerate = ARG_DEF(NULL, "fps", 1, "Stream frame rate (rate/scale)"); static const arg_def_t use_ivf = ARG_DEF(NULL, "ivf", 0, "Output IVF (default is WebM)"); +static const arg_def_t out_part = ARG_DEF("P", "output-partitions", 0, + "Makes encoder output partitions. Requires IVF output!"); static const arg_def_t q_hist_n = ARG_DEF(NULL, "q-hist", 1, "Show quantizer histogram (n-buckets)"); static const arg_def_t rate_hist_n = ARG_DEF(NULL, "rate-hist", 1, @@ -989,7 +997,7 @@ static const arg_def_t *main_args[] = &debugmode, &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl, - &verbosearg, &psnrarg, &use_ivf, &q_hist_n, &rate_hist_n, + &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, NULL }; @@ -1492,6 +1500,7 @@ struct global_config int show_psnr; int have_framerate; struct vpx_rational framerate; + int out_part; int debug; int show_q_hist_buckets; int show_rate_hist_buckets; @@ -1603,6 +1612,8 @@ static void parse_global_config(struct global_config *global, char **argv) global->framerate = arg_parse_rational(&arg); global->have_framerate = 1; } + else if (arg_match(&arg,&out_part, argi)) + global->out_part = 1; else if (arg_match(&arg, &debugmode, argi)) global->debug = 1; else if (arg_match(&arg, &q_hist_n, argi)) @@ -2081,11 +2092,14 @@ static void initialize_encoder(struct stream_state *stream, struct global_config *global) { int i; + int flags = 0; + + flags |= global->show_psnr ? VPX_CODEC_USE_PSNR : 0; + flags |= global->out_part ? VPX_CODEC_USE_OUTPUT_PARTITION : 0; /* Construct Encoder Context */ vpx_codec_enc_init(&stream->encoder, global->codec->iface, - &stream->config.cfg, - global->show_psnr ? VPX_CODEC_USE_PSNR : 0); + &stream->config.cfg, flags); ctx_exit_on_error(&stream->encoder, "Failed to initialize encoder"); /* Note that we bypass the vpx_codec_control wrapper macro because @@ -2154,12 +2168,18 @@ static void get_cx_data(struct stream_state *stream, while ((pkt = vpx_codec_get_cx_data(&stream->encoder, &iter))) { + static size_t fsize = 0; + static off_t ivf_header_pos = 0; + *got_data = 1; switch (pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: - stream->frames_out++; + if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) + { + stream->frames_out++; + } fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz); @@ -2175,9 +2195,28 @@ static void get_cx_data(struct stream_state *stream, } else { - write_ivf_frame_header(stream->file, pkt); - if(fwrite(pkt->data.frame.buf, 1, - pkt->data.frame.sz, stream->file)); + if (pkt->data.frame.partition_id <= 0) + { + ivf_header_pos = ftello(stream->file); + fsize = pkt->data.frame.sz; + + write_ivf_frame_header(stream->file, pkt); + } + else + { + fsize += pkt->data.frame.sz; + + if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) + { + off_t currpos = ftello(stream->file); + fseeko(stream->file, ivf_header_pos, SEEK_SET); + write_ivf_frame_size(stream->file, fsize); + fseeko(stream->file, currpos, SEEK_SET); + } + } + + fwrite(pkt->data.frame.buf, 1, + pkt->data.frame.sz, stream->file); } stream->nbytes += pkt->data.raw.sz; break;