From 60cb39da860c91adc45f51f2dfa193d19598a801 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" <rbultje@google.com> Date: Tue, 6 Dec 2011 11:53:02 -0800 Subject: [PATCH] Dual 16x16 inter prediction. This patch introduces the concept of dual inter16x16 prediction. A 16x16 inter-predicted macroblock can use 2 references instead of 1, where both references use the same mvmode (new, near/est, zero). In the case of newmv, this means that two MVs are coded instead of one. The frame can be encoded in 3 ways: all MBs single-prediction, all MBs dual prediction, or per-MB single/dual prediction selection ("hybrid"), in which case a single bit is coded per-MB to indicate whether the MB uses single or dual inter prediction. In the future, we can (maybe?) get further gains by mixing this with Adrian's 32x32 work, per-segment dual prediction settings, or adding support for dual splitmv/8x8mv inter prediction. Gain (on derf-set, CQ mode) is ~2.8% (SSIM) or ~3.6% (glb PSNR). Most gain is at medium/high bitrates, but there's minor gains at low bitrates also. Output was confirmed to match between encoder and decoder. Note for optimization people: this patch introduces a 2nd version of 16x16/8x8 sixtap/bilin functions, which does an avg instead of a store. They may want to look and make sure this is implemented to their satisfaction so we can optimize it best in the future. Change-ID: I59dc84b07cbb3ccf073ac0f756d03d294cb19281 --- configure | 1 + vp8/common/alloccommon.c | 3 + vp8/common/blockd.h | 13 ++ vp8/common/filter.c | 214 ++++++++++++++++++++ vp8/common/generic/systemdependent.c | 28 ++- vp8/common/onyxc_int.h | 13 ++ vp8/common/recon.h | 16 ++ vp8/common/reconinter.c | 127 ++++++++++++ vp8/common/reconinter.h | 6 + vp8/common/subpixel.h | 32 +++ vp8/decoder/decodemv.c | 60 +++++- vp8/decoder/decodframe.c | 35 ++++ vp8/decoder/onyxd_int.h | 3 + vp8/decoder/threading.c | 4 + vp8/encoder/bitstream.c | 73 ++++++- vp8/encoder/encodeframe.c | 177 ++++++++++++++++- vp8/encoder/ethreading.c | 8 + vp8/encoder/onyx_if.c | 187 ++++++++++++++++++ vp8/encoder/onyx_int.h | 38 +++- vp8/encoder/pickinter.c | 6 + vp8/encoder/rdopt.c | 285 ++++++++++++++++++++++++++- vp8/encoder/rdopt.h | 4 +- 22 files changed, 1306 insertions(+), 27 deletions(-) diff --git a/configure b/configure index 9aa1fbfd3a..7942af9baa 100755 --- a/configure +++ b/configure @@ -217,6 +217,7 @@ HAVE_LIST=" unistd_h " EXPERIMENT_LIST=" + dualpred extend_qrange segmentation segfeatures diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index 2535a78fad..61bb317770 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -193,6 +193,9 @@ void vp8_create_common(VP8_COMMON *oci) vp8_default_bmode_probs(oci->fc.bmode_prob); oci->mb_no_coeff_skip = 1; +#if CONFIG_DUALPRED + oci->dual_pred_mode = HYBRID_PREDICTION; +#endif /* CONFIG_DUALPRED */ oci->no_lpf = 0; oci->filter_type = NORMAL_LOOPFILTER; oci->use_bilinear_mc_filter = 0; diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index d51e881b42..751f58f791 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -184,6 +184,10 @@ typedef struct TX_SIZE txfm_size; #endif int_mv mv; +#if CONFIG_DUALPRED + MV_REFERENCE_FRAME second_ref_frame; + int_mv second_mv; +#endif unsigned char partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char need_to_clamp_mvs; @@ -236,6 +240,11 @@ typedef struct MacroBlockD int fullpixel_mask; YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ +#if CONFIG_DUALPRED + struct { + uint8_t *y_buffer, *u_buffer, *v_buffer; + } second_pre; +#endif /* CONFIG_DUALPRED */ YV12_BUFFER_CONFIG dst; #if CONFIG_NEWNEAR @@ -305,6 +314,10 @@ typedef struct MacroBlockD vp8_subpix_fn_t subpixel_predict8x4; vp8_subpix_fn_t subpixel_predict8x8; vp8_subpix_fn_t subpixel_predict16x16; +#if CONFIG_DUALPRED + vp8_subpix_fn_t subpixel_predict_avg8x8; + vp8_subpix_fn_t subpixel_predict_avg16x16; +#endif /* CONFIG_DUALPRED */ void *current_bc; diff --git a/vp8/common/filter.c b/vp8/common/filter.c index ae59529522..256ba47367 100644 --- a/vp8/common/filter.c +++ b/vp8/common/filter.c @@ -128,6 +128,61 @@ static void filter_block2d_second_pass } } +#if CONFIG_DUALPRED +/* + * The only functional difference between filter_block2d_second_pass() + * and this function is that filter_block2d_second_pass() does a sixtap + * filter on the input and stores it in the output. This function + * (filter_block2d_second_pass_avg()) does a sixtap filter on the input, + * and then averages that with the content already present in the output + * ((filter_result + dest + 1) >> 1) and stores that in the output. + */ +static void filter_block2d_second_pass_avg +( + int *src_ptr, + unsigned char *output_ptr, + int output_pitch, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < output_height; i++) + { + for (j = 0; j < output_width; j++) + { + /* Apply filter */ + Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ + + /* Normalize back to 0-255 */ + Temp = Temp >> VP8_FILTER_SHIFT; + + if (Temp < 0) + Temp = 0; + else if (Temp > 255) + Temp = 255; + + output_ptr[j] = (unsigned char) ((output_ptr[j] + Temp + 1) >> 1); + src_ptr++; + } + + /* Start next row */ + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_pitch; + } +} +#endif /* CONFIG_DUALPRED */ static void filter_block2d ( @@ -193,6 +248,32 @@ void vp8_sixtap_predict8x8_c } +#if CONFIG_DUALPRED +void vp8_sixtap_predict_avg8x8_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[13*16]; /* Temp data buffer used in filtering */ + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); + + /* then filter verticaly... */ + filter_block2d_second_pass_avg(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); +} +#endif /* CONFIG_DUALPRED */ + void vp8_sixtap_predict8x4_c ( unsigned char *src_ptr, @@ -245,6 +326,33 @@ void vp8_sixtap_predict16x16_c } +#if CONFIG_DUALPRED +void vp8_sixtap_predict_avg16x16_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[21*24]; /* Temp data buffer used in filtering */ + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, + src_pixels_per_line, 1, 21, 16, HFilter); + + /* then filter verticaly... */ + filter_block2d_second_pass_avg(FData + 32, dst_ptr, dst_pitch, + 16, 16, 16, 16, VFilter); +} +#endif /* CONFIG_DUALPRED */ /**************************************************************************** * @@ -349,6 +457,46 @@ static void filter_block2d_bil_second_pass } } +#if CONFIG_DUALPRED +/* + * As before for filter_block2d_second_pass_avg(), the functional difference + * between filter_block2d_bil_second_pass() and filter_block2d_bil_second_pass_avg() + * is that filter_block2d_bil_second_pass() does a bilinear filter on input + * and stores the result in output; filter_block2d_bil_second_pass_avg(), + * instead, does a bilinear filter on input, averages the resulting value + * with the values already present in the output and stores the result of + * that back into the output ((filter_result + dest + 1) >> 1). + */ +static void filter_block2d_bil_second_pass_avg +( + unsigned short *src_ptr, + unsigned char *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < height; i++) + { + for (j = 0; j < width; j++) + { + /* Apply filter */ + Temp = ((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[width] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2); + dst_ptr[j] = (unsigned int)(((Temp >> VP8_FILTER_SHIFT) + dst_ptr[j] + 1) >> 1); + src_ptr++; + } + + /* Next row... */ + dst_ptr += dst_pitch; + } +} +#endif /* CONFIG_DUALPRED */ /**************************************************************************** * @@ -395,6 +543,28 @@ static void filter_block2d_bil filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); } +#if CONFIG_DUALPRED +static void filter_block2d_bil_avg +( + unsigned char *src_ptr, + unsigned char *dst_ptr, + unsigned int src_pitch, + unsigned int dst_pitch, + const short *HFilter, + const short *VFilter, + int Width, + int Height +) +{ + unsigned short FData[17*16]; /* Temp data buffer used in filtering */ + + /* First filter 1-D horizontally... */ + filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); + + /* then 1-D vertically... */ + filter_block2d_bil_second_pass_avg(FData, dst_ptr, dst_pitch, Height, Width, VFilter); +} +#endif /* CONFIG_DUALPRED */ void vp8_bilinear_predict4x4_c ( @@ -454,6 +624,28 @@ void vp8_bilinear_predict8x8_c } +#if CONFIG_DUALPRED +void vp8_bilinear_predict_avg8x8_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line, + dst_pitch, HFilter, VFilter, 8, 8); +} +#endif /* CONFIG_DUALPRED */ + void vp8_bilinear_predict8x4_c ( unsigned char *src_ptr, @@ -492,3 +684,25 @@ void vp8_bilinear_predict16x16_c filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); } + +#if CONFIG_DUALPRED +void vp8_bilinear_predict_avg16x16_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line, + dst_pitch, HFilter, VFilter, 16, 16); +} +#endif /* CONFIG_DUALPRED */ diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 68ed8aab0d..9619163140 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -84,6 +84,10 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) #endif rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; +#if CONFIG_DUALPRED + rtcd->recon.avg16x16 = vp8_avg_mem16x16_c; + rtcd->recon.avg8x8 = vp8_avg_mem8x8_c; +#endif /* CONFIG_DUALPRED */ rtcd->recon.copy8x4 = vp8_copy_mem8x4_c; rtcd->recon.recon = vp8_recon_b_c; #if CONFIG_I8X8 @@ -112,14 +116,22 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) #endif - rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c; - rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c; - rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_c; - rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_c; - rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c; - rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_c; - rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_c; - rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_c; + rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c; + rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c; +#if CONFIG_DUALPRED + rtcd->subpix.sixtap_avg16x16 = vp8_sixtap_predict_avg16x16_c; + rtcd->subpix.sixtap_avg8x8 = vp8_sixtap_predict_avg8x8_c; +#endif /* CONFIG_DUALPRED */ + rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_c; + rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_c; + rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c; + rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_c; +#if CONFIG_DUALPRED + rtcd->subpix.bilinear_avg16x16 = vp8_bilinear_predict_avg16x16_c; + rtcd->subpix.bilinear_avg8x8 = vp8_bilinear_predict_avg8x8_c; +#endif /* CONFIG_DUALPRED */ + rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_c; + rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_c; rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c; rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c; diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 7329e38697..08fc795012 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -73,6 +73,16 @@ typedef enum BILINEAR = 1 } INTERPOLATIONFILTERTYPE; +#if CONFIG_DUALPRED +typedef enum +{ + SINGLE_PREDICTION_ONLY = 0, + DUAL_PREDICTION_ONLY = 1, + HYBRID_PREDICTION = 2, + NB_PREDICTION_TYPES = 3, +} DUALPREDMODE_TYPE; +#endif /* CONFIG_DUALPRED */ + typedef struct VP8_COMMON_RTCD { #if CONFIG_RUNTIME_CPU_DETECT @@ -130,6 +140,9 @@ typedef struct VP8Common /* profile settings */ int experimental; int mb_no_coeff_skip; +#if CONFIG_DUALPRED + DUALPREDMODE_TYPE dual_pred_mode; +#endif /* CONFIG_DUALPRED */ int no_lpf; int use_bilinear_mc_filter; int full_pixel; diff --git a/vp8/common/recon.h b/vp8/common/recon.h index f459922e78..b82e9a720c 100644 --- a/vp8/common/recon.h +++ b/vp8/common/recon.h @@ -49,6 +49,18 @@ extern prototype_copy_block(vp8_recon_copy16x16); #endif extern prototype_copy_block(vp8_recon_copy8x8); +#if CONFIG_DUALPRED +#ifndef vp8_recon_avg16x16 +#define vp8_recon_avg16x16 vp8_avg_mem16x16_c +#endif +extern prototype_copy_block(vp8_recon_avg16x16); + +#ifndef vp8_recon_avg8x8 +#define vp8_recon_avg8x8 vp8_avg_mem8x8_c +#endif +extern prototype_copy_block(vp8_recon_avg8x8); +#endif /* CONFIG_DUALPRED */ + #ifndef vp8_recon_copy8x4 #define vp8_recon_copy8x4 vp8_copy_mem8x4_c #endif @@ -157,6 +169,10 @@ typedef struct vp8_recon_rtcd_vtable { vp8_copy_block_fn_t copy16x16; vp8_copy_block_fn_t copy8x8; +#if CONFIG_DUALPRED + vp8_copy_block_fn_t avg16x16; + vp8_copy_block_fn_t avg8x8; +#endif /* CONFIG_DUALPRED */ vp8_copy_block_fn_t copy8x4; vp8_recon_fn_t recon; #if CONFIG_I8X8 diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 064a8355ce..f677362e35 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -62,6 +62,30 @@ void vp8_copy_mem16x16_c( } +#if CONFIG_DUALPRED +void vp8_avg_mem16x16_c( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) +{ + int r; + + for (r = 0; r < 16; r++) + { + int n; + + for (n = 0; n < 16; n++) + { + dst[n] = (dst[n] + src[n] + 1) >> 1; + } + + src += src_stride; + dst += dst_stride; + } +} +#endif /* CONFIG_DUALPRED */ + void vp8_copy_mem8x8_c( unsigned char *src, int src_stride, @@ -92,6 +116,30 @@ void vp8_copy_mem8x8_c( } +#if CONFIG_DUALPRED +void vp8_avg_mem8x8_c( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) +{ + int r; + + for (r = 0; r < 8; r++) + { + int n; + + for (n = 0; n < 8; n++) + { + dst[n] = (dst[n] + src[n] + 1) >> 1; + } + + src += src_stride; + dst += dst_stride; + } +} +#endif /* CONFIG_DUALPRED */ + void vp8_copy_mem8x4_c( unsigned char *src, int src_stride, @@ -388,6 +436,74 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, } +#if CONFIG_DUALPRED +/* + * This function should be called after an initial call to + * vp8_build_inter16x16_predictors_mb() or _mby()/_mbuv(). + * It will run a second sixtap filter on a (different) ref + * frame and average the result with the output of the + * first sixtap filter. The second reference frame is stored + * in x->second_pre (the reference frame index is in + * x->mode_info_context->mbmi.second_ref_frame). The second + * motion vector is x->mode_info_context->mbmi.second_mv. + * + * This allows blending prediction from two reference frames + * which sometimes leads to better prediction than from a + * single reference framer. + */ +void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride) +{ + int offset; + unsigned char *ptr; + unsigned char *uptr, *vptr; + + int mv_row = x->mode_info_context->mbmi.second_mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.second_mv.as_mv.col; + + unsigned char *ptr_base = x->second_pre.y_buffer; + int pre_stride = x->block[0].pre_stride; + + ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict_avg16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride); + } + else + { + RECON_INVOKE(&x->rtcd->recon, avg16x16)(ptr, pre_stride, dst_y, dst_ystride); + } + + /* calc uv motion vectors */ + mv_row = (mv_row + (mv_row > 0)) >> 1; + mv_col = (mv_col + (mv_col > 0)) >> 1; + + mv_row &= x->fullpixel_mask; + mv_col &= x->fullpixel_mask; + + pre_stride >>= 1; + offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); + uptr = x->second_pre.u_buffer + offset; + vptr = x->second_pre.v_buffer + offset; + + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict_avg8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride); + x->subpixel_predict_avg8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride); + } + else + { + RECON_INVOKE(&x->rtcd->recon, avg8x8)(uptr, pre_stride, dst_u, dst_uvstride); + RECON_INVOKE(&x->rtcd->recon, avg8x8)(vptr, pre_stride, dst_v, dst_uvstride); + } +} +#endif /* CONFIG_DUALPRED */ + static void build_inter4x4_predictors_mb(MACROBLOCKD *x) { int i; @@ -490,6 +606,17 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x) { vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256], &x->predictor[320], 16, 8); +#if CONFIG_DUALPRED + if (x->mode_info_context->mbmi.second_ref_frame) + { + /* 256 = offset of U plane in Y+U+V buffer; + * 320 = offset of V plane in Y+U+V buffer. + * (256=16x16, 320=16x16+8x8). */ + vp8_build_2nd_inter16x16_predictors_mb(x, x->predictor, + &x->predictor[256], + &x->predictor[320], 16, 8); + } +#endif /* CONFIG_DUALPRED */ } else { diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h index 456812ecdd..c8e2f803b7 100644 --- a/vp8/common/reconinter.h +++ b/vp8/common/reconinter.h @@ -19,6 +19,12 @@ extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_v, int dst_ystride, int dst_uvstride); +extern void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride); extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x); diff --git a/vp8/common/subpixel.h b/vp8/common/subpixel.h index acdeec3bcb..33d4b355de 100644 --- a/vp8/common/subpixel.h +++ b/vp8/common/subpixel.h @@ -34,6 +34,18 @@ extern prototype_subpixel_predict(vp8_subpix_sixtap16x16); #endif extern prototype_subpixel_predict(vp8_subpix_sixtap8x8); +#if CONFIG_DUALPRED +#ifndef vp8_subpix_sixtap_avg16x16 +#define vp8_subpix_sixtap_avg16x16 vp8_sixtap_predict_avg16x16_c +#endif +extern prototype_subpixel_predict(vp8_subpix_sixtap_avg16x16); + +#ifndef vp8_subpix_sixtap_avg8x8 +#define vp8_subpix_sixtap_avg8x8 vp8_sixtap_predict_avg8x8_c +#endif +extern prototype_subpixel_predict(vp8_subpix_sixtap_avg8x8); +#endif /* CONFIG_DUALPRED */ + #ifndef vp8_subpix_sixtap8x4 #define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_c #endif @@ -54,6 +66,18 @@ extern prototype_subpixel_predict(vp8_subpix_bilinear16x16); #endif extern prototype_subpixel_predict(vp8_subpix_bilinear8x8); +#if CONFIG_DUALPRED +#ifndef vp8_subpix_bilinear_avg16x16 +#define vp8_subpix_bilinear_avg16x16 vp8_bilinear_predict_avg16x16_c +#endif +extern prototype_subpixel_predict(vp8_subpix_bilinear_avg16x16); + +#ifndef vp8_subpix_bilinear_avg8x8 +#define vp8_subpix_bilinear_avg8x8 vp8_bilinear_predict_avg8x8_c +#endif +extern prototype_subpixel_predict(vp8_subpix_bilinear_avg8x8); +#endif /* CONFIG_DUALPRED */ + #ifndef vp8_subpix_bilinear8x4 #define vp8_subpix_bilinear8x4 vp8_bilinear_predict8x4_c #endif @@ -69,10 +93,18 @@ typedef struct { vp8_subpix_fn_t sixtap16x16; vp8_subpix_fn_t sixtap8x8; +#if CONFIG_DUALPRED + vp8_subpix_fn_t sixtap_avg16x16; + vp8_subpix_fn_t sixtap_avg8x8; +#endif /* CONFIG_DUALPRED */ vp8_subpix_fn_t sixtap8x4; vp8_subpix_fn_t sixtap4x4; vp8_subpix_fn_t bilinear16x16; vp8_subpix_fn_t bilinear8x8; +#if CONFIG_DUALPRED + vp8_subpix_fn_t bilinear_avg16x16; + vp8_subpix_fn_t bilinear_avg8x8; +#endif /* CONFIG_DUALPRED */ vp8_subpix_fn_t bilinear8x4; vp8_subpix_fn_t bilinear4x4; } vp8_subpix_rtcd_vtable_t; diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c index 7e3137fd26..df2b85349e 100644 --- a/vp8/decoder/decodemv.c +++ b/vp8/decoder/decodemv.c @@ -392,6 +392,17 @@ static void mb_mode_mv_init(VP8D_COMP *pbi) pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8); pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8); pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8); +#if CONFIG_DUALPRED + pbi->common.dual_pred_mode = vp8_read(bc, 128); + if (pbi->common.dual_pred_mode) + pbi->common.dual_pred_mode += vp8_read(bc, 128); + if (pbi->common.dual_pred_mode == HYBRID_PREDICTION) + { + pbi->prob_dualpred[0] = (vp8_prob)vp8_read_literal(bc, 8); + pbi->prob_dualpred[1] = (vp8_prob)vp8_read_literal(bc, 8); + pbi->prob_dualpred[2] = (vp8_prob)vp8_read_literal(bc, 8); + } +#endif /* CONFIG_DUALPRED */ if (vp8_read_bit(bc)) { @@ -444,6 +455,9 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_top_edge -= LEFT_TOP_MARGIN; mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; mbmi->need_to_clamp_mvs = 0; +#if CONFIG_DUALPRED + mbmi->second_ref_frame = 0; +#endif /* CONFIG_DUALPRED */ /* Distance of Mb to the various image edges. * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units */ @@ -666,6 +680,50 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_bottom_edge); propagate_mv: /* same MV throughout */ +#if CONFIG_DUALPRED + if (pbi->common.dual_pred_mode == DUAL_PREDICTION_ONLY || + (pbi->common.dual_pred_mode == HYBRID_PREDICTION && + vp8_read(bc, pbi->prob_dualpred[(mi[-1].mbmi.second_ref_frame != INTRA_FRAME) + + (mi[-mis].mbmi.second_ref_frame != INTRA_FRAME)]))) + { + mbmi->second_ref_frame = mbmi->ref_frame + 1; + if (mbmi->second_ref_frame == 4) + mbmi->second_ref_frame = 1; + } + if (mbmi->second_ref_frame) + { + vp8_find_near_mvs(xd, mi, &nearest, &nearby, &best_mv, rct, + mbmi->second_ref_frame, pbi->common.ref_frame_sign_bias); + switch (mbmi->mode) { + case ZEROMV: + mbmi->second_mv.as_int = 0; + break; + case NEARMV: + mbmi->second_mv.as_int = nearby.as_int; + vp8_clamp_mv(&mbmi->second_mv, mb_to_left_edge, mb_to_right_edge, + mb_to_top_edge, mb_to_bottom_edge); + break; + case NEARESTMV: + mbmi->second_mv.as_int = nearest.as_int; + vp8_clamp_mv(&mbmi->second_mv, mb_to_left_edge, mb_to_right_edge, + mb_to_top_edge, mb_to_bottom_edge); + break; + case NEWMV: + read_mv(bc, &mbmi->second_mv.as_mv, (const MV_CONTEXT *) mvc); + mbmi->second_mv.as_mv.row += best_mv.as_mv.row; + mbmi->second_mv.as_mv.col += best_mv.as_mv.col; + mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&mbmi->second_mv, + mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + break; + default: + break; + } + } +#endif /* CONFIG_DUALPRED */ + #if CONFIG_ERROR_CONCEALMENT if(pbi->ec_enabled) { @@ -854,4 +912,4 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) #endif -} \ No newline at end of file +} diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index f3da2d0b57..2ad5d1b87c 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -135,6 +135,14 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); +#if CONFIG_DUALPRED + if (xd->mode_info_context->mbmi.second_ref_frame) + { + vp8_build_2nd_inter16x16_predictors_mb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } +#endif /* CONFIG_DUALPRED */ } #ifdef DEC_DEBUG if (dec_debug) { @@ -605,6 +613,25 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd) xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; +#if CONFIG_DUALPRED + if (xd->mode_info_context->mbmi.second_ref_frame) + { + int second_ref_fb_idx; + + /* Select the appropriate reference frame for this MB */ + if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) + second_ref_fb_idx = pc->lst_fb_idx; + else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) + second_ref_fb_idx = pc->gld_fb_idx; + else + second_ref_fb_idx = pc->alt_fb_idx; + + xd->second_pre.y_buffer = pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; + xd->second_pre.u_buffer = pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset; + xd->second_pre.v_buffer = pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset; + } +#endif /* CONFIG_DUALPRED */ + if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { /* propagate errors from reference frames */ @@ -852,6 +879,10 @@ static void init_frame(VP8D_COMP *pbi) xd->subpixel_predict8x4 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap8x4); xd->subpixel_predict8x8 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap8x8); xd->subpixel_predict16x16 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap16x16); +#if CONFIG_DUALPRED + xd->subpixel_predict_avg8x8 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap_avg8x8); + xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap_avg16x16); +#endif /* CONFIG_DUALPRED */ } else { @@ -859,6 +890,10 @@ static void init_frame(VP8D_COMP *pbi) xd->subpixel_predict8x4 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear8x4); xd->subpixel_predict8x8 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear8x8); xd->subpixel_predict16x16 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear16x16); +#if CONFIG_DUALPRED + xd->subpixel_predict_avg8x8 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear_avg8x8); + xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear_avg16x16); +#endif /* CONFIG_DUALPRED */ } if (pbi->decoded_key_frame && pbi->ec_enabled && !pbi->ec_active) diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 14ac2f5d4a..cf686380a1 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -135,6 +135,9 @@ typedef struct VP8Decompressor vp8_prob prob_last; vp8_prob prob_gf; vp8_prob prob_skip_false; +#if CONFIG_DUALPRED + vp8_prob prob_dualpred[3]; +#endif /* CONFIG_DUALPRED */ #if CONFIG_ERROR_CONCEALMENT MB_OVERLAP *overlaps; diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 09dffe2691..f5c916f348 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -50,6 +50,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; +#if CONFIG_DUALPRED + mbd->subpixel_predict_avg8x8 = xd->subpixel_predict_avg8x8; + mbd->subpixel_predict_avg16x16 = xd->subpixel_predict_avg16x16; +#endif /* CONFIG_DUALPRED */ mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1); mbd->mode_info_stride = pc->mode_info_stride; diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index b19d58e3c7..d3e61699ff 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -959,6 +959,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) int prob_last_coded; int prob_gf_coded; int prob_skip_false = 0; +#if CONFIG_DUALPRED + int prob_dual_pred[3]; +#endif /* CONFIG_DUALPRED */ cpi->mb.partition_info = cpi->mb.pi; @@ -1012,6 +1015,39 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) vp8_write_literal(w, prob_last_coded, 8); vp8_write_literal(w, prob_gf_coded, 8); +#if CONFIG_DUALPRED + if (cpi->common.dual_pred_mode == HYBRID_PREDICTION) + { + vp8_write(w, 1, 128); + vp8_write(w, 1, 128); + for (i = 0; i < 3; i++) { + if (cpi->single_pred_count[i] + cpi->dual_pred_count[i]) + { + prob_dual_pred[i] = cpi->single_pred_count[i] * 256 / + (cpi->single_pred_count[i] + cpi->dual_pred_count[i]); + if (prob_dual_pred[i] < 1) + prob_dual_pred[i] = 1; + else if (prob_dual_pred[i] > 255) + prob_dual_pred[i] = 255; + } + else + { + prob_dual_pred[i] = 128; + } + vp8_write_literal(w, prob_dual_pred[i], 8); + } + } + else if (cpi->common.dual_pred_mode == SINGLE_PREDICTION_ONLY) + { + vp8_write(w, 0, 128); + } + else /* dual prediction only */ + { + vp8_write(w, 1, 128); + vp8_write(w, 0, 128); + } +#endif /* CONFIG_DUALPRED */ + update_mbintra_mode_probs(cpi); vp8_write_mvprobs(cpi); @@ -1153,14 +1189,29 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) switch (mode) /* new, split require MVs */ { case NEWMV: - #ifdef ENTROPY_STATS active_section = 5; #endif write_mv(w, &mi->mv.as_mv, &best_mv, mvc); +#if CONFIG_DUALPRED + if (cpi->common.dual_pred_mode == HYBRID_PREDICTION) + { + int t = m[-mis].mbmi.second_ref_frame != INTRA_FRAME; + int l = m[-1 ].mbmi.second_ref_frame != INTRA_FRAME; + vp8_write(w, mi->second_ref_frame != INTRA_FRAME, + prob_dual_pred[t + l]); + } + if (mi->second_ref_frame) + { + const int second_rf = mi->second_ref_frame; + int_mv n1, n2; + int ct[4]; + vp8_find_near_mvs(xd, m, &n1, &n2, &best_mv, ct, second_rf, cpi->common.ref_frame_sign_bias); + write_mv(w, &mi->second_mv.as_mv, &best_mv, mvc); + } +#endif /* CONFIG_DUALPRED */ break; - case SPLITMV: { int j = 0; @@ -1207,6 +1258,15 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) } break; default: +#if CONFIG_DUALPRED + if (cpi->common.dual_pred_mode == HYBRID_PREDICTION) + { + int t = m[-mis].mbmi.second_ref_frame != INTRA_FRAME; + int l = m[-1 ].mbmi.second_ref_frame != INTRA_FRAME; + vp8_write(w, mi->second_ref_frame != INTRA_FRAME, + prob_dual_pred[t + l]); + } +#endif /* CONFIG_DUALPRED */ break; } } @@ -1228,6 +1288,15 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) #endif cpi->mb.partition_info++; } + +#if CONFIG_DUALPRED + if (cpi->common.dual_pred_mode == HYBRID_PREDICTION) + { + cpi->prob_dualpred[0] = (prob_dual_pred[0] + cpi->prob_dualpred[0] + 1) >> 1; + cpi->prob_dualpred[1] = (prob_dual_pred[1] + cpi->prob_dualpred[1] + 1) >> 1; + cpi->prob_dualpred[2] = (prob_dual_pred[2] + cpi->prob_dualpred[2] + 1) >> 1; + } +#endif /* CONFIG_DUALPRED */ } diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index a6a09247a2..ac3058106d 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -910,7 +910,7 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) xd->fullpixel_mask = 0xfffffff8; } -void vp8_encode_frame(VP8_COMP *cpi) +static void encode_frame_internal(VP8_COMP *cpi) { int mb_row; MACROBLOCK *const x = & cpi->mb; @@ -953,6 +953,12 @@ void vp8_encode_frame(VP8_COMP *cpi) &cpi->common.rtcd.subpix, sixtap8x8); xd->subpixel_predict16x16 = SUBPIX_INVOKE( &cpi->common.rtcd.subpix, sixtap16x16); +#if CONFIG_DUALPRED + xd->subpixel_predict_avg8x8 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap_avg8x8); + xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap_avg16x16); +#endif /* CONFIG_DUALPRED */ } else { @@ -964,6 +970,12 @@ void vp8_encode_frame(VP8_COMP *cpi) &cpi->common.rtcd.subpix, bilinear8x8); xd->subpixel_predict16x16 = SUBPIX_INVOKE( &cpi->common.rtcd.subpix, bilinear16x16); +#if CONFIG_DUALPRED + xd->subpixel_predict_avg8x8 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear_avg8x8); + xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear_avg16x16); +#endif /* CONFIG_DUALPRED */ } // Reset frame count of inter 0,0 motion vector usage. @@ -1006,6 +1018,11 @@ void vp8_encode_frame(VP8_COMP *cpi) // re-initencode frame context. init_encode_frame_mb_context(cpi); +#if CONFIG_DUALPRED + cpi->rd_single_diff = cpi->rd_dual_diff = cpi->rd_hybrid_diff = 0; + cpi->single_pred_count[0] = cpi->single_pred_count[1] = cpi->single_pred_count[2] = 0; + cpi->dual_pred_count[0] = cpi->dual_pred_count[1] = cpi->dual_pred_count[2] = 0; +#endif /* CONFIG_DUALPRED */ { struct vpx_usec_timer emr_timer; @@ -1189,6 +1206,121 @@ void vp8_encode_frame(VP8_COMP *cpi) #endif } + +void vp8_encode_frame(VP8_COMP *cpi) +{ +#if CONFIG_DUALPRED + if (cpi->sf.RD) + { + int frame_type, pred_type; + int redo = 0; + + /* + * This code does a single RD pass over the whole frame assuming + * either dual, single or hybrid prediction as per whatever has + * worked best for that type of frame in the past. + * It also predicts whether another coding mode would have worked + * better that this coding mode. If that is the case, it remembers + * that for subsequent frames. If the difference is above a certain + * threshold, it will actually re-encode the current frame using + * that different coding mode. + */ + if (cpi->common.frame_type == KEY_FRAME) + frame_type = 0; + else if (cpi->is_src_frame_alt_ref && cpi->common.refresh_golden_frame) + frame_type = 3; + else if (cpi->common.refresh_golden_frame || cpi->common.refresh_alt_ref_frame) + frame_type = 1; + else + frame_type = 2; + + if (cpi->rd_prediction_type_threshes[frame_type][1] > + cpi->rd_prediction_type_threshes[frame_type][0] && + cpi->rd_prediction_type_threshes[frame_type][1] > + cpi->rd_prediction_type_threshes[frame_type][2]) + pred_type = DUAL_PREDICTION_ONLY; + else if (cpi->rd_prediction_type_threshes[frame_type][0] > + cpi->rd_prediction_type_threshes[frame_type][1] && + cpi->rd_prediction_type_threshes[frame_type][0] > + cpi->rd_prediction_type_threshes[frame_type][2]) + pred_type = SINGLE_PREDICTION_ONLY; + else + pred_type = HYBRID_PREDICTION; + + cpi->common.dual_pred_mode = pred_type; + encode_frame_internal(cpi); + + cpi->rd_single_diff /= cpi->common.MBs; + cpi->rd_prediction_type_threshes[frame_type][0] += cpi->rd_single_diff; + cpi->rd_prediction_type_threshes[frame_type][0] >>= 1; + cpi->rd_dual_diff /= cpi->common.MBs; + cpi->rd_prediction_type_threshes[frame_type][1] += cpi->rd_dual_diff; + cpi->rd_prediction_type_threshes[frame_type][1] >>= 1; + cpi->rd_hybrid_diff /= cpi->common.MBs; + cpi->rd_prediction_type_threshes[frame_type][2] += cpi->rd_hybrid_diff; + cpi->rd_prediction_type_threshes[frame_type][2] >>= 1; + + /* FIXME make "100" (the threshold at which to re-encode the + * current frame) a commandline option. */ + if (cpi->common.dual_pred_mode == SINGLE_PREDICTION_ONLY && + (cpi->rd_dual_diff >= 100 || cpi->rd_hybrid_diff >= 100)) + { + redo = 1; + cpi->common.dual_pred_mode = cpi->rd_dual_diff > cpi->rd_hybrid_diff ? + DUAL_PREDICTION_ONLY : HYBRID_PREDICTION; + } + else if (cpi->common.dual_pred_mode == DUAL_PREDICTION_ONLY && + (cpi->rd_single_diff >= 100 || cpi->rd_hybrid_diff >= 100)) + { + redo = 1; + cpi->common.dual_pred_mode = cpi->rd_single_diff > cpi->rd_hybrid_diff ? + SINGLE_PREDICTION_ONLY : HYBRID_PREDICTION; + } + else if (cpi->common.dual_pred_mode == HYBRID_PREDICTION && + (cpi->rd_single_diff >= 100 || cpi->rd_dual_diff >= 100)) + { + if (cpi->dual_pred_count == 0) + { + cpi->common.dual_pred_mode = SINGLE_PREDICTION_ONLY; + } + else if (cpi->single_pred_count == 0) + { + cpi->common.dual_pred_mode = DUAL_PREDICTION_ONLY; + } + else + { + redo = 1; + cpi->common.dual_pred_mode = cpi->rd_single_diff > cpi->rd_dual_diff ? + SINGLE_PREDICTION_ONLY : DUAL_PREDICTION_ONLY; + } + } + + + if (redo) + { + encode_frame_internal(cpi); + } + + if (cpi->common.dual_pred_mode == HYBRID_PREDICTION) + { + if (cpi->dual_pred_count == 0) + { + cpi->common.dual_pred_mode = SINGLE_PREDICTION_ONLY; + } + else if (cpi->single_pred_count == 0) + { + cpi->common.dual_pred_mode = DUAL_PREDICTION_ONLY; + } + } + } + else +#endif /* CONFIG_DUALPRED */ + { + encode_frame_internal(cpi); + } + +} + void vp8_setup_block_ptrs(MACROBLOCK *x) { int r, c; @@ -1416,6 +1548,7 @@ int vp8cx_encode_inter_macroblock if (cpi->sf.RD) { int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; + int single, dual, hybrid; /* Are we using the fast quantizer for the mode selection? */ if(cpi->sf.use_fastquant_for_pick) @@ -1430,7 +1563,23 @@ int vp8cx_encode_inter_macroblock cpi->zbin_mode_boost_enabled = 0; } vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, - &distortion, &intra_error); + &distortion, &intra_error, &single, &dual, &hybrid); +#if CONFIG_DUALPRED + cpi->rd_single_diff += single; + cpi->rd_dual_diff += dual; + cpi->rd_hybrid_diff += hybrid; + if (x->e_mbd.mode_info_context->mbmi.ref_frame && + x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + { + MB_MODE_INFO *t = &x->e_mbd.mode_info_context[-cpi->common.mode_info_stride].mbmi; + MB_MODE_INFO *l = &x->e_mbd.mode_info_context[-1].mbmi; + int cnt = (t->second_ref_frame != INTRA_FRAME) + (l->second_ref_frame != INTRA_FRAME); + if (x->e_mbd.mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) + cpi->single_pred_count[cnt]++; + else + cpi->dual_pred_count[cnt]++; + } +#endif /* CONFIG_DUALPRED */ /* switch back to the regular quantizer for the encode */ if (cpi->sf.improved_quant) @@ -1581,6 +1730,27 @@ int vp8cx_encode_inter_macroblock xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; +#if CONFIG_DUALPRED + if (xd->mode_info_context->mbmi.second_ref_frame) { + int second_ref_fb_idx; + + cpi->mbs_dual_count++; + if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) + second_ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) + second_ref_fb_idx = cpi->common.gld_fb_idx; + else + second_ref_fb_idx = cpi->common.alt_fb_idx; + + xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + + recon_yoffset; + xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + + recon_uvoffset; + xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + + recon_uvoffset; + } +#endif /* CONFIG_DUALPRED */ + if (!x->skip) { vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x); @@ -1591,10 +1761,11 @@ int vp8cx_encode_inter_macroblock } else + { vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - + } } #if CONFIG_T8X8 if ( get_seg_tx_type( xd, *segment_id ) == TX_8X8 ) diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 64d7707f70..f2fa5b360e 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -398,6 +398,10 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->subpixel_predict8x4 = xd->subpixel_predict8x4; zd->subpixel_predict8x8 = xd->subpixel_predict8x8; zd->subpixel_predict16x16 = xd->subpixel_predict16x16; +#if CONFIG_DUALPRED + zd->subpixel_predict_avg8x8 = xd->subpixel_predict_avg8x8; + zd->subpixel_predict_avg16x16 = xd->subpixel_predict_avg16x16; +#endif /* CONFIG_DUALPRED */ zd->segmentation_enabled = xd->segmentation_enabled; zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; @@ -439,6 +443,10 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; +#if CONFIG_DUALPRED + mbd->subpixel_predict_avg8x8 = xd->subpixel_predict_avg8x8; + mbd->subpixel_predict_avg16x16 = xd->subpixel_predict_avg16x16; +#endif /* CONFIG_DUALPRED */ #if CONFIG_RUNTIME_CPU_DETECT mbd->rtcd = xd->rtcd; #endif diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 126a2db5e3..7b9f08fe63 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -802,6 +802,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) } cpi->mbs_tested_so_far = 0; + cpi->mbs_dual_count = 0; // best quality defaults sf->RD = 1; @@ -857,6 +858,21 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITG ] = 5000; sf->thresh_mult[THR_SPLITA ] = 5000; +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_ZEROLG ] = 0; + sf->thresh_mult[THR_DUAL_NEARESTLG] = 0; + sf->thresh_mult[THR_DUAL_NEARLG ] = 0; + sf->thresh_mult[THR_DUAL_ZEROLA ] = 0; + sf->thresh_mult[THR_DUAL_NEARESTLA] = 0; + sf->thresh_mult[THR_DUAL_NEARLA ] = 0; + sf->thresh_mult[THR_DUAL_ZEROGA ] = 0; + sf->thresh_mult[THR_DUAL_NEARESTGA] = 0; + sf->thresh_mult[THR_DUAL_NEARGA ] = 0; + + sf->thresh_mult[THR_DUAL_NEWLG ] = 1000; + sf->thresh_mult[THR_DUAL_NEWLA ] = 1000; + sf->thresh_mult[THR_DUAL_NEWGA ] = 1000; +#endif /* CONFIG_DUALPRED */ sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; @@ -908,6 +924,22 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITMV ] = 1700; sf->thresh_mult[THR_SPLITG ] = 4500; sf->thresh_mult[THR_SPLITA ] = 4500; + +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_ZEROLG ] = 0; + sf->thresh_mult[THR_DUAL_NEARESTLG] = 0; + sf->thresh_mult[THR_DUAL_NEARLG ] = 0; + sf->thresh_mult[THR_DUAL_ZEROLA ] = 0; + sf->thresh_mult[THR_DUAL_NEARESTLA] = 0; + sf->thresh_mult[THR_DUAL_NEARLA ] = 0; + sf->thresh_mult[THR_DUAL_ZEROGA ] = 0; + sf->thresh_mult[THR_DUAL_NEARESTGA] = 0; + sf->thresh_mult[THR_DUAL_NEARGA ] = 0; + + sf->thresh_mult[THR_DUAL_NEWLG ] = 1000; + sf->thresh_mult[THR_DUAL_NEWLA ] = 1000; + sf->thresh_mult[THR_DUAL_NEWGA ] = 1000; +#endif /* CONFIG_DUALPRED */ #else sf->thresh_mult[THR_NEWMV ] = 1500; sf->thresh_mult[THR_NEWG ] = 1500; @@ -968,6 +1000,22 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEWA ] = 2000; sf->thresh_mult[THR_SPLITA ] = 20000; } + +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_ZEROLG ] = 1500; + sf->thresh_mult[THR_DUAL_NEARESTLG] = 1500; + sf->thresh_mult[THR_DUAL_NEARLG ] = 1500; + sf->thresh_mult[THR_DUAL_ZEROLA ] = 1500; + sf->thresh_mult[THR_DUAL_NEARESTLA] = 1500; + sf->thresh_mult[THR_DUAL_NEARLA ] = 1500; + sf->thresh_mult[THR_DUAL_ZEROGA ] = 1500; + sf->thresh_mult[THR_DUAL_NEARESTGA] = 1500; + sf->thresh_mult[THR_DUAL_NEARGA ] = 1500; + + sf->thresh_mult[THR_DUAL_NEWLG ] = 2000; + sf->thresh_mult[THR_DUAL_NEWLA ] = 2000; + sf->thresh_mult[THR_DUAL_NEWGA ] = 2000; +#endif /* CONFIG_DUALPRED */ } if (Speed > 2) @@ -1008,6 +1056,22 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITA ] = 50000; } +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_ZEROLG ] = 2000; + sf->thresh_mult[THR_DUAL_NEARESTLG] = 2000; + sf->thresh_mult[THR_DUAL_NEARLG ] = 2000; + sf->thresh_mult[THR_DUAL_ZEROLA ] = 2000; + sf->thresh_mult[THR_DUAL_NEARESTLA] = 2000; + sf->thresh_mult[THR_DUAL_NEARLA ] = 2000; + sf->thresh_mult[THR_DUAL_ZEROGA ] = 2000; + sf->thresh_mult[THR_DUAL_NEARESTGA] = 2000; + sf->thresh_mult[THR_DUAL_NEARGA ] = 2000; + + sf->thresh_mult[THR_DUAL_NEWLG ] = 2500; + sf->thresh_mult[THR_DUAL_NEWLA ] = 2500; + sf->thresh_mult[THR_DUAL_NEWGA ] = 2500; +#endif /* CONFIG_DUALPRED */ + sf->improved_quant = 0; sf->improved_dct = 0; @@ -1065,6 +1129,15 @@ void vp8_set_speed_features(VP8_COMP *cpi) cpi->mode_check_freq[THR_NEWA] = 4; } +#if CONFIG_DUALPRED + cpi->mode_check_freq[THR_DUAL_NEARLG ] = 2; + cpi->mode_check_freq[THR_DUAL_NEARLA ] = 2; + cpi->mode_check_freq[THR_DUAL_NEARGA ] = 2; + cpi->mode_check_freq[THR_DUAL_NEWLG ] = 4; + cpi->mode_check_freq[THR_DUAL_NEWLA ] = 4; + cpi->mode_check_freq[THR_DUAL_NEWGA ] = 4; +#endif /* CONFIG_DUALPRED */ + if (cpi->ref_frame_flags & VP8_GOLD_FLAG) { sf->thresh_mult[THR_NEARESTG ] = 2000; @@ -1080,6 +1153,12 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEARA ] = 2000; sf->thresh_mult[THR_NEWA ] = 4000; } + +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_NEWLG ] = 4000; + sf->thresh_mult[THR_DUAL_NEWLA ] = 4000; + sf->thresh_mult[THR_DUAL_NEWGA ] = 4000; +#endif /* CONFIG_DUALPRED */ } break; @@ -1114,6 +1193,22 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITA ] = 10000; sf->search_method = NSTEP; +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_ZEROLG ] = 1000; + sf->thresh_mult[THR_DUAL_NEARESTLG] = 1000; + sf->thresh_mult[THR_DUAL_NEARLG ] = 1000; + sf->thresh_mult[THR_DUAL_ZEROLA ] = 1000; + sf->thresh_mult[THR_DUAL_NEARESTLA] = 1000; + sf->thresh_mult[THR_DUAL_NEARLA ] = 1000; + sf->thresh_mult[THR_DUAL_ZEROGA ] = 1000; + sf->thresh_mult[THR_DUAL_NEARESTGA] = 1000; + sf->thresh_mult[THR_DUAL_NEARGA ] = 1000; + + sf->thresh_mult[THR_DUAL_NEWLG ] = 2000; + sf->thresh_mult[THR_DUAL_NEWLA ] = 2000; + sf->thresh_mult[THR_DUAL_NEWGA ] = 2000; +#endif /* CONFIG_DUALPRED */ + if (Speed > 0) { cpi->mode_check_freq[THR_SPLITG] = 4; @@ -1201,6 +1296,21 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITA ] = 50000; } +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_ZEROLG ] = 2000; + sf->thresh_mult[THR_DUAL_NEARESTLG] = 2000; + sf->thresh_mult[THR_DUAL_NEARLG ] = 2000; + sf->thresh_mult[THR_DUAL_ZEROLA ] = 2000; + sf->thresh_mult[THR_DUAL_NEARESTLA] = 2000; + sf->thresh_mult[THR_DUAL_NEARLA ] = 2000; + sf->thresh_mult[THR_DUAL_ZEROGA ] = 2000; + sf->thresh_mult[THR_DUAL_NEARESTGA] = 2000; + sf->thresh_mult[THR_DUAL_NEARGA ] = 2000; + + sf->thresh_mult[THR_DUAL_NEWLG ] = 2500; + sf->thresh_mult[THR_DUAL_NEWLA ] = 2500; + sf->thresh_mult[THR_DUAL_NEWGA ] = 2500; +#endif /* CONFIG_DUALPRED */ } if (Speed > 2) @@ -1227,6 +1337,15 @@ void vp8_set_speed_features(VP8_COMP *cpi) cpi->mode_check_freq[THR_NEWA] = 4; } +#if CONFIG_DUALPRED + cpi->mode_check_freq[THR_DUAL_NEARLG ] = 2; + cpi->mode_check_freq[THR_DUAL_NEARLA ] = 2; + cpi->mode_check_freq[THR_DUAL_NEARGA ] = 2; + cpi->mode_check_freq[THR_DUAL_NEWLG ] = 4; + cpi->mode_check_freq[THR_DUAL_NEWLA ] = 4; + cpi->mode_check_freq[THR_DUAL_NEWGA ] = 4; +#endif /* CONFIG_DUALPRED */ + sf->thresh_mult[THR_SPLITMV ] = INT_MAX; sf->thresh_mult[THR_SPLITG ] = INT_MAX; sf->thresh_mult[THR_SPLITA ] = INT_MAX; @@ -1289,6 +1408,12 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEARA ] = 2000; sf->thresh_mult[THR_NEWA ] = 4000; } + +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_NEWLG ] = 4000; + sf->thresh_mult[THR_DUAL_NEWLA ] = 4000; + sf->thresh_mult[THR_DUAL_NEWGA ] = 4000; +#endif /* CONFIG_DUALPRED */ } if (Speed > 5) @@ -1358,6 +1483,22 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEARA ] = thresh; } +#if CONFIG_DUALPRED + sf->thresh_mult[THR_DUAL_ZEROLG ] = thresh; + sf->thresh_mult[THR_DUAL_NEARESTLG] = thresh; + sf->thresh_mult[THR_DUAL_NEARLG ] = thresh; + sf->thresh_mult[THR_DUAL_ZEROLA ] = thresh; + sf->thresh_mult[THR_DUAL_NEARESTLA] = thresh; + sf->thresh_mult[THR_DUAL_NEARLA ] = thresh; + sf->thresh_mult[THR_DUAL_ZEROGA ] = thresh; + sf->thresh_mult[THR_DUAL_NEARESTGA] = thresh; + sf->thresh_mult[THR_DUAL_NEARGA ] = thresh; + + sf->thresh_mult[THR_DUAL_NEWLG ] = thresh << 1; + sf->thresh_mult[THR_DUAL_NEWLA ] = thresh << 1; + sf->thresh_mult[THR_DUAL_NEWGA ] = thresh << 1; +#endif /* CONFIG_DUALPRED */ + // Disable other intra prediction modes sf->thresh_mult[THR_TM] = INT_MAX; sf->thresh_mult[THR_V_PRED] = INT_MAX; @@ -1394,6 +1535,22 @@ void vp8_set_speed_features(VP8_COMP *cpi) cpi->mode_check_freq[THR_NEWA] = 1 << (Tmp + 1); } +#if CONFIG_DUALPRED + cpi->mode_check_freq[THR_DUAL_ZEROLG ] = 1 << (Tmp - 1); + cpi->mode_check_freq[THR_DUAL_NEARESTLG] = 1 << (Tmp - 1); + cpi->mode_check_freq[THR_DUAL_NEARLG ] = 1 << Tmp; + cpi->mode_check_freq[THR_DUAL_ZEROLA ] = 1 << (Tmp - 1); + cpi->mode_check_freq[THR_DUAL_NEARESTLA] = 1 << (Tmp - 1); + cpi->mode_check_freq[THR_DUAL_NEARLA ] = 1 << Tmp; + cpi->mode_check_freq[THR_DUAL_ZEROGA ] = 1 << (Tmp - 1); + cpi->mode_check_freq[THR_DUAL_NEARESTGA] = 1 << (Tmp - 1); + cpi->mode_check_freq[THR_DUAL_NEARGA ] = 1 << Tmp; + + cpi->mode_check_freq[THR_DUAL_NEWLG ] = 1 << (Tmp + 1); + cpi->mode_check_freq[THR_DUAL_NEWLA ] = 1 << (Tmp + 1); + cpi->mode_check_freq[THR_DUAL_NEWGA ] = 1 << (Tmp + 1); +#endif /* CONFIG_DUALPRED */ + cpi->mode_check_freq[THR_NEWMV] = 1 << (Tmp - 1); } @@ -1439,6 +1596,31 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITA ] = INT_MAX; } +#if CONFIG_DUALPRED + if ((cpi->ref_frame_flags & (VP8_LAST_FLAG | VP8_GOLD_FLAG)) != (VP8_LAST_FLAG | VP8_GOLD_FLAG)) + { + sf->thresh_mult[THR_DUAL_ZEROLG ] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEARESTLG] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEARLG ] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEWLG ] = INT_MAX; + } + + if ((cpi->ref_frame_flags & (VP8_LAST_FLAG | VP8_ALT_FLAG)) != (VP8_LAST_FLAG | VP8_ALT_FLAG)) + { + sf->thresh_mult[THR_DUAL_ZEROLA ] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEARESTLA] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEARLA ] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEWLA ] = INT_MAX; + } + + if ((cpi->ref_frame_flags & (VP8_GOLD_FLAG | VP8_ALT_FLAG)) != (VP8_GOLD_FLAG | VP8_ALT_FLAG)) + { + sf->thresh_mult[THR_DUAL_ZEROGA ] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEARESTGA] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEARGA ] = INT_MAX; + sf->thresh_mult[THR_DUAL_NEWGA ] = INT_MAX; + } +#endif /* CONFIG_DUALPRED */ // Slow quant, dct and trellis not worthwhile for first pass // so make sure they are always turned off. @@ -2132,6 +2314,11 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->prob_last_coded = 128; cpi->prob_gf_coded = 128; cpi->prob_intra_coded = 63; +#if CONFIG_DUALPRED + cpi->prob_dualpred[0] = 128; + cpi->prob_dualpred[1] = 128; + cpi->prob_dualpred[2] = 128; +#endif /* CONFIG_DUALPRED */ // Prime the recent reference frame useage counters. // Hereafter they will be maintained as a sort of moving average diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 38025cad5c..74c9876d0f 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -42,11 +42,12 @@ #define AF_THRESH 25 #define AF_THRESH2 100 #define ARF_DECAY_THRESH 12 -#if CONFIG_I8X8 -#define MAX_MODES 21 -#else -#define MAX_MODES 20 -#endif +#if CONFIG_DUALPRED +#define MAX_MODES (32 + CONFIG_I8X8) +#else /* CONFIG_DUALPRED */ +#define MAX_MODES (20 + CONFIG_I8X8) +#endif /* CONFIG_DUALPRED */ + #define MIN_THRESHMULT 32 #define MAX_THRESHMULT 512 @@ -192,6 +193,24 @@ typedef enum #if CONFIG_I8X8 THR_I8X8_PRED = 20, #endif + +#if CONFIG_DUALPRED + THR_DUAL_ZEROLG = 20, + THR_DUAL_NEARESTLG = 21, + THR_DUAL_NEARLG = 22, + + THR_DUAL_ZEROLA = 23, + THR_DUAL_NEARESTLA = 24, + THR_DUAL_NEARLA = 25, + + THR_DUAL_ZEROGA = 26, + THR_DUAL_NEARESTGA = 27, + THR_DUAL_NEARGA = 28, + + THR_DUAL_NEWLG = 29, + THR_DUAL_NEWLA = 30, + THR_DUAL_NEWGA = 31, +#endif /* CONFIG_DUALPRED */ } THR_MODES; @@ -339,10 +358,16 @@ typedef struct VP8_COMP unsigned int mode_test_hit_counts[MAX_MODES]; unsigned int mode_chosen_counts[MAX_MODES]; unsigned int mbs_tested_so_far; + unsigned int mbs_dual_count; int rd_thresh_mult[MAX_MODES]; int rd_baseline_thresh[MAX_MODES]; int rd_threshes[MAX_MODES]; +#if CONFIG_DUALPRED + int rd_single_diff, rd_dual_diff, rd_hybrid_diff; + int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES]; + int dual_pred_count[3], single_pred_count[3]; +#endif /* CONFIG_DUALPRED */ int RDMULT; int RDDIV ; @@ -491,6 +516,9 @@ typedef struct VP8_COMP int prob_skip_false; int last_skip_false_probs[3]; int last_skip_probs_q[3]; +#if CONFIG_DUALPRED + int prob_dualpred[3]; +#endif /* CONFIG_DUALPRED */ int recent_ref_frame_usage[MAX_REF_FRAMES]; int count_mb_ref_frame_usage[MAX_REF_FRAMES]; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index d7a2058680..4b622d8bff 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -44,6 +44,7 @@ extern unsigned int cnt_pm; extern const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES]; extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; +extern const MV_REFERENCE_FRAME vp8_second_ref_frame_order[MAX_MODES]; extern unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); @@ -528,6 +529,11 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, if (best_rd <= cpi->rd_threshes[mode_index]) continue; +#if CONFIG_DUALPRED + if (vp8_second_ref_frame_order[mode_index]) + continue; +#endif /* CONFIG_DUALPRED */ + x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index]; if (skip_mode[x->e_mbd.mode_info_context->mbmi.ref_frame]) diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 8be284965f..fd5bd2e39e 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -106,6 +106,25 @@ const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = #if CONFIG_I8X8 I8X8_PRED, #endif + +#if CONFIG_DUALPRED + /* dual prediction modes */ + ZEROMV, + NEARESTMV, + NEARMV, + + ZEROMV, + NEARESTMV, + NEARMV, + + ZEROMV, + NEARESTMV, + NEARMV, + + NEWMV, + NEWMV, + NEWMV, +#endif /* CONFIG_DUALPRED */ }; const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES] = @@ -141,7 +160,54 @@ const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES] = #if CONFIG_I8X8 INTRA_FRAME, #endif + +#if CONFIG_DUALPRED + /* dual prediction modes */ + LAST_FRAME, + LAST_FRAME, + LAST_FRAME, + + ALTREF_FRAME, + ALTREF_FRAME, + ALTREF_FRAME, + + GOLDEN_FRAME, + GOLDEN_FRAME, + GOLDEN_FRAME, + + LAST_FRAME, + ALTREF_FRAME, + GOLDEN_FRAME, +#endif /* CONFIG_DUALPRED */ +}; + +#if CONFIG_DUALPRED +const MV_REFERENCE_FRAME vp8_second_ref_frame_order[MAX_MODES] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#if CONFIG_I8X8 + 0, +#endif + + /* dual prediction modes */ + GOLDEN_FRAME, + GOLDEN_FRAME, + GOLDEN_FRAME, + + LAST_FRAME, + LAST_FRAME, + LAST_FRAME, + + ALTREF_FRAME, + ALTREF_FRAME, + ALTREF_FRAME, + + GOLDEN_FRAME, + LAST_FRAME, + ALTREF_FRAME, }; +#endif /* CONFIG_DUALPRED */ static void fill_token_costs( unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS], @@ -997,7 +1063,6 @@ static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, x->e_mbd.mode_info_context->mbmi.segment_id); #endif - vp8_build_inter16x16_predictors_mbuv(&x->e_mbd); ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); @@ -1970,7 +2035,10 @@ static void set_i8x8_block_modes(MACROBLOCK *x, int *modes) -void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra) +void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, + int *returnrate, int *returndistortion, int *returnintra, + int *best_single_rd_diff, int *best_dual_rd_diff, + int *best_hybrid_rd_diff) { BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; @@ -1996,6 +2064,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int distortion; int best_rd = INT_MAX; int best_intra_rd = INT_MAX; +#if CONFIG_DUALPRED + int best_dual_rd = INT_MAX; + int best_single_rd = INT_MAX; + int best_hybrid_rd = INT_MAX; +#endif /* CONFIG_DUALPRED */ int rate2, distortion2; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); @@ -2016,6 +2089,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int_mv frame_nearest_mv[4]; int_mv frame_near_mv[4]; int_mv frame_best_ref_mv[4]; +#if CONFIG_DUALPRED + int_mv mc_search_result[4]; +#endif /* CONFIG_DUALPRED */ int frame_mdcounts[4][4]; unsigned char *y_buffer[4]; unsigned char *u_buffer[4]; @@ -2023,6 +2099,13 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); +#if CONFIG_DUALPRED + for (i = 0; i < 4; i++) + { +#define INVALID_MV 0x80008000 + mc_search_result[i].as_int = INVALID_MV; + } +#endif /* CONFIG_DUALPRED */ if (cpi->ref_frame_flags & VP8_LAST_FLAG) { @@ -2088,6 +2171,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int this_rd = INT_MAX; int disable_skip = 0; int other_cost = 0; +#if CONFIG_DUALPRED + int dualmode_cost = 0; + int mode_excluded = 0; +#endif /* CONFIG_DUALPRED */ // Experimental debug code. // Record of rd values recorded for this MB. -1 indicates not measured @@ -2109,6 +2196,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->e_mbd.mode_info_context->mbmi.mode = this_mode; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index]; +#if CONFIG_DUALPRED + x->e_mbd.mode_info_context->mbmi.second_ref_frame = vp8_second_ref_frame_order[mode_index]; +#endif /* CONFIG_DUALPRED */ //#if CONFIG_SEGFEATURES // If the segment reference frame feature is enabled.... @@ -2189,6 +2279,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int vp8_update_zbin_extra(cpi, x); } +#if CONFIG_DUALPRED + if (!x->e_mbd.mode_info_context->mbmi.second_ref_frame) +#endif /* CONFIG_DUALPRED */ switch (this_mode) { case B_PRED: @@ -2430,6 +2523,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse); } +#if CONFIG_DUALPRED + mc_search_result[x->e_mbd.mode_info_context->mbmi.ref_frame].as_int = d->bmi.mv.as_int; +#endif /* CONFIG_DUALPRED */ mode_mv[NEWMV].as_int = d->bmi.mv.as_int; @@ -2458,6 +2554,13 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]); vp8_build_inter16x16_predictors_mby(&x->e_mbd); +#if CONFIG_DUALPRED + MB_MODE_INFO *t = &x->e_mbd.mode_info_context[-cpi->common.mode_info_stride].mbmi; + MB_MODE_INFO *l = &x->e_mbd.mode_info_context[-1].mbmi; + int cnt = (t->second_ref_frame != INTRA_FRAME) + (l->second_ref_frame != INTRA_FRAME); + dualmode_cost = vp8_cost_bit(cpi->prob_dualpred[cnt], 0); +#endif /* CONFIG_DUALPRED */ + if (cpi->active_map_enabled && x->active_ptr[0] == 0) { x->skip = 1; } @@ -2516,24 +2619,124 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int distortion2 += distortion; // UV cost and distortion + vp8_build_inter16x16_predictors_mbuv(&x->e_mbd); rd_inter16x16_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); rate2 += rate_uv; distortion2 += distortion_uv; +#if CONFIG_DUALPRED + mode_excluded = cpi->common.dual_pred_mode == DUAL_PREDICTION_ONLY; +#endif /* CONFIG_DUALPRED */ break; default: break; } +#if CONFIG_DUALPRED + else /* x->e_mbd.mode_info_context->mbmi.second_ref_frame != 0 */ + { + int ref1 = x->e_mbd.mode_info_context->mbmi.ref_frame; + int ref2 = x->e_mbd.mode_info_context->mbmi.second_ref_frame; + + mode_excluded = cpi->common.dual_pred_mode == SINGLE_PREDICTION_ONLY; + switch (this_mode) + { + case NEWMV: + if (mc_search_result[ref1].as_int == INVALID_MV || + mc_search_result[ref2].as_int == INVALID_MV) + continue; + x->e_mbd.mode_info_context->mbmi.mv.as_int = mc_search_result[ref1].as_int; + x->e_mbd.mode_info_context->mbmi.second_mv.as_int = mc_search_result[ref2].as_int; + rate2 += vp8_mv_bit_cost(&mc_search_result[ref1], + &frame_best_ref_mv[ref1], x->mvcost, 96); + rate2 += vp8_mv_bit_cost(&mc_search_result[ref2], + &frame_best_ref_mv[ref2], x->mvcost, 96); + break; + case ZEROMV: + x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; + x->e_mbd.mode_info_context->mbmi.second_mv.as_int = 0; + break; + case NEARMV: + if (frame_near_mv[ref1].as_int == 0 || frame_near_mv[ref2].as_int == 0) + continue; + x->e_mbd.mode_info_context->mbmi.mv.as_int = frame_near_mv[ref1].as_int; + x->e_mbd.mode_info_context->mbmi.second_mv.as_int = frame_near_mv[ref2].as_int; + break; + case NEARESTMV: + if (frame_nearest_mv[ref1].as_int == 0 || frame_nearest_mv[ref2].as_int == 0) + continue; + x->e_mbd.mode_info_context->mbmi.mv.as_int = frame_nearest_mv[ref1].as_int; + x->e_mbd.mode_info_context->mbmi.second_mv.as_int = frame_nearest_mv[ref2].as_int; + break; + default: + break; + } + + /* Add in the Mv/mode cost */ + rate2 += vp8_cost_mv_ref(this_mode, mdcounts); + + vp8_clamp_mv2(&x->e_mbd.mode_info_context->mbmi.mv, xd); + vp8_clamp_mv2(&x->e_mbd.mode_info_context->mbmi.second_mv, xd); + if (((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row >> 3) < x->mv_row_min) || + ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row >> 3) > x->mv_row_max) || + ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col >> 3) < x->mv_col_min) || + ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col >> 3) > x->mv_col_max) || + ((x->e_mbd.mode_info_context->mbmi.second_mv.as_mv.row >> 3) < x->mv_row_min) || + ((x->e_mbd.mode_info_context->mbmi.second_mv.as_mv.row >> 3) > x->mv_row_max) || + ((x->e_mbd.mode_info_context->mbmi.second_mv.as_mv.col >> 3) < x->mv_col_min) || + ((x->e_mbd.mode_info_context->mbmi.second_mv.as_mv.col >> 3) > x->mv_col_max)) + continue; + + /* build first and second prediction */ + vp8_build_inter16x16_predictors_mby(&x->e_mbd); + vp8_build_inter16x16_predictors_mbuv(&x->e_mbd); + /* do second round and average the results */ + x->e_mbd.second_pre.y_buffer = y_buffer[ref2]; + x->e_mbd.second_pre.u_buffer = u_buffer[ref2]; + x->e_mbd.second_pre.v_buffer = v_buffer[ref2]; + vp8_build_2nd_inter16x16_predictors_mb(&x->e_mbd, x->e_mbd.predictor, + &x->e_mbd.predictor[256], + &x->e_mbd.predictor[320], 16, 8); + + /* Y cost and distortion */ + macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)); + rate2 += rate_y; + distortion2 += distortion; + + /* UV cost and distortion */ + rd_inter16x16_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); + rate2 += rate_uv; + distortion2 += distortion_uv; + + /* don't bother w/ skip, we would never have come here if skip were enabled */ + x->e_mbd.mode_info_context->mbmi.mode = this_mode; + + /* We don't include the cost of the second reference here, because there are only + * three options: Last/Golden, ARF/Last or Golden/ARF, or in other words if you + * present them in that order, the second one is always known if the first is known */ + MB_MODE_INFO *t = &x->e_mbd.mode_info_context[-cpi->common.mode_info_stride].mbmi; + MB_MODE_INFO *l = &x->e_mbd.mode_info_context[-1].mbmi; + int cnt = (t->second_ref_frame != INTRA_FRAME) + (l->second_ref_frame != INTRA_FRAME); + dualmode_cost = vp8_cost_bit(cpi->prob_dualpred[cnt], 1); + } +#endif /* CONFIG_DUALPRED */ // Where skip is allowable add in the default per mb cost for the no skip case. // where we then decide to skip we have to delete this and replace it with the // cost of signallying a skip if (cpi->common.mb_no_coeff_skip) { - other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); - rate2 += other_cost; + int prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 0); + other_cost += prob_skip_cost; + rate2 += prob_skip_cost; } +#if CONFIG_DUALPRED + if (cpi->common.dual_pred_mode == HYBRID_PREDICTION) + { + rate2 += dualmode_cost; + } +#endif /* CONFIG_DUALPRED */ + /* Estimate the reference frame signaling cost and add it * to the rolling cost variable. */ @@ -2589,9 +2792,26 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int *returnintra = distortion2 ; } +#if CONFIG_DUALPRED + if (!disable_skip && + (this_mode == SPLITMV || x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)) + { + if (this_rd < best_dual_rd) + best_dual_rd = this_rd; + if (this_rd < best_single_rd) + best_single_rd = this_rd; + if (this_rd < best_hybrid_rd) + best_hybrid_rd = this_rd; + } +#endif /* CONFIG_DUALPRED */ + // Did this mode help.. i.i is it the new best mode if (this_rd < best_rd || x->skip) { +#if CONFIG_DUALPRED + if (!mode_excluded) + { +#endif /* CONFIG_DUALPRED */ // Note index of best mode so far best_mode_index = mode_index; @@ -2624,7 +2844,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int { best_bmodes[i] = x->e_mbd.block[i].bmi; } - +#if CONFIG_DUALPRED + } +#endif /* CONFIG_DUALPRED */ // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; @@ -2642,6 +2864,48 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; } +#if CONFIG_DUALPRED + /* keep record of best dual/single-only prediction */ + if (!disable_skip && + x->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME && + this_mode != SPLITMV) + { + int single_rd, hybrid_rd, single_rate, hybrid_rate; + + if (cpi->common.dual_pred_mode == HYBRID_PREDICTION) + { + single_rate = rate2 - dualmode_cost; + hybrid_rate = rate2; + } + else + { + single_rate = rate2; + hybrid_rate = rate2 + dualmode_cost; + } + + single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); + hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); + + if (x->e_mbd.mode_info_context->mbmi.second_ref_frame == INTRA_FRAME && + single_rd < best_single_rd) + { + best_single_rd = single_rd; + if (0) printf("single rd [DMC: %d]: %d\n", dualmode_cost, single_rd); + } + else if (x->e_mbd.mode_info_context->mbmi.second_ref_frame != INTRA_FRAME && + single_rd < best_dual_rd) + { + best_dual_rd = single_rd; + if (0) printf("dual rd [DMC: %d]: %d\n", dualmode_cost, single_rd); + } + if (hybrid_rd < best_hybrid_rd) + { + best_hybrid_rd = hybrid_rd; + if (0) printf("hybrid rd [DMC: %d]: %d\n", best_hybrid_rd, hybrid_rd); + } + } +#endif /* CONFIG_DUALPRED */ + if (x->skip) break; @@ -2694,6 +2958,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int (cpi->common.mb_no_coeff_skip) ? 1 : 0; x->e_mbd.mode_info_context->mbmi.partitioning = 0; +#if CONFIG_DUALPRED + *best_single_rd_diff = *best_dual_rd_diff = *best_hybrid_rd_diff = 0; +#endif /* CONFIG_DUALPRED */ + return; } @@ -2730,8 +2998,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]); - - +#if CONFIG_DUALPRED + *best_single_rd_diff = best_rd - best_single_rd; + *best_dual_rd_diff = best_rd - best_dual_rd; + *best_hybrid_rd_diff = best_rd - best_hybrid_rd; +#endif /* CONFIG_DUALPRED */ } void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index 20fe4b5bd1..a22abd12e5 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -16,7 +16,9 @@ #define RDCOST_8x8(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); -extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); +extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, + int *returnrate, int *returndistortion, int *returnintra, + int *best_single_rd_diff, int *best_dual_rd_diff, int *best_hybrid_rd_diff); extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); extern void vp8_mv_pred -- GitLab