diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 71867a9384598ef46249954bee091b91edff789c..a31a476f3c42569d18ec3aefb254e312626597bf 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -18,63 +18,34 @@ #include "vp9/encoder/vp9_variance.h" -void variance(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum) { +void variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, unsigned int *sse, int *sum) { int i, j; - int diff; *sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { - diff = src_ptr[j] - ref_ptr[j]; + const int diff = a[j] - b[j]; *sum += diff; *sse += diff * diff; } - src_ptr += source_stride; - ref_ptr += recon_stride; + a += a_stride; + b += b_stride; } } -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : uint8_t *src_ptr : Pointer to source block. - * uint32_t src_pixels_per_line : Stride of input block. - * uint32_t pixel_step : Offset between filter input - * samples (see notes). - * uint32_t output_height : Input block height. - * uint32_t output_width : Input block width. - * int32_t *vp9_filter : Array of 2 bi-linear filter - * taps. - * - * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement first-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step= - * stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ +// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// first-pass of 2-D separable filter. +// +// Produces int32_t output to retain precision for next pass. Two filter taps +// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is +// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It +// defines the offset required to move from one input to the next. static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -99,38 +70,14 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, } } -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : int32_t *src_ptr : Pointer to source block. - * uint32_t src_pixels_per_line : Stride of input block. - * uint32_t pixel_step : Offset between filter input - * samples (see notes). - * uint32_t output_height : Input block height. - * uint32_t output_width : Input block width. - * int32_t *vp9_filter : Array of 2 bi-linear filter - * taps. - * - * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement second-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by - * filter_block2d_bil_first_pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step= - * stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ +// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// second-pass of 2-D separable filter. +// +// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two +// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the +// filter is applied horizontally (pixel_step=1) or vertically (pixel_step= +// stride). It defines the offset required to move from one input to the next. static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, uint8_t *output_ptr, unsigned int src_pixels_per_line, @@ -156,9 +103,8 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; - for (i = 0; i < 256; i++) { - sum += (src_ptr[i] * src_ptr[i]); - } + for (i = 0; i < 256; i++) + sum += src_ptr[i] * src_ptr[i]; return sum; } @@ -183,12 +129,10 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -205,13 +149,11 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -240,12 +182,10 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -262,13 +202,11 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -297,12 +235,10 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -319,13 +255,11 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -354,12 +288,10 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -376,13 +308,11 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -582,17 +512,12 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse) { uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + uint16_t fdata3[5 * 4]; + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); - // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 4, hfilter); - - // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); @@ -607,18 +532,13 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, unsigned int *sse, const uint8_t *second_pred) { uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer - uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); + uint16_t fdata3[5 * 4]; + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); - // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 4, hfilter); - - // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); @@ -631,17 +551,14 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 8]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); - return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -653,18 +570,17 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 8]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -675,12 +591,10 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering + uint16_t fdata3[17 * 16]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -699,11 +613,9 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, const uint8_t *second_pred) { uint16_t fdata3[17 * 16]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -722,10 +634,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, unsigned int *sse) { uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -742,13 +652,11 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -764,12 +672,10 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -786,13 +692,11 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -928,12 +832,10 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering + uint16_t fdata3[16 * 9]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -950,13 +852,11 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering + uint16_t fdata3[16 * 9]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -974,10 +874,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, unsigned int *sse) { uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -994,13 +892,11 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 16]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -1016,12 +912,10 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering + uint16_t fdata3[8 * 5]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -1038,13 +932,11 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering + uint16_t fdata3[8 * 5]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -1060,14 +952,12 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[5 * 8]; // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be // of this big? same issue appears in all other block size settings. uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); @@ -1084,13 +974,11 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[5 * 8]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); @@ -1106,9 +994,8 @@ void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { - int tmp; - tmp = pred[j] + ref[j]; - comp_pred[j] = (tmp + 1) >> 1; + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); } comp_pred += width; pred += width; diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index c9e39a1a206311908f3ea277a7055c5daf6f2736..4c8be71cd7574cfed1af35754140fad1b584b4ea 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -17,14 +17,10 @@ extern "C" { #endif -void variance(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum); +void variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, + unsigned int *sse, int *sum); typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride,