diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c deleted file mode 100644 index 27146e23f5dac2eec4ea1f38034d41c615d364a4..0000000000000000000000000000000000000000 --- a/vp8/encoder/arm/mcomp_arm.c +++ /dev/null @@ -1,615 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "mcomp.h" -#include "vpx_mem/vpx_mem.h" - -#include <stdio.h> -#include <limits.h> -#include <math.h> - -#ifdef ENTROPY_STATS -static int mv_ref_ct [31] [4] [2]; -static int mv_mode_cts [4] [2]; -#endif - -extern unsigned int vp8_sub_pixel_variance16x16s_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); - - -int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - int bestmse = INT_MAX; - MV startmv; - //MV this_mv; - MV this_mv; - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - int whichdir ; - - - // Trap uncodable vectors - if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) - { - bestmv->row <<= 3; - bestmv->col <<= 3; - return INT_MAX; - } - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); - bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // go left then right and check error - this_mv.row = startmv.row; - this_mv.col = ((startmv.col - 8) | 4); - left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 8; - right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - this_mv.row = ((startmv.row - 8) | 4); - up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 8; - down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - //for(whichdir =0;whichdir<4;whichdir++) - //{ - this_mv = startmv; - - switch (whichdir) - { - case 0: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - break; - case 1: - this_mv.col += 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - break; - case 2: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row += 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - break; - case 3: - this_mv.col += 4; - this_mv.row += 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -// } - - - // time to check quarter pels. - if (bestmv->row < startmv.row) - y -= d->pre_stride; - - if (bestmv->col < startmv.col) - y--; - - startmv = *bestmv; - - - - // go left then right and check error - this_mv.row = startmv.row; - - if (startmv.col & 7) - { - this_mv.col = startmv.col - 2; - left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); - } - - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 4; - right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - - if (startmv.row & 7) - { - this_mv.row = startmv.row - 2; - up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.row = (startmv.row - 8) | 6; - up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 4; - down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) - { - case 0: - - if (startmv.row & 7) - { - this_mv.row -= 2; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; - } - } - else - { - this_mv.row = (startmv.row - 8) | 6; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); - } - } - - break; - case 1: - this_mv.col += 2; - - if (startmv.row & 7) - { - this_mv.row -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.row = (startmv.row - 8) | 6; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - - break; - case 2: - this_mv.row += 2; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; - } - - break; - case 3: - this_mv.col += 2; - this_mv.row += 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -// } - - return bestmse; -} - -int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - int bestmse = INT_MAX; - MV startmv; - //MV this_mv; - MV this_mv; - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - - // Trap uncodable vectors - if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) - { - bestmv->row <<= 3; - bestmv->col <<= 3; - return INT_MAX; - } - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); - bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // go left then right and check error - this_mv.row = startmv.row; - this_mv.col = ((startmv.col - 8) | 4); - left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 8; - right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - this_mv.row = ((startmv.row - 8) | 4); - up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 8; - down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - // somewhat strangely not doing all the diagonals for half pel is slower than doing them. -#if 0 - // now check 1 more diagonal - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - this_mv = startmv; - - switch (whichdir) - { - case 0: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 1: - this_mv.col += 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 2: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row += 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 3: - this_mv.col += 4; - this_mv.row += 4; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -#else - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col += 8; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = startmv.row + 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col += 8; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -#endif - return bestmse; -} - - -#ifdef ENTROPY_STATS -void print_mode_context(void) -{ - FILE *f = fopen("modecont.c", "w"); - int i, j; - - fprintf(f, "#include \"entropy.h\"\n"); - fprintf(f, "const int vp8_mode_contexts[6][4] =\n"); - fprintf(f, "{\n"); - - for (j = 0; j < 6; j++) - { - fprintf(f, " { // %d \n", j); - fprintf(f, " "); - - for (i = 0; i < 4; i++) - { - int overal_prob; - int this_prob; - int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1]; - - // Overall probs - count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; - - if (count) - overal_prob = 256 * mv_mode_cts[i][0] / count; - else - overal_prob = 128; - - if (overal_prob == 0) - overal_prob = 1; - - // context probs - count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - - if (count) - this_prob = 256 * mv_ref_ct[j][i][0] / count; - else - this_prob = 128; - - if (this_prob == 0) - this_prob = 1; - - fprintf(f, "%5d, ", this_prob); - //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob); - //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob); - } - - fprintf(f, " },\n"); - } - - fprintf(f, "};\n"); - fclose(f); -} - -/* MV ref count ENTROPY_STATS stats code */ -#ifdef ENTROPY_STATS -void init_mv_ref_counts() -{ - vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); - vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); -} - -void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) -{ - if (m == ZEROMV) - { - ++mv_ref_ct [ct[0]] [0] [0]; - ++mv_mode_cts[0][0]; - } - else - { - ++mv_ref_ct [ct[0]] [0] [1]; - ++mv_mode_cts[0][1]; - - if (m == NEARESTMV) - { - ++mv_ref_ct [ct[1]] [1] [0]; - ++mv_mode_cts[1][0]; - } - else - { - ++mv_ref_ct [ct[1]] [1] [1]; - ++mv_mode_cts[1][1]; - - if (m == NEARMV) - { - ++mv_ref_ct [ct[2]] [2] [0]; - ++mv_mode_cts[2][0]; - } - else - { - ++mv_ref_ct [ct[2]] [2] [1]; - ++mv_mode_cts[2][1]; - - if (m == NEWMV) - { - ++mv_ref_ct [ct[3]] [3] [0]; - ++mv_mode_cts[3][0]; - } - else - { - ++mv_ref_ct [ct[3]] [3] [1]; - ++mv_mode_cts[3][1]; - } - } - } - } -} - -#endif/* END MV ref count ENTROPY_STATS stats code */ - -#endif diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm index 1c1441cc2c0769711021d9a0f59bff62279ee2ea..0a2b71c49c6baab9fb3cc39f9caa38f842d2618e 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm @@ -9,9 +9,9 @@ ; - EXPORT |vp8_sub_pixel_variance16x16s_4_0_neon| - EXPORT |vp8_sub_pixel_variance16x16s_0_4_neon| - EXPORT |vp8_sub_pixel_variance16x16s_4_4_neon| + EXPORT |vp8_variance_halfpixvar16x16_h_neon| + EXPORT |vp8_variance_halfpixvar16x16_v_neon| + EXPORT |vp8_variance_halfpixvar16x16_hv_neon| EXPORT |vp8_sub_pixel_variance16x16s_neon| ARM REQUIRE8 @@ -20,7 +20,7 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon +;unsigned int vp8_variance_halfpixvar16x16_h_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -29,7 +29,7 @@ ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_4_0_neon| PROC +|vp8_variance_halfpixvar16x16_h_neon| PROC push {lr} mov r12, #4 ;loop counter @@ -120,7 +120,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon ENDP ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon +;unsigned int vp8_variance_halfpixvar16x16_v_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -129,7 +129,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_0_4_neon| PROC +|vp8_variance_halfpixvar16x16_v_neon| PROC push {lr} mov r12, #4 ;loop counter @@ -216,7 +216,7 @@ vp8_filt_spo16x16s_0_4_loop_neon ENDP ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon +;unsigned int vp8_variance_halfpixvar16x16_hv_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -225,7 +225,7 @@ vp8_filt_spo16x16s_0_4_loop_neon ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_4_4_neon| PROC +|vp8_variance_halfpixvar16x16_hv_neon| PROC push {lr} vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index fb9dd5a5b0402a94778b7d0a1084c6bec9119e01..0e5f62fcf776abc6e67cdb7b5c10a99649254408 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -30,6 +30,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon); //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c); //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon); //extern prototype_getmbss(vp8_get_mb_ss_c); extern prototype_variance(vp8_mse16x16_neon); @@ -84,6 +87,15 @@ extern prototype_sad(vp8_get4x4sse_cs_neon); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon + //#undef vp8_variance_getmbss //#define vp8_variance_getmbss vp8_get_mb_ss_c diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 607c3d23645b81e88aa2752e6e40172f68ae9512..691aee0a60517c29306279c6f675adaa08165f65 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -462,12 +462,11 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV * int step_param = 3; //3; // Dont search over full range for first pass int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3; int n; - vp8_variance_fn_ptr_t v_fn_ptr; + vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; int new_mv_mode_penalty = 256; + // override the default variance function to use MSE v_fn_ptr.vf = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16); - v_fn_ptr.sdf = cpi->fn_ptr.sdf; - v_fn_ptr.sdx4df = cpi->fn_ptr.sdx4df; // Set up pointers for this macro block recon buffer xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index c5cae9cf2699445b898181993de874b12e47e38a..8cc63f83d7707386eec218cf702ce83105a77485 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -186,7 +186,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc -#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. +#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best @@ -195,7 +195,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; } -int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; unsigned char *z = (*(b->base_src) + b->src); @@ -220,7 +220,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, bestmv->col <<= 3; // calculate central point error - besterr = vf(y, d->pre_stride, z, b->src_stride, &sse); + besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) @@ -309,7 +309,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #undef CHECK_BETTER #undef MIN #undef MAX -int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { int bestmse = INT_MAX; MV startmv; @@ -336,13 +336,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, startmv = *bestmv; // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); + bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // go left then right and check error this_mv.row = startmv.row; this_mv.col = ((startmv.col - 8) | 4); - left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse); + left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) @@ -352,7 +352,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.col += 8; - right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse); + right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) @@ -364,7 +364,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, // go up then down and check error this_mv.col = startmv.col; this_mv.row = ((startmv.row - 8) | 4); - up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse); + up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) @@ -374,7 +374,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.row += 8; - down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse); + down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) @@ -386,10 +386,6 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // whichdir must be 0-4. Therefore, one of the cases below - // must run through. However, because there is no default - // and diag is not set elsewhere, we get a compile warning - diag = 0; //for(whichdir =0;whichdir<4;whichdir++) //{ this_mv = startmv; @@ -399,22 +395,22 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, case 0: this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); break; case 1: this_mv.col += 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); break; case 2: this_mv.col = (this_mv.col - 8) | 4; this_mv.row += 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); break; case 3: this_mv.col += 4; this_mv.row += 4; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); break; } @@ -446,12 +442,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col = startmv.col - 2; - left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); + left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); } left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); @@ -463,7 +459,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.col += 4; - right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) @@ -478,12 +474,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.row & 7) { this_mv.row = startmv.row - 2; - up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.row = (startmv.row - 8) | 6; - up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); @@ -495,7 +491,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.row += 4; - down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) @@ -523,12 +519,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; + diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; } } else @@ -538,12 +534,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); } } @@ -554,12 +550,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.row & 7) { this_mv.row -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.row = (startmv.row - 8) | 6; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } break; @@ -569,19 +565,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; + diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; } break; case 3: this_mv.col += 2; this_mv.row += 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); break; } @@ -598,7 +594,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, return bestmse; } -int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { int bestmse = INT_MAX; MV startmv; @@ -623,13 +619,13 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm startmv = *bestmv; // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); + bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // go left then right and check error this_mv.row = startmv.row; this_mv.col = ((startmv.col - 8) | 4); - left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse); + left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) @@ -639,7 +635,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.col += 8; - right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse); + right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) @@ -651,7 +647,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm // go up then down and check error this_mv.col = startmv.col; this_mv.row = ((startmv.row - 8) | 4); - up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse); + up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) @@ -661,7 +657,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.row += 8; - down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse); + down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) @@ -681,22 +677,22 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm case 0: this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; case 1: this_mv.col += 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; case 2: this_mv.col = (this_mv.col - 8) | 4; this_mv.row += 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; case 3: this_mv.col += 4; this_mv.row += 4; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; } @@ -711,7 +707,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm #else this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -721,7 +717,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.col += 8; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -732,7 +728,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm this_mv.col = (this_mv.col - 8) | 4; this_mv.row = startmv.row + 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -742,7 +738,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.col += 8; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -758,7 +754,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector -#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. +#define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best static const MV next_chkpts[6][3] = @@ -780,8 +776,7 @@ int vp8_hex_search int search_param, int error_per_bit, int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, + const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], int *mvcost[2] ) @@ -896,7 +891,7 @@ cal_neighbors: best_mv->row = br; best_mv->col = bc; - return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; + return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; } #undef MVC #undef PRE diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 7cc924279afe710afe8d0977b4d9501db351308f..181e95822b88eff3b6570661272325ea62b7af7c 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -42,14 +42,15 @@ extern int vp8_hex_search int search_param, int error_per_bit, int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, + const vp8_variance_fn_ptr_t *vf, int *mvsadcost[2], int *mvcost[2] ); -typedef int (fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]); +typedef int (fractional_mv_step_fp) + (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, + int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]); extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively; extern fractional_mv_step_fp vp8_find_best_sub_pixel_step; extern fractional_mv_step_fp vp8_find_best_half_pixel_step; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index bd41b2cb6a88da76b42c201cdda142946dabb988..7a78b2901285a01292e7eb321b486c23ae777bc6 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -2334,11 +2334,50 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) vp8cx_create_encoder_threads(cpi); - cpi->fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); - cpi->fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); - cpi->fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); - cpi->fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); - cpi->fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); + cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); + cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); + cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); + cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h); + cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v); + cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv); + cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); + cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); + + cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); + cpi->fn_ptr[BLOCK_16X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8); + cpi->fn_ptr[BLOCK_16X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8); + cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); + cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); + + cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); + cpi->fn_ptr[BLOCK_8X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16); + cpi->fn_ptr[BLOCK_8X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16); + cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); + cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); + + cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); + cpi->fn_ptr[BLOCK_8X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8); + cpi->fn_ptr[BLOCK_8X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8); + cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); + cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); + + cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); + cpi->fn_ptr[BLOCK_4X4].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4); + cpi->fn_ptr[BLOCK_4X4].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4); + cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); + cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); #if !(CONFIG_REALTIME_ONLY) cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index eb516e9277ca32ba4e80c1e0751af33cb3d1034f..81e32f031d845995c03a043cdcddab04dc553b55 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -229,6 +229,16 @@ typedef struct VP8_ENCODER_RTCD vp8_search_rtcd_vtable_t search; } VP8_ENCODER_RTCD; +enum +{ + BLOCK_16X8, + BLOCK_8X16, + BLOCK_8X8, + BLOCK_4X4, + BLOCK_16X16, + BLOCK_MAX_SEGMENTS +}; + typedef struct { @@ -591,7 +601,7 @@ typedef struct fractional_mv_step_fp *find_fractional_mv_step; vp8_full_search_fn_t full_search_sad; vp8_diamond_search_fn_t diamond_search_sad; - vp8_variance_fn_ptr_t fn_ptr; + vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; unsigned int time_receive_data; unsigned int time_compress_data; unsigned int time_pick_lpf; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 8821b3a0ba32565ed24e2c1f5dea22852d8c0d35..2f7dd9c7c47f6b4fe623d1a6183545a64440c578 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -50,14 +50,13 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv); -int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { (void) b; (void) d; (void) ref_mv; (void) error_per_bit; - (void) svf; - (void) vf; + (void) vfp; (void) mvcost; bestmv->row <<= 3; bestmv->col <<= 3; @@ -65,7 +64,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, } -static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, unsigned int *sse) +static int get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse) { BLOCK *b = &mb->block[0]; @@ -81,11 +80,11 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, v if (xoffset | yoffset) { - return svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse); + return vfp->svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse); } else { - return vf(what, what_stride, in_what, in_what_stride, sse); + return vfp->vf(what, what_stride, in_what, in_what_stride, sse); } } @@ -719,13 +718,13 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } else { - bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -744,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9 if (thissme < bestsme) { @@ -765,7 +764,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec } if (bestsme < INT_MAX) - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, cpi->fn_ptr.svf, cpi->fn_ptr.vf, cpi->mb.mvcost); + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -795,7 +794,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec x->e_mbd.block[0].bmi.mode = this_mode; x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int; - distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse)); + distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse)); this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index dbef85b9f71c9b838cb00d18735b079e284fba7c..8f406b9b76875648ba8295732da4ebae47a87bcb 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1130,6 +1130,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int bsd = 0; int bestsegmentyrate = 0; + static const int segmentation_to_sseshift[4] = {3, 3, 2, 0}; + // FIX TO Rd error outrange bug PGW 9 june 2004 B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, @@ -1151,10 +1153,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int rate = 0; int sbr = 0; int sbd = 0; - int UNINITIALIZED_IS_SAFE(sseshift); + int sseshift; int segmentyrate = 0; - vp8_variance_fn_ptr_t v_fn_ptr; + vp8_variance_fn_ptr_t *v_fn_ptr; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; @@ -1174,42 +1176,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes br = 0; bd = 0; - switch (segmentation) - { - case 0: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); - sseshift = 3; - break; - case 1: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); - sseshift = 3; - break; - case 2: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); - sseshift = 2; - break; - case 3: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); - sseshift = 0; - break; - } - + v_fn_ptr = &cpi->fn_ptr[segmentation]; + sseshift = segmentation_to_sseshift[segmentation]; labels = vp8_mbsplits[segmentation]; label_count = vp8_count_labels(labels); @@ -1281,10 +1249,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int sadpb = x->sadperbit4; if (cpi->sf.search_method == HEX) - bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr.vf, v_fn_ptr.sdf, x->mvsadcost, mvcost); + bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); else { - bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost); + bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); n = num00; num00 = 0; @@ -1297,7 +1265,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes num00--; else { - thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost); + thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); if (thissme < bestsme) { @@ -1312,7 +1280,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes // Should we do a full search (best quality only) if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000) { - thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, &v_fn_ptr, x->mvcost, x->mvsadcost); + thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost); if (thissme < bestsme) { @@ -1330,9 +1298,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes if (bestsme < INT_MAX) { if (!fullpixel) - cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr.svf, v_fn_ptr.vf, mvcost); + cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost); else - vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr.svf, v_fn_ptr.vf, mvcost); + vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost); } } @@ -1852,13 +1820,13 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } else { - bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -1877,7 +1845,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9 if (thissme < bestsme) { @@ -1914,7 +1882,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range; { int sadpb = x->sadperbit16 >> 2; - thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr, x->mvcost, x->mvsadcost); + thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost); } // Barrier threshold to initiating full search @@ -1939,7 +1907,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (bestsme < INT_MAX) // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost); // normal mvc=11 - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, cpi->fn_ptr.svf, cpi->fn_ptr.vf, x->mvcost); + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 630afdbf17910d6f6c84524e14fb73a4ab416e65..fd5dd7edeb2be96e555b0bc90b533febca28f824 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -234,7 +234,7 @@ static int find_matching_mb &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, - &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, + &num00, &cpi->fn_ptr[BLOCK_16X16], mvsadcost, mvcost); } else @@ -245,7 +245,7 @@ static int find_matching_mb &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, - &num00, &cpi->fn_ptr, + &num00, &cpi->fn_ptr[BLOCK_16X16], mvsadcost, mvcost); //sadpb < 9 // Further step/diamond searches as necessary @@ -267,7 +267,7 @@ static int find_matching_mb &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, - &num00, &cpi->fn_ptr, + &num00, &cpi->fn_ptr[BLOCK_16X16], mvsadcost, mvcost); //sadpb = 9 if (thissme < bestsme) @@ -291,8 +291,8 @@ static int find_matching_mb { bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, - x->errorperbit, cpi->fn_ptr.svf, - cpi->fn_ptr.vf, cpi->mb.mvcost); + x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], + cpi->mb.mvcost); } #endif diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index 3c9ae987c6fa6b56f52f6ca7c3b40b1061e1ba7b..f60038f022188657dc942ff1707dd278a9dcb269 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -219,6 +219,21 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8); #endif extern prototype_subpixvariance(vp8_variance_subpixvar16x16); +#ifndef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c +#endif +extern prototype_variance(vp8_variance_halfpixvar16x16_h); + +#ifndef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c +#endif +extern prototype_variance(vp8_variance_halfpixvar16x16_v); + +#ifndef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c +#endif +extern prototype_variance(vp8_variance_halfpixvar16x16_hv); + #ifndef vp8_variance_subpixmse16x16 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c #endif @@ -283,6 +298,9 @@ typedef struct vp8_subpixvariance_fn_t subpixvar8x16; vp8_subpixvariance_fn_t subpixvar16x8; vp8_subpixvariance_fn_t subpixvar16x16; + vp8_variance_fn_t halfpixvar16x16_h; + vp8_variance_fn_t halfpixvar16x16_v; + vp8_variance_fn_t halfpixvar16x16_hv; vp8_subpixvariance_fn_t subpixmse16x16; vp8_getmbss_fn_t getmbss; @@ -309,11 +327,14 @@ typedef struct typedef struct { - vp8_sad_fn_t sdf; - vp8_sad_multi_fn_t sdx3f; - vp8_sad_multi_d_fn_t sdx4df; - vp8_variance_fn_t vf; + vp8_sad_fn_t sdf; + vp8_variance_fn_t vf; vp8_subpixvariance_fn_t svf; + vp8_variance_fn_t svf_halfpix_h; + vp8_variance_fn_t svf_halfpix_v; + vp8_variance_fn_t svf_halfpix_hv; + vp8_sad_multi_fn_t sdx3f; + vp8_sad_multi_d_fn_t sdx4df; } vp8_variance_fn_ptr_t; #if CONFIG_RUNTIME_CPU_DETECT @@ -322,7 +343,4 @@ typedef struct #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn #endif -/* TODO: Determine if this USEBILINEAR flag is necessary. */ -#define USEBILINEAR - #endif diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c index 1774143511be75913ca94833b2d3588918fd0629..48d5bb501c59d2b27be689f51b94568f0ac91432 100644 --- a/vp8/encoder/variance_c.c +++ b/vp8/encoder/variance_c.c @@ -24,7 +24,6 @@ const int vp8_six_tap[8][6] = }; -#ifdef USEBILINEAR const int VP8_FILTER_WEIGHT = 128; const int VP8_FILTER_SHIFT = 7; const int vp8_bilinear_taps[8][2] = @@ -461,6 +460,19 @@ unsigned int vp8_sub_pixel_variance16x16_c return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } + +unsigned int vp8_half_pixel_variance16x16_c( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4, + ref_ptr, recon_stride, sse); +} + + unsigned int vp8_sub_pixel_mse16x16_c ( const unsigned char *src_ptr, @@ -525,4 +537,3 @@ unsigned int vp8_sub_pixel_variance8x16_c return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } -#endif diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index d126faf32fcd98346ed1eaf7518dc164ee543005..da27e08972e66fc943733a1754ce8308aa8980cc 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -19,7 +19,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/mcomp_arm.c VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c