mcomp.c 74.97 KiB
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/config.h"
#include <stdio.h>
#include <limits.h>
#include <math.h>
#include "vp8/common/findnearmv.h"
#ifdef ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
static int mv_mode_cts [4] [2];
#endif
#if CONFIG_HIGH_PRECISION_MV
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight, int ishp)
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)]) * Weight) >> 7;
#else
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
#endif
#if CONFIG_HIGH_PRECISION_MV
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit, int ishp)
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] +
        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)])
        * error_per_bit + 128) >> 8;
#else
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
        * error_per_bit + 128) >> 8;
#endif
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
    /* Calculate sad error cost on full pixel basis. */
    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
        mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
        * error_per_bit + 128) >> 8;
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) { int Len; int search_site_count = 0; // Generate offsets for 4 search sites per step. Len = MAX_FIRST_STEP; x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = 0; search_site_count++; while (Len > 0) { // Compute offsets for search sites. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -Len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = Len; search_site_count++; // Contract. Len /= 2; } x->ss_count = search_site_count; x->searches_per_step = 4; } void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) { int Len; int search_site_count = 0; // Generate offsets for 8 search sites per step. Len = MAX_FIRST_STEP; x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = 0; search_site_count++; while (Len > 0) { // Compute offsets for search sites. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride; search_site_count++; // Compute offsets for search sites.
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -Len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = Len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride - Len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride + Len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride - Len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride + Len; search_site_count++; // Contract. Len /= 2; } x->ss_count = search_site_count; x->searches_per_step = 8; } /* * To avoid the penalty for crossing cache-line read, preload the reference * area in a small buffer, which is aligned to make sure there won't be crossing * cache-line read while reading from this buffer. This reduced the cpu * cycles spent on reading ref data in sub-pixel filter functions. * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we * could reduce the area. */ #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector #if CONFIG_SIXTEENTH_SUBPEL_UV #define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc #else #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc #endif /* CONFIG_SIXTEENTH_SUBPEL_UV */ #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
#if CONFIG_HIGH_PRECISION_MV #define PREHP(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector #if CONFIG_SIXTEENTH_SUBPEL_UV #define SPHP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc #else /* CONFIG_SIXTEENTH_SUBPEL_UV */ #define SPHP(x) ((x)&7) // convert motion vector component to offset for svf calc #endif /* CONFIG_SIXTEENTH_SUBPEL_UV */ #define DISTHP(r,c) vfp->svf( PREHP(r,c), y_stride, SPHP(c),SPHP(r), z,b->src_stride,&sse) // returns subpixel variance error function. #define ERRHP(r,c) (MVC(r,c)+DISTHP(r,c)) // returns distortion + motion vector cost #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = ((xd->allow_high_precision_mv)?DISTHP(r,c):DIST(r,c)); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best #else #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best #endif /* CONFIG_HIGH_PRECISION_MV */ #define MIN(x,y) (((x)<(y))?(x):(y)) #define MAX(x,y) (((x)>(y))?(x):(y)) int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { unsigned char *z = (*(b->base_src) + b->src); MACROBLOCKD *xd = &x->e_mbd; int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = 4; unsigned int quarteriters = 4; #if CONFIG_HIGH_PRECISION_MV unsigned int eighthiters = 4; #endif int thismse; int maxc, minc, maxr, minr; int y_stride; int offset; #if ARCH_X86 || ARCH_X86_64 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; unsigned char *y; int buf_r1, buf_r2, buf_c1, buf_c2; // Clamping to avoid out-of-range data access buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):INTERP_EXTEND; buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):INTERP_EXTEND; buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):INTERP_EXTEND; buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):INTERP_EXTEND; y_stride = 32; /* Copy to intermediate buffer before searching. */ vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2); y = xd->y_buf + y_stride*buf_r1 +buf_c1; #else unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; #endif #if CONFIG_HIGH_PRECISION_MV if (xd->allow_high_precision_mv) { rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col; br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
hstep = 4; minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1)); maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1)); minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1)); maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1)); } else #endif { rr = ref_mv->as_mv.row >> 1; rc = ref_mv->as_mv.col >> 1; br = bestmv->as_mv.row << 2; bc = bestmv->as_mv.col << 2; hstep = 2; minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); } tr = br; tc = bc; offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; // calculate central point error besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = besterr; #if CONFIG_HIGH_PRECISION_MV besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); #endif // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) while (--halfiters) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc;
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
} // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) // 1/4 pel hstep >>= 1; while (--quarteriters) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } #if CONFIG_HIGH_PRECISION_MV if (x->e_mbd.allow_high_precision_mv) { hstep >>= 1; while (--eighthiters) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break;
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
tr = br; tc = bc; } } #endif #if CONFIG_HIGH_PRECISION_MV if (x->e_mbd.allow_high_precision_mv) { bestmv->as_mv.row = br; bestmv->as_mv.col = bc; } else #endif /* CONFIG_HIGH_PRECISION_MV */ { bestmv->as_mv.row = br << 1; bestmv->as_mv.col = bc << 1; } if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3))) return INT_MAX; return besterr; } #undef MVC #undef PRE #undef SP #undef DIST #undef IFMVCV #undef ERR #undef CHECK_BETTER #undef MIN #undef MAX #if CONFIG_HIGH_PRECISION_MV #undef PREHP #undef DPHP #undef DISTHP #undef ERRHP #endif #if CONFIG_SIXTEENTH_SUBPEL_UV #define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc #else #define SP(x) ((x)&7) // convert motion vector component to offset for svf calc #endif /* CONFIG_SIXTEENTH_SUBPEL_UV */ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; int_mv startmv; int_mv this_mv; #if CONFIG_HIGH_PRECISION_MV int_mv orig_mv; int yrow_movedback=0, ycol_movedback=0; #endif unsigned char *z = (*(b->base_src) + b->src); int left, right, up, down, diag; unsigned int sse; int whichdir ; int thismse; int y_stride; MACROBLOCKD *xd = &x->e_mbd; #if ARCH_X86 || ARCH_X86_64
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; unsigned char *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; #endif // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; startmv = *bestmv; #if CONFIG_HIGH_PRECISION_MV orig_mv = *bestmv; #endif // calculate central point error bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = bestmse; #if CONFIG_HIGH_PRECISION_MV bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); #endif // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 8; thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); //for(whichdir =0;whichdir<4;whichdir++) //{ this_mv = startmv; switch (whichdir) { case 0: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); break; case 1: this_mv.as_mv.col += 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); break; case 2: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row += 4; thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); break; case 3: default: this_mv.as_mv.col += 4; this_mv.as_mv.row += 4; thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); break; } #if CONFIG_HIGH_PRECISION_MV diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse;
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
*sse1 = sse; } // } // time to check quarter pels. if (bestmv->as_mv.row < startmv.as_mv.row) { y -= y_stride; #if CONFIG_HIGH_PRECISION_MV yrow_movedback = 1; #endif } if (bestmv->as_mv.col < startmv.as_mv.col) { y--; #if CONFIG_HIGH_PRECISION_MV ycol_movedback = 1; #endif } startmv = *bestmv; // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; if (startmv.as_mv.col & 7) { this_mv.as_mv.col = startmv.as_mv.col - 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } #if CONFIG_HIGH_PRECISION_MV left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; }
701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
// go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; if (startmv.as_mv.row & 7) { this_mv.as_mv.row = startmv.as_mv.row - 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse); } #if CONFIG_HIGH_PRECISION_MV up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); // for(whichdir=0;whichdir<4;whichdir++) // { this_mv = startmv; switch (whichdir) { case 0: if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 2; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
} } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse); } } break; case 1: this_mv.as_mv.col += 2; if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse); } break; case 2: this_mv.as_mv.row += 2; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } break; case 3: this_mv.as_mv.col += 2; this_mv.as_mv.row += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); break; } #if CONFIG_HIGH_PRECISION_MV diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; }
841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
#if CONFIG_HIGH_PRECISION_MV if (!x->e_mbd.allow_high_precision_mv) return bestmse; /* Now do 1/8th pixel */ if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) { y -= y_stride; yrow_movedback = 1; } if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) { y--; ycol_movedback = 1; } startmv = *bestmv; // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; if (startmv.as_mv.col & 7) { this_mv.as_mv.col = startmv.as_mv.col - 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; if (startmv.as_mv.row & 7) { this_mv.as_mv.row = startmv.as_mv.row - 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); } up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); // for(whichdir=0;whichdir<4;whichdir++) // { this_mv = startmv; switch (whichdir) { case 0: if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 1; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);; } } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse); } } break; case 1: this_mv.as_mv.col += 1; if (startmv.as_mv.row & 7) {
981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
this_mv.as_mv.row -= 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); } break; case 2: this_mv.as_mv.row += 1; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse); } break; case 3: this_mv.as_mv.col += 1; this_mv.as_mv.row += 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } #endif /* CONFIG_HIGH_PRECISION_MV */ return bestmse; } #undef SP int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; int_mv startmv; int_mv this_mv; unsigned char *z = (*(b->base_src) + b->src); int left, right, up, down, diag; unsigned int sse; int whichdir ; int thismse; int y_stride; MACROBLOCKD *xd = &x->e_mbd; #if ARCH_X86 || ARCH_X86_64 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; unsigned char *y;
1051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120
y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; #endif // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; startmv = *bestmv; // calculate central point error bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = bestmse; #if CONFIG_HIGH_PRECISION_MV bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); #endif // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); #if CONFIG_HIGH_PRECISION_MV up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv); #else up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); #endif if (up < bestmse) {