An error occurred while loading the file. Please try again.
-
Ronald S. Bultje authored
Change-Id: I0af3395500b1cb0ed629249eb6636a0c9322cb18
18433aef
mcomp.c 74.97 KiB
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/config.h"
#include <stdio.h>
#include <limits.h>
#include <math.h>
#include "vp8/common/findnearmv.h"
#ifdef ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
static int mv_mode_cts [4] [2];
#endif
#if CONFIG_HIGH_PRECISION_MV
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight, int ishp)
{
// MV costing is based on the distribution of vectors in the previous frame and as such will tend to
// over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
// cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
// The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)]) * Weight) >> 7;
}
#else
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
{
// MV costing is based on the distribution of vectors in the previous frame and as such will tend to
// over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
// cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
// The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
}
#endif
#if CONFIG_HIGH_PRECISION_MV
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit, int ishp)
{
return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] +
mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)])
* error_per_bit + 128) >> 8;
}
#else
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
{
return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
* error_per_bit + 128) >> 8;
}
#endif
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
{
/* Calculate sad error cost on full pixel basis. */
return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
* error_per_bit + 128) >> 8;
}
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
{
int Len;
int search_site_count = 0;
// Generate offsets for 4 search sites per step.
Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = 0;
search_site_count++;
while (Len > 0)
{
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = -Len;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = Len;
search_site_count++;
// Contract.
Len /= 2;
}
x->ss_count = search_site_count;
x->searches_per_step = 4;
}
void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
{
int Len;
int search_site_count = 0;
// Generate offsets for 8 search sites per step.
Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = 0;
search_site_count++;
while (Len > 0)
{
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride;
search_site_count++;
// Compute offsets for search sites.
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = -Len;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = Len;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride - Len;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = -Len;
x->ss[search_site_count].offset = -Len * stride + Len;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = -Len;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride - Len;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = Len;
x->ss[search_site_count].mv.row = Len;
x->ss[search_site_count].offset = Len * stride + Len;
search_site_count++;
// Contract.
Len /= 2;
}
x->ss_count = search_site_count;
x->searches_per_step = 8;
}
/*
* To avoid the penalty for crossing cache-line read, preload the reference
* area in a small buffer, which is aligned to make sure there won't be crossing
* cache-line read while reading from this buffer. This reduced the cpu
* cycles spent on reading ref data in sub-pixel filter functions.
* TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
* 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
* could reduce the area.
*/
#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
#else
#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
#if CONFIG_HIGH_PRECISION_MV
#define PREHP(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SPHP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define SPHP(x) ((x)&7) // convert motion vector component to offset for svf calc
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define DISTHP(r,c) vfp->svf( PREHP(r,c), y_stride, SPHP(c),SPHP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define ERRHP(r,c) (MVC(r,c)+DISTHP(r,c)) // returns distortion + motion vector cost
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = ((xd->allow_high_precision_mv)?DISTHP(r,c):DIST(r,c)); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
#else
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
#endif /* CONFIG_HIGH_PRECISION_MV */
#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
const vp8_variance_fn_ptr_t *vfp,
int *mvcost[2], int *distortion,
unsigned int *sse1)
{
unsigned char *z = (*(b->base_src) + b->src);
MACROBLOCKD *xd = &x->e_mbd;
int rr, rc, br, bc, hstep;
int tr, tc;
unsigned int besterr = INT_MAX;
unsigned int left, right, up, down, diag;
unsigned int sse;
unsigned int whichdir;
unsigned int halfiters = 4;
unsigned int quarteriters = 4;
#if CONFIG_HIGH_PRECISION_MV
unsigned int eighthiters = 4;
#endif
int thismse;
int maxc, minc, maxr, minr;
int y_stride;
int offset;
#if ARCH_X86 || ARCH_X86_64
unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *y;
int buf_r1, buf_r2, buf_c1, buf_c2;
// Clamping to avoid out-of-range data access
buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):INTERP_EXTEND;
buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):INTERP_EXTEND;
buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):INTERP_EXTEND;
buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):INTERP_EXTEND;
y_stride = 32;
/* Copy to intermediate buffer before searching. */
vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
y = xd->y_buf + y_stride*buf_r1 +buf_c1;
#else
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
y_stride = d->pre_stride;
#endif
#if CONFIG_HIGH_PRECISION_MV
if (xd->allow_high_precision_mv)
{
rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col;
br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
hstep = 4;
minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
}
else
#endif
{
rr = ref_mv->as_mv.row >> 1; rc = ref_mv->as_mv.col >> 1;
br = bestmv->as_mv.row << 2; bc = bestmv->as_mv.col << 2;
hstep = 2;
minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
}
tr = br;
tc = bc;
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
// central mv
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
// calculate central point error
besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = besterr;
#if CONFIG_HIGH_PRECISION_MV
besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
#endif
// TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
while (--halfiters)
{
// 1/2 pel
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir)
{
case 0:
CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
tr = br;
tc = bc;
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
}
// TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
// 1/4 pel
hstep >>= 1;
while (--quarteriters)
{
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir)
{
case 0:
CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
tr = br;
tc = bc;
}
#if CONFIG_HIGH_PRECISION_MV
if (x->e_mbd.allow_high_precision_mv)
{
hstep >>= 1;
while (--eighthiters)
{
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir)
{
case 0:
CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
tr = br;
tc = bc;
}
}
#endif
#if CONFIG_HIGH_PRECISION_MV
if (x->e_mbd.allow_high_precision_mv)
{
bestmv->as_mv.row = br;
bestmv->as_mv.col = bc;
}
else
#endif /* CONFIG_HIGH_PRECISION_MV */
{
bestmv->as_mv.row = br << 1;
bestmv->as_mv.col = bc << 1;
}
if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
(abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
return INT_MAX;
return besterr;
}
#undef MVC
#undef PRE
#undef SP
#undef DIST
#undef IFMVCV
#undef ERR
#undef CHECK_BETTER
#undef MIN
#undef MAX
#if CONFIG_HIGH_PRECISION_MV
#undef PREHP
#undef DPHP
#undef DISTHP
#undef ERRHP
#endif
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else
#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
const vp8_variance_fn_ptr_t *vfp,
int *mvcost[2], int *distortion,
unsigned int *sse1)
{
int bestmse = INT_MAX;
int_mv startmv;
int_mv this_mv;
#if CONFIG_HIGH_PRECISION_MV
int_mv orig_mv;
int yrow_movedback=0, ycol_movedback=0;
#endif
unsigned char *z = (*(b->base_src) + b->src);
int left, right, up, down, diag;
unsigned int sse;
int whichdir ;
int thismse;
int y_stride;
MACROBLOCKD *xd = &x->e_mbd;
#if ARCH_X86 || ARCH_X86_64
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *y;
y_stride = 32;
/* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
y = xd->y_buf + y_stride + 1;
#else
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
y_stride = d->pre_stride;
#endif
// central mv
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
startmv = *bestmv;
#if CONFIG_HIGH_PRECISION_MV
orig_mv = *bestmv;
#endif
// calculate central point error
bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = bestmse;
#if CONFIG_HIGH_PRECISION_MV
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
#endif
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (left < bestmse)
{
*bestmv = this_mv;
bestmse = left;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (right < bestmse)
{
*bestmv = this_mv;
bestmse = right;
*distortion = thismse;
*sse1 = sse;
}
// go up then down and check error
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (up < bestmse)
{
*bestmv = this_mv;
bestmse = up;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.row += 8;
thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (down < bestmse)
{
*bestmv = this_mv;
bestmse = down;
*distortion = thismse;
*sse1 = sse;
}
// now check 1 more diagonal
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
//for(whichdir =0;whichdir<4;whichdir++)
//{
this_mv = startmv;
switch (whichdir)
{
case 0:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
break;
case 1:
this_mv.as_mv.col += 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
break;
case 2:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
break;
case 3:
default:
this_mv.as_mv.col += 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
break;
}
#if CONFIG_HIGH_PRECISION_MV
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
*distortion = thismse;
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
*sse1 = sse;
}
// }
// time to check quarter pels.
if (bestmv->as_mv.row < startmv.as_mv.row)
{
y -= y_stride;
#if CONFIG_HIGH_PRECISION_MV
yrow_movedback = 1;
#endif
}
if (bestmv->as_mv.col < startmv.as_mv.col)
{
y--;
#if CONFIG_HIGH_PRECISION_MV
ycol_movedback = 1;
#endif
}
startmv = *bestmv;
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col = startmv.as_mv.col - 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
#if CONFIG_HIGH_PRECISION_MV
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (left < bestmse)
{
*bestmv = this_mv;
bestmse = left;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.col += 4;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (right < bestmse)
{
*bestmv = this_mv;
bestmse = right;
*distortion = thismse;
*sse1 = sse;
}
701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
// go up then down and check error
this_mv.as_mv.col = startmv.as_mv.col;
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row = startmv.as_mv.row - 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
}
#if CONFIG_HIGH_PRECISION_MV
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (up < bestmse)
{
*bestmv = this_mv;
bestmse = up;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.row += 4;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (down < bestmse)
{
*bestmv = this_mv;
bestmse = down;
*distortion = thismse;
*sse1 = sse;
}
// now check 1 more diagonal
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
// for(whichdir=0;whichdir<4;whichdir++)
// {
this_mv = startmv;
switch (whichdir)
{
case 0:
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row -= 2;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
}
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse);
}
}
break;
case 1:
this_mv.as_mv.col += 2;
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row -= 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
}
break;
case 2:
this_mv.as_mv.row += 2;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
break;
case 3:
this_mv.as_mv.col += 2;
this_mv.as_mv.row += 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
break;
}
#if CONFIG_HIGH_PRECISION_MV
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
*distortion = thismse;
*sse1 = sse;
}
841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
#if CONFIG_HIGH_PRECISION_MV
if (!x->e_mbd.allow_high_precision_mv)
return bestmse;
/* Now do 1/8th pixel */
if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback)
{
y -= y_stride;
yrow_movedback = 1;
}
if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback)
{
y--;
ycol_movedback = 1;
}
startmv = *bestmv;
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col = startmv.as_mv.col - 1;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
if (left < bestmse)
{
*bestmv = this_mv;
bestmse = left;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.col += 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
if (right < bestmse)
{
*bestmv = this_mv;
bestmse = right;
*distortion = thismse;
*sse1 = sse;
}
// go up then down and check error
this_mv.as_mv.col = startmv.as_mv.col;
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row = startmv.as_mv.row - 1;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
}
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
if (up < bestmse)
{
*bestmv = this_mv;
bestmse = up;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.row += 2;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
if (down < bestmse)
{
*bestmv = this_mv;
bestmse = down;
*distortion = thismse;
*sse1 = sse;
}
// now check 1 more diagonal
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
// for(whichdir=0;whichdir<4;whichdir++)
// {
this_mv = startmv;
switch (whichdir)
{
case 0:
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row -= 1;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 1;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
}
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 1;
thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse);
}
}
break;
case 1:
this_mv.as_mv.col += 1;
if (startmv.as_mv.row & 7)
{
981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
this_mv.as_mv.row -= 1;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
}
break;
case 2:
this_mv.as_mv.row += 1;
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 1;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
break;
case 3:
this_mv.as_mv.col += 1;
this_mv.as_mv.row += 1;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
break;
}
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
*distortion = thismse;
*sse1 = sse;
}
#endif /* CONFIG_HIGH_PRECISION_MV */
return bestmse;
}
#undef SP
int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
const vp8_variance_fn_ptr_t *vfp,
int *mvcost[2], int *distortion,
unsigned int *sse1)
{
int bestmse = INT_MAX;
int_mv startmv;
int_mv this_mv;
unsigned char *z = (*(b->base_src) + b->src);
int left, right, up, down, diag;
unsigned int sse;
int whichdir ;
int thismse;
int y_stride;
MACROBLOCKD *xd = &x->e_mbd;
#if ARCH_X86 || ARCH_X86_64
unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *y;
1051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120
y_stride = 32;
/* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
y = xd->y_buf + y_stride + 1;
#else
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
y_stride = d->pre_stride;
#endif
// central mv
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
startmv = *bestmv;
// calculate central point error
bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = bestmse;
#if CONFIG_HIGH_PRECISION_MV
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
#endif
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (left < bestmse)
{
*bestmv = this_mv;
bestmse = left;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (right < bestmse)
{
*bestmv = this_mv;
bestmse = right;
*distortion = thismse;
*sse1 = sse;
}
// go up then down and check error
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
#if CONFIG_HIGH_PRECISION_MV
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
#endif
if (up < bestmse)
{