• Deb Mukherjee's avatar
    Diamond search change to accelerate movement · a1e2d50b
    Deb Mukherjee authored
    Optional change in diamond search to continue in the best move
    direction until that move turns worse.
    
    This is still WIP since the exact way the new method is to be used is
    under investigation. One option is to make it an option in diamond
    search and use it only when motion is large.
    
    Overall slightly positive on derfraw300 +0.02%, stdhdraw +0.13%,
    but works a lot better for high motion sequences (ex. football : +1%).
    
    Change-Id: If88e01a6021daa0cda934680cdc70be1ee04f798
    a1e2d50b
vp9_mcomp.c 76.59 KiB
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include <stdio.h>
#include <limits.h>
#include <math.h>
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vpx_mem/vpx_mem.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_common.h"
// #define NEW_DIAMOND_SEARCH
void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
  int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
                ((ref_mv->as_mv.col & 7) ? 1 : 0);
  int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL +
                ((ref_mv->as_mv.row & 7) ? 1 : 0);
  int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
  int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
  /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
  if (x->mv_col_min < col_min)
    x->mv_col_min = col_min;
  if (x->mv_col_max > col_max)
    x->mv_col_max = col_max;
  if (x->mv_row_min < row_min)
    x->mv_row_min = row_min;
  if (x->mv_row_max > row_max)
    x->mv_row_max = row_max;
int vp9_init_search_range(VP9_COMP *cpi, int size) {
  int sr = 0;
  // Minimum search size no matter what the passed in value.
  size = MAX(16, size);
  while ((size << sr) < MAX_FULL_PEL_VAL)
    sr++;
  if (sr)
    sr--;
  sr += cpi->sf.reduce_first_step_size;
  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
  return sr;
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
                    int weight, int ishp) {
  MV v;
  v.row = mv->as_mv.row - ref->as_mv.row;
  v.col = mv->as_mv.col - ref->as_mv.col;
  return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
                             mvcost[0][v.row] +
                             mvcost[1][v.col]) * weight, 7);
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
int error_per_bit, int ishp) { if (mvcost) { MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] + mvcost[0][v.row] + mvcost[1][v.col]) * error_per_bit, 13); } return 0; } static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost, int *mvsadcost[2], int error_per_bit) { if (mvsadcost) { MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] + mvsadcost[0][v.row] + mvsadcost[1][v.col]) * error_per_bit, 8); } return 0; } void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { int len; int search_site_count = 0; // Generate offsets for 4 search sites per step. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = 0; search_site_count++; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Compute offsets for search sites. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -len; x->ss[search_site_count].offset = -len * stride; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = len; x->ss[search_site_count].offset = len * stride; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = len; search_site_count++; } x->ss_count = search_site_count; x->searches_per_step = 4; } void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { int len; int search_site_count = 0; // Generate offsets for 8 search sites per step.
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = 0; search_site_count++; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Compute offsets for search sites. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -len; x->ss[search_site_count].offset = -len * stride; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = len; x->ss[search_site_count].offset = len * stride; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -len; x->ss[search_site_count].mv.row = -len; x->ss[search_site_count].offset = -len * stride - len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = len; x->ss[search_site_count].mv.row = -len; x->ss[search_site_count].offset = -len * stride + len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = -len; x->ss[search_site_count].mv.row = len; x->ss[search_site_count].offset = len * stride - len; search_site_count++; // Compute offsets for search sites. x->ss[search_site_count].mv.col = len; x->ss[search_site_count].mv.row = len; x->ss[search_site_count].offset = len * stride + len; search_site_count++; } x->ss_count = search_site_count; x->searches_per_step = 8; } /* * To avoid the penalty for crossing cache-line read, preload the reference * area in a small buffer, which is aligned to make sure there won't be crossing * cache-line read while reading from this buffer. This reduced the cpu * cycles spent on reading ref data in sub-pixel filter functions. * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we * could reduce the area. */ /* estimated cost of a motion vector (r,c) */
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
#define MVC(r, c) \ (mvcost ? \ ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ error_per_bit + 4096) >> 13 : 0) #define SP(x) (((x) & 7) << 1) // convert motion vector component to offset // for svf calc #define IFMVCV(r, c, s, e) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ s \ else \ e; /* pointer to predictor base of a motionvector */ #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) /* returns subpixel variance error function */ #define DIST(r, c) \ vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse) /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER(v, r, c) \ IFMVCV(r, c, { \ thismse = (DIST(r, c)); \ if ((v = MVC(r, c) + thismse) < besterr) { \ besterr = v; \ br = r; \ bc = c; \ *distortion = thismse; \ *sse1 = sse; \ } \ }, \ v = INT_MAX;) int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = 4; unsigned int quarteriters = 4; unsigned int eighthiters = 4; int thismse; int maxc, minc, maxr, minr; int y_stride; int offset; int usehp = xd->allow_high_precision_mv; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; y_stride = xd->plane[0].pre[0].stride; rr = ref_mv->as_mv.row;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
rc = ref_mv->as_mv.col; br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3; hstep = 4; minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); tr = br; tc = bc; offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; // calculate central point error besterr = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); // TODO: Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) while (--halfiters) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } // TODO: Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel hstep >>= 1; while (--quarteriters) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } if (xd->allow_high_precision_mv) { usehp = vp9_use_mv_hp(&ref_mv->as_mv); } else { usehp = 0; } if (usehp) { hstep >>= 1; while (--eighthiters) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } } bestmv->as_mv.row = br; bestmv->as_mv.col = bc; if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr;
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
} #undef DIST /* returns subpixel variance error function */ #define DIST(r, c) \ vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ z, src_stride, &sse, second_pred) int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w, int h) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = 4; unsigned int quarteriters = 4; unsigned int eighthiters = 4; int thismse; int maxc, minc, maxr, minr; int y_stride; int offset; int usehp = xd->allow_high_precision_mv; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; y_stride = xd->plane[0].pre[0].stride; rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col; br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3; hstep = 4; minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); tr = br; tc = bc; offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- // pixel search, and can be passed in this function. comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) while (--halfiters) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel hstep >>= 1; while (--quarteriters) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br;
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
tc = bc; } if (xd->allow_high_precision_mv) { usehp = vp9_use_mv_hp(&ref_mv->as_mv); } else { usehp = 0; } if (usehp) { hstep >>= 1; while (--eighthiters) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } } bestmv->as_mv.row = br; bestmv->as_mv.col = bc; if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr; } #undef MVC #undef PRE #undef DIST #undef IFMVCV #undef CHECK_BETTER #undef MIN #undef MAX int vp9_find_best_sub_pixel_step(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; int_mv startmv; int_mv this_mv;
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
int_mv orig_mv; int yrow_movedback = 0, ycol_movedback = 0; uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; int left, right, up, down, diag; unsigned int sse; int whichdir; int thismse; int y_stride; MACROBLOCKD *xd = &x->e_mbd; int usehp = xd->allow_high_precision_mv; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; y_stride = xd->plane[0].pre[0].stride; // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; startmv = *bestmv; orig_mv = *bestmv; // calculate central point error bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 8;
701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); // for(whichdir =0;whichdir<4;whichdir++) // { this_mv = startmv; switch (whichdir) { case 0: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride, &sse); break; case 1: this_mv.as_mv.col += 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride, &sse); break; case 2: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row += 4; thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); break; case 3: default: this_mv.as_mv.col += 4; this_mv.as_mv.row += 4; thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } // } // time to check quarter pels. if (bestmv->as_mv.row < startmv.as_mv.row) { y -= y_stride; yrow_movedback = 1; } if (bestmv->as_mv.col < startmv.as_mv.col) { y--; ycol_movedback = 1; } startmv = *bestmv;
771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
// go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; if (startmv.as_mv.col & 7) { this_mv.as_mv.col = startmv.as_mv.col - 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse); } left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; if (startmv.as_mv.row & 7) { this_mv.as_mv.row = startmv.as_mv.row - 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); } up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv);
841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); // for(whichdir=0;whichdir<4;whichdir++) // { this_mv = startmv; switch (whichdir) { case 0: if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 2; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse); } } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, src_stride, &sse); } } break; case 1: this_mv.as_mv.col += 2; if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); } break; case 2: this_mv.as_mv.row += 2; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else {
911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse); } break; case 3: this_mv.as_mv.col += 2; this_mv.as_mv.row += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } if (x->e_mbd.allow_high_precision_mv) { usehp = vp9_use_mv_hp(&ref_mv->as_mv); } else { usehp = 0; } if (!usehp) return bestmse; /* Now do 1/8th pixel */ if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) { y -= y_stride; yrow_movedback = 1; } if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) { y--; ycol_movedback = 1; } startmv = *bestmv; // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; if (startmv.as_mv.col & 7) { this_mv.as_mv.col = startmv.as_mv.col - 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse); } left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; }
981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
this_mv.as_mv.col += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; if (startmv.as_mv.row & 7) { this_mv.as_mv.row = startmv.as_mv.row - 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); // for(whichdir=0;whichdir<4;whichdir++) // { this_mv = startmv; switch (whichdir) { case 0: if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 1; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else {
1051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse); } } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, src_stride, &sse); } } break; case 1: this_mv.as_mv.col += 1; if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } break; case 2: this_mv.as_mv.row += 1; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse); } break; case 3: this_mv.as_mv.col += 1; this_mv.as_mv.row += 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } return bestmse;
1121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
} #undef SP int vp9_find_best_half_pixel_step(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; int_mv startmv; int_mv this_mv; uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; int left, right, up, down, diag; unsigned int sse; int whichdir; int thismse; int y_stride; MACROBLOCKD *xd = &x->e_mbd; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; y_stride = xd->plane[0].pre[0].stride; // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; startmv = *bestmv; // calculate central point error bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv);
1191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260
if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 8; thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); this_mv = startmv; switch (whichdir) { case 0: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride, &sse); break; case 1: this_mv.as_mv.col += 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride, &sse); break; case 2: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row += 4; thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); break; case 3: default: this_mv.as_mv.col += 4; this_mv.as_mv.row += 4; thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } return bestmse; } #define CHECK_BOUNDS(range) \ {\ all_in = 1;\ all_in &= ((br-range) >= x->mv_row_min);\ all_in &= ((br+range) <= x->mv_row_max);\ all_in &= ((bc-range) >= x->mv_col_min);\ all_in &= ((bc+range) <= x->mv_col_max);\
1261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330
} #define CHECK_POINT \ {\ if (this_mv.as_mv.col < x->mv_col_min) continue;\ if (this_mv.as_mv.col > x->mv_col_max) continue;\ if (this_mv.as_mv.row < x->mv_row_min) continue;\ if (this_mv.as_mv.row > x->mv_row_max) continue;\ } #define CHECK_BETTER \ {\ if (thissad < bestsad)\ {\ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, \ sad_per_bit);\ if (thissad < bestsad)\ {\ bestsad = thissad;\ best_site = i;\ }\ }\ } static const MV next_chkpts[6][3] = { {{ -2, 0}, { -1, -2}, {1, -2}}, {{ -1, -2}, {1, -2}, {2, 0}}, {{1, -2}, {2, 0}, {1, 2}}, {{2, 0}, {1, 2}, { -1, 2}}, {{1, 2}, { -1, 2}, { -2, 0}}, {{ -1, 2}, { -2, 0}, { -1, -2}} }; int vp9_hex_search ( MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, const vp9_variance_fn_ptr_t *vfp, int *mvjsadcost, int *mvsadcost[2], int *mvjcost, int *mvcost[2], int_mv *center_mv ) { const MACROBLOCKD* const xd = &x->e_mbd; MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} }; MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; int i, j; uint8_t *what = x->plane[0].src.buf; int what_stride = x->plane[0].src.stride; int in_what_stride = xd->plane[0].pre[0].stride; int br, bc; int_mv this_mv; unsigned int bestsad = 0x7fffffff; unsigned int thissad; uint8_t *base_offset; uint8_t *this_offset; int k = -1; int all_in; int best_site = -1; int_mv fcenter_mv; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // adjust ref_mv to make sure it is within MV range clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->as_mv.row;
1331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400
bc = ref_mv->as_mv.col; // Work out the start point for the search base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); this_offset = base_offset + (br * (xd->plane[0].pre[0].stride)) + bc; this_mv.as_mv.row = br; this_mv.as_mv.col = bc; bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); // hex search // j=0 CHECK_BOUNDS(2) if (all_in) { for (i = 0; i < 6; i++) { this_mv.as_mv.row = br + hex[i].row; this_mv.as_mv.col = bc + hex[i].col; this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < 6; i++) { this_mv.as_mv.row = br + hex[i].row; this_mv.as_mv.col = bc + hex[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } if (best_site == -1) goto cal_neighbors; else { br += hex[best_site].row; bc += hex[best_site].col; k = best_site; } for (j = 1; j < 127; j++) { best_site = -1; CHECK_BOUNDS(2) if (all_in) { for (i = 0; i < 3; i++) { this_mv.as_mv.row = br + next_chkpts[k][i].row; this_mv.as_mv.col = bc + next_chkpts[k][i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < 3; i++) { this_mv.as_mv.row = br + next_chkpts[k][i].row; this_mv.as_mv.col = bc + next_chkpts[k][i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } if (best_site == -1) break; else { br += next_chkpts[k][best_site].row;
1401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
bc += next_chkpts[k][best_site].col; k += 5 + best_site; if (k >= 12) k -= 12; else if (k >= 6) k -= 6; } } // check 4 1-away neighbors cal_neighbors: for (j = 0; j < 32; j++) { best_site = -1; CHECK_BOUNDS(1) if (all_in) { for (i = 0; i < 4; i++) { this_mv.as_mv.row = br + neighbors[i].row; this_mv.as_mv.col = bc + neighbors[i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < 4; i++) { this_mv.as_mv.row = br + neighbors[i].row; this_mv.as_mv.col = bc + neighbors[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } if (best_site == -1) break; else { br += neighbors[best_site].row; bc += neighbors[best_site].col; } } best_mv->as_mv.row = br; best_mv->as_mv.col = bc; return bestsad; } #undef CHECK_BOUNDS #undef CHECK_POINT #undef CHECK_BETTER int vp9_diamond_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { int i, j, step; const MACROBLOCKD* const xd = &x->e_mbd; uint8_t *what = x->plane[0].src.buf; int what_stride = x->plane[0].src.stride; uint8_t *in_what; int in_what_stride = xd->plane[0].pre[0].stride; uint8_t *best_address; int tot_steps; int_mv this_mv; int bestsad = INT_MAX; int best_site = 0; int last_site = 0;
1471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540
int ref_row, ref_col; int this_row_offset, this_col_offset; search_site *ss; uint8_t *check_here; int thissad; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; // Work out the start point for the search in_what = (uint8_t *)(xd->plane[0].pre[0].buf + (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); best_address = in_what; // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); // search_param determines the length of the initial step and hence the number of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. ss = &x->ss[search_param * x->searches_per_step]; tot_steps = (x->ss_count / x->searches_per_step) - search_param; i = 1; for (step = 0; step < tot_steps; step++) { for (j = 0; j < x->searches_per_step; j++) { // Trap illegal vectors this_row_offset = best_mv->as_mv.row + ss[i].mv.row; this_col_offset = best_mv->as_mv.col + ss[i].mv.col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[i].offset + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = i; } } } i++; } if (best_site != last_site) {
1541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610
best_mv->as_mv.row += ss[best_site].mv.row; best_mv->as_mv.col += ss[best_site].mv.col; best_address += ss[best_site].offset; last_site = best_site; #if defined(NEW_DIAMOND_SEARCH) while (1) { this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[best_site].offset + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row += ss[best_site].mv.row; best_mv->as_mv.col += ss[best_site].mv.col; best_address += ss[best_site].offset; continue; } } } break; }; #endif } else if (best_address == in_what) (*num00)++; } this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; if (bestsad == INT_MAX) return INT_MAX; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); } int vp9_diamond_search_sadx4(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { int i, j, step; const MACROBLOCKD* const xd = &x->e_mbd; uint8_t *what = x->plane[0].src.buf; int what_stride = x->plane[0].src.stride; uint8_t *in_what; int in_what_stride = xd->plane[0].pre[0].stride; uint8_t *best_address; int tot_steps; int_mv this_mv; unsigned int bestsad = INT_MAX; int best_site = 0; int last_site = 0;
1611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680
int ref_row; int ref_col; int this_row_offset; int this_col_offset; search_site *ss; uint8_t *check_here; unsigned int thissad; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; // Work out the start point for the search in_what = (uint8_t *)(xd->plane[0].pre[0].buf + (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); best_address = in_what; // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); // search_param determines the length of the initial step and hence the number of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. ss = &x->ss[search_param * x->searches_per_step]; tot_steps = (x->ss_count / x->searches_per_step) - search_param; i = 1; for (step = 0; step < tot_steps; step++) { int all_in = 1, t; // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of // checking 4 bounds for each points. all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); if (all_in) { unsigned int sad_array[4]; for (j = 0; j < x->searches_per_step; j += 4) { unsigned char const *block_offset[4]; for (t = 0; t < 4; t++) block_offset[t] = ss[i + t].offset + best_address; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (t = 0; t < 4; t++, i++) { if (sad_array[t] < bestsad) { this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
1681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750
if (sad_array[t] < bestsad) { bestsad = sad_array[t]; best_site = i; } } } } } else { for (j = 0; j < x->searches_per_step; j++) { // Trap illegal vectors this_row_offset = best_mv->as_mv.row + ss[i].mv.row; this_col_offset = best_mv->as_mv.col + ss[i].mv.col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[i].offset + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = i; } } } i++; } } if (best_site != last_site) { best_mv->as_mv.row += ss[best_site].mv.row; best_mv->as_mv.col += ss[best_site].mv.col; best_address += ss[best_site].offset; last_site = best_site; #if defined(NEW_DIAMOND_SEARCH) while (1) { this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[best_site].offset + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row += ss[best_site].mv.row; best_mv->as_mv.col += ss[best_site].mv.col; best_address += ss[best_site].offset; continue; } } } break; }; #endif } else if (best_address == in_what) (*num00)++; }
1751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820
this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; if (bestsad == INT_MAX) return INT_MAX; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); } /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, int_mv *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, vp9_variance_fn_ptr_t *fn_ptr, int_mv *ref_mv, int_mv *dst_mv) { int_mv temp_mv; int thissme, n, num00; int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, step_param, sadpb, &num00, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); dst_mv->as_int = temp_mv.as_int; n = num00; num00 = 0; /* If there won't be more n-step search, check to see if refining search is needed. */ if (n > further_steps) do_refine = 0; while (n < further_steps) { n++; if (num00) num00--; else { thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); /* check to see if refining search is needed. */ if (num00 > (further_steps - n)) do_refine = 0; if (thissme < bestsme) { bestsme = thissme; dst_mv->as_int = temp_mv.as_int; } } } /* final 1-away diamond refining search */ if (do_refine == 1) { int search_range = 8; int_mv best_mv; best_mv.as_int = dst_mv->as_int; thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); if (thissme < bestsme) { bestsme = thissme;
1821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890
dst_mv->as_int = best_mv.as_int; } } return bestsme; } int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv, int n) { const MACROBLOCKD* const xd = &x->e_mbd; uint8_t *what = x->plane[0].src.buf; int what_stride = x->plane[0].src.stride; uint8_t *in_what; int in_what_stride = xd->plane[0].pre[0].stride; int mv_stride = xd->plane[0].pre[0].stride; uint8_t *bestaddress; int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0]; int_mv this_mv; int bestsad = INT_MAX; int r, c; uint8_t *check_here; int thissad; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; int row_min = ref_row - distance; int row_max = ref_row + distance; int col_min = ref_col - distance; int col_max = ref_col + distance; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // Work out the mid point for the search in_what = xd->plane[0].pre[0].buf; bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border if (col_min < x->mv_col_min) col_min = x->mv_col_min; if (col_max > x->mv_col_max) col_max = x->mv_col_max; if (row_min < x->mv_row_min) row_min = x->mv_row_min; if (row_max > x->mv_row_max) row_max = x->mv_row_max; for (r = row_min; r < row_max; r++) { this_mv.as_mv.row = r; check_here = r * mv_stride + in_what + col_min;
1891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960
for (c = col_min; c < col_max; c++) { thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } check_here++; } } this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; if (bestsad < INT_MAX) return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); else return INT_MAX; } int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv, int n) { const MACROBLOCKD* const xd = &x->e_mbd; uint8_t *what = x->plane[0].src.buf; int what_stride = x->plane[0].src.stride; uint8_t *in_what; int in_what_stride = xd->plane[0].pre[0].stride; int mv_stride = xd->plane[0].pre[0].stride; uint8_t *bestaddress; int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0]; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; uint8_t *check_here; unsigned int thissad; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; int row_min = ref_row - distance; int row_max = ref_row + distance; int col_min = ref_col - distance; int col_max = ref_col + distance; unsigned int sad_array[3]; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // Work out the mid point for the search in_what = xd->plane[0].pre[0].buf;
1961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030
bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border if (col_min < x->mv_col_min) col_min = x->mv_col_min; if (col_max > x->mv_col_max) col_max = x->mv_col_max; if (row_min < x->mv_row_min) row_min = x->mv_row_min; if (row_max > x->mv_row_max) row_max = x->mv_row_max; for (r = row_min; r < row_max; r++) { this_mv.as_mv.row = r; check_here = r * mv_stride + in_what + col_min; c = col_min; while ((c + 2) < col_max) { int i; fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; i++) { thissad = sad_array[i]; if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } while (c < col_max) { thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } }
2031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100
check_here++; c++; } } this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; if (bestsad < INT_MAX) return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); else return INT_MAX; } int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv, int n) { const MACROBLOCKD* const xd = &x->e_mbd; uint8_t *what = x->plane[0].src.buf; int what_stride = x->plane[0].src.stride; uint8_t *in_what; int in_what_stride = xd->plane[0].pre[0].stride; int mv_stride = xd->plane[0].pre[0].stride; uint8_t *bestaddress; int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0]; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; uint8_t *check_here; unsigned int thissad; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; int row_min = ref_row - distance; int row_max = ref_row + distance; int col_min = ref_col - distance; int col_max = ref_col + distance; DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); unsigned int sad_array[3]; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // Work out the mid point for the search in_what = xd->plane[0].pre[0].buf; bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
2101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border if (col_min < x->mv_col_min) col_min = x->mv_col_min; if (col_max > x->mv_col_max) col_max = x->mv_col_max; if (row_min < x->mv_row_min) row_min = x->mv_row_min; if (row_max > x->mv_row_max) row_max = x->mv_row_max; for (r = row_min; r < row_max; r++) { this_mv.as_mv.row = r; check_here = r * mv_stride + in_what + col_min; c = col_min; while ((c + 7) < col_max) { int i; fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); for (i = 0; i < 8; i++) { thissad = (unsigned int)sad_array8[i]; if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } while ((c + 2) < col_max) { int i; fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; i++) { thissad = sad_array[i]; if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } }
2171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240
while (c < col_max) { thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; if (bestsad < INT_MAX) return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); else return INT_MAX; } int vp9_refining_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, int search_range, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { const MACROBLOCKD* const xd = &x->e_mbd; MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; int i, j; int this_row_offset, this_col_offset; int what_stride = x->plane[0].src.stride; int in_what_stride = xd->plane[0].pre[0].stride; uint8_t *what = x->plane[0].src.buf; uint8_t *best_address = xd->plane[0].pre[0].buf + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + ref_mv->as_mv.col; uint8_t *check_here; unsigned int thissad; int_mv this_mv; unsigned int bestsad = INT_MAX; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); for (i = 0; i < search_range; i++) { int best_site = -1; for (j = 0; j < 4; j++) { this_row_offset = ref_mv->as_mv.row + neighbors[j].row; this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
2241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310
if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = j; } } } } if (best_site == -1) break; else { ref_mv->as_mv.row += neighbors[best_site].row; ref_mv->as_mv.col += neighbors[best_site].col; best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; } } this_mv.as_mv.row = ref_mv->as_mv.row << 3; this_mv.as_mv.col = ref_mv->as_mv.col << 3; if (bestsad < INT_MAX) return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); else return INT_MAX; } int vp9_refining_search_sadx4(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, int search_range, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { const MACROBLOCKD* const xd = &x->e_mbd; MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; int i, j; int this_row_offset, this_col_offset; int what_stride = x->plane[0].src.stride; int in_what_stride = xd->plane[0].pre[0].stride; uint8_t *what = x->plane[0].src.buf; uint8_t *best_address = xd->plane[0].pre[0].buf + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + ref_mv->as_mv.col; uint8_t *check_here; unsigned int thissad; int_mv this_mv; unsigned int bestsad = INT_MAX; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
2311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380
mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); for (i = 0; i < search_range; i++) { int best_site = -1; int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) & ((ref_mv->as_mv.row + 1) < x->mv_row_max) & ((ref_mv->as_mv.col - 1) > x->mv_col_min) & ((ref_mv->as_mv.col + 1) < x->mv_col_max); if (all_in) { unsigned int sad_array[4]; unsigned char const *block_offset[4]; block_offset[0] = best_address - in_what_stride; block_offset[1] = best_address - 1; block_offset[2] = best_address + 1; block_offset[3] = best_address + in_what_stride; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (j = 0; j < 4; j++) { if (sad_array[j] < bestsad) { this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); if (sad_array[j] < bestsad) { bestsad = sad_array[j]; best_site = j; } } } } else { for (j = 0; j < 4; j++) { this_row_offset = ref_mv->as_mv.row + neighbors[j].row; this_col_offset = ref_mv->as_mv.col + neighbors[j].col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = j; } } } } } if (best_site == -1) break; else { ref_mv->as_mv.row += neighbors[best_site].row; ref_mv->as_mv.col += neighbors[best_site].col; best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; } } this_mv.as_mv.row = ref_mv->as_mv.row << 3; this_mv.as_mv.col = ref_mv->as_mv.col << 3; if (bestsad < INT_MAX)
2381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); else return INT_MAX; } /* This function is called when we do joint motion search in comp_inter_inter * mode. */ int vp9_refining_search_8p_c(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, int search_range, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv, const uint8_t *second_pred, int w, int h) { const MACROBLOCKD* const xd = &x->e_mbd; MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; int i, j; int this_row_offset, this_col_offset; int what_stride = x->plane[0].src.stride; int in_what_stride = xd->plane[0].pre[0].stride; uint8_t *what = x->plane[0].src.buf; uint8_t *best_address = xd->plane[0].pre[0].buf + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + ref_mv->as_mv.col; uint8_t *check_here; unsigned int thissad; int_mv this_mv; unsigned int bestsad = INT_MAX; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; /* Get compound pred by averaging two pred blocks. */ bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride, second_pred, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); for (i = 0; i < search_range; i++) { int best_site = -1; for (j = 0; j < 8; j++) { this_row_offset = ref_mv->as_mv.row + neighbors[j].row; this_col_offset = ref_mv->as_mv.col + neighbors[j].col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; /* Get compound block and use it to calculate SAD. */ thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, second_pred, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
24512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485
if (thissad < bestsad) { bestsad = thissad; best_site = j; } } } } if (best_site == -1) { break; } else { ref_mv->as_mv.row += neighbors[best_site].row; ref_mv->as_mv.col += neighbors[best_site].col; best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; } } this_mv.as_mv.row = ref_mv->as_mv.row << 3; this_mv.as_mv.col = ref_mv->as_mv.col << 3; if (bestsad < INT_MAX) { // FIXME(rbultje, yunqing): add full-pixel averaging variance functions // so we don't have to use the subpixel with xoff=0,yoff=0 here. int besterr = fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, (unsigned int *)(&thissad), second_pred) + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, xd->allow_high_precision_mv); return besterr; } else { return INT_MAX; } }