vp9_mcomp.c 87.5 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11
#include <assert.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
12 13
#include <limits.h>
#include <math.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
14
#include <stdio.h>
John Koleszar's avatar
John Koleszar committed
15

16
#include "./vpx_config.h"
17
#include "./vpx_dsp_rtcd.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
18

19
#include "vpx_dsp/vpx_dsp_common.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
20
#include "vpx_mem/vpx_mem.h"
21
#include "vpx_ports/mem.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
22

23
#include "vp9/common/vp9_common.h"
24
#include "vp9/common/vp9_mvref_common.h"
25
#include "vp9/common/vp9_reconinter.h"
John Koleszar's avatar
John Koleszar committed
26

27
#include "vp9/encoder/vp9_encoder.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
28 29
#include "vp9/encoder/vp9_mcomp.h"

30 31
// #define NEW_DIAMOND_SEARCH

32 33 34 35 36
static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
                                             const MV *mv) {
  return &buf->buf[mv->row * buf->stride + mv->col];
}

Alex Converse's avatar
Alex Converse committed
37
void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
38 39 40 41 42
  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;

43 44 45 46
  col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
  row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
  col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
  row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
47 48 49

  // Get intersection of UMV window and valid MV window to reduce # of checks
  // in diamond search.
Alex Converse's avatar
Alex Converse committed
50 51 52 53
  if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
  if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
  if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
  if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
54 55
}

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
                                    const MvLimits *umv_window_limits,
                                    const MV *ref_mv) {
  subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8,
                                     ref_mv->col - MAX_FULL_PEL_VAL * 8);
  subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8,
                                     ref_mv->col + MAX_FULL_PEL_VAL * 8);
  subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8,
                                     ref_mv->row - MAX_FULL_PEL_VAL * 8);
  subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8,
                                     ref_mv->row + MAX_FULL_PEL_VAL * 8);

  subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min);
  subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max);
  subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min);
  subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max);
}

74
int vp9_init_search_range(int size) {
75
  int sr = 0;
76
  // Minimum search size no matter what the passed in value.
77
  size = VPXMAX(16, size);
78

79
  while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
80

81
  sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
82 83 84
  return sr;
}

85 86
static INLINE int mv_cost(const MV *mv, const int *joint_cost,
                          int *const comp_cost[2]) {
87 88
  assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
  assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
89 90
  return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
         comp_cost[1][mv->col];
John Koleszar's avatar
John Koleszar committed
91 92
}

93 94 95
int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
                    int *mvcost[2], int weight) {
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
96 97 98
  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}

99 100 101
#define PIXEL_TRANSFORM_ERROR_SCALE 4
static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
                       int *mvcost[2], int error_per_bit) {
102
  if (mvcost) {
103
    const MV diff = { mv->row - ref->row, mv->col - ref->col };
104 105
    return (int)ROUND64_POWER_OF_TWO(
        (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
106 107
        RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
            PIXEL_TRANSFORM_ERROR_SCALE);
108
  }
John Koleszar's avatar
John Koleszar committed
109
  return 0;
110 111
}

112
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
113
                          int sad_per_bit) {
114
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
115
  return ROUND_POWER_OF_TWO(
116
      (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
117
      VP9_PROB_COST_SHIFT);
John Koleszar's avatar
John Koleszar committed
118 119
}

120
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
121 122
  int len;
  int ss_count = 0;
John Koleszar's avatar
John Koleszar committed
123

Dmitry Kovalev's avatar
Dmitry Kovalev committed
124
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
125
    // Generate offsets for 4 search sites per step.
126
    const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
127
    int i;
128 129 130
    for (i = 0; i < 4; ++i, ++ss_count) {
      cfg->ss_mv[ss_count] = ss_mvs[i];
      cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
131
    }
John Koleszar's avatar
John Koleszar committed
132 133
  }

134
  cfg->searches_per_step = 4;
135
  cfg->total_steps = ss_count / cfg->searches_per_step;
John Koleszar's avatar
John Koleszar committed
136 137
}

138
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
139 140
  int len;
  int ss_count = 0;
John Koleszar's avatar
John Koleszar committed
141

Dmitry Kovalev's avatar
Dmitry Kovalev committed
142
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
143
    // Generate offsets for 8 search sites per step.
144 145 146
    const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
                           { 0, len },    { -len, -len }, { -len, len },
                           { len, -len }, { len, len } };
147
    int i;
148 149 150
    for (i = 0; i < 8; ++i, ++ss_count) {
      cfg->ss_mv[ss_count] = ss_mvs[i];
      cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
151
    }
John Koleszar's avatar
John Koleszar committed
152 153
  }

154
  cfg->searches_per_step = 8;
155
  cfg->total_steps = ss_count / cfg->searches_per_step;
John Koleszar's avatar
John Koleszar committed
156 157
}

Johann's avatar
Johann committed
158
// convert motion vector component to offset for sv[a]f calc
159
static INLINE int sp(int x) { return x & 7; }
160

161 162
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
  return &buf[(r >> 3) * stride + (c >> 3)];
163
}
164

165 166
#if CONFIG_VP9_HIGHBITDEPTH
/* checks if (r, c) has better score than previous best */
167 168 169
#define CHECK_BETTER(v, r, c)                                                \
  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
    int64_t tmpmse;                                                          \
170 171
    const MV mv = { r, c };                                                  \
    const MV ref_mv = { rr, rc };                                            \
172 173 174 175 176 177 178 179
    if (second_pred == NULL) {                                               \
      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
                         src_stride, &sse);                                  \
    } else {                                                                 \
      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
                          src_stride, &sse, second_pred);                    \
    }                                                                        \
    tmpmse = thismse;                                                        \
180
    tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);     \
181 182 183 184 185 186 187 188 189 190 191
    if (tmpmse >= INT_MAX) {                                                 \
      v = INT_MAX;                                                           \
    } else if ((v = (uint32_t)tmpmse) < besterr) {                           \
      besterr = v;                                                           \
      br = r;                                                                \
      bc = c;                                                                \
      *distortion = thismse;                                                 \
      *sse1 = sse;                                                           \
    }                                                                        \
  } else {                                                                   \
    v = INT_MAX;                                                             \
192 193
  }
#else
194
/* checks if (r, c) has better score than previous best */
195 196
#define CHECK_BETTER(v, r, c)                                                \
  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
197 198
    const MV mv = { r, c };                                                  \
    const MV ref_mv = { rr, rc };                                            \
199 200 201 202 203 204
    if (second_pred == NULL)                                                 \
      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
                         src_stride, &sse);                                  \
    else                                                                     \
      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
                          src_stride, &sse, second_pred);                    \
205 206
    if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +     \
             thismse) < besterr) {                                           \
207 208 209 210 211 212 213 214
      besterr = v;                                                           \
      br = r;                                                                \
      bc = c;                                                                \
      *distortion = thismse;                                                 \
      *sse1 = sse;                                                           \
    }                                                                        \
  } else {                                                                   \
    v = INT_MAX;                                                             \
Dmitry Kovalev's avatar
Dmitry Kovalev committed
215
  }
216

217
#endif
218 219 220 221 222 223 224 225 226 227 228 229 230 231
#define FIRST_LEVEL_CHECKS                                       \
  {                                                              \
    unsigned int left, right, up, down, diag;                    \
    CHECK_BETTER(left, tr, tc - hstep);                          \
    CHECK_BETTER(right, tr, tc + hstep);                         \
    CHECK_BETTER(up, tr - hstep, tc);                            \
    CHECK_BETTER(down, tr + hstep, tc);                          \
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
    switch (whichdir) {                                          \
      case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
      case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
      case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
      case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
    }                                                            \
232 233
  }

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
#define SECOND_LEVEL_CHECKS                                       \
  {                                                               \
    int kr, kc;                                                   \
    unsigned int second;                                          \
    if (tr != br && tc != bc) {                                   \
      kr = br - tr;                                               \
      kc = bc - tc;                                               \
      CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
      CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
    } else if (tr == br && tc != bc) {                            \
      kc = bc - tc;                                               \
      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
      switch (whichdir) {                                         \
        case 0:                                                   \
        case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
        case 2:                                                   \
        case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
      }                                                           \
    } else if (tr != br && tc == bc) {                            \
      kr = br - tr;                                               \
      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
      switch (whichdir) {                                         \
        case 0:                                                   \
        case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
        case 1:                                                   \
        case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
      }                                                           \
    }                                                             \
264 265
  }

266 267 268
// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
// later in the same way.
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
#define SECOND_LEVEL_CHECKS_BEST                \
  {                                             \
    unsigned int second;                        \
    int br0 = br;                               \
    int bc0 = bc;                               \
    assert(tr == br || tc == bc);               \
    if (tr == br && tc != bc) {                 \
      kc = bc - tc;                             \
    } else if (tr != br && tc == bc) {          \
      kr = br - tr;                             \
    }                                           \
    CHECK_BETTER(second, br0 + kr, bc0);        \
    CHECK_BETTER(second, br0, bc0 + kc);        \
    if (br0 != br || bc0 != bc) {               \
      CHECK_BETTER(second, br0 + kr, bc0 + kc); \
    }                                           \
285 286
  }

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
#define SETUP_SUBPEL_SEARCH                                                 \
  const uint8_t *const z = x->plane[0].src.buf;                             \
  const int src_stride = x->plane[0].src.stride;                            \
  const MACROBLOCKD *xd = &x->e_mbd;                                        \
  unsigned int besterr = UINT_MAX;                                          \
  unsigned int sse;                                                         \
  unsigned int whichdir;                                                    \
  int thismse;                                                              \
  const unsigned int halfiters = iters_per_step;                            \
  const unsigned int quarteriters = iters_per_step;                         \
  const unsigned int eighthiters = iters_per_step;                          \
  const int y_stride = xd->plane[0].pre[0].stride;                          \
  const int offset = bestmv->row * y_stride + bestmv->col;                  \
  const uint8_t *const y = xd->plane[0].pre[0].buf;                         \
                                                                            \
  int rr = ref_mv->row;                                                     \
  int rc = ref_mv->col;                                                     \
  int br = bestmv->row * 8;                                                 \
  int bc = bestmv->col * 8;                                                 \
  int hstep = 4;                                                            \
  int minc, maxc, minr, maxr;                                               \
  int tr = br;                                                              \
  int tc = bc;                                                              \
  MvLimits subpel_mv_limits;                                                \
                                                                            \
  vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \
  minc = subpel_mv_limits.col_min;                                          \
  maxc = subpel_mv_limits.col_max;                                          \
  minr = subpel_mv_limits.row_min;                                          \
  maxr = subpel_mv_limits.row_max;                                          \
                                                                            \
  bestmv->row *= 8;                                                         \
319
  bestmv->col *= 8;
320

321 322 323 324 325 326
static unsigned int setup_center_error(
    const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
    const uint8_t *const src, const int src_stride, const uint8_t *const y,
    int y_stride, const uint8_t *second_pred, int w, int h, int offset,
    int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
327
#if CONFIG_VP9_HIGHBITDEPTH
328
  uint64_t besterr;
329 330
  if (second_pred != NULL) {
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
331
      DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
332
      vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
333
                               y_stride);
334 335
      besterr =
          vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
336
    } else {
337
      DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
338
      vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
339 340 341 342 343
      besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
    }
  } else {
    besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  }
344
  *distortion = (uint32_t)besterr;
345
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
346
  if (besterr >= UINT_MAX) return UINT_MAX;
347
  return (uint32_t)besterr;
348
#else
349
  uint32_t besterr;
350
  (void)xd;
351
  if (second_pred != NULL) {
352
    DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
353
    vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
354 355 356 357 358
    besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  } else {
    besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  }
  *distortion = besterr;
359
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
360
  return besterr;
361
#endif  // CONFIG_VP9_HIGHBITDEPTH
362
}
363

364
static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) {
365 366 367
  return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
}

368
static INLINE int is_cost_list_wellbehaved(int *cost_list) {
369 370
  return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
         cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
371 372 373 374 375 376 377 378 379 380
}

// Returns surface minima estimate at given precision in 1/2^n bits.
// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
// For a given set of costs S0, S1, S2, S3, S4 at points
// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
// the solution for the location of the minima (x0, y0) is given by:
// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
// The code below is an integerized version of that.
381
static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
382 383 384 385
  const int64_t x0 = (int64_t)cost_list[1] - cost_list[3];
  const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3];
  const int64_t x1 = (int64_t)cost_list[4] - cost_list[2];
  const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2];
386
  const int b = 1 << (bits - 1);
387 388
  *ic = (int)divide_and_round(x0 * b, y0);
  *ir = (int)divide_and_round(x1 * b, y1);
389 390
}

391 392 393 394 395 396 397 398
uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
                                 const MV *ref_mv, int allow_hp,
                                 int error_per_bit,
                                 const vp9_variance_fn_ptr_t *vfp,
                                 int forced_stop, int iters_per_step,
                                 int *cost_list, int *mvjcost, int *mvcost[2],
                                 uint32_t *distortion, uint32_t *sse1,
                                 const uint8_t *second_pred, int w, int h) {
399
  SETUP_SUBPEL_SEARCH;
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
                               src_stride, y, y_stride, second_pred, w, h,
                               offset, mvjcost, mvcost, sse1, distortion);
  (void)halfiters;
  (void)quarteriters;
  (void)eighthiters;
  (void)whichdir;
  (void)allow_hp;
  (void)forced_stop;
  (void)hstep;
  (void)rr;
  (void)rc;
  (void)minr;
  (void)minc;
  (void)maxr;
  (void)maxc;
  (void)tr;
  (void)tc;
  (void)sse;
  (void)thismse;
  (void)cost_list;
421 422 423 424

  return besterr;
}

425
uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
426 427 428 429 430
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    int h) {
431
  SETUP_SUBPEL_SEARCH;
432 433 434 435 436 437 438 439 440 441 442 443
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
                               src_stride, y, y_stride, second_pred, w, h,
                               offset, mvjcost, mvcost, sse1, distortion);
  (void)halfiters;
  (void)quarteriters;
  (void)eighthiters;
  (void)whichdir;
  (void)allow_hp;
  (void)forced_stop;
  (void)hstep;

  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
444
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
445
      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
446
    int ir, ic;
447
    unsigned int minpt = INT_MAX;
448
    get_cost_surf_min(cost_list, &ir, &ic, 2);
449
    if (ir != 0 || ic != 0) {
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
      CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
    }
  } else {
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }

    tr = br;
    tc = bc;

    // Each subsequent iteration checks at least one point in common with
    // the last iteration could be 2 ( if diag selected) 1/4 pel
    // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    if (forced_stop != 2) {
      hstep >>= 1;
      FIRST_LEVEL_CHECKS;
      if (quarteriters > 1) {
        SECOND_LEVEL_CHECKS;
      }
    }
  }

  tr = br;
  tc = bc;

476
  if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
477 478 479 480
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
481 482 483 484 485 486 487 488 489
    }
  }

  bestmv->row = br;
  bestmv->col = bc;

  return besterr;
}

490 491 492 493 494 495
uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    int h) {
496
  SETUP_SUBPEL_SEARCH;
497 498 499 500
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
                               src_stride, y, y_stride, second_pred, w, h,
                               offset, mvjcost, mvcost, sse1, distortion);
  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
501
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
502
      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
503 504
    unsigned int minpt;
    int ir, ic;
505
    get_cost_surf_min(cost_list, &ir, &ic, 1);
506 507
    if (ir != 0 || ic != 0) {
      CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
508 509
    }
  } else {
510 511 512 513
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
514
  }
515

516 517 518 519 520
  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
521 522
    tr = br;
    tc = bc;
523 524 525 526 527 528 529
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

530
  if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
531 532
    tr = br;
    tc = bc;
533 534 535 536 537 538 539 540
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
541 542
  (void)tr;
  (void)tc;
543 544 545 546 547 548 549

  bestmv->row = br;
  bestmv->col = bc;

  return besterr;
}

550 551 552 553 554 555
uint32_t vp9_find_best_sub_pixel_tree_pruned(
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    int h) {
556
  SETUP_SUBPEL_SEARCH;
557 558 559 560
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
                               src_stride, y, y_stride, second_pred, w, h,
                               offset, mvjcost, mvcost, sse1, distortion);
  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
561 562
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
      cost_list[4] != INT_MAX) {
563
    unsigned int left, right, up, down, diag;
564 565
    whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
               (cost_list[2] < cost_list[4] ? 0 : 2);
566 567 568
    switch (whichdir) {
      case 0:
        CHECK_BETTER(left, tr, tc - hstep);
569 570
        CHECK_BETTER(down, tr + hstep, tc);
        CHECK_BETTER(diag, tr + hstep, tc - hstep);
571 572 573
        break;
      case 1:
        CHECK_BETTER(right, tr, tc + hstep);
574 575
        CHECK_BETTER(down, tr + hstep, tc);
        CHECK_BETTER(diag, tr + hstep, tc + hstep);
576 577 578
        break;
      case 2:
        CHECK_BETTER(left, tr, tc - hstep);
579 580
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(diag, tr - hstep, tc - hstep);
581 582 583
        break;
      case 3:
        CHECK_BETTER(right, tr, tc + hstep);
584 585
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(diag, tr - hstep, tc + hstep);
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
        break;
    }
  } else {
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

  tr = br;
  tc = bc;

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

612
  if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
613 614 615 616 617 618 619 620 621 622
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
623 624
  (void)tr;
  (void)tc;
625 626 627 628 629 630 631

  bestmv->row = br;
  bestmv->col = bc;

  return besterr;
}

632
/* clang-format off */
633
static const MV search_step_table[12] = {
634 635 636 637
  // left, right, up, down
  { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
  { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
  { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
638
};
639 640 641 642 643 644 645 646
/* clang-format on */

uint32_t vp9_find_best_sub_pixel_tree(
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    int h) {
647 648 649 650
  const uint8_t *const z = x->plane[0].src.buf;
  const uint8_t *const src_address = z;
  const int src_stride = x->plane[0].src.stride;
  const MACROBLOCKD *xd = &x->e_mbd;
651
  unsigned int besterr = UINT_MAX;
652 653 654 655 656 657 658 659 660 661 662 663
  unsigned int sse;
  int thismse;
  const int y_stride = xd->plane[0].pre[0].stride;
  const int offset = bestmv->row * y_stride + bestmv->col;
  const uint8_t *const y = xd->plane[0].pre[0].buf;

  int rr = ref_mv->row;
  int rc = ref_mv->col;
  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
  int hstep = 4;
  int iter, round = 3 - forced_stop;
664 665

  int minc, maxc, minr, maxr;
666 667 668 669 670
  int tr = br;
  int tc = bc;
  const MV *search_step = search_step_table;
  int idx, best_idx = -1;
  unsigned int cost_array[5];
671
  int kr, kc;
672 673 674 675 676 677 678
  MvLimits subpel_mv_limits;

  vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
  minc = subpel_mv_limits.col_min;
  maxc = subpel_mv_limits.col_max;
  minr = subpel_mv_limits.row_min;
  maxr = subpel_mv_limits.row_max;
679

680
  if (!(allow_hp && use_mv_hp(ref_mv)))
681
    if (round == 3) round = 2;
682 683 684

  bestmv->row *= 8;
  bestmv->col *= 8;
685

686 687 688
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
                               src_stride, y, y_stride, second_pred, w, h,
                               offset, mvjcost, mvcost, sse1, distortion);
689

690
  (void)cost_list;  // to silence compiler warning
691

692 693 694 695 696 697 698 699 700 701 702
  for (iter = 0; iter < round; ++iter) {
    // Check vertical and horizontal sub-pixel positions.
    for (idx = 0; idx < 4; ++idx) {
      tr = br + search_step[idx].row;
      tc = bc + search_step[idx].col;
      if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
        const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
        MV this_mv;
        this_mv.row = tr;
        this_mv.col = tc;
        if (second_pred == NULL)
703 704
          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
                             src_stride, &sse);
705
        else
Johann's avatar
Johann committed
706
          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
707
                              src_address, src_stride, &sse, second_pred);
708 709
        cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
                                                mvcost, error_per_bit);
710 711 712 713 714 715 716 717

        if (cost_array[idx] < besterr) {
          best_idx = idx;
          besterr = cost_array[idx];
          *distortion = thismse;
          *sse1 = sse;
        }
      } else {
718
        cost_array[idx] = UINT_MAX;
719
      }
720 721
    }

722
    // Check diagonal sub-pixel position
723 724 725 726 727
    kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
    kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);

    tc = bc + kc;
    tr = br + kr;
728 729
    if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
      const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
730
      MV this_mv = { tr, tc };
731
      if (second_pred == NULL)
732 733
        thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
                           src_stride, &sse);
734
      else
735 736 737 738
        thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
                            src_stride, &sse, second_pred);
      cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
                                            error_per_bit);
739 740 741 742 743 744 745 746

      if (cost_array[4] < besterr) {
        best_idx = 4;
        besterr = cost_array[4];
        *distortion = thismse;
        *sse1 = sse;
      }
    } else {
747
      cost_array[idx] = UINT_MAX;
748 749 750 751 752 753 754 755
    }

    if (best_idx < 4 && best_idx >= 0) {
      br += search_step[best_idx].row;
      bc += search_step[best_idx].col;
    } else if (best_idx == 4) {
      br = tr;
      bc = tc;
756
    }
757

758
    if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST;
759

760 761
    tr = br;
    tc = bc;
762 763 764 765

    search_step += 4;
    hstep >>= 1;
    best_idx = -1;
766
  }
767 768 769 770

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

771 772
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
773 774
  (void)tr;
  (void)tc;
775

776 777
  bestmv->row = br;
  bestmv->col = bc;
778

779 780
  return besterr;
}
781

John Koleszar's avatar
John Koleszar committed
782
#undef CHECK_BETTER
783

Alex Converse's avatar
Alex Converse committed
784
static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
785
                               int range) {
Alex Converse's avatar
Alex Converse committed
786 787 788 789
  return ((row - range) >= mv_limits->row_min) &
         ((row + range) <= mv_limits->row_max) &
         ((col - range) >= mv_limits->col_min) &
         ((col + range) <= mv_limits->col_max);
790
}
791

Alex Converse's avatar
Alex Converse committed
792 793 794
static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
  return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
         (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
795
}
796

797 798 799 800 801 802 803 804 805 806
#define CHECK_BETTER                                                      \
  {                                                                       \
    if (thissad < bestsad) {                                              \
      if (use_mvcost)                                                     \
        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
      if (thissad < bestsad) {                                            \
        bestsad = thissad;                                                \
        best_site = i;                                                    \
      }                                                                   \
    }                                                                     \
John Koleszar's avatar
John Koleszar committed
807 808
  }

809 810 811
#define MAX_PATTERN_SCALES 11
#define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
#define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
812

813
// Calculate and return a sad+mvcost list around an integer best pel.
814
static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
815 816
                                      int sadpb,
                                      const vp9_variance_fn_ptr_t *fn_ptr,
817 818
                                      const MV *best_mv, int *cost_list) {
  static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
819 820
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
821
  const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
822 823 824 825
  int br = best_mv->row;
  int bc = best_mv->col;
  MV this_mv;
  int i;
826
  unsigned int sse;
827 828 829

  this_mv.row = br;
  this_mv.col = bc;
830 831 832
  cost_list[0] =
      fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
                 in_what->stride, &sse) +
833
      mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
Alex Converse's avatar
Alex Converse committed
834
  if (check_bounds(&x->mv_limits, br, bc, 1)) {
835
    for (i = 0; i < 4; i++) {
836
      const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
837 838 839
      cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
                                    get_buf_from_mv(in_what, &this_mv),
                                    in_what->stride, &sse) +
840 841
                         mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
                                     x->mvcost, x->errorperbit);
842 843 844
    }
  } else {
    for (i = 0; i < 4; i++) {
845
      const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Alex Converse's avatar
Alex Converse committed
846
      if (!is_mv_in(&x->mv_limits, &this_mv))
847 848
        cost_list[i + 1] = INT_MAX;
      else
849 850 851
        cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
                                      get_buf_from_mv(in_what, &this_mv),
                                      in_what->stride, &sse) +
852 853
                           mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
                                       x->mvcost, x->errorperbit);
854 855 856 857
    }
  }
}

858 859 860 861
// Generic pattern search function that searches over multiple scales.
// Each scale can have a different number of candidates and shape of
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
862
//
clang-format's avatar
clang-format committed
863 864 865 866 867 868
static int vp9_pattern_search(
    const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
    int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
    int use_mvcost, const MV *center_mv, MV *best_mv,
    const int num_candidates[MAX_PATTERN_SCALES],
    const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
869
  const MACROBLOCKD *const xd = &x->e_mbd;
870 871 872
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  };
873
  int i, s, t;
874 875
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
John Koleszar's avatar
John Koleszar committed
876
  int br, bc;
877 878
  int bestsad = INT_MAX;
  int thissad;
John Koleszar's avatar
John Koleszar committed
879
  int k = -1;
880
  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
881
  int best_init_s = search_param_to_steps[search_param];
John Koleszar's avatar
John Koleszar committed
882
  // adjust ref_mv to make sure it is within MV range
Alex Converse's avatar
Alex Converse committed
883 884
  clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
           x->mv_limits.row_min, x->mv_limits.row_max);
885 886
  br = ref_mv->row;
  bc = ref_mv->col;
John Koleszar's avatar
John Koleszar committed
887 888

  // Work out the start point for the search
889 890 891
  bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
                     in_what->stride) +
            mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
John Koleszar's avatar
John Koleszar committed
892

893 894 895 896 897 898 899
  // Search all possible scales upto the search param around the center point
  // pick the scale of the point that is best as the starting scale of
  // further steps around it.
  if (do_init_search) {
    s = best_init_s;
    best_init_s = -1;
    for (t = 0; t <= s; ++t) {
900
      int best_site = -1;
Alex Converse's avatar
Alex Converse committed
901
      if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
902
        for (i = 0; i < num_candidates[t]; i++) {
903 904 905 906 907
          const MV this_mv = { br + candidates[t][i].row,
                               bc + candidates[t][i].col };
          thissad =
              vfp->sdf(what->buf, what->stride,
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
908 909 910 911
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < num_candidates[t]; i++) {
912 913
          const MV this_mv = { br + candidates[t][i].row,
                               bc + candidates[t][i].col };
Alex Converse's avatar
Alex Converse committed
914
          if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
915 916 917
          thissad =
              vfp->sdf(what->buf, what->stride,
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
918 919 920 921 922 923 924 925 926
          CHECK_BETTER
        }
      }
      if (best_site == -1) {
        continue;
      } else {
        best_init_s = t;
        k = best_site;
      }
John Koleszar's avatar
John Koleszar committed
927
    }
928 929 930
    if (best_init_s != -1) {
      br += candidates[best_init_s][k].row;
      bc += candidates[best_init_s][k].col;
John Koleszar's avatar
John Koleszar committed
931 932 933
    }
  }

934 935 936
  // If the center point is still the best, just skip this and move to
  // the refinement step.
  if (best_init_s != -1) {
937
    int best_site = -1;
938
    s = best_init_s;
939

940 941 942
    do {
      // No need to search all 6 points the 1st time if initial search was used
      if (!do_init_search || s != best_init_s) {
Alex Converse's avatar
Alex Converse committed
943
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
944
          for (i = 0; i < num_candidates[s]; i++) {
945 946 947 948 949
            const MV this_mv = { br + candidates[s][i].row,
                                 bc + candidates[s][i].col };
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
950 951 952 953
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
954 955
            const MV this_mv = { br + candidates[s][i].row,
                                 bc + candidates[s][i].col };
Alex Converse's avatar
Alex Converse committed
956
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
957 958 959
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
960 961 962 963 964 965 966 967 968 969 970
            CHECK_BETTER
          }
        }

        if (best_site == -1) {
          continue;
        } else {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
John Koleszar's avatar
John Koleszar committed
971
      }
972

973 974 975
      do {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
976 977 978 979
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
        next_chkpts_indices[1] = k;
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;

Alex Converse's avatar
Alex Converse committed
980
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
981
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
982 983 984 985 986 987 988
            const MV this_mv = {
              br + candidates[s][next_chkpts_indices[i]].row,
              bc + candidates[s][next_chkpts_indices[i]].col
            };
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
989 990 991 992
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
993 994 995 996
            const MV this_mv = {
              br + candidates[s][next_chkpts_indices[i]].row,
              bc + candidates[s][next_chkpts_indices[i]].col
            };
Alex Converse's avatar
Alex Converse committed
997
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
998 999 1000
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      } while (best_site != -1);
    } while (s--);
John Koleszar's avatar
John Koleszar committed
1012
  }
1013

1014
  // Returns the one-away integer pel sad values around the best as follows:
1015 1016 1017 1018 1019 1020 1021 1022
  // cost_list[0]: cost at the best integer pel
  // cost_list[1]: cost at delta {0, -1} (left)   from the best integer pel
  // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
  // cost_list[3]: cost at delta { 0, 1} (right)  from the best integer pel
  // cost_list[4]: cost at delta {-1, 0} (top)    from the best integer pel
  if (cost_list) {
    const MV best_mv = { br, bc };
    calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
1023 1024 1025 1026 1027 1028 1029
  }
  best_mv->row = br;
  best_mv->col = bc;
  return bestsad;
}

// A specialized function where the smallest scale search candidates
1030
// are 4 1-away neighbors, and cost_list is non-null
1031 1032
// TODO(debargha): Merge this function with the one above. Also remove
// use_mvcost option since it is always 1, to save unnecessary branches.
1033 1034 1035 1036 1037 1038
static int vp9_pattern_search_sad(
    const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
    int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
    int use_mvcost, const MV *center_mv, MV *best_mv,
    const int num_candidates[MAX_PATTERN_SCALES],
    const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
  const MACROBLOCKD *const xd = &x->e_mbd;
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  };
  int i, s, t;
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  int br, bc;
  int bestsad = INT_MAX;
  int thissad;
  int k = -1;
1050
  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1051 1052
  int best_init_s = search_param_to_steps[search_param];
  // adjust ref_mv to make sure it is within MV range
Alex Converse's avatar
Alex Converse committed
1053 1054
  clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
           x->mv_limits.row_min, x->mv_limits.row_max);
1055 1056
  br = ref_mv->row;
  bc = ref_mv->col;
1057 1058
  if (cost_list != NULL) {
    cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1059 1060 1061 1062
        INT_MAX;
  }

  // Work out the start point for the search
1063 1064 1065
  bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
                     in_what->stride) +
            mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1066 1067 1068 1069 1070 1071 1072 1073 1074

  // Search all possible scales upto the search param around the center point
  // pick the scale of the point that is best as the starting scale of
  // further steps around it.
  if (do_init_search) {
    s = best_init_s;
    best_init_s = -1;
    for (t = 0; t <= s; ++t) {
      int best_site = -1;
Alex Converse's avatar
Alex Converse committed
1075
      if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1076
        for (i = 0; i < num_candidates[t]; i++) {
1077 1078 1079 1080 1081
          const MV this_mv = { br + candidates[t][i].row,
                               bc + candidates[t][i].col };
          thissad =
              vfp->sdf(what->buf, what->stride,
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1082 1083 1084 1085
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < num_candidates[t]; i++) {
1086 1087
          const MV this_mv = { br + candidates[t][i].row,
                               bc + candidates[t][i].col };
Alex Converse's avatar
Alex Converse committed
1088
          if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1089 1090 1091
          thissad =
              vfp->sdf(what->buf, what->stride,
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
          CHECK_BETTER
        }
      }
      if (best_site == -1) {
        continue;
      } else {
        best_init_s = t;
        k = best_site;
      }
    }
    if (best_init_s != -1) {
      br += candidates[best_init_s][k].row;
      bc += candidates[best_init_s][k].col;
    }
  }

  // If the center point is still the best, just skip this and move to
  // the refinement step.
  if (best_init_s != -1) {
1111
    int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
1112 1113 1114 1115 1116
    int best_site = -1;
    s = best_init_s;

    for (; s >= do_sad; s--) {
      if (!do_init_search || s != best_init_s) {
Alex Converse's avatar
Alex Converse committed
1117
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1118
          for (i = 0; i < num_candidates[s]; i++) {
1119 1120 1121 1122 1123
            const MV this_mv = { br + candidates[s][i].row,
                                 bc + candidates[s][i].col };
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1124 1125 1126 1127
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
1128 1129
            const MV this_mv = { br + candidates[s][i].row,
                                 bc + candidates[s][i].col };
Alex Converse's avatar
Alex Converse committed
1130
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1131 1132 1133
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153
            CHECK_BETTER
          }
        }

        if (best_site == -1) {
          continue;
        } else {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
      }

      do {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
        next_chkpts_indices[1] = k;
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;

Alex Converse's avatar
Alex Converse committed
1154
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1155
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1156 1157 1158 1159 1160 1161 1162
            const MV this_mv = {
              br + candidates[s][next_chkpts_indices[i]].row,
              bc + candidates[s][next_chkpts_indices[i]].col
            };
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1163 1164 1165 1166
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1167 1168 1169 1170
            const MV this_mv = {
              br + candidates[s][next_chkpts_indices[i]].row,
              bc + candidates[s][next_chkpts_indices[i]].col
            };
Alex Converse's avatar
Alex Converse committed
1171
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1172 1173 1174
            thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      } while (best_site != -1);
    }

1187
    // Note: If we enter the if below, then cost_list must be non-NULL.
1188
    if (s == 0) {
1189
      cost_list[0] = bestsad;
1190
      if (!do_init_search || s != best_init_s) {
Alex Converse's avatar
Alex Converse committed
1191
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1192
          for (i = 0; i < num_candidates[s]; i++) {
1193 1194 1195 1196 1197
            const MV this_mv = { br + candidates[s][i].row,
                                 bc + candidates[s][i].col };
            cost_list[i + 1] = thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1198 1199 1200 1201
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
1202 1203
            const MV this_mv = { br + candidates[s][i].row,
                                 bc + candidates[s][i].col };
Alex Converse's avatar
Alex Converse committed
1204
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1205 1206 1207
            cost_list[i + 1] = thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
      }
      while (best_site != -1) {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
        next_chkpts_indices[1] = k;
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1224 1225 1226
        cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
        cost_list[((k + 2) % 4) + 1] = cost_list[0];
        cost_list[0] = bestsad;
1227

Alex Converse's avatar
Alex Converse committed
1228
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1229
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1230 1231 1232 1233 1234 1235 1236
            const MV this_mv = {
              br + candidates[s][next_chkpts_indices[i]].row,
              bc + candidates[s][next_chkpts_indices[i]].col
            };
            cost_list[next_chkpts_indices[i] + 1] = thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1237 1238 1239 1240
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1241 1242 1243 1244
            const MV this_mv = {
              br + candidates[s][next_chkpts_indices[i]].row,
              bc + candidates[s][next_chkpts_indices[i]].col
            };
Alex Converse's avatar
Alex Converse committed
1245
            if (!is_mv_in(&x->mv_limits, &this_mv)) {
1246
              cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1247 1248
              continue;
            }
1249 1250 1251
            cost_list[next_chkpts_indices[i] + 1] = thissad =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      }
    }
  }

  // Returns the one-away integer pel sad values around the best as follows:
1266 1267 1268 1269 1270 1271
  // cost_list[0]: sad at the best integer pel
  // cost_list[1]: sad at delta {0, -1} (left)   from the best integer pel
  // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
  // cost_list[3]: sad at delta { 0, 1} (right)  from the best integer pel
  // cost_list[4]: sad at delta {-1, 0} (top)    from the best integer pel
  if (cost_list) {
1272
    static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1273 1274
    if (cost_list[0] == INT_MAX) {
      cost_list[0] = bestsad;
Alex Converse's avatar
Alex Converse committed
1275
      if (check_bounds(&x->mv_limits, br, bc, 1)) {
1276
        for (i = 0; i < 4; i++) {
1277 1278 1279 1280
          const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
          cost_list[i + 1] =
              vfp->sdf(what->buf, what->stride,
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1281 1282 1283
        }
      } else {
        for (i = 0; i < 4; i++) {
1284
          const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Alex Converse's avatar
Alex Converse committed
1285
          if (!is_mv_in(&x->mv_limits, &this_mv))
1286
            cost_list[i + 1] = INT_MAX;
1287
          else
1288 1289 1290
            cost_list[i + 1] =
                vfp->sdf(what->buf, what->stride,
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1291 1292 1293 1294 1295
        }
      }
    } else {
      if (use_mvcost) {
        for (i = 0; i < 4; i++) {
1296
          const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1297 1298
          if (cost_list[i + 1] != INT_MAX) {
            cost_list[i + 1] +=
1299 1300 1301
                mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
          }
        }
1302
      }
John Koleszar's avatar
John Koleszar committed
1303
    }
John Koleszar's avatar
John Koleszar committed
1304
  }
1305 1306
  best_mv->row = br;
  best_mv->col = bc;
1307 1308
  return bestsad;
}
1309

1310 1311
int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,