mcomp.c 75.2 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13
 */


#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
14
#include "vpx_ports/config.h"
John Koleszar's avatar
John Koleszar committed
15 16 17
#include <stdio.h>
#include <limits.h>
#include <math.h>
18
#include "vp8/common/findnearmv.h"
John Koleszar's avatar
John Koleszar committed
19 20 21 22 23 24

#ifdef ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
static int mv_mode_cts [4] [2];
#endif

25 26
#if CONFIG_HIGH_PRECISION_MV
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight, int ishp)
John Koleszar's avatar
John Koleszar committed
27 28 29 30 31
{
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
32
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)]) * Weight) >> 7;
John Koleszar's avatar
John Koleszar committed
33
}
34 35
#else
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
36 37 38 39 40
{
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
41
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
42 43
}
#endif
John Koleszar's avatar
John Koleszar committed
44

45 46 47
#if CONFIG_HIGH_PRECISION_MV
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit, int ishp)
{
48 49 50 51 52 53
    // Ignore costing if mvcost is NULL
    if (mvcost)
        return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] +
                 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)])
                 * error_per_bit + 128) >> 8;
    return 0;
54 55
}
#else
Scott LaVarnway's avatar
Scott LaVarnway committed
56
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
John Koleszar's avatar
John Koleszar committed
57
{
58 59 60 61 62 63
    // Ignore costing if mvcost is NULL
    if (mvcost)
        return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
                 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
                 * error_per_bit + 128) >> 8;
    return 0;
John Koleszar's avatar
John Koleszar committed
64
}
65 66
#endif

John Koleszar's avatar
John Koleszar committed
67

Scott LaVarnway's avatar
Scott LaVarnway committed
68
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
69
{
70 71 72 73 74 75 76
    // Calculate sad error cost on full pixel basis.
    // Ignore costing if mvcost is NULL
    if (mvsadcost)
        return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
                 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
                 * error_per_bit + 128) >> 8;
    return 0;
John Koleszar's avatar
John Koleszar committed
77 78
}

79

John Koleszar's avatar
John Koleszar committed
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
{
    int Len;
    int search_site_count = 0;


    // Generate offsets for 4 search sites per step.
    Len = MAX_FIRST_STEP;
    x->ss[search_site_count].mv.col = 0;
    x->ss[search_site_count].mv.row = 0;
    x->ss[search_site_count].offset = 0;
    search_site_count++;

    while (Len > 0)
    {

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = -Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = Len;
        search_site_count++;

        // Contract.
        Len /= 2;
    }

    x->ss_count = search_site_count;
    x->searches_per_step = 4;
}

void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
{
    int Len;
    int search_site_count = 0;

    // Generate offsets for 8 search sites per step.
    Len = MAX_FIRST_STEP;
    x->ss[search_site_count].mv.col = 0;
    x->ss[search_site_count].mv.row = 0;
    x->ss[search_site_count].offset = 0;
    search_site_count++;

    while (Len > 0)
    {

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = -Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride - Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride + Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride - Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride + Len;
        search_site_count++;


        // Contract.
        Len /= 2;
    }

    x->ss_count = search_site_count;
    x->searches_per_step = 8;
}

200 201 202 203 204 205 206 207 208
/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */
209

210
#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
211 212 213
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
#else
John Koleszar's avatar
John Koleszar committed
214
#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
215
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
216
#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 : 0) // estimated cost of a motion vector (r,c)
217
#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
John Koleszar's avatar
John Koleszar committed
218
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
219 220 221 222 223 224 225 226 227 228 229 230 231
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;

#if CONFIG_HIGH_PRECISION_MV
#define PREHP(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SPHP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define SPHP(x) ((x)&7) // convert motion vector component to offset for svf calc
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define DISTHP(r,c) vfp->svf( PREHP(r,c), y_stride, SPHP(c),SPHP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define ERRHP(r,c) (MVC(r,c)+DISTHP(r,c)) // returns distortion + motion vector cost
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = ((xd->allow_high_precision_mv)?DISTHP(r,c):DIST(r,c)); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
#else
232
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
233
#endif /* CONFIG_HIGH_PRECISION_MV */
234

John Koleszar's avatar
John Koleszar committed
235 236 237
#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))

Scott LaVarnway's avatar
Scott LaVarnway committed
238 239 240 241 242 243
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                             int_mv *bestmv, int_mv *ref_mv,
                                             int error_per_bit,
                                             const vp8_variance_fn_ptr_t *vfp,
                                             int *mvcost[2], int *distortion,
                                             unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
244 245
{
    unsigned char *z = (*(b->base_src) + b->src);
246
    MACROBLOCKD *xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
247

248 249
    int rr, rc, br, bc, hstep;
    int tr, tc;
John Koleszar's avatar
John Koleszar committed
250 251 252 253 254 255
    unsigned int besterr = INT_MAX;
    unsigned int left, right, up, down, diag;
    unsigned int sse;
    unsigned int whichdir;
    unsigned int halfiters = 4;
    unsigned int quarteriters = 4;
256 257 258
#if CONFIG_HIGH_PRECISION_MV
    unsigned int eighthiters = 4;
#endif
259
    int thismse;
260 261 262
    int maxc, minc, maxr, minr;
    int y_stride;
    int offset;
John Koleszar's avatar
John Koleszar committed
263

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;
    int buf_r1, buf_r2, buf_c1, buf_c2;

    // Clamping to avoid out-of-range data access
    buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):INTERP_EXTEND;
    buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):INTERP_EXTEND;
    buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):INTERP_EXTEND;
    buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):INTERP_EXTEND;
    y_stride = 32;

    /* Copy to intermediate buffer before searching. */
    vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    y = xd->y_buf + y_stride*buf_r1 +buf_c1;
#else
    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    y_stride = d->pre_stride;
#endif


285
#if CONFIG_HIGH_PRECISION_MV
286 287 288 289 290
    if (xd->allow_high_precision_mv)
    {
        rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col;
        br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3;
        hstep = 4;
291 292 293 294
        minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
        maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
        minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
        maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
295 296
    }
    else
297
#endif
298 299 300 301 302 303 304 305 306
    {
        rr = ref_mv->as_mv.row >> 1; rc = ref_mv->as_mv.col >> 1;
        br = bestmv->as_mv.row << 2; bc = bestmv->as_mv.col << 2;
        hstep = 2;
        minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
        maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
        minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
        maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    }
John Koleszar's avatar
John Koleszar committed
307

308 309
    tr = br;
    tc = bc;
310 311 312 313


    offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;

John Koleszar's avatar
John Koleszar committed
314
    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed
315 316
    bestmv->as_mv.row <<= 3;
    bestmv->as_mv.col <<= 3;
John Koleszar's avatar
John Koleszar committed
317 318

    // calculate central point error
319
    besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
320
    *distortion = besterr;
321 322 323
#if CONFIG_HIGH_PRECISION_MV
    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
324
    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
325
#endif
John Koleszar's avatar
John Koleszar committed
326 327 328 329 330

    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    while (--halfiters)
    {
        // 1/2 pel
331 332 333 334
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(down, tr + hstep, tc);
John Koleszar's avatar
John Koleszar committed
335 336 337 338 339 340

        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

        switch (whichdir)
        {
        case 0:
341
            CHECK_BETTER(diag, tr - hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
342 343
            break;
        case 1:
344
            CHECK_BETTER(diag, tr - hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
345 346
            break;
        case 2:
347
            CHECK_BETTER(diag, tr + hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
348 349
            break;
        case 3:
350
            CHECK_BETTER(diag, tr + hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
351 352 353 354 355 356 357 358 359 360 361 362 363
            break;
        }

        // no reason to check the same one again.
        if (tr == br && tc == bc)
            break;

        tr = br;
        tc = bc;
    }

    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    // 1/4 pel
364
    hstep >>= 1;
John Koleszar's avatar
John Koleszar committed
365 366
    while (--quarteriters)
    {
367 368 369 370
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(down, tr + hstep, tc);
John Koleszar's avatar
John Koleszar committed
371 372 373 374 375 376

        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

        switch (whichdir)
        {
        case 0:
377
            CHECK_BETTER(diag, tr - hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
378 379
            break;
        case 1:
380
            CHECK_BETTER(diag, tr - hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
381 382
            break;
        case 2:
383
            CHECK_BETTER(diag, tr + hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
384 385
            break;
        case 3:
386
            CHECK_BETTER(diag, tr + hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
387 388 389 390 391 392 393 394 395 396 397
            break;
        }

        // no reason to check the same one again.
        if (tr == br && tc == bc)
            break;

        tr = br;
        tc = bc;
    }

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
#if CONFIG_HIGH_PRECISION_MV
    if (x->e_mbd.allow_high_precision_mv)
    {
        hstep >>= 1;
        while (--eighthiters)
        {
            CHECK_BETTER(left, tr, tc - hstep);
            CHECK_BETTER(right, tr, tc + hstep);
            CHECK_BETTER(up, tr - hstep, tc);
            CHECK_BETTER(down, tr + hstep, tc);

            whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

            switch (whichdir)
            {
            case 0:
                CHECK_BETTER(diag, tr - hstep, tc - hstep);
                break;
            case 1:
                CHECK_BETTER(diag, tr - hstep, tc + hstep);
                break;
            case 2:
                CHECK_BETTER(diag, tr + hstep, tc - hstep);
                break;
            case 3:
                CHECK_BETTER(diag, tr + hstep, tc + hstep);
                break;
            }

            // no reason to check the same one again.
            if (tr == br && tc == bc)
                break;

            tr = br;
            tc = bc;
        }
    }
435 436 437 438 439 440 441 442
#endif
#if CONFIG_HIGH_PRECISION_MV
    if (x->e_mbd.allow_high_precision_mv)
    {
        bestmv->as_mv.row = br;
        bestmv->as_mv.col = bc;
    }
    else
443
#endif  /* CONFIG_HIGH_PRECISION_MV */
444 445 446 447
    {
        bestmv->as_mv.row = br << 1;
        bestmv->as_mv.col = bc << 1;
    }
John Koleszar's avatar
John Koleszar committed
448

449 450
    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
John Koleszar's avatar
John Koleszar committed
451 452 453 454 455 456 457 458
        return INT_MAX;

    return besterr;
}
#undef MVC
#undef PRE
#undef SP
#undef DIST
459
#undef IFMVCV
John Koleszar's avatar
John Koleszar committed
460 461 462 463
#undef ERR
#undef CHECK_BETTER
#undef MIN
#undef MAX
464

465 466 467 468 469 470 471
#if CONFIG_HIGH_PRECISION_MV
#undef PREHP
#undef DPHP
#undef DISTHP
#undef ERRHP
#endif

472 473 474 475
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else
#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
476
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
Scott LaVarnway's avatar
Scott LaVarnway committed
477 478 479 480 481 482
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                 int_mv *bestmv, int_mv *ref_mv,
                                 int error_per_bit,
                                 const vp8_variance_fn_ptr_t *vfp,
                                 int *mvcost[2], int *distortion,
                                 unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
483 484
{
    int bestmse = INT_MAX;
Scott LaVarnway's avatar
Scott LaVarnway committed
485 486
    int_mv startmv;
    int_mv this_mv;
487 488 489 490
#if CONFIG_HIGH_PRECISION_MV
    int_mv orig_mv;
    int yrow_movedback=0, ycol_movedback=0;
#endif
John Koleszar's avatar
John Koleszar committed
491 492 493 494
    unsigned char *z = (*(b->base_src) + b->src);
    int left, right, up, down, diag;
    unsigned int sse;
    int whichdir ;
495
    int thismse;
496
    int y_stride;
497
    MACROBLOCKD *xd = &x->e_mbd;
498 499 500 501 502 503 504 505 506 507 508 509 510

#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;

    y_stride = 32;
    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
     vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
     y = xd->y_buf + y_stride + 1;
#else
     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
     y_stride = d->pre_stride;
#endif
John Koleszar's avatar
John Koleszar committed
511 512

    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed
513 514
    bestmv->as_mv.row <<= 3;
    bestmv->as_mv.col <<= 3;
John Koleszar's avatar
John Koleszar committed
515
    startmv = *bestmv;
516 517 518
#if CONFIG_HIGH_PRECISION_MV
    orig_mv = *bestmv;
#endif
John Koleszar's avatar
John Koleszar committed
519 520

    // calculate central point error
521
    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
522
    *distortion = bestmse;
523 524 525
#if CONFIG_HIGH_PRECISION_MV
    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
526
    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
527
#endif
John Koleszar's avatar
John Koleszar committed
528 529

    // go left then right and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
530 531
    this_mv.as_mv.row = startmv.as_mv.row;
    this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
532
    thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
533 534 535
#if CONFIG_HIGH_PRECISION_MV
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
536
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
537
#endif
John Koleszar's avatar
John Koleszar committed
538 539 540 541 542

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
543
        *distortion = thismse;
544
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
545 546
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
547
    this_mv.as_mv.col += 8;
548
    thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
549 550 551
#if CONFIG_HIGH_PRECISION_MV
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
552
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
553
#endif
John Koleszar's avatar
John Koleszar committed
554 555 556 557 558

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
559
        *distortion = thismse;
560
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
561 562 563
    }

    // go up then down and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
564 565
    this_mv.as_mv.col = startmv.as_mv.col;
    this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
566
    thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
567 568 569
#if CONFIG_HIGH_PRECISION_MV
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
570
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
571
#endif
John Koleszar's avatar
John Koleszar committed
572 573 574 575 576

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
577
        *distortion = thismse;
578
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
579 580
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
581
    this_mv.as_mv.row += 8;
582
    thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
583 584 585
#if CONFIG_HIGH_PRECISION_MV
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
586
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
587
#endif
John Koleszar's avatar
John Koleszar committed
588 589 590 591 592

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
593
        *distortion = thismse;
594
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
595 596 597 598 599 600 601 602 603 604 605 606
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    //for(whichdir =0;whichdir<4;whichdir++)
    //{
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:
Scott LaVarnway's avatar
Scott LaVarnway committed
607 608
        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
609
        thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
610 611
        break;
    case 1:
Scott LaVarnway's avatar
Scott LaVarnway committed
612 613
        this_mv.as_mv.col += 4;
        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
614
        thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
615 616
        break;
    case 2:
Scott LaVarnway's avatar
Scott LaVarnway committed
617 618
        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
        this_mv.as_mv.row += 4;
619
        thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
620 621
        break;
    case 3:
622
    default:
Scott LaVarnway's avatar
Scott LaVarnway committed
623 624
        this_mv.as_mv.col += 4;
        this_mv.as_mv.row += 4;
625
        thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
626 627 628
        break;
    }

629 630 631
#if CONFIG_HIGH_PRECISION_MV
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
632
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
633
#endif
John Koleszar's avatar
John Koleszar committed
634 635 636 637 638

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
639
        *distortion = thismse;
640
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
641 642 643 644 645 646
    }

//  }


    // time to check quarter pels.
Scott LaVarnway's avatar
Scott LaVarnway committed
647
    if (bestmv->as_mv.row < startmv.as_mv.row)
648
    {
649
        y -= y_stride;
650 651 652 653
#if CONFIG_HIGH_PRECISION_MV
        yrow_movedback = 1;
#endif
    }
John Koleszar's avatar
John Koleszar committed
654

Scott LaVarnway's avatar
Scott LaVarnway committed
655
    if (bestmv->as_mv.col < startmv.as_mv.col)
656
    {
John Koleszar's avatar
John Koleszar committed
657
        y--;
658 659 660 661
#if CONFIG_HIGH_PRECISION_MV
        ycol_movedback = 1;
#endif
    }
John Koleszar's avatar
John Koleszar committed
662 663 664 665 666 667

    startmv = *bestmv;



    // go left then right and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
668
    this_mv.as_mv.row = startmv.as_mv.row;
John Koleszar's avatar
John Koleszar committed
669

Scott LaVarnway's avatar
Scott LaVarnway committed
670
    if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
671
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
672
        this_mv.as_mv.col = startmv.as_mv.col - 2;
673
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
674 675 676
    }
    else
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
677
        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
678
        thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
679 680
    }

681 682 683
#if CONFIG_HIGH_PRECISION_MV
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
684
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
685
#endif
John Koleszar's avatar
John Koleszar committed
686 687 688 689 690

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
691
        *distortion = thismse;
692
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
693 694
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
695
    this_mv.as_mv.col += 4;
696
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
697 698 699
#if CONFIG_HIGH_PRECISION_MV
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
700
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
701
#endif
John Koleszar's avatar
John Koleszar committed
702 703 704 705 706

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
707
        *distortion = thismse;
708
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
709 710 711
    }

    // go up then down and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
712
    this_mv.as_mv.col = startmv.as_mv.col;
John Koleszar's avatar
John Koleszar committed
713

Scott LaVarnway's avatar
Scott LaVarnway committed
714
    if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
715
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
716
        this_mv.as_mv.row = startmv.as_mv.row - 2;
717
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
718 719 720
    }
    else
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
721
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
722
        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
723 724
    }

725 726 727
#if CONFIG_HIGH_PRECISION_MV
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
728
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
729
#endif
John Koleszar's avatar
John Koleszar committed
730 731 732 733 734

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
735
        *distortion = thismse;
736
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
737 738
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
739
    this_mv.as_mv.row += 4;
740
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
741 742 743
#if CONFIG_HIGH_PRECISION_MV
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
744
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
745
#endif
John Koleszar's avatar
John Koleszar committed
746 747 748 749 750

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
751
        *distortion = thismse;
752
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
753 754 755 756 757 758 759 760 761 762 763 764 765 766
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

//  for(whichdir=0;whichdir<4;whichdir++)
//  {
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:

Scott LaVarnway's avatar
Scott LaVarnway committed
767
        if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
768
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
769
            this_mv.as_mv.row -= 2;
John Koleszar's avatar
John Koleszar committed
770

Scott LaVarnway's avatar
Scott LaVarnway committed
771
            if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
772
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
773
                this_mv.as_mv.col -= 2;
774
                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
775 776 777
            }
            else
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
778
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
779
                thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
John Koleszar's avatar
John Koleszar committed
780 781 782 783
            }
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
784
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
John Koleszar's avatar
John Koleszar committed
785

Scott LaVarnway's avatar
Scott LaVarnway committed
786
            if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
787
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
788
                this_mv.as_mv.col -= 2;
789
                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
790 791 792
            }
            else
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
793
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
794
                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
795 796 797 798 799
            }
        }

        break;
    case 1:
Scott LaVarnway's avatar
Scott LaVarnway committed
800
        this_mv.as_mv.col += 2;
John Koleszar's avatar
John Koleszar committed
801

Scott LaVarnway's avatar
Scott LaVarnway committed
802
        if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
803
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
804
            this_mv.as_mv.row -= 2;
805
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
806 807 808
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
809
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
810
            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
811 812 813 814
        }

        break;
    case 2:
Scott LaVarnway's avatar
Scott LaVarnway committed
815
        this_mv.as_mv.row += 2;
John Koleszar's avatar
John Koleszar committed
816

Scott LaVarnway's avatar
Scott LaVarnway committed
817
        if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
818
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
819
            this_mv.as_mv.col -= 2;
820
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
821 822 823
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
824
            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
825
            thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
826 827 828 829
        }

        break;
    case 3:
Scott LaVarnway's avatar
Scott LaVarnway committed
830 831
        this_mv.as_mv.col += 2;
        this_mv.as_mv.row += 2;
832
        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
833 834 835
        break;
    }

836 837 838
#if CONFIG_HIGH_PRECISION_MV
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
839
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
840
#endif
John Koleszar's avatar
John Koleszar committed
841 842 843 844 845

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
846
        *distortion = thismse;
847
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
848 849
    }

850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
#if CONFIG_HIGH_PRECISION_MV
    if (!x->e_mbd.allow_high_precision_mv)
        return bestmse;

    /* Now do 1/8th pixel */
    if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback)
    {
        y -= y_stride;
        yrow_movedback = 1;
    }

    if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback)
    {
        y--;
        ycol_movedback = 1;
    }

    startmv = *bestmv;

    // go left then right and check error
    this_mv.as_mv.row = startmv.as_mv.row;

    if (startmv.as_mv.col & 7)
    {
        this_mv.as_mv.col = startmv.as_mv.col - 1;
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }
    else
    {
        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
        thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }

883
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
884 885 886 887 888 889 890 891 892 893 894

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
        *distortion = thismse;
        *sse1 = sse;
    }

    this_mv.as_mv.col += 2;
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
895
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
        *distortion = thismse;
        *sse1 = sse;
    }

    // go up then down and check error
    this_mv.as_mv.col = startmv.as_mv.col;

    if (startmv.as_mv.row & 7)
    {
        this_mv.as_mv.row = startmv.as_mv.row - 1;
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }
    else
    {
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
    }

919
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
920 921 922 923 924 925 926 927 928 929 930

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
        *distortion = thismse;
        *sse1 = sse;
    }

    this_mv.as_mv.row += 2;
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
931
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
        *distortion = thismse;
        *sse1 = sse;
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

//  for(whichdir=0;whichdir<4;whichdir++)
//  {
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:

        if (startmv.as_mv.row & 7)
        {
            this_mv.as_mv.row -= 1;

            if (startmv.as_mv.col & 7)
            {
                this_mv.as_mv.col -= 1;
                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
            }
            else
            {
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
                thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
            }
        }
        else
        {
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;

            if (startmv.as_mv.col & 7)
            {
                this_mv.as_mv.col -= 1;
                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
            }
            else
            {
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse);
            }
        }

        break;
    case 1:
        this_mv.as_mv.col += 1;

        if (startmv.as_mv.row & 7)
        {
            this_mv.as_mv.row -= 1;
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }
        else
        {
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
        }

        break;
    case 2:
        this_mv.as_mv.row += 1;

        if (startmv.as_mv.col & 7)
        {
            this_mv.as_mv.col -= 1;
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }
        else
        {
            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
            thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }

        break;
    case 3:
        this_mv.as_mv.col += 1;
        this_mv.as_mv.row += 1;
        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        break;
    }

1022
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
        *distortion = thismse;
        *sse1 = sse;
    }

#endif  /* CONFIG_HIGH_PRECISION_MV */

John Koleszar's avatar
John Koleszar committed
1034 1035 1036
    return bestmse;
}

1037 1038
#undef SP

1039
int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
Scott LaVarnway's avatar
Scott LaVarnway committed
1040 1041 1042 1043 1044
                                  int_mv *bestmv, int_mv *ref_mv,
                                  int error_per_bit,
                                  const vp8_variance_fn_ptr_t *vfp,
                                  int *mvcost[2], int *distortion,
                                  unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
1045 1046
{
    int bestmse = INT_MAX;
Scott LaVarnway's avatar
Scott LaVarnway committed
1047 1048
    int_mv startmv;
    int_mv this_mv;
John Koleszar's avatar
John Koleszar committed
1049 1050 1051
    unsigned char *z = (*(b->base_src) + b->src);
    int left, right, up, down, diag;
    unsigned int sse;
Yunqing Wang's avatar
Yunqing Wang committed
1052
    int whichdir ;
1053
    int thismse;
1054
    int y_stride;
1055
    MACROBLOCKD *xd = &x->e_mbd;
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068

#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->