mcomp.c 75 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13
 */


#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
14
#include "vpx_ports/config.h"
John Koleszar's avatar
John Koleszar committed
15 16 17
#include <stdio.h>
#include <limits.h>
#include <math.h>
18
#include "vp8/common/findnearmv.h"
John Koleszar's avatar
John Koleszar committed
19 20 21 22 23 24

#ifdef ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
static int mv_mode_cts [4] [2];
#endif

25 26
#if CONFIG_HIGH_PRECISION_MV
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight, int ishp)
John Koleszar's avatar
John Koleszar committed
27 28 29 30 31
{
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
32
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)]) * Weight) >> 7;
John Koleszar's avatar
John Koleszar committed
33
}
34 35
#else
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
36 37 38 39 40
{
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
41
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
42 43
}
#endif
John Koleszar's avatar
John Koleszar committed
44

45 46 47 48 49 50 51 52
#if CONFIG_HIGH_PRECISION_MV
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit, int ishp)
{
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] +
        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)])
        * error_per_bit + 128) >> 8;
}
#else
Scott LaVarnway's avatar
Scott LaVarnway committed
53
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
John Koleszar's avatar
John Koleszar committed
54
{
55 56
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
Scott LaVarnway's avatar
Scott LaVarnway committed
57
        * error_per_bit + 128) >> 8;
John Koleszar's avatar
John Koleszar committed
58
}
59 60
#endif

John Koleszar's avatar
John Koleszar committed
61

Scott LaVarnway's avatar
Scott LaVarnway committed
62
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
63 64
{
    /* Calculate sad error cost on full pixel basis. */
Scott LaVarnway's avatar
Scott LaVarnway committed
65 66 67
    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
        mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
        * error_per_bit + 128) >> 8;
John Koleszar's avatar
John Koleszar committed
68 69
}

70

John Koleszar's avatar
John Koleszar committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
{
    int Len;
    int search_site_count = 0;


    // Generate offsets for 4 search sites per step.
    Len = MAX_FIRST_STEP;
    x->ss[search_site_count].mv.col = 0;
    x->ss[search_site_count].mv.row = 0;
    x->ss[search_site_count].offset = 0;
    search_site_count++;

    while (Len > 0)
    {

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = -Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = Len;
        search_site_count++;

        // Contract.
        Len /= 2;
    }

    x->ss_count = search_site_count;
    x->searches_per_step = 4;
}

void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
{
    int Len;
    int search_site_count = 0;

    // Generate offsets for 8 search sites per step.
    Len = MAX_FIRST_STEP;
    x->ss[search_site_count].mv.col = 0;
    x->ss[search_site_count].mv.row = 0;
    x->ss[search_site_count].offset = 0;
    search_site_count++;

    while (Len > 0)
    {

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = -Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride - Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride + Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride - Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride + Len;
        search_site_count++;


        // Contract.
        Len /= 2;
    }

    x->ss_count = search_site_count;
    x->searches_per_step = 8;
}

191 192 193 194 195 196 197 198 199
/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */
200

201
#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
202 203 204
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
#else
John Koleszar's avatar
John Koleszar committed
205
#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
206 207
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
208
#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
John Koleszar's avatar
John Koleszar committed
209
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
210 211 212 213 214 215 216 217 218 219 220 221 222
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;

#if CONFIG_HIGH_PRECISION_MV
#define PREHP(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SPHP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define SPHP(x) ((x)&7) // convert motion vector component to offset for svf calc
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define DISTHP(r,c) vfp->svf( PREHP(r,c), y_stride, SPHP(c),SPHP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define ERRHP(r,c) (MVC(r,c)+DISTHP(r,c)) // returns distortion + motion vector cost
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = ((xd->allow_high_precision_mv)?DISTHP(r,c):DIST(r,c)); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
#else
223
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
224
#endif /* CONFIG_HIGH_PRECISION_MV */
225

John Koleszar's avatar
John Koleszar committed
226 227 228
#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))

Scott LaVarnway's avatar
Scott LaVarnway committed
229 230 231 232 233 234
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                             int_mv *bestmv, int_mv *ref_mv,
                                             int error_per_bit,
                                             const vp8_variance_fn_ptr_t *vfp,
                                             int *mvcost[2], int *distortion,
                                             unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
235 236
{
    unsigned char *z = (*(b->base_src) + b->src);
237
    MACROBLOCKD *xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
238

239 240
    int rr, rc, br, bc, hstep;
    int tr, tc;
John Koleszar's avatar
John Koleszar committed
241 242 243 244 245 246
    unsigned int besterr = INT_MAX;
    unsigned int left, right, up, down, diag;
    unsigned int sse;
    unsigned int whichdir;
    unsigned int halfiters = 4;
    unsigned int quarteriters = 4;
247 248 249
#if CONFIG_HIGH_PRECISION_MV
    unsigned int eighthiters = 4;
#endif
250
    int thismse;
251 252 253
    int maxc, minc, maxr, minr;
    int y_stride;
    int offset;
John Koleszar's avatar
John Koleszar committed
254

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;
    int buf_r1, buf_r2, buf_c1, buf_c2;

    // Clamping to avoid out-of-range data access
    buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):INTERP_EXTEND;
    buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):INTERP_EXTEND;
    buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):INTERP_EXTEND;
    buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):INTERP_EXTEND;
    y_stride = 32;

    /* Copy to intermediate buffer before searching. */
    vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    y = xd->y_buf + y_stride*buf_r1 +buf_c1;
#else
    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    y_stride = d->pre_stride;
#endif


276
#if CONFIG_HIGH_PRECISION_MV
277 278 279 280 281
    if (xd->allow_high_precision_mv)
    {
        rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col;
        br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3;
        hstep = 4;
282 283 284 285
        minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
        maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
        minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
        maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
286 287
    }
    else
288
#endif
289 290 291 292 293 294 295 296 297
    {
        rr = ref_mv->as_mv.row >> 1; rc = ref_mv->as_mv.col >> 1;
        br = bestmv->as_mv.row << 2; bc = bestmv->as_mv.col << 2;
        hstep = 2;
        minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
        maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
        minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
        maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    }
John Koleszar's avatar
John Koleszar committed
298

299 300
    tr = br;
    tc = bc;
301 302 303 304


    offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;

John Koleszar's avatar
John Koleszar committed
305
    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed
306 307
    bestmv->as_mv.row <<= 3;
    bestmv->as_mv.col <<= 3;
John Koleszar's avatar
John Koleszar committed
308 309

    // calculate central point error
310
    besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
311
    *distortion = besterr;
312 313 314
#if CONFIG_HIGH_PRECISION_MV
    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
315
    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
316
#endif
John Koleszar's avatar
John Koleszar committed
317 318 319 320 321

    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    while (--halfiters)
    {
        // 1/2 pel
322 323 324 325
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(down, tr + hstep, tc);
John Koleszar's avatar
John Koleszar committed
326 327 328 329 330 331

        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

        switch (whichdir)
        {
        case 0:
332
            CHECK_BETTER(diag, tr - hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
333 334
            break;
        case 1:
335
            CHECK_BETTER(diag, tr - hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
336 337
            break;
        case 2:
338
            CHECK_BETTER(diag, tr + hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
339 340
            break;
        case 3:
341
            CHECK_BETTER(diag, tr + hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
342 343 344 345 346 347 348 349 350 351 352 353 354
            break;
        }

        // no reason to check the same one again.
        if (tr == br && tc == bc)
            break;

        tr = br;
        tc = bc;
    }

    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    // 1/4 pel
355
    hstep >>= 1;
John Koleszar's avatar
John Koleszar committed
356 357
    while (--quarteriters)
    {
358 359 360 361
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(down, tr + hstep, tc);
John Koleszar's avatar
John Koleszar committed
362 363 364 365 366 367

        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

        switch (whichdir)
        {
        case 0:
368
            CHECK_BETTER(diag, tr - hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
369 370
            break;
        case 1:
371
            CHECK_BETTER(diag, tr - hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
372 373
            break;
        case 2:
374
            CHECK_BETTER(diag, tr + hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
375 376
            break;
        case 3:
377
            CHECK_BETTER(diag, tr + hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
378 379 380 381 382 383 384 385 386 387 388
            break;
        }

        // no reason to check the same one again.
        if (tr == br && tc == bc)
            break;

        tr = br;
        tc = bc;
    }

389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
#if CONFIG_HIGH_PRECISION_MV
    if (x->e_mbd.allow_high_precision_mv)
    {
        hstep >>= 1;
        while (--eighthiters)
        {
            CHECK_BETTER(left, tr, tc - hstep);
            CHECK_BETTER(right, tr, tc + hstep);
            CHECK_BETTER(up, tr - hstep, tc);
            CHECK_BETTER(down, tr + hstep, tc);

            whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

            switch (whichdir)
            {
            case 0:
                CHECK_BETTER(diag, tr - hstep, tc - hstep);
                break;
            case 1:
                CHECK_BETTER(diag, tr - hstep, tc + hstep);
                break;
            case 2:
                CHECK_BETTER(diag, tr + hstep, tc - hstep);
                break;
            case 3:
                CHECK_BETTER(diag, tr + hstep, tc + hstep);
                break;
            }

            // no reason to check the same one again.
            if (tr == br && tc == bc)
                break;

            tr = br;
            tc = bc;
        }
    }
426 427 428 429 430 431 432 433
#endif
#if CONFIG_HIGH_PRECISION_MV
    if (x->e_mbd.allow_high_precision_mv)
    {
        bestmv->as_mv.row = br;
        bestmv->as_mv.col = bc;
    }
    else
434
#endif  /* CONFIG_HIGH_PRECISION_MV */
435 436 437 438
    {
        bestmv->as_mv.row = br << 1;
        bestmv->as_mv.col = bc << 1;
    }
John Koleszar's avatar
John Koleszar committed
439

440 441
    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
John Koleszar's avatar
John Koleszar committed
442 443 444 445 446 447 448 449
        return INT_MAX;

    return besterr;
}
#undef MVC
#undef PRE
#undef SP
#undef DIST
450
#undef IFMVCV
John Koleszar's avatar
John Koleszar committed
451 452 453 454
#undef ERR
#undef CHECK_BETTER
#undef MIN
#undef MAX
455

456 457 458 459 460 461 462
#if CONFIG_HIGH_PRECISION_MV
#undef PREHP
#undef DPHP
#undef DISTHP
#undef ERRHP
#endif

463 464 465 466
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else
#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
467
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
Scott LaVarnway's avatar
Scott LaVarnway committed
468 469 470 471 472 473
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                 int_mv *bestmv, int_mv *ref_mv,
                                 int error_per_bit,
                                 const vp8_variance_fn_ptr_t *vfp,
                                 int *mvcost[2], int *distortion,
                                 unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
474 475
{
    int bestmse = INT_MAX;
Scott LaVarnway's avatar
Scott LaVarnway committed
476 477
    int_mv startmv;
    int_mv this_mv;
478 479 480 481
#if CONFIG_HIGH_PRECISION_MV
    int_mv orig_mv;
    int yrow_movedback=0, ycol_movedback=0;
#endif
John Koleszar's avatar
John Koleszar committed
482 483 484 485
    unsigned char *z = (*(b->base_src) + b->src);
    int left, right, up, down, diag;
    unsigned int sse;
    int whichdir ;
486
    int thismse;
487
    int y_stride;
488
    MACROBLOCKD *xd = &x->e_mbd;
489 490 491 492 493 494 495 496 497 498 499 500 501

#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;

    y_stride = 32;
    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
     vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
     y = xd->y_buf + y_stride + 1;
#else
     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
     y_stride = d->pre_stride;
#endif
John Koleszar's avatar
John Koleszar committed
502 503

    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed
504 505
    bestmv->as_mv.row <<= 3;
    bestmv->as_mv.col <<= 3;
John Koleszar's avatar
John Koleszar committed
506
    startmv = *bestmv;
507 508 509
#if CONFIG_HIGH_PRECISION_MV
    orig_mv = *bestmv;
#endif
John Koleszar's avatar
John Koleszar committed
510 511

    // calculate central point error
512
    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
513
    *distortion = bestmse;
514 515 516
#if CONFIG_HIGH_PRECISION_MV
    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
517
    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
518
#endif
John Koleszar's avatar
John Koleszar committed
519 520

    // go left then right and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
521 522
    this_mv.as_mv.row = startmv.as_mv.row;
    this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
523
    thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
524 525 526
#if CONFIG_HIGH_PRECISION_MV
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
527
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
528
#endif
John Koleszar's avatar
John Koleszar committed
529 530 531 532 533

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
534
        *distortion = thismse;
535
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
536 537
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
538
    this_mv.as_mv.col += 8;
539
    thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
540 541 542
#if CONFIG_HIGH_PRECISION_MV
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
543
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
544
#endif
John Koleszar's avatar
John Koleszar committed
545 546 547 548 549

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
550
        *distortion = thismse;
551
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
552 553 554
    }

    // go up then down and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
555 556
    this_mv.as_mv.col = startmv.as_mv.col;
    this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
557
    thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
558 559 560
#if CONFIG_HIGH_PRECISION_MV
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
561
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
562
#endif
John Koleszar's avatar
John Koleszar committed
563 564 565 566 567

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
568
        *distortion = thismse;
569
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
570 571
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
572
    this_mv.as_mv.row += 8;
573
    thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
574 575 576
#if CONFIG_HIGH_PRECISION_MV
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
577
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
578
#endif
John Koleszar's avatar
John Koleszar committed
579 580 581 582 583

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
584
        *distortion = thismse;
585
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
586 587 588 589 590 591 592 593 594 595 596 597
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    //for(whichdir =0;whichdir<4;whichdir++)
    //{
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:
Scott LaVarnway's avatar
Scott LaVarnway committed
598 599
        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
600
        thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
601 602
        break;
    case 1:
Scott LaVarnway's avatar
Scott LaVarnway committed
603 604
        this_mv.as_mv.col += 4;
        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
605
        thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
606 607
        break;
    case 2:
Scott LaVarnway's avatar
Scott LaVarnway committed
608 609
        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
        this_mv.as_mv.row += 4;
610
        thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
611 612
        break;
    case 3:
613
    default:
Scott LaVarnway's avatar
Scott LaVarnway committed
614 615
        this_mv.as_mv.col += 4;
        this_mv.as_mv.row += 4;
616
        thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
617 618 619
        break;
    }

620 621 622
#if CONFIG_HIGH_PRECISION_MV
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
623
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
624
#endif
John Koleszar's avatar
John Koleszar committed
625 626 627 628 629

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
630
        *distortion = thismse;
631
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
632 633 634 635 636 637
    }

//  }


    // time to check quarter pels.
Scott LaVarnway's avatar
Scott LaVarnway committed
638
    if (bestmv->as_mv.row < startmv.as_mv.row)
639
    {
640
        y -= y_stride;
641 642 643 644
#if CONFIG_HIGH_PRECISION_MV
        yrow_movedback = 1;
#endif
    }
John Koleszar's avatar
John Koleszar committed
645

Scott LaVarnway's avatar
Scott LaVarnway committed
646
    if (bestmv->as_mv.col < startmv.as_mv.col)
647
    {
John Koleszar's avatar
John Koleszar committed
648
        y--;
649 650 651 652
#if CONFIG_HIGH_PRECISION_MV
        ycol_movedback = 1;
#endif
    }
John Koleszar's avatar
John Koleszar committed
653 654 655 656 657 658

    startmv = *bestmv;



    // go left then right and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
659
    this_mv.as_mv.row = startmv.as_mv.row;
John Koleszar's avatar
John Koleszar committed
660

Scott LaVarnway's avatar
Scott LaVarnway committed
661
    if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
662
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
663
        this_mv.as_mv.col = startmv.as_mv.col - 2;
664
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
665 666 667
    }
    else
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
668
        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
669
        thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
670 671
    }

672 673 674
#if CONFIG_HIGH_PRECISION_MV
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
675
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
676
#endif
John Koleszar's avatar
John Koleszar committed
677 678 679 680 681

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
682
        *distortion = thismse;
683
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
684 685
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
686
    this_mv.as_mv.col += 4;
687
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
688 689 690
#if CONFIG_HIGH_PRECISION_MV
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
691
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
692
#endif
John Koleszar's avatar
John Koleszar committed
693 694 695 696 697

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
698
        *distortion = thismse;
699
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
700 701 702
    }

    // go up then down and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
703
    this_mv.as_mv.col = startmv.as_mv.col;
John Koleszar's avatar
John Koleszar committed
704

Scott LaVarnway's avatar
Scott LaVarnway committed
705
    if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
706
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
707
        this_mv.as_mv.row = startmv.as_mv.row - 2;
708
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
709 710 711
    }
    else
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
712
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
713
        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
714 715
    }

716 717 718
#if CONFIG_HIGH_PRECISION_MV
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
719
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
720
#endif
John Koleszar's avatar
John Koleszar committed
721 722 723 724 725

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
726
        *distortion = thismse;
727
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
728 729
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
730
    this_mv.as_mv.row += 4;
731
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
732 733 734
#if CONFIG_HIGH_PRECISION_MV
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
735
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
736
#endif
John Koleszar's avatar
John Koleszar committed
737 738 739 740 741

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
742
        *distortion = thismse;
743
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
744 745 746 747 748 749 750 751 752 753 754 755 756 757
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

//  for(whichdir=0;whichdir<4;whichdir++)
//  {
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:

Scott LaVarnway's avatar
Scott LaVarnway committed
758
        if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
759
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
760
            this_mv.as_mv.row -= 2;
John Koleszar's avatar
John Koleszar committed
761

Scott LaVarnway's avatar
Scott LaVarnway committed
762
            if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
763
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
764
                this_mv.as_mv.col -= 2;
765
                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
766 767 768
            }
            else
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
769
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
770
                thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
John Koleszar's avatar
John Koleszar committed
771 772 773 774
            }
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
775
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
John Koleszar's avatar
John Koleszar committed
776

Scott LaVarnway's avatar
Scott LaVarnway committed
777
            if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
778
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
779
                this_mv.as_mv.col -= 2;
780
                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
781 782 783
            }
            else
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
784
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
785
                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
786 787 788 789 790
            }
        }

        break;
    case 1:
Scott LaVarnway's avatar
Scott LaVarnway committed
791
        this_mv.as_mv.col += 2;
John Koleszar's avatar
John Koleszar committed
792

Scott LaVarnway's avatar
Scott LaVarnway committed
793
        if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
794
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
795
            this_mv.as_mv.row -= 2;
796
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
797 798 799
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
800
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
801
            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
802 803 804 805
        }

        break;
    case 2:
Scott LaVarnway's avatar
Scott LaVarnway committed
806
        this_mv.as_mv.row += 2;
John Koleszar's avatar
John Koleszar committed
807

Scott LaVarnway's avatar
Scott LaVarnway committed
808
        if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
809
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
810
            this_mv.as_mv.col -= 2;
811
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
812 813 814
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
815
            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
816
            thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
817 818 819 820
        }

        break;
    case 3:
Scott LaVarnway's avatar
Scott LaVarnway committed
821 822
        this_mv.as_mv.col += 2;
        this_mv.as_mv.row += 2;
823
        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
824 825 826
        break;
    }

827 828 829
#if CONFIG_HIGH_PRECISION_MV
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
830
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
831
#endif
John Koleszar's avatar
John Koleszar committed
832 833 834 835 836

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
837
        *distortion = thismse;
838
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
839 840
    }

841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
#if CONFIG_HIGH_PRECISION_MV
    if (!x->e_mbd.allow_high_precision_mv)
        return bestmse;

    /* Now do 1/8th pixel */
    if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback)
    {
        y -= y_stride;
        yrow_movedback = 1;
    }

    if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback)
    {
        y--;
        ycol_movedback = 1;
    }

    startmv = *bestmv;

    // go left then right and check error
    this_mv.as_mv.row = startmv.as_mv.row;

    if (startmv.as_mv.col & 7)
    {
        this_mv.as_mv.col = startmv.as_mv.col - 1;
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }
    else
    {
        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
        thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }

874
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
875 876 877 878 879 880 881 882 883 884 885

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
        *distortion = thismse;
        *sse1 = sse;
    }

    this_mv.as_mv.col += 2;
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
886
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
        *distortion = thismse;
        *sse1 = sse;
    }

    // go up then down and check error
    this_mv.as_mv.col = startmv.as_mv.col;

    if (startmv.as_mv.row & 7)
    {
        this_mv.as_mv.row = startmv.as_mv.row - 1;
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }
    else
    {
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
    }

910
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
911 912 913 914 915 916 917 918 919 920 921

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
        *distortion = thismse;
        *sse1 = sse;
    }

    this_mv.as_mv.row += 2;
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
922
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
        *distortion = thismse;
        *sse1 = sse;
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

//  for(whichdir=0;whichdir<4;whichdir++)
//  {
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:

        if (startmv.as_mv.row & 7)
        {
            this_mv.as_mv.row -= 1;

            if (startmv.as_mv.col & 7)
            {
                this_mv.as_mv.col -= 1;
                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
            }
            else
            {
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
                thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
            }
        }
        else
        {
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;

            if (startmv.as_mv.col & 7)
            {
                this_mv.as_mv.col -= 1;
                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
            }
            else
            {
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse);
            }
        }

        break;
    case 1:
        this_mv.as_mv.col += 1;

        if (startmv.as_mv.row & 7)
        {
            this_mv.as_mv.row -= 1;
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }
        else
        {
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
        }

        break;
    case 2:
        this_mv.as_mv.row += 1;

        if (startmv.as_mv.col & 7)
        {
            this_mv.as_mv.col -= 1;
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }
        else
        {
            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
            thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }

        break;
    case 3:
        this_mv.as_mv.col += 1;
        this_mv.as_mv.row += 1;
        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        break;
    }

1013
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
        *distortion = thismse;
        *sse1 = sse;
    }

#endif  /* CONFIG_HIGH_PRECISION_MV */

John Koleszar's avatar
John Koleszar committed
1025 1026 1027
    return bestmse;
}

1028 1029
#undef SP

1030
int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
Scott LaVarnway's avatar
Scott LaVarnway committed
1031 1032 1033 1034 1035
                                  int_mv *bestmv, int_mv *ref_mv,
                                  int error_per_bit,
                                  const vp8_variance_fn_ptr_t *vfp,
                                  int *mvcost[2], int *distortion,
                                  unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
1036 1037
{
    int bestmse = INT_MAX;
Scott LaVarnway's avatar
Scott LaVarnway committed
1038 1039
    int_mv startmv;
    int_mv this_mv;
John Koleszar's avatar
John Koleszar committed
1040 1041 1042
    unsigned char *z = (*(b->base_src) + b->src);
    int left, right, up, down, diag;
    unsigned int sse;
Yunqing Wang's avatar
Yunqing Wang committed
1043
    int whichdir ;
1044
    int thismse;
1045
    int y_stride;
1046
    MACROBLOCKD *xd = &x->e_mbd;
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059

#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;

    y_stride = 32;
    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
    y = xd->y_buf + y_stride + 1;
#else
    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    y_stride = d->pre_stride;
#endif
John Koleszar's avatar
John Koleszar committed
1060 1061

    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed