mcomp.c 74.9 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13
 */


#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
14
#include "vpx_ports/config.h"
John Koleszar's avatar
John Koleszar committed
15 16 17
#include <stdio.h>
#include <limits.h>
#include <math.h>
18
#include "vp8/common/findnearmv.h"
John Koleszar's avatar
John Koleszar committed
19 20 21 22 23 24

#ifdef ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
static int mv_mode_cts [4] [2];
#endif

Scott LaVarnway's avatar
Scott LaVarnway committed
25
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
John Koleszar's avatar
John Koleszar committed
26 27 28 29 30
{
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
31
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
John Koleszar's avatar
John Koleszar committed
32
}
33 34 35 36 37 38 39 40 41 42
#if CONFIG_HIGH_PRECISION_MV
int vp8_mv_bit_cost_hp(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
{
    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row)] + mvcost[1][(mv->as_mv.col - ref->as_mv.col)]) * Weight) >> 7;
}
#endif
John Koleszar's avatar
John Koleszar committed
43

44 45 46 47 48 49 50 51
#if CONFIG_HIGH_PRECISION_MV
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit, int ishp)
{
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp==0)] +
        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp==0)])
        * error_per_bit + 128) >> 8;
}
#else
Scott LaVarnway's avatar
Scott LaVarnway committed
52
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
John Koleszar's avatar
John Koleszar committed
53
{
54 55
    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
Scott LaVarnway's avatar
Scott LaVarnway committed
56
        * error_per_bit + 128) >> 8;
John Koleszar's avatar
John Koleszar committed
57
}
58 59
#endif

John Koleszar's avatar
John Koleszar committed
60

Scott LaVarnway's avatar
Scott LaVarnway committed
61
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
62 63
{
    /* Calculate sad error cost on full pixel basis. */
Scott LaVarnway's avatar
Scott LaVarnway committed
64 65 66
    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
        mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
        * error_per_bit + 128) >> 8;
John Koleszar's avatar
John Koleszar committed
67 68
}

69

John Koleszar's avatar
John Koleszar committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
{
    int Len;
    int search_site_count = 0;


    // Generate offsets for 4 search sites per step.
    Len = MAX_FIRST_STEP;
    x->ss[search_site_count].mv.col = 0;
    x->ss[search_site_count].mv.row = 0;
    x->ss[search_site_count].offset = 0;
    search_site_count++;

    while (Len > 0)
    {

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = -Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = Len;
        search_site_count++;

        // Contract.
        Len /= 2;
    }

    x->ss_count = search_site_count;
    x->searches_per_step = 4;
}

void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
{
    int Len;
    int search_site_count = 0;

    // Generate offsets for 8 search sites per step.
    Len = MAX_FIRST_STEP;
    x->ss[search_site_count].mv.col = 0;
    x->ss[search_site_count].mv.row = 0;
    x->ss[search_site_count].offset = 0;
    search_site_count++;

    while (Len > 0)
    {

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = 0;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = -Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = 0;
        x->ss[search_site_count].offset = Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride - Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = -Len;
        x->ss[search_site_count].offset = -Len * stride + Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = -Len;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride - Len;
        search_site_count++;

        // Compute offsets for search sites.
        x->ss[search_site_count].mv.col = Len;
        x->ss[search_site_count].mv.row = Len;
        x->ss[search_site_count].offset = Len * stride + Len;
        search_site_count++;


        // Contract.
        Len /= 2;
    }

    x->ss_count = search_site_count;
    x->searches_per_step = 8;
}

190 191 192 193 194 195 196 197 198
/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */
199

200
#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
201 202 203
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
#else
John Koleszar's avatar
John Koleszar committed
204
#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
205 206
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
207
#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
John Koleszar's avatar
John Koleszar committed
208
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
209 210 211 212 213 214 215 216 217 218 219 220 221
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;

#if CONFIG_HIGH_PRECISION_MV
#define PREHP(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SPHP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define SPHP(x) ((x)&7) // convert motion vector component to offset for svf calc
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
#define DISTHP(r,c) vfp->svf( PREHP(r,c), y_stride, SPHP(c),SPHP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define ERRHP(r,c) (MVC(r,c)+DISTHP(r,c)) // returns distortion + motion vector cost
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = ((xd->allow_high_precision_mv)?DISTHP(r,c):DIST(r,c)); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
#else
222
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
223
#endif /* CONFIG_HIGH_PRECISION_MV */
224

John Koleszar's avatar
John Koleszar committed
225 226 227
#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))

Scott LaVarnway's avatar
Scott LaVarnway committed
228 229 230 231 232 233
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                             int_mv *bestmv, int_mv *ref_mv,
                                             int error_per_bit,
                                             const vp8_variance_fn_ptr_t *vfp,
                                             int *mvcost[2], int *distortion,
                                             unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
234 235
{
    unsigned char *z = (*(b->base_src) + b->src);
236
    MACROBLOCKD *xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
237

238 239
    int rr, rc, br, bc, hstep;
    int tr, tc;
John Koleszar's avatar
John Koleszar committed
240 241 242 243 244 245
    unsigned int besterr = INT_MAX;
    unsigned int left, right, up, down, diag;
    unsigned int sse;
    unsigned int whichdir;
    unsigned int halfiters = 4;
    unsigned int quarteriters = 4;
246 247 248
#if CONFIG_HIGH_PRECISION_MV
    unsigned int eighthiters = 4;
#endif
249
    int thismse;
250 251 252
    int maxc, minc, maxr, minr;
    int y_stride;
    int offset;
John Koleszar's avatar
John Koleszar committed
253

254
#if CONFIG_HIGH_PRECISION_MV
255 256 257 258 259 260 261 262 263 264 265
    if (xd->allow_high_precision_mv)
    {
        rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col;
        br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3;
        hstep = 4;
        minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width) - 1));
        maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width) - 1));
        minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width) - 1));
        maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width) - 1));
    }
    else
266
#endif
267 268 269 270 271 272 273 274 275
    {
        rr = ref_mv->as_mv.row >> 1; rc = ref_mv->as_mv.col >> 1;
        br = bestmv->as_mv.row << 2; bc = bestmv->as_mv.col << 2;
        hstep = 2;
        minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
        maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
        minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
        maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    }
John Koleszar's avatar
John Koleszar committed
276

277 278
    tr = br;
    tc = bc;
279 280 281 282 283 284 285

#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;
    int buf_r1, buf_r2, buf_c1, buf_c2;

    // Clamping to avoid out-of-range data access
286 287 288 289
    buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):INTERP_EXTEND;
    buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):INTERP_EXTEND;
    buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):INTERP_EXTEND;
    buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):INTERP_EXTEND;
290 291 292 293 294 295 296 297 298 299 300 301
    y_stride = 32;

    /* Copy to intermediate buffer before searching. */
    vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    y = xd->y_buf + y_stride*buf_r1 +buf_c1;
#else
    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    y_stride = d->pre_stride;
#endif

    offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;

John Koleszar's avatar
John Koleszar committed
302
    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed
303 304
    bestmv->as_mv.row <<= 3;
    bestmv->as_mv.col <<= 3;
John Koleszar's avatar
John Koleszar committed
305 306

    // calculate central point error
307
    besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
308
    *distortion = besterr;
309 310 311
#if CONFIG_HIGH_PRECISION_MV
    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
312
    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
313
#endif
John Koleszar's avatar
John Koleszar committed
314 315 316 317 318

    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    while (--halfiters)
    {
        // 1/2 pel
319 320 321 322
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(down, tr + hstep, tc);
John Koleszar's avatar
John Koleszar committed
323 324 325 326 327 328

        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

        switch (whichdir)
        {
        case 0:
329
            CHECK_BETTER(diag, tr - hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
330 331
            break;
        case 1:
332
            CHECK_BETTER(diag, tr - hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
333 334
            break;
        case 2:
335
            CHECK_BETTER(diag, tr + hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
336 337
            break;
        case 3:
338
            CHECK_BETTER(diag, tr + hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
339 340 341 342 343 344 345 346 347 348 349 350 351
            break;
        }

        // no reason to check the same one again.
        if (tr == br && tc == bc)
            break;

        tr = br;
        tc = bc;
    }

    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    // 1/4 pel
352
    hstep >>= 1;
John Koleszar's avatar
John Koleszar committed
353 354
    while (--quarteriters)
    {
355 356 357 358
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(down, tr + hstep, tc);
John Koleszar's avatar
John Koleszar committed
359 360 361 362 363 364

        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

        switch (whichdir)
        {
        case 0:
365
            CHECK_BETTER(diag, tr - hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
366 367
            break;
        case 1:
368
            CHECK_BETTER(diag, tr - hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
369 370
            break;
        case 2:
371
            CHECK_BETTER(diag, tr + hstep, tc - hstep);
John Koleszar's avatar
John Koleszar committed
372 373
            break;
        case 3:
374
            CHECK_BETTER(diag, tr + hstep, tc + hstep);
John Koleszar's avatar
John Koleszar committed
375 376 377 378 379 380 381 382 383 384 385
            break;
        }

        // no reason to check the same one again.
        if (tr == br && tc == bc)
            break;

        tr = br;
        tc = bc;
    }

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
#if CONFIG_HIGH_PRECISION_MV
    if (x->e_mbd.allow_high_precision_mv)
    {
        hstep >>= 1;
        while (--eighthiters)
        {
            CHECK_BETTER(left, tr, tc - hstep);
            CHECK_BETTER(right, tr, tc + hstep);
            CHECK_BETTER(up, tr - hstep, tc);
            CHECK_BETTER(down, tr + hstep, tc);

            whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

            switch (whichdir)
            {
            case 0:
                CHECK_BETTER(diag, tr - hstep, tc - hstep);
                break;
            case 1:
                CHECK_BETTER(diag, tr - hstep, tc + hstep);
                break;
            case 2:
                CHECK_BETTER(diag, tr + hstep, tc - hstep);
                break;
            case 3:
                CHECK_BETTER(diag, tr + hstep, tc + hstep);
                break;
            }

            // no reason to check the same one again.
            if (tr == br && tc == bc)
                break;

            tr = br;
            tc = bc;
        }
    }
423 424 425 426 427 428 429 430
#endif
#if CONFIG_HIGH_PRECISION_MV
    if (x->e_mbd.allow_high_precision_mv)
    {
        bestmv->as_mv.row = br;
        bestmv->as_mv.col = bc;
    }
    else
431
#endif  /* CONFIG_HIGH_PRECISION_MV */
432 433 434 435
    {
        bestmv->as_mv.row = br << 1;
        bestmv->as_mv.col = bc << 1;
    }
John Koleszar's avatar
John Koleszar committed
436

437 438
    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
John Koleszar's avatar
John Koleszar committed
439 440 441 442 443 444 445 446
        return INT_MAX;

    return besterr;
}
#undef MVC
#undef PRE
#undef SP
#undef DIST
447
#undef IFMVCV
John Koleszar's avatar
John Koleszar committed
448 449 450 451
#undef ERR
#undef CHECK_BETTER
#undef MIN
#undef MAX
452

453 454 455 456 457 458 459
#if CONFIG_HIGH_PRECISION_MV
#undef PREHP
#undef DPHP
#undef DISTHP
#undef ERRHP
#endif

460 461 462 463
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
#else
#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
464
#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
Scott LaVarnway's avatar
Scott LaVarnway committed
465 466 467 468 469 470
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                 int_mv *bestmv, int_mv *ref_mv,
                                 int error_per_bit,
                                 const vp8_variance_fn_ptr_t *vfp,
                                 int *mvcost[2], int *distortion,
                                 unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
471 472
{
    int bestmse = INT_MAX;
Scott LaVarnway's avatar
Scott LaVarnway committed
473 474
    int_mv startmv;
    int_mv this_mv;
475 476 477 478
#if CONFIG_HIGH_PRECISION_MV
    int_mv orig_mv;
    int yrow_movedback=0, ycol_movedback=0;
#endif
John Koleszar's avatar
John Koleszar committed
479 480 481 482
    unsigned char *z = (*(b->base_src) + b->src);
    int left, right, up, down, diag;
    unsigned int sse;
    int whichdir ;
483
    int thismse;
484
    int y_stride;
485
    MACROBLOCKD *xd = &x->e_mbd;
486 487 488 489 490 491 492 493 494 495 496 497 498

#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;

    y_stride = 32;
    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
     vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
     y = xd->y_buf + y_stride + 1;
#else
     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
     y_stride = d->pre_stride;
#endif
John Koleszar's avatar
John Koleszar committed
499 500

    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed
501 502
    bestmv->as_mv.row <<= 3;
    bestmv->as_mv.col <<= 3;
John Koleszar's avatar
John Koleszar committed
503
    startmv = *bestmv;
504 505 506
#if CONFIG_HIGH_PRECISION_MV
    orig_mv = *bestmv;
#endif
John Koleszar's avatar
John Koleszar committed
507 508

    // calculate central point error
509
    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
510
    *distortion = bestmse;
511 512 513
#if CONFIG_HIGH_PRECISION_MV
    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
514
    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
515
#endif
John Koleszar's avatar
John Koleszar committed
516 517

    // go left then right and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
518 519
    this_mv.as_mv.row = startmv.as_mv.row;
    this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
520
    thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
521 522 523
#if CONFIG_HIGH_PRECISION_MV
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
524
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
525
#endif
John Koleszar's avatar
John Koleszar committed
526 527 528 529 530

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
531
        *distortion = thismse;
532
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
533 534
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
535
    this_mv.as_mv.col += 8;
536
    thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
537 538 539
#if CONFIG_HIGH_PRECISION_MV
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
540
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
541
#endif
John Koleszar's avatar
John Koleszar committed
542 543 544 545 546

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
547
        *distortion = thismse;
548
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
549 550 551
    }

    // go up then down and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
552 553
    this_mv.as_mv.col = startmv.as_mv.col;
    this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
554
    thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
555 556 557
#if CONFIG_HIGH_PRECISION_MV
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
558
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
559
#endif
John Koleszar's avatar
John Koleszar committed
560 561 562 563 564

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
565
        *distortion = thismse;
566
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
567 568
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
569
    this_mv.as_mv.row += 8;
570
    thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
571 572 573
#if CONFIG_HIGH_PRECISION_MV
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
574
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
575
#endif
John Koleszar's avatar
John Koleszar committed
576 577 578 579 580

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
581
        *distortion = thismse;
582
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
583 584 585 586 587 588 589 590 591 592 593 594
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    //for(whichdir =0;whichdir<4;whichdir++)
    //{
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:
Scott LaVarnway's avatar
Scott LaVarnway committed
595 596
        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
597
        thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
598 599
        break;
    case 1:
Scott LaVarnway's avatar
Scott LaVarnway committed
600 601
        this_mv.as_mv.col += 4;
        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
602
        thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
603 604
        break;
    case 2:
Scott LaVarnway's avatar
Scott LaVarnway committed
605 606
        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
        this_mv.as_mv.row += 4;
607
        thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
608 609
        break;
    case 3:
610
    default:
Scott LaVarnway's avatar
Scott LaVarnway committed
611 612
        this_mv.as_mv.col += 4;
        this_mv.as_mv.row += 4;
613
        thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
614 615 616
        break;
    }

617 618 619
#if CONFIG_HIGH_PRECISION_MV
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
620
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
621
#endif
John Koleszar's avatar
John Koleszar committed
622 623 624 625 626

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
627
        *distortion = thismse;
628
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
629 630 631 632 633 634
    }

//  }


    // time to check quarter pels.
Scott LaVarnway's avatar
Scott LaVarnway committed
635
    if (bestmv->as_mv.row < startmv.as_mv.row)
636
    {
637
        y -= y_stride;
638 639 640 641
#if CONFIG_HIGH_PRECISION_MV
        yrow_movedback = 1;
#endif
    }
John Koleszar's avatar
John Koleszar committed
642

Scott LaVarnway's avatar
Scott LaVarnway committed
643
    if (bestmv->as_mv.col < startmv.as_mv.col)
644
    {
John Koleszar's avatar
John Koleszar committed
645
        y--;
646 647 648 649
#if CONFIG_HIGH_PRECISION_MV
        ycol_movedback = 1;
#endif
    }
John Koleszar's avatar
John Koleszar committed
650 651 652 653 654 655

    startmv = *bestmv;



    // go left then right and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
656
    this_mv.as_mv.row = startmv.as_mv.row;
John Koleszar's avatar
John Koleszar committed
657

Scott LaVarnway's avatar
Scott LaVarnway committed
658
    if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
659
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
660
        this_mv.as_mv.col = startmv.as_mv.col - 2;
661
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
662 663 664
    }
    else
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
665
        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
666
        thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
667 668
    }

669 670 671
#if CONFIG_HIGH_PRECISION_MV
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
672
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
673
#endif
John Koleszar's avatar
John Koleszar committed
674 675 676 677 678

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
679
        *distortion = thismse;
680
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
681 682
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
683
    this_mv.as_mv.col += 4;
684
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
685 686 687
#if CONFIG_HIGH_PRECISION_MV
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
688
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
689
#endif
John Koleszar's avatar
John Koleszar committed
690 691 692 693 694

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
695
        *distortion = thismse;
696
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
697 698 699
    }

    // go up then down and check error
Scott LaVarnway's avatar
Scott LaVarnway committed
700
    this_mv.as_mv.col = startmv.as_mv.col;
John Koleszar's avatar
John Koleszar committed
701

Scott LaVarnway's avatar
Scott LaVarnway committed
702
    if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
703
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
704
        this_mv.as_mv.row = startmv.as_mv.row - 2;
705
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
706 707 708
    }
    else
    {
Scott LaVarnway's avatar
Scott LaVarnway committed
709
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
710
        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
711 712
    }

713 714 715
#if CONFIG_HIGH_PRECISION_MV
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
716
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
717
#endif
John Koleszar's avatar
John Koleszar committed
718 719 720 721 722

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
723
        *distortion = thismse;
724
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
725 726
    }

Scott LaVarnway's avatar
Scott LaVarnway committed
727
    this_mv.as_mv.row += 4;
728
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
729 730 731
#if CONFIG_HIGH_PRECISION_MV
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
732
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
733
#endif
John Koleszar's avatar
John Koleszar committed
734 735 736 737 738

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
739
        *distortion = thismse;
740
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
741 742 743 744 745 746 747 748 749 750 751 752 753 754
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

//  for(whichdir=0;whichdir<4;whichdir++)
//  {
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:

Scott LaVarnway's avatar
Scott LaVarnway committed
755
        if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
756
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
757
            this_mv.as_mv.row -= 2;
John Koleszar's avatar
John Koleszar committed
758

Scott LaVarnway's avatar
Scott LaVarnway committed
759
            if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
760
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
761
                this_mv.as_mv.col -= 2;
762
                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
763 764 765
            }
            else
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
766
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
767
                thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
John Koleszar's avatar
John Koleszar committed
768 769 770 771
            }
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
772
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
John Koleszar's avatar
John Koleszar committed
773

Scott LaVarnway's avatar
Scott LaVarnway committed
774
            if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
775
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
776
                this_mv.as_mv.col -= 2;
777
                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
778 779 780
            }
            else
            {
Scott LaVarnway's avatar
Scott LaVarnway committed
781
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
782
                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
783 784 785 786 787
            }
        }

        break;
    case 1:
Scott LaVarnway's avatar
Scott LaVarnway committed
788
        this_mv.as_mv.col += 2;
John Koleszar's avatar
John Koleszar committed
789

Scott LaVarnway's avatar
Scott LaVarnway committed
790
        if (startmv.as_mv.row & 7)
John Koleszar's avatar
John Koleszar committed
791
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
792
            this_mv.as_mv.row -= 2;
793
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
794 795 796
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
797
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
798
            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
799 800 801 802
        }

        break;
    case 2:
Scott LaVarnway's avatar
Scott LaVarnway committed
803
        this_mv.as_mv.row += 2;
John Koleszar's avatar
John Koleszar committed
804

Scott LaVarnway's avatar
Scott LaVarnway committed
805
        if (startmv.as_mv.col & 7)
John Koleszar's avatar
John Koleszar committed
806
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
807
            this_mv.as_mv.col -= 2;
808
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
809 810 811
        }
        else
        {
Scott LaVarnway's avatar
Scott LaVarnway committed
812
            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
813
            thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
814 815 816 817
        }

        break;
    case 3:
Scott LaVarnway's avatar
Scott LaVarnway committed
818 819
        this_mv.as_mv.col += 2;
        this_mv.as_mv.row += 2;
820
        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
John Koleszar's avatar
John Koleszar committed
821 822 823
        break;
    }

824 825 826
#if CONFIG_HIGH_PRECISION_MV
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
#else
827
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
828
#endif
John Koleszar's avatar
John Koleszar committed
829 830 831 832 833

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
834
        *distortion = thismse;
835
        *sse1 = sse;
John Koleszar's avatar
John Koleszar committed
836 837
    }

838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
#if CONFIG_HIGH_PRECISION_MV
    if (!x->e_mbd.allow_high_precision_mv)
        return bestmse;

    /* Now do 1/8th pixel */
    if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback)
    {
        y -= y_stride;
        yrow_movedback = 1;
    }

    if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback)
    {
        y--;
        ycol_movedback = 1;
    }

    startmv = *bestmv;

    // go left then right and check error
    this_mv.as_mv.row = startmv.as_mv.row;

    if (startmv.as_mv.col & 7)
    {
        this_mv.as_mv.col = startmv.as_mv.col - 1;
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }
    else
    {
        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
        thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }

871
    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
872 873 874 875 876 877 878 879 880 881 882

    if (left < bestmse)
    {
        *bestmv = this_mv;
        bestmse = left;
        *distortion = thismse;
        *sse1 = sse;
    }

    this_mv.as_mv.col += 2;
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
883
    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906

    if (right < bestmse)
    {
        *bestmv = this_mv;
        bestmse = right;
        *distortion = thismse;
        *sse1 = sse;
    }

    // go up then down and check error
    this_mv.as_mv.col = startmv.as_mv.col;

    if (startmv.as_mv.row & 7)
    {
        this_mv.as_mv.row = startmv.as_mv.row - 1;
        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
    }
    else
    {
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
    }

907
    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
908 909 910 911 912 913 914 915 916 917 918

    if (up < bestmse)
    {
        *bestmv = this_mv;
        bestmse = up;
        *distortion = thismse;
        *sse1 = sse;
    }

    this_mv.as_mv.row += 2;
    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
919
    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009

    if (down < bestmse)
    {
        *bestmv = this_mv;
        bestmse = down;
        *distortion = thismse;
        *sse1 = sse;
    }


    // now check 1 more diagonal
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

//  for(whichdir=0;whichdir<4;whichdir++)
//  {
    this_mv = startmv;

    switch (whichdir)
    {
    case 0:

        if (startmv.as_mv.row & 7)
        {
            this_mv.as_mv.row -= 1;

            if (startmv.as_mv.col & 7)
            {
                this_mv.as_mv.col -= 1;
                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
            }
            else
            {
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
                thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
            }
        }
        else
        {
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;

            if (startmv.as_mv.col & 7)
            {
                this_mv.as_mv.col -= 1;
                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
            }
            else
            {
                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse);
            }
        }

        break;
    case 1:
        this_mv.as_mv.col += 1;

        if (startmv.as_mv.row & 7)
        {
            this_mv.as_mv.row -= 1;
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }
        else
        {
            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
        }

        break;
    case 2:
        this_mv.as_mv.row += 1;

        if (startmv.as_mv.col & 7)
        {
            this_mv.as_mv.col -= 1;
            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }
        else
        {
            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
            thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        }

        break;
    case 3:
        this_mv.as_mv.col += 1;
        this_mv.as_mv.row += 1;
        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
        break;
    }

1010
    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021

    if (diag < bestmse)
    {
        *bestmv = this_mv;
        bestmse = diag;
        *distortion = thismse;
        *sse1 = sse;
    }

#endif  /* CONFIG_HIGH_PRECISION_MV */

John Koleszar's avatar
John Koleszar committed
1022 1023 1024
    return bestmse;
}

1025 1026
#undef SP

1027
int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
Scott LaVarnway's avatar
Scott LaVarnway committed
1028 1029 1030 1031 1032
                                  int_mv *bestmv, int_mv *ref_mv,
                                  int error_per_bit,
                                  const vp8_variance_fn_ptr_t *vfp,
                                  int *mvcost[2], int *distortion,
                                  unsigned int *sse1)
John Koleszar's avatar
John Koleszar committed
1033 1034
{
    int bestmse = INT_MAX;
Scott LaVarnway's avatar
Scott LaVarnway committed
1035 1036
    int_mv startmv;
    int_mv this_mv;
John Koleszar's avatar
John Koleszar committed
1037 1038 1039
    unsigned char *z = (*(b->base_src) + b->src);
    int left, right, up, down, diag;
    unsigned int sse;
Yunqing Wang's avatar
Yunqing Wang committed
1040
    int whichdir ;
1041
    int thismse;
1042
    int y_stride;
1043
    MACROBLOCKD *xd = &x->e_mbd;
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056

#if ARCH_X86 || ARCH_X86_64
    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    unsigned char *y;

    y_stride = 32;
    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
    y = xd->y_buf + y_stride + 1;
#else
    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
    y_stride = d->pre_stride;
#endif
John Koleszar's avatar
John Koleszar committed
1057 1058

    // central mv
Scott LaVarnway's avatar
Scott LaVarnway committed
1059 1060
    bestmv->as_mv.row <<= 3;
    bestmv->as_mv.col <<= 3;
John Koleszar's avatar
John Koleszar committed
1061 1062 1063
    startmv = *bestmv;

    // calculate central point error
1064
    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
1065
    *distortion