motion_est.c 76.3 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
6
 * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
7
 *
8
 * This file is part of Libav.
9
 *
10
 * Libav is free software; you can redistribute it and/or
Fabrice Bellard's avatar
Fabrice Bellard committed
11 12
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
14
 *
15
 * Libav is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Fabrice Bellard's avatar
Fabrice Bellard committed
17 18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
19
 *
Fabrice Bellard's avatar
Fabrice Bellard committed
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with Libav; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
23
 */
24

Michael Niedermayer's avatar
Michael Niedermayer committed
25
/**
26
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
27 28
 * Motion estimation.
 */
29

Fabrice Bellard's avatar
Fabrice Bellard committed
30 31
#include <stdlib.h>
#include <stdio.h>
32
#include <limits.h>
33
#include "libavutil/intmath.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
34 35
#include "avcodec.h"
#include "dsputil.h"
36
#include "mathops.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
37 38
#include "mpegvideo.h"

39 40
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
41

42
#define SQ(a) ((a)*(a))
43

44 45 46 47 48 49
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

Michael Niedermayer's avatar
Michael Niedermayer committed
50
static inline int sad_hpel_motion_search(MpegEncContext * s,
51
                                  int *mx_ptr, int *my_ptr, int dmin,
52 53
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
54

55
static inline unsigned update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
56
{
57 58 59 60
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
61
    }
62
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
63 64
}

65 66 67 68 69 70
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
71

72
static int minima_cmp(const void *a, const void *b){
73 74
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
75

76 77
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
78

79 80 81
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
82

83
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
99 100
}

101 102
static int get_flags(MotionEstContext *c, int direct, int chroma){
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
103
           + (direct ? FLAG_DIRECT : 0)
104
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
105 106
}

107
static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
108
                      const int size, const int h, int ref_index, int src_index,
109
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
110 111 112 113 114 115 116 117
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
118
        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
119 120 121 122 123 124 125 126 127 128 129 130 131
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
132

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
149

150 151 152 153 154 155 156 157 158
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
159
                }else{
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
                    assert((fx>>1) + 16*s->mb_x >= -16);
                    assert((fy>>1) + 16*s->mb_y >= -16);
                    assert((fx>>1) + 16*s->mb_x <= s->width);
                    assert((fy>>1) + 16*s->mb_y <= s->height);
                    assert((bx>>1) + 16*s->mb_x >= -16);
                    assert((by>>1) + 16*s->mb_y >= -16);
                    assert((bx>>1) + 16*s->mb_x <= s->width);
                    assert((by>>1) + 16*s->mb_y <= s->height);

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
    return d;
}

static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
192
        int uvdxy;              /* no, it might not be used uninitialized */
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
209
            d = cmp_func(s, c->temp, src[0], stride, h);
210
        }else{
211
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
212 213 214 215 216 217 218
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
219 220
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
221
        }
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
    return d;
}

static int cmp_simple(MpegEncContext *s, const int x, const int y,
                      int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
    return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
}

static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
238
    }
239 240 241 242 243 244 245 246 247
}

static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
248
    }
249
}
250

251
/** @brief compares a block (either a full macroblock or a partition thereof)
252 253
    against a proposed motion-compensated prediction of that block
 */
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
       && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && flags==0 && h==16 && size==0 && subx==0 && suby==0){
        return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
    }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && subx==0 && suby==0){
        return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
    }else{
        return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
    }
}

static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
    }
}

static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
    }
287
}
Michael Niedermayer's avatar
Michael Niedermayer committed
288 289 290

#include "motion_est_template.c"

Michael Niedermayer's avatar
Michael Niedermayer committed
291 292 293 294 295 296 297
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
}

298
int ff_init_me(MpegEncContext *s){
299
    MotionEstContext * const c= &s->me;
300 301
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
302 303 304 305 306

    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -ME_MAP_SIZE){
        av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
        return -1;
    }
307
    //special case of snow is needed because snow uses its own iterative ME code
308
    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != AV_CODEC_ID_SNOW){
309 310 311
        av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
        return -1;
    }
312

Michael Niedermayer's avatar
10l  
Michael Niedermayer committed
313
    c->avctx= s->avctx;
314 315 316 317

    if(cache_size < 2*dia_size && !c->stride){
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
    }
318

319 320 321 322
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
323

324 325 326
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
327

328
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
329
    if(s->flags&CODEC_FLAG_QPEL){
330
        c->sub_motion_search= qpel_motion_search;
331 332 333
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
334
    }else{
335
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
336
            c->sub_motion_search= hpel_motion_search;
337 338
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
339
                && c->avctx->    mb_cmp == FF_CMP_SAD)
340
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
341
        else
342
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
343
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
344 345 346 347
    c->hpel_avg= s->dsp.avg_pixels_tab;
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
    else               c->hpel_put= s->dsp.put_pixels_tab;

348
    if(s->linesize){
349
        c->stride  = s->linesize;
350
        c->uvstride= s->uvlinesize;
351
    }else{
352 353
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
354
    }
355

Diego Biurrun's avatar
Diego Biurrun committed
356 357 358
    /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
     * not have yet, and even if we had, the motion estimation code
     * does not expect it. */
359
    if(s->codec_id != AV_CODEC_ID_SNOW){
360
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
361 362 363 364 365 366 367
            s->dsp.me_cmp[2]= zero_cmp;
        }
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
            s->dsp.me_sub_cmp[2]= zero_cmp;
        }
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
368 369
    }

370
    if(s->codec_id == AV_CODEC_ID_H261){
371 372 373
        c->sub_motion_search= no_sub_motion_search;
    }

374
    return 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
375
}
376

377 378
#define Z_THRESHOLD 256

Michael Niedermayer's avatar
Michael Niedermayer committed
379
#define CHECK_SAD_HALF_MV(suffix, x, y) \
380
{\
381
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
382
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
383 384
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
385

Michael Niedermayer's avatar
Michael Niedermayer committed
386
static inline int sad_hpel_motion_search(MpegEncContext * s,
387
                                  int *mx_ptr, int *my_ptr, int dmin,
388 389
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
390
{
391 392
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
393
    int mx, my, dminh;
394
    uint8_t *pix, *ptr;
395 396
    int stride= c->stride;
    const int flags= c->sub_flags;
397
    LOAD_COMMON
398

399
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
400

401
    if(c->skip){
402 403 404 405 406 407
//    printf("S");
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
//    printf("N");
408

409
    pix = c->src[src_index][0];
410

411 412
    mx = *mx_ptr;
    my = *my_ptr;
413
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
414

415 416
    dminh = dmin;

417
    if (mx > xmin && mx < xmax &&
418
        my > ymin && my < ymax) {
419
        int dx=0, dy=0;
420
        int d, pen_x, pen_y;
421 422 423 424 425 426 427
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
428

429

430 431 432
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

433
        ptr-= stride;
434
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
435
            CHECK_SAD_HALF_MV(y2 , 0, -1)
436
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
437
                CHECK_SAD_HALF_MV(xy2, -1, -1)
438
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
439
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
440
                    ptr+= stride;
441
                }else{
442
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
443
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
444
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
445
                CHECK_SAD_HALF_MV(x2 , -1,  0)
446
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
447
                CHECK_SAD_HALF_MV(xy2, +1, -1)
448
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
449
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
450
                    ptr+= stride;
451
                }else{
452
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
453
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
454
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
455
                CHECK_SAD_HALF_MV(x2 , +1,  0)
456 457 458 459
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
460
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
461
                    ptr+= stride;
462
                }else{
463
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
464
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
465
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
466 467
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
468 469
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
470
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
471
                    ptr+= stride;
472
                }else{
473
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
474
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
475
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
476 477
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
478
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
479
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
480 481 482
        }
        mx+=dx;
        my+=dy;
483 484

    }else{
485 486
        mx<<=1;
        my<<=1;
487 488 489 490
    }

    *mx_ptr = mx;
    *my_ptr = my;
491
    return dminh;
492 493
}

494
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
495
{
496
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
497

498 499
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
500

Diego Biurrun's avatar
Diego Biurrun committed
501
    /* has already been set to the 4 MV if 4MV is done */
502
    if(mv4){
503 504
        int mot_xy= s->block_index[0];

505 506 507 508
        s->current_picture.f.motion_val[0][mot_xy    ][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy    ][1] = my;
        s->current_picture.f.motion_val[0][mot_xy + 1][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy + 1][1] = my;
509

510
        mot_xy += s->b8_stride;
511 512 513 514
        s->current_picture.f.motion_val[0][mot_xy    ][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy    ][1] = my;
        s->current_picture.f.motion_val[0][mot_xy + 1][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy + 1][1] = my;
515 516 517
    }
}

518 519 520
/**
 * get fullpel ME search limits.
 */
521
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
522
{
523
    MotionEstContext * const c= &s->me;
524
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
525
/*
526
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
527
    else                   c->range= 16;
528
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
529
    if (s->unrestricted_mv) {
530 531 532 533
        c->xmin = - x - 16;
        c->ymin = - y - 16;
        c->xmax = - x + s->mb_width *16;
        c->ymax = - y + s->mb_height*16;
534 535 536 537
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
538
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
539
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
540
    } else {
541 542 543 544
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
545
    }
546 547 548 549 550 551
    if(range){
        c->xmin = FFMAX(c->xmin,-range);
        c->xmax = FFMIN(c->xmax, range);
        c->ymin = FFMAX(c->ymin,-range);
        c->ymax = FFMIN(c->ymax, range);
    }
552 553
}

554 555
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
556 557 558 559 560 561 562 563 564

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

565
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
566
{
567
    MotionEstContext * const c= &s->me;
568 569
    const int size= 1;
    const int h=8;
570 571
    int block;
    int P[10][2];
572
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
573
    int same=1;
574
    const int stride= c->stride;
575
    uint8_t *mv_penalty= c->current_mv_penalty;
576

577
    init_mv4_ref(c);
578

579 580 581 582 583
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
584
        const int mot_stride = s->b8_stride;
585
        const int mot_xy = s->block_index[block];
586

587 588
        P_LEFT[0] = s->current_picture.f.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.f.motion_val[0][mot_xy - 1][1];
589

590
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
591 592

        /* special case for first line */
593
        if (s->first_slice_line && block<2) {
594 595
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
596
        } else {
597 598 599 600
            P_TOP[0]      = s->current_picture.f.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.f.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.f.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.f.motion_val[0][mot_xy - mot_stride + off[block]][1];
601 602 603 604
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
605

606 607 608
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

609 610
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
611 612 613 614
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;

615
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
616

617
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
618

619
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
620
            int dxy;
621
            const int offset= ((block&1) + (block>>1)*stride)*8;
622
            uint8_t *dest_y = c->scratchpad + offset;
623
            if(s->quarter_sample){
624
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
625 626 627
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
628
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
629
                else
630
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
631
            }else{
632
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
633 634 635
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
636
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
637
                else
638
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
639
            }
640
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
641 642 643 644 645 646 647 648 649 650
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
651

652 653
        s->current_picture.f.motion_val[0][s->block_index[block]][0] = mx4;
        s->current_picture.f.motion_val[0][s->block_index[block]][1] = my4;
654 655

        if(mx4 != mx || my4 != my) same=0;
656
    }
657

658 659
    if(same)
        return INT_MAX;
660

Michael Niedermayer's avatar
Michael Niedermayer committed
661
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
662
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
663
    }
664

665
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
666 667 668 669 670 671 672
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
673

674
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
675

676
        if(s->no_rounding){
677 678
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.f.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad + 8, s->last_picture.f.data[2] + offset, s->uvlinesize, 8);
679
        }else{
680 681
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.f.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad + 8, s->last_picture.f.data[2] + offset, s->uvlinesize, 8);
682 683
        }

684 685
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.f.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.f.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
686
    }
687

688 689
    c->pred_x= mx;
    c->pred_y= my;
690

691
    switch(c->avctx->mb_cmp&0xFF){
692 693 694 695 696
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
697
        return dmin_sum+ 11*c->mb_penalty_factor;
698
    }
699 700
}

701 702 703 704 705 706 707 708 709 710 711 712 713
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

714
static int interlaced_search(MpegEncContext *s, int ref_index,
715
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
716
{
717
    MotionEstContext * const c= &s->me;
718 719 720 721
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
722
    uint8_t * const mv_penalty= c->current_mv_penalty;
723 724 725 726 727
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
728

729 730 731 732
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
733
    init_interlaced_ref(s, ref_index);
734

735 736 737 738 739 740
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
741
            int dmin, mx_i, my_i;
742
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
743

744
            if(user_field_select){
745 746
                assert(field_select==0 || field_select==1);
                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);