h264_loopfilter.c 36 KB
Newer Older
1 2 3 4
/*
 * H.26L/H.264/AVC/JVT/14496-10/... loop filter
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
5
 * This file is part of Libav.
6
 *
7
 * Libav is free software; you can redistribute it and/or
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * Libav is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
19 20 21 22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26 27
 * H.264 / AVC / MPEG4 part10 loop filter.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

Måns Rullgård's avatar
Måns Rullgård committed
28
#include "libavutil/intreadwrite.h"
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
#include "internal.h"
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
#include "h264.h"
#include "mathops.h"
#include "rectangle.h"

//#undef NDEBUG
#include <assert.h>

/* Deblocking filter (p153) */
static const uint8_t alpha_table[52*3] = {
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
    80, 90,101,113,127,144,162,182,203,226,
   255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,255,
};
static const uint8_t beta_table[52*3] = {
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
    18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
};
static const uint8_t tc0_table[52*3][4] = {
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
    {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
    {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
    {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
    {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
    {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
    {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
};

103
static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) {
104 105
    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
106
    const int alpha = alpha_table[index_a];
107
    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
108 109 110 111
    if (alpha ==0 || beta == 0) return;

    if( bS[0] < 4 ) {
        int8_t tc[4];
112 113 114 115
        tc[0] = tc0_table[index_a][bS[0]];
        tc[1] = tc0_table[index_a][bS[1]];
        tc[2] = tc0_table[index_a][bS[2]];
        tc[3] = tc0_table[index_a][bS[3]];
116
        h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
117
    } else {
118
        h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
119 120
    }
}
121
static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
122 123
    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
124
    const int alpha = alpha_table[index_a];
125
    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
126 127 128 129
    if (alpha ==0 || beta == 0) return;

    if( bS[0] < 4 ) {
        int8_t tc[4];
130 131 132 133
        tc[0] = tc0_table[index_a][bS[0]]+1;
        tc[1] = tc0_table[index_a][bS[1]]+1;
        tc[2] = tc0_table[index_a][bS[2]]+1;
        tc[3] = tc0_table[index_a][bS[3]]+1;
134
        h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
135
    } else {
136
        h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
137 138 139
    }
}

140
static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
141 142
    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
    int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
143
    int alpha = alpha_table[index_a];
144
    int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
145
    if (alpha ==0 || beta == 0) return;
146

147 148 149 150 151 152 153 154 155
    if( bS[0] < 4 ) {
        int8_t tc[4];
        tc[0] = tc0_table[index_a][bS[0*bsi]];
        tc[1] = tc0_table[index_a][bS[1*bsi]];
        tc[2] = tc0_table[index_a][bS[2*bsi]];
        tc[3] = tc0_table[index_a][bS[3*bsi]];
        h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
    } else {
        h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
156 157
    }
}
158
static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
159 160
    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
    int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
161
    int alpha = alpha_table[index_a];
162
    int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
163
    if (alpha ==0 || beta == 0) return;
164

165 166 167 168 169 170 171 172 173
    if( bS[0] < 4 ) {
        int8_t tc[4];
        tc[0] = tc0_table[index_a][bS[0*bsi]] + 1;
        tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
        tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
        tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
        h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
    } else {
        h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
174 175 176
    }
}

177
static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
178 179
    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
180
    const int alpha = alpha_table[index_a];
181
    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
182 183 184 185
    if (alpha ==0 || beta == 0) return;

    if( bS[0] < 4 ) {
        int8_t tc[4];
186 187 188 189
        tc[0] = tc0_table[index_a][bS[0]];
        tc[1] = tc0_table[index_a][bS[1]];
        tc[2] = tc0_table[index_a][bS[2]];
        tc[3] = tc0_table[index_a][bS[3]];
190
        h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
191
    } else {
192
        h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
193 194 195
    }
}

196
static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
197 198
    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
199
    const int alpha = alpha_table[index_a];
200
    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
201 202 203 204
    if (alpha ==0 || beta == 0) return;

    if( bS[0] < 4 ) {
        int8_t tc[4];
205 206 207 208
        tc[0] = tc0_table[index_a][bS[0]]+1;
        tc[1] = tc0_table[index_a][bS[1]]+1;
        tc[2] = tc0_table[index_a][bS[2]]+1;
        tc[3] = tc0_table[index_a][bS[3]]+1;
209
        h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
210
    } else {
211
        h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
212 213 214 215 216
    }
}

void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
    MpegEncContext * const s = &h->s;
217
    int mb_xy;
218
    int mb_type, left_type;
219
    int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
220
    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
221
    int chroma444 = CHROMA444;
222 223 224

    mb_xy = h->mb_xy;

225
    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
226 227 228 229
        ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
        return;
    }
    assert(!FRAME_MBAFF);
230
    left_type= h->left_type[0];
231 232 233 234 235 236 237 238 239 240 241 242

    mb_type = s->current_picture.mb_type[mb_xy];
    qp = s->current_picture.qscale_table[mb_xy];
    qp0 = s->current_picture.qscale_table[mb_xy-1];
    qp1 = s->current_picture.qscale_table[h->top_mb_xy];
    qpc = get_chroma_qp( h, 0, qp );
    qpc0 = get_chroma_qp( h, 0, qp0 );
    qpc1 = get_chroma_qp( h, 0, qp1 );
    qp0 = (qp + qp0 + 1) >> 1;
    qp1 = (qp + qp1 + 1) >> 1;
    qpc0 = (qpc + qpc0 + 1) >> 1;
    qpc1 = (qpc + qpc1 + 1) >> 1;
243
    qp_thresh = 15+52 - h->slice_alpha_c0_offset;
244 245 246 247 248 249 250 251
    if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
       qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
        return;

    if( IS_INTRA(mb_type) ) {
        int16_t bS4[4] = {4,4,4,4};
        int16_t bS3[4] = {3,3,3,3};
        int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
252 253
        if(left_type)
            filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
254
        if( IS_8x8DCT(mb_type) ) {
255 256 257
            filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
            filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
            filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
258
        } else {
259 260 261 262 263 264 265
            filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
            filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
            filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h);
            filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
            filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h);
            filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
            filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
266
        }
267
        if(chroma){
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
            if(chroma444){
                if(left_type){
                    filter_mb_edgev( &img_cb[4*0], linesize, bS4, qpc0, h);
                    filter_mb_edgev( &img_cr[4*0], linesize, bS4, qpc0, h);
                }
                if( IS_8x8DCT(mb_type) ) {
                    filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h);
                    filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
                    filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h);
                } else {
                    filter_mb_edgev( &img_cb[4*1], linesize, bS3, qpc, h);
                    filter_mb_edgev( &img_cr[4*1], linesize, bS3, qpc, h);
                    filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h);
                    filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
                    filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h);
                    filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
                    filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, h);
                    filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, h);
                }
            }else{
                if(left_type){
                    filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
                    filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
                }
                filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
                filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
                filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
                filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
                filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
                filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
307
            }
308
        }
309 310
        return;
    } else {
311
        LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
312 313 314
        int edges;
        if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
            edges = 4;
Måns Rullgård's avatar
Måns Rullgård committed
315 316 317 318
            AV_WN64A(bS[0][0], 0x0002000200020002ULL);
            AV_WN64A(bS[0][2], 0x0002000200020002ULL);
            AV_WN64A(bS[1][0], 0x0002000200020002ULL);
            AV_WN64A(bS[1][2], 0x0002000200020002ULL);
319
        } else {
320
            int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
321
            int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
322 323
            int step =  1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
            edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
324
            h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
325
                                              h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
326
        }
327
        if( IS_INTRA(left_type) )
Måns Rullgård's avatar
Måns Rullgård committed
328
            AV_WN64A(bS[0][0], 0x0004000400040004ULL);
329
        if( IS_INTRA(h->top_type) )
Måns Rullgård's avatar
Måns Rullgård committed
330
            AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL);
331 332

#define FILTER(hv,dir,edge)\
Måns Rullgård's avatar
Måns Rullgård committed
333
        if(AV_RN64A(bS[dir][edge])) {                                   \
334
            filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\
335 336 337 338 339 340 341 342
            if(chroma){\
                if(chroma444){\
                    filter_mb_edge##hv( &img_cb[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                    filter_mb_edge##hv( &img_cr[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                } else if(!(edge&1)) {\
                    filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                    filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                }\
343 344
            }\
        }
345 346
        if(left_type)
            FILTER(v,0,0);
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
        if( edges == 1 ) {
            FILTER(h,1,0);
        } else if( IS_8x8DCT(mb_type) ) {
            FILTER(v,0,2);
            FILTER(h,1,0);
            FILTER(h,1,2);
        } else {
            FILTER(v,0,1);
            FILTER(v,0,2);
            FILTER(v,0,3);
            FILTER(h,1,0);
            FILTER(h,1,1);
            FILTER(h,1,2);
            FILTER(h,1,3);
        }
#undef FILTER
    }
}

366
static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
367
    int v;
368

369 370 371 372
    v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx];
    if(!v && h->ref_cache[0][b_idx]!=-1)
        v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
           FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
373 374 375

    if(h->list_count==2){
        if(!v)
Michael Niedermayer's avatar
indent  
Michael Niedermayer committed
376 377 378 379 380 381 382 383 384 385 386 387 388 389
            v = h->ref_cache[1][b_idx] != h->ref_cache[1][bn_idx] |
                h->mv_cache[1][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
                FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit;

        if(v){
            if(h->ref_cache[0][b_idx] != h->ref_cache[1][bn_idx] |
               h->ref_cache[1][b_idx] != h->ref_cache[0][bn_idx])
                return 1;
            return
                h->mv_cache[0][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
                FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit |
                h->mv_cache[1][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
                FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
        }
390
    }
391 392 393

    return v;
}
394

395
static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) {
396 397
    MpegEncContext * const s = &h->s;
    int edge;
398
    int chroma_qp_avg[2];
399
    const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
400
    const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
401 402

    // how often to recheck mv-based bS when iterating between edges
403 404 405
    static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
                                              {0,3,1,1,3,3,3,3}};
    const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7];
406 407
    const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4;

408 409 410
    // how often to recheck mv-based bS when iterating along each edge
    const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));

411
    if(mbm_type && !first_vertical_edge_done){
412

413
        if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0)
414 415 416 417 418 419 420 421 422 423 424 425
            && IS_INTERLACED(mbm_type&~mb_type)
            ) {
            // This is a special case in the norm where the filtering must
            // be done twice (one each of the field) even if we are in a
            // frame macroblock.
            //
            unsigned int tmp_linesize   = 2 *   linesize;
            unsigned int tmp_uvlinesize = 2 * uvlinesize;
            int mbn_xy = mb_xy - 2 * s->mb_stride;
            int j;

            for(j=0; j<2; j++, mbn_xy += s->mb_stride){
426
                DECLARE_ALIGNED(8, int16_t, bS)[4];
427 428
                int qp;
                if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) {
Måns Rullgård's avatar
Måns Rullgård committed
429
                    AV_WN64A(bS, 0x0003000300030003ULL);
430
                } else {
431
                    if(!CABAC && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])){
432 433 434 435
                        bS[0]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+0]);
                        bS[1]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+1]);
                        bS[2]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+2]);
                        bS[3]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+3]);
436
                    }else{
437
                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4;
438 439 440 441
                    int i;
                    for( i = 0; i < 4; i++ ) {
                        bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]);
                    }
442
                    }
443
                }
444 445 446 447 448 449
                // Do not use s->qscale as luma quantizer because it has not the same
                // value in IPCM macroblocks.
                qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
                tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
                { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
                filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h );
450 451
                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
452 453 454 455 456 457 458 459
                if (chroma) {
                    if (chroma444) {
                        filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
                        filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
                    } else {
                        filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
                        filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
                    }
460
                }
461
            }
462
        }else{
463
            DECLARE_ALIGNED(8, int16_t, bS)[4];
464
            int qp;
465

466
            if( IS_INTRA(mb_type|mbm_type)) {
Måns Rullgård's avatar
Måns Rullgård committed
467
                AV_WN64A(bS, 0x0003000300030003ULL);
468 469 470
                if (   (!IS_INTERLACED(mb_type|mbm_type))
                    || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
                )
Måns Rullgård's avatar
Måns Rullgård committed
471
                    AV_WN64A(bS, 0x0004000400040004ULL);
472
            } else {
473
                int i;
474
                int mv_done;
475

476
                if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) {
Måns Rullgård's avatar
Måns Rullgård committed
477
                    AV_WN64A(bS, 0x0001000100010001ULL);
478
                    mv_done = 1;
479
                }
480 481 482
                else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
                    int b_idx= 8 + 4;
                    int bn_idx= b_idx - (dir ? 8:1);
483

484
                    bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit);
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
                    mv_done = 1;
                }
                else
                    mv_done = 0;

                for( i = 0; i < 4; i++ ) {
                    int x = dir == 0 ? 0 : i;
                    int y = dir == 0 ? i    : 0;
                    int b_idx= 8 + 4 + x + 8*y;
                    int bn_idx= b_idx - (dir ? 8:1);

                    if( h->non_zero_count_cache[b_idx] |
                        h->non_zero_count_cache[bn_idx] ) {
                        bS[i] = 2;
                    }
                    else if(!mv_done)
                    {
502
                        bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
503 504 505 506
                    }
                }
            }

507 508 509 510 511 512 513 514
            /* Filter edge */
            // Do not use s->qscale as luma quantizer because it has not the same
            // value in IPCM macroblocks.
            if(bS[0]+bS[1]+bS[2]+bS[3]){
                qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbm_xy] + 1 ) >> 1;
                //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
                tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
                //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
515 516
                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
517 518
                if( dir == 0 ) {
                    filter_mb_edgev( &img_y[0], linesize, bS, qp, h );
519
                    if (chroma) {
520 521 522 523 524 525 526
                        if (chroma444) {
                            filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                            filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
                        } else {
                            filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                            filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
                        }
527 528 529
                    }
                } else {
                    filter_mb_edgeh( &img_y[0], linesize, bS, qp, h );
530
                    if (chroma) {
531 532 533 534 535 536 537
                        if (chroma444) {
                            filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                            filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
                        } else {
                            filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                            filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
                        }
538 539
                    }
                }
540 541
            }
        }
542 543
    }

544 545
    /* Calculate bS */
    for( edge = 1; edge < edges; edge++ ) {
546
        DECLARE_ALIGNED(8, int16_t, bS)[4];
547 548
        int qp;

549
        if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type)
550 551
            continue;

552
        if( IS_INTRA(mb_type)) {
Måns Rullgård's avatar
Måns Rullgård committed
553
            AV_WN64A(bS, 0x0003000300030003ULL);
554
        } else {
555
            int i;
556 557 558
            int mv_done;

            if( edge & mask_edge ) {
Måns Rullgård's avatar
Måns Rullgård committed
559
                AV_ZERO64(bS);
560 561
                mv_done = 1;
            }
562
            else if( mask_par0 ) {
563 564 565
                int b_idx= 8 + 4 + edge * (dir ? 8:1);
                int bn_idx= b_idx - (dir ? 8:1);

566
                bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit);
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
                mv_done = 1;
            }
            else
                mv_done = 0;

            for( i = 0; i < 4; i++ ) {
                int x = dir == 0 ? edge : i;
                int y = dir == 0 ? i    : edge;
                int b_idx= 8 + 4 + x + 8*y;
                int bn_idx= b_idx - (dir ? 8:1);

                if( h->non_zero_count_cache[b_idx] |
                    h->non_zero_count_cache[bn_idx] ) {
                    bS[i] = 2;
                }
                else if(!mv_done)
                {
584
                    bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
585 586 587 588 589 590 591 592 593 594
                }
            }

            if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
                continue;
        }

        /* Filter edge */
        // Do not use s->qscale as luma quantizer because it has not the same
        // value in IPCM macroblocks.
595
        qp = s->current_picture.qscale_table[mb_xy];
596
        //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
597
        tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
Michael Niedermayer's avatar
Michael Niedermayer committed
598
        //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
599
        if( dir == 0 ) {
600
            filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
601 602 603 604 605 606 607 608
            if (chroma) {
                if (chroma444) {
                    filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
                    filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
                } else if( (edge&1) == 0 ) {
                    filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
                    filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
                }
609 610
            }
        } else {
611
            filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
612 613 614 615 616 617 618 619
            if (chroma) {
                if (chroma444) {
                    filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
                    filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
                } else if( (edge&1) == 0 ) {
                    filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
                    filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
                }
620 621 622 623 624 625 626 627 628 629 630 631
            }
        }
    }
}

void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
    MpegEncContext * const s = &h->s;
    const int mb_xy= mb_x + mb_y*s->mb_stride;
    const int mb_type = s->current_picture.mb_type[mb_xy];
    const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
    int first_vertical_edge_done = 0;
    av_unused int dir;
632
    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
633 634 635

    if (FRAME_MBAFF
            // and current and left pair do not have the same interlaced type
636
            && IS_INTERLACED(mb_type^h->left_type[0])
Michael Niedermayer's avatar
Michael Niedermayer committed
637
            // and left mb is in available to us
638
            && h->left_type[0]) {
639 640 641
        /* First vertical edge is different in MBAFF frames
         * There are 8 different bS to compute and 2 different Qp
         */
642
        DECLARE_ALIGNED(8, int16_t, bS)[8];
643 644 645 646 647 648 649
        int qp[2];
        int bqp[2];
        int rqp[2];
        int mb_qp, mbn0_qp, mbn1_qp;
        int i;
        first_vertical_edge_done = 1;

Måns Rullgård's avatar
Måns Rullgård committed
650 651 652 653
        if( IS_INTRA(mb_type) ) {
            AV_WN64A(&bS[0], 0x0004000400040004ULL);
            AV_WN64A(&bS[4], 0x0004000400040004ULL);
        } else {
654 655
            static const uint8_t offset[2][2][8]={
                {
656 657
                    {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1},
                    {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3},
658
                },{
659 660
                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
661 662 663
                }
            };
            const uint8_t *off= offset[MB_FIELD][mb_y&1];
664
            for( i = 0; i < 8; i++ ) {
665 666 667
                int j= MB_FIELD ? i>>2 : i&1;
                int mbn_xy = h->left_mb_xy[j];
                int mbn_type= h->left_type[j];
668

669
                if( IS_INTRA( mbn_type ) )
670
                    bS[i] = 4;
671 672 673
                else{
                    bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] |
                         ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ?
674
                            (h->cbp_table[mbn_xy] & (((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2) << 12))
675
                                                                       :
676
                            h->non_zero_count[mbn_xy][ off[i] ]));
677
                }
678 679 680 681
            }
        }

        mb_qp = s->current_picture.qscale_table[mb_xy];
682 683
        mbn0_qp = s->current_picture.qscale_table[h->left_mb_xy[0]];
        mbn1_qp = s->current_picture.qscale_table[h->left_mb_xy[1]];
684 685 686 687 688 689 690 691 692 693 694 695 696 697
        qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
        bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
                   get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
        rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
                   get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
        qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
        bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
                   get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
        rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
                   get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;

        /* Filter edge */
        tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
        { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
698 699 700
        if(MB_FIELD){
            filter_mb_mbaff_edgev ( h, img_y                ,   linesize, bS  , 1, qp [0] );
            filter_mb_mbaff_edgev ( h, img_y  + 8*  linesize,   linesize, bS+4, 1, qp [1] );
701
            if (chroma){
702 703 704 705 706 707 708 709 710 711 712
                if (CHROMA444) {
                    filter_mb_mbaff_edgev ( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
                    filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
                    filter_mb_mbaff_edgev ( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
                    filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
                }else{
                    filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
                    filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
                    filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
                    filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
                }
713
            }
714 715 716
        }else{
            filter_mb_mbaff_edgev ( h, img_y              , 2*  linesize, bS  , 2, qp [0] );
            filter_mb_mbaff_edgev ( h, img_y  +   linesize, 2*  linesize, bS+1, 2, qp [1] );
717
            if (chroma){
718 719 720 721 722 723 724 725 726 727 728
                if (CHROMA444) {
                    filter_mb_mbaff_edgev ( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
                    filter_mb_mbaff_edgev ( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
                    filter_mb_mbaff_edgev ( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
                    filter_mb_mbaff_edgev ( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
                }else{
                    filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
                    filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
                    filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
                    filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
                }
729
            }
730
        }
731 732 733 734
    }

#if CONFIG_SMALL
    for( dir = 0; dir < 2; dir++ )
735
        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir);
736
#else
737 738
    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0);
    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1);
739 740
#endif
}