svq3.c 39.7 KB
Newer Older
1
/*
2
 * Copyright (c) 2003 The Libav Project
3
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or
7 8
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12 13 14 15 16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with Libav; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 20 21
 */

/*
22 23
 * How to use this decoder:
 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 25 26 27 28 29
 * have stsd atoms to describe media trak properties. A stsd atom for a
 * video trak contains 1 or more ImageDescription atoms. These atoms begin
 * with the 4-byte length of the atom followed by the codec fourcc. Some
 * decoders need information in this atom to operate correctly. Such
 * is the case with SVQ3. In order to get the best use out of this decoder,
 * the calling app must make the SVQ3 ImageDescription atom available
30 31
 * via the AVCodecContext's extradata[_size] field:
 *
32
 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33
 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 35
 * AVCodecContext.extradata_size = size of ImageDescription atom memory
 * buffer (which will be the same as the ImageDescription atom size field
36 37 38 39
 * from the QT file, minus 4 bytes since the length is missing)
 *
 * You will know you have these parameters passed correctly when the decoder
 * correctly decodes this file:
40
 *  http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41
 */
42 43 44 45 46 47 48 49
#include "internal.h"
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
#include "h264.h"

#include "h264data.h" //FIXME FIXME FIXME

50
#include "h264_mvpred.h"
51 52 53 54
#include "golomb.h"
#include "rectangle.h"
#include "vdpau_internal.h"

55
#if CONFIG_ZLIB
56 57 58
#include <zlib.h>
#endif

59 60
#include "svq1.h"

61
/**
62
 * @file
63 64 65
 * svq3 decoder.
 */

66 67 68 69 70 71 72 73 74
typedef struct {
    H264Context h;
    int halfpel_flag;
    int thirdpel_flag;
    int unknown_flag;
    int next_slice_index;
    uint32_t watermark_key;
} SVQ3Context;

75 76
#define FULLPEL_MODE  1
#define HALFPEL_MODE  2
Michael Niedermayer's avatar
Michael Niedermayer committed
77
#define THIRDPEL_MODE 3
78
#define PREDICT_MODE  4
79

Michael Niedermayer's avatar
Michael Niedermayer committed
80 81 82 83 84 85
/* dual scan (from some older h264 draft)
 o-->o-->o   o
         |  /|
 o   o   o / o
 | / |   |/  |
 o   o   o   o
86
   /
Michael Niedermayer's avatar
Michael Niedermayer committed
87 88
 o-->o-->o-->o
*/
89 90 91 92 93
static const uint8_t svq3_scan[16] = {
    0+0*4, 1+0*4, 2+0*4, 2+1*4,
    2+2*4, 3+0*4, 3+1*4, 3+2*4,
    0+1*4, 0+2*4, 1+1*4, 1+2*4,
    0+3*4, 1+3*4, 2+3*4, 3+3*4,
94 95 96
};

static const uint8_t svq3_pred_0[25][2] = {
97 98 99 100 101 102 103 104 105
    { 0, 0 },
    { 1, 0 }, { 0, 1 },
    { 0, 2 }, { 1, 1 }, { 2, 0 },
    { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
    { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
    { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
    { 2, 4 }, { 3, 3 }, { 4, 2 },
    { 4, 3 }, { 3, 4 },
    { 4, 4 }
106 107 108
};

static const int8_t svq3_pred_1[6][6][5] = {
109 110 111 112 113 114 115 116 117 118 119 120
    { { 2,-1,-1,-1,-1 }, { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 },
      { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 }, { 1, 2,-1,-1,-1 } },
    { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
      { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
    { { 2, 0,-1,-1,-1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
      { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
    { { 2, 0,-1,-1,-1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
      { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
    { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
      { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
    { { 0, 2,-1,-1,-1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
      { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
121 122 123
};

static const struct { uint8_t run; uint8_t level; } svq3_dct_tables[2][16] = {
124 125 126 127
    { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
      { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
    { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
      { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
128 129 130
};

static const uint32_t svq3_dequant_coeff[32] = {
131 132 133 134
     3881,  4351,  4890,  5481,  6154,  6914,  7761,  8718,
     9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
    24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
    61694, 68745, 77615, 89113,100253,109366,126635,141533
135 136
};

137
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp){
138
    const int qmul = svq3_dequant_coeff[qp];
139 140 141
#define stride 16
    int i;
    int temp[16];
142
    static const uint8_t x_offset[4]={0, 1*stride, 4*stride, 5*stride};
143

144 145 146 147 148
    for(i=0; i<4; i++){
        const int z0 = 13*(input[4*i+0] +    input[4*i+2]);
        const int z1 = 13*(input[4*i+0] -    input[4*i+2]);
        const int z2 =  7* input[4*i+1] - 17*input[4*i+3];
        const int z3 = 17* input[4*i+1] +  7*input[4*i+3];
149 150 151 152 153

        temp[4*i+0] = z0+z3;
        temp[4*i+1] = z1+z2;
        temp[4*i+2] = z1-z2;
        temp[4*i+3] = z0-z3;
154 155
    }

156 157 158 159 160 161 162 163 164 165 166
    for(i=0; i<4; i++){
        const int offset= x_offset[i];
        const int z0= 13*(temp[4*0+i] +    temp[4*2+i]);
        const int z1= 13*(temp[4*0+i] -    temp[4*2+i]);
        const int z2=  7* temp[4*1+i] - 17*temp[4*3+i];
        const int z3= 17* temp[4*1+i] +  7*temp[4*3+i];

        output[stride* 0+offset] = ((z0 + z3)*qmul + 0x80000) >> 20;
        output[stride* 2+offset] = ((z1 + z2)*qmul + 0x80000) >> 20;
        output[stride* 8+offset] = ((z1 - z2)*qmul + 0x80000) >> 20;
        output[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20;
167 168 169 170
    }
}
#undef stride

171
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp,
172
                            int dc)
173
{
174
    const int qmul = svq3_dequant_coeff[qp];
175 176 177 178 179 180 181
    int i;

    if (dc) {
        dc = 13*13*((dc == 1) ? 1538*block[0] : ((qmul*(block[0] >> 3)) / 2));
        block[0] = 0;
    }

182 183 184 185 186
    for (i = 0; i < 4; i++) {
        const int z0 = 13*(block[0 + 4*i] +    block[2 + 4*i]);
        const int z1 = 13*(block[0 + 4*i] -    block[2 + 4*i]);
        const int z2 =  7* block[1 + 4*i] - 17*block[3 + 4*i];
        const int z3 = 17* block[1 + 4*i] +  7*block[3 + 4*i];
187

188 189 190 191
        block[0 + 4*i] = z0 + z3;
        block[1 + 4*i] = z1 + z2;
        block[2 + 4*i] = z1 - z2;
        block[3 + 4*i] = z0 - z3;
192 193
    }

194 195 196 197 198 199 200
    for (i = 0; i < 4; i++) {
        const int z0 = 13*(block[i + 4*0] +    block[i + 4*2]);
        const int z1 = 13*(block[i + 4*0] -    block[i + 4*2]);
        const int z2 =  7* block[i + 4*1] - 17*block[i + 4*3];
        const int z3 = 17* block[i + 4*1] +  7*block[i + 4*3];
        const int rr = (dc + 0x80000);

201 202 203 204
        dst[i + stride*0] = av_clip_uint8( dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) );
        dst[i + stride*1] = av_clip_uint8( dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) );
        dst[i + stride*2] = av_clip_uint8( dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) );
        dst[i + stride*3] = av_clip_uint8( dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) );
205 206 207
    }
}

208 209 210
static inline int svq3_decode_block(GetBitContext *gb, DCTELEM *block,
                                    int index, const int type)
{
211 212
    static const uint8_t *const scan_patterns[4] =
    { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
213

214 215 216
    int run, level, sign, vlc, limit;
    const int intra = (3 * type) >> 2;
    const uint8_t *const scan = scan_patterns[type];
217

218 219
    for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
        for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
220

221 222
          if (vlc == INVALID_VLC)
              return -1;
223

224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
          sign = (vlc & 0x1) - 1;
          vlc  = (vlc + 1) >> 1;

          if (type == 3) {
              if (vlc < 3) {
                  run   = 0;
                  level = vlc;
              } else if (vlc < 4) {
                  run   = 1;
                  level = 1;
              } else {
                  run   = (vlc & 0x3);
                  level = ((vlc + 9) >> 2) - run;
              }
          } else {
              if (vlc < 16) {
                  run   = svq3_dct_tables[intra][vlc].run;
                  level = svq3_dct_tables[intra][vlc].level;
              } else if (intra) {
                  run   = (vlc & 0x7);
                  level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
              } else {
                  run   = (vlc & 0xF);
                  level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
              }
          }
250

251 252
          if ((index += run) >= limit)
              return -1;
253

254 255
          block[scan[index]] = (level ^ sign) - sign;
        }
256

257 258 259
        if (type != 2) {
            break;
        }
260 261
    }

262
    return 0;
263 264
}

265 266 267 268 269
static inline void svq3_mc_dir_part(MpegEncContext *s,
                                    int x, int y, int width, int height,
                                    int mx, int my, int dxy,
                                    int thirdpel, int dir, int avg)
{
270 271 272 273 274 275 276
    const Picture *pic = (dir == 0) ? &s->last_picture : &s->next_picture;
    uint8_t *src, *dest;
    int i, emu = 0;
    int blocksize = 2 - (width>>3); //16->0, 8->1, 4->2

    mx += x;
    my += y;
277

278 279
    if (mx < 0 || mx >= (s->h_edge_pos - width  - 1) ||
        my < 0 || my >= (s->v_edge_pos - height - 1)) {
280

281 282 283
        if ((s->flags & CODEC_FLAG_EMU_EDGE)) {
            emu = 1;
        }
284

285 286
        mx = av_clip (mx, -16, (s->h_edge_pos - width  + 15));
        my = av_clip (my, -16, (s->v_edge_pos - height + 15));
287 288
    }

289
    /* form component predictions */
290 291
    dest = s->current_picture.f.data[0] + x + y*s->linesize;
    src  = pic->f.data[0] + mx + my*s->linesize;
292 293

    if (emu) {
294
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, (width + 1), (height + 1),
295
                            mx, my, s->h_edge_pos, s->v_edge_pos);
Mike Melanson's avatar
Mike Melanson committed
296
        src = s->edge_emu_buffer;
297
    }
298 299 300 301 302 303 304 305 306 307 308 309 310
    if (thirdpel)
        (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->linesize, width, height);
    else
        (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->linesize, height);

    if (!(s->flags & CODEC_FLAG_GRAY)) {
        mx     = (mx + (mx < (int) x)) >> 1;
        my     = (my + (my < (int) y)) >> 1;
        width  = (width  >> 1);
        height = (height >> 1);
        blocksize++;

        for (i = 1; i < 3; i++) {
311 312
            dest = s->current_picture.f.data[i] + (x >> 1) + (y >> 1) * s->uvlinesize;
            src  = pic->f.data[i] + mx + my * s->uvlinesize;
313 314

            if (emu) {
315
                s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->uvlinesize, (width + 1), (height + 1),
316 317 318 319 320 321 322 323 324
                                    mx, my, (s->h_edge_pos >> 1), (s->v_edge_pos >> 1));
                src = s->edge_emu_buffer;
            }
            if (thirdpel)
                (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->uvlinesize, width, height);
            else
                (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->uvlinesize, height);
        }
    }
325 326
}

327 328 329
static inline int svq3_mc_dir(H264Context *h, int size, int mode, int dir,
                              int avg)
{
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
    int i, j, k, mx, my, dx, dy, x, y;
    MpegEncContext *const s = (MpegEncContext *) h;
    const int part_width  = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
    const int part_height = 16 >> ((unsigned) (size + 1) / 3);
    const int extra_width = (mode == PREDICT_MODE) ? -16*6 : 0;
    const int h_edge_pos  = 6*(s->h_edge_pos - part_width ) - extra_width;
    const int v_edge_pos  = 6*(s->v_edge_pos - part_height) - extra_width;

    for (i = 0; i < 16; i += part_height) {
        for (j = 0; j < 16; j += part_width) {
            const int b_xy = (4*s->mb_x + (j >> 2)) + (4*s->mb_y + (i >> 2))*h->b_stride;
            int dxy;
            x = 16*s->mb_x + j;
            y = 16*s->mb_y + i;
            k = ((j >> 2) & 1) + ((i >> 1) & 2) + ((j >> 1) & 4) + (i & 8);

            if (mode != PREDICT_MODE) {
                pred_motion(h, k, (part_width >> 2), dir, 1, &mx, &my);
            } else {
349 350
                mx = s->next_picture.f.motion_val[0][b_xy][0] << 1;
                my = s->next_picture.f.motion_val[0][b_xy][1] << 1;
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411

                if (dir == 0) {
                    mx = ((mx * h->frame_num_offset) / h->prev_frame_num_offset + 1) >> 1;
                    my = ((my * h->frame_num_offset) / h->prev_frame_num_offset + 1) >> 1;
                } else {
                    mx = ((mx * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1) >> 1;
                    my = ((my * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1) >> 1;
                }
            }

            /* clip motion vector prediction to frame border */
            mx = av_clip(mx, extra_width - 6*x, h_edge_pos - 6*x);
            my = av_clip(my, extra_width - 6*y, v_edge_pos - 6*y);

            /* get (optional) motion vector differential */
            if (mode == PREDICT_MODE) {
                dx = dy = 0;
            } else {
                dy = svq3_get_se_golomb(&s->gb);
                dx = svq3_get_se_golomb(&s->gb);

                if (dx == INVALID_VLC || dy == INVALID_VLC) {
                    av_log(h->s.avctx, AV_LOG_ERROR, "invalid MV vlc\n");
                    return -1;
                }
            }

            /* compute motion vector */
            if (mode == THIRDPEL_MODE) {
                int fx, fy;
                mx  = ((mx + 1)>>1) + dx;
                my  = ((my + 1)>>1) + dy;
                fx  = ((unsigned)(mx + 0x3000))/3 - 0x1000;
                fy  = ((unsigned)(my + 0x3000))/3 - 0x1000;
                dxy = (mx - 3*fx) + 4*(my - 3*fy);

                svq3_mc_dir_part(s, x, y, part_width, part_height, fx, fy, dxy, 1, dir, avg);
                mx += mx;
                my += my;
            } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
                mx  = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
                my  = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
                dxy = (mx&1) + 2*(my&1);

                svq3_mc_dir_part(s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0, dir, avg);
                mx *= 3;
                my *= 3;
            } else {
                mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
                my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;

                svq3_mc_dir_part(s, x, y, part_width, part_height, mx, my, 0, 0, dir, avg);
                mx *= 6;
                my *= 6;
            }

            /* update mv_cache */
            if (mode != PREDICT_MODE) {
                int32_t mv = pack16to32(mx,my);

                if (part_height == 8 && i < 8) {
412
                    AV_WN32A(h->mv_cache[dir][scan8[k] + 1*8], mv);
413 414

                    if (part_width == 8 && j < 8) {
415
                        AV_WN32A(h->mv_cache[dir][scan8[k] + 1 + 1*8], mv);
416 417 418
                    }
                }
                if (part_width == 8 && j < 8) {
419
                    AV_WN32A(h->mv_cache[dir][scan8[k] + 1], mv);
420 421
                }
                if (part_width == 4 || part_height == 4) {
422
                    AV_WN32A(h->mv_cache[dir][scan8[k]], mv);
423 424 425 426
                }
            }

            /* write back motion vectors */
427 428 429
            fill_rectangle(s->current_picture.f.motion_val[dir][b_xy],
                           part_width >> 2, part_height >> 2, h->b_stride,
                           pack16to32(mx, my), 4);
430
        }
431 432
    }

433
    return 0;
434 435
}

436
static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
437
{
438
    H264Context *h = &svq3->h;
439 440 441 442 443 444 445 446 447 448 449 450 451
    int i, j, k, m, dir, mode;
    int cbp = 0;
    uint32_t vlc;
    int8_t *top, *left;
    MpegEncContext *const s = (MpegEncContext *) h;
    const int mb_xy = h->mb_xy;
    const int b_xy  = 4*s->mb_x + 4*s->mb_y*h->b_stride;

    h->top_samples_available      = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
    h->left_samples_available     = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
    h->topright_samples_available = 0xFFFF;

    if (mb_type == 0) {           /* SKIP */
452
        if (s->pict_type == AV_PICTURE_TYPE_P || s->next_picture.f.mb_type[mb_xy] == -1) {
453 454
            svq3_mc_dir_part(s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 0, 0);

455
            if (s->pict_type == AV_PICTURE_TYPE_B) {
456 457 458 459 460
                svq3_mc_dir_part(s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 1, 1);
            }

            mb_type = MB_TYPE_SKIP;
        } else {
461
            mb_type = FFMIN(s->next_picture.f.mb_type[mb_xy], 6);
462 463 464 465
            if (svq3_mc_dir(h, mb_type, PREDICT_MODE, 0, 0) < 0)
                return -1;
            if (svq3_mc_dir(h, mb_type, PREDICT_MODE, 1, 1) < 0)
                return -1;
466

467
            mb_type = MB_TYPE_16x16;
468
        }
469
    } else if (mb_type < 8) {     /* INTER */
470
        if (svq3->thirdpel_flag && svq3->halfpel_flag == !get_bits1 (&s->gb)) {
471
            mode = THIRDPEL_MODE;
472
        } else if (svq3->halfpel_flag && svq3->thirdpel_flag == !get_bits1 (&s->gb)) {
473 474 475
            mode = HALFPEL_MODE;
        } else {
            mode = FULLPEL_MODE;
476
        }
477

478 479 480 481 482 483 484 485 486 487
        /* fill caches */
        /* note ref_cache should contain here:
            ????????
            ???11111
            N??11111
            N??11111
            N??11111
        */

        for (m = 0; m < 2; m++) {
488
            if (s->mb_x > 0 && h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1]+6] != -1) {
489
                for (i = 0; i < 4; i++) {
490
                    AV_COPY32(h->mv_cache[m][scan8[0] - 1 + i*8], s->current_picture.f.motion_val[m][b_xy - 1 + i*h->b_stride]);
491 492 493
                }
            } else {
                for (i = 0; i < 4; i++) {
494
                    AV_ZERO32(h->mv_cache[m][scan8[0] - 1 + i*8]);
495 496 497
                }
            }
            if (s->mb_y > 0) {
498
                memcpy(h->mv_cache[m][scan8[0] - 1*8], s->current_picture.f.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
499
                memset(&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
500 501

                if (s->mb_x < (s->mb_width - 1)) {
502
                    AV_COPY32(h->mv_cache[m][scan8[0] + 4 - 1*8], s->current_picture.f.motion_val[m][b_xy - h->b_stride + 4]);
503
                    h->ref_cache[m][scan8[0] + 4 - 1*8] =
504 505
                        (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride + 1]+6] == -1 ||
                         h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride    ]  ] == -1) ? PART_NOT_AVAILABLE : 1;
506 507 508
                }else
                    h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
                if (s->mb_x > 0) {
509
                    AV_COPY32(h->mv_cache[m][scan8[0] - 1 - 1*8], s->current_picture.f.motion_val[m][b_xy - h->b_stride - 1]);
510
                    h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride - 1]+3] == -1) ? PART_NOT_AVAILABLE : 1;
511 512 513 514 515
                }else
                    h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
            }else
                memset(&h->ref_cache[m][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);

516
            if (s->pict_type != AV_PICTURE_TYPE_B)
517
                break;
518
        }
519

520
        /* decode motion vector(s) and form prediction(s) */
521
        if (s->pict_type == AV_PICTURE_TYPE_P) {
522 523
            if (svq3_mc_dir(h, (mb_type - 1), mode, 0, 0) < 0)
                return -1;
524
        } else {        /* AV_PICTURE_TYPE_B */
525 526 527 528 529
            if (mb_type != 2) {
                if (svq3_mc_dir(h, 0, mode, 0, 0) < 0)
                    return -1;
            } else {
                for (i = 0; i < 4; i++) {
530
                    memset(s->current_picture.f.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
531 532 533 534 535 536 537
                }
            }
            if (mb_type != 1) {
                if (svq3_mc_dir(h, 0, mode, 1, (mb_type == 3)) < 0)
                    return -1;
            } else {
                for (i = 0; i < 4; i++) {
538
                    memset(s->current_picture.f.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
539 540
                }
            }
541
        }
542

543 544 545 546 547 548 549
        mb_type = MB_TYPE_16x16;
    } else if (mb_type == 8 || mb_type == 33) {   /* INTRA4x4 */
        memset(h->intra4x4_pred_mode_cache, -1, 8*5*sizeof(int8_t));

        if (mb_type == 8) {
            if (s->mb_x > 0) {
                for (i = 0; i < 4; i++) {
550
                    h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1]+6-i];
551 552 553 554 555 556
                }
                if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
                    h->left_samples_available = 0x5F5F;
                }
            }
            if (s->mb_y > 0) {
557 558 559 560
                h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+0];
                h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+1];
                h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+2];
                h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+3];
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590

                if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
                    h->top_samples_available = 0x33FF;
                }
            }

            /* decode prediction codes for luma blocks */
            for (i = 0; i < 16; i+=2) {
                vlc = svq3_get_ue_golomb(&s->gb);

                if (vlc >= 25){
                    av_log(h->s.avctx, AV_LOG_ERROR, "luma prediction:%d\n", vlc);
                    return -1;
                }

                left    = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
                top     = &h->intra4x4_pred_mode_cache[scan8[i] - 8];

                left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
                left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];

                if (left[1] == -1 || left[2] == -1){
                    av_log(h->s.avctx, AV_LOG_ERROR, "weird prediction\n");
                    return -1;
                }
            }
        } else {    /* mb_type == 33, DC_128_PRED block type */
            for (i = 0; i < 4; i++) {
                memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_PRED, 4);
            }
591
        }
592

593
        write_back_intra_pred_mode(h);
594

595
        if (mb_type == 8) {
596
            ff_h264_check_intra4x4_pred_mode(h);
597

598 599 600 601 602 603
            h->top_samples_available  = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
            h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
        } else {
            for (i = 0; i < 4; i++) {
                memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_128_PRED, 4);
            }
604

605 606 607
            h->top_samples_available  = 0x33FF;
            h->left_samples_available = 0x5F5F;
        }
608

609 610 611 612
        mb_type = MB_TYPE_INTRA4x4;
    } else {                      /* INTRA16x16 */
        dir = i_mb_type_info[mb_type - 8].pred_mode;
        dir = (dir >> 1) ^ 3*(dir & 1) ^ 1;
613

614
        if ((h->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, dir, 0)) == -1){
615 616 617
            av_log(h->s.avctx, AV_LOG_ERROR, "check_intra_pred_mode = -1\n");
            return -1;
        }
618

619 620
        cbp = i_mb_type_info[mb_type - 8].cbp;
        mb_type = MB_TYPE_INTRA16x16;
621
    }
622

623
    if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
624
        for (i = 0; i < 4; i++) {
625
            memset(s->current_picture.f.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
626
        }
627
        if (s->pict_type == AV_PICTURE_TYPE_B) {
628
            for (i = 0; i < 4; i++) {
629
                memset(s->current_picture.f.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
630 631
            }
        }
632
    }
633
    if (!IS_INTRA4x4(mb_type)) {
634
        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy], DC_PRED, 8);
635
    }
636
    if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
637 638 639
        memset(h->non_zero_count_cache + 8, 0, 14*8*sizeof(uint8_t));
        s->dsp.clear_blocks(h->mb+  0);
        s->dsp.clear_blocks(h->mb+384);
640
    }
641

642
    if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
643 644 645 646
        if ((vlc = svq3_get_ue_golomb(&s->gb)) >= 48){
            av_log(h->s.avctx, AV_LOG_ERROR, "cbp_vlc=%d\n", vlc);
            return -1;
        }
647

648
        cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc] : golomb_to_inter_cbp[vlc];
649
    }
650
    if (IS_INTRA16x16(mb_type) || (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
651
        s->qscale += svq3_get_se_golomb(&s->gb);
652

653
        if (s->qscale > 31u){
654
            av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
655 656
            return -1;
        }
657
    }
658
    if (IS_INTRA16x16(mb_type)) {
659 660
        AV_ZERO128(h->mb_luma_dc[0]+0);
        AV_ZERO128(h->mb_luma_dc[0]+8);
Mans Rullgard's avatar
Mans Rullgard committed
661
        if (svq3_decode_block(&s->gb, h->mb_luma_dc[0], 0, 1)){
662 663
            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n");
            return -1;
664
        }
665
    }
666

667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
    if (cbp) {
        const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
        const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);

        for (i = 0; i < 4; i++) {
            if ((cbp & (1 << i))) {
                for (j = 0; j < 4; j++) {
                    k = index ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
                    h->non_zero_count_cache[ scan8[k] ] = 1;

                    if (svq3_decode_block(&s->gb, &h->mb[16*k], index, type)){
                        av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding block\n");
                        return -1;
                    }
                }
            }
        }
684

685
        if ((cbp & 0x30)) {
686 687
            for (i = 1; i < 3; ++i) {
              if (svq3_decode_block(&s->gb, &h->mb[16*16*i], 0, 3)){
688 689 690
                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
                return -1;
              }
691 692 693
            }

            if ((cbp & 0x20)) {
694 695 696 697 698 699 700 701 702
                for (i = 1; i < 3; i++) {
                    for (j = 0; j < 4; j++) {
                        k = 16*i + j;
                        h->non_zero_count_cache[ scan8[k] ] = 1;

                        if (svq3_decode_block(&s->gb, &h->mb[16*k], 1, 1)){
                            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
                            return -1;
                        }
703 704 705
                    }
                }
            }
706
        }
707 708
    }

709
    h->cbp= cbp;
710
    s->current_picture.f.mb_type[mb_xy] = mb_type;
711

712
    if (IS_INTRA(mb_type)) {
713
        h->chroma_pred_mode = ff_h264_check_intra_pred_mode(h, DC_PRED8x8, 1);
714
    }
715

716
    return 0;
717 718
}

719
static int svq3_decode_slice_header(AVCodecContext *avctx)
720
{
721 722 723
    SVQ3Context *svq3 = avctx->priv_data;
    H264Context *h = &svq3->h;
    MpegEncContext *s = &h->s;
724 725
    const int mb_xy = h->mb_xy;
    int i, header;
726

727
    header = get_bits(&s->gb, 8);
728

729 730
    if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
        /* TODO: what? */
731
        av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
732 733 734
        return -1;
    } else {
        int length = (header >> 5) & 3;
735

736
        svq3->next_slice_index = get_bits_count(&s->gb) + 8*show_bits(&s->gb, 8*length) + 8*length;
737

738 739
        if (svq3->next_slice_index > s->gb.size_in_bits) {
            av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
740
            return -1;
741
    }
742

743
        s->gb.size_in_bits = svq3->next_slice_index - 8*(length - 1);
744
        skip_bits(&s->gb, 8);
745

746
        if (svq3->watermark_key) {
747
            uint32_t header = AV_RL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1]);
748
            AV_WL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1], header ^ svq3->watermark_key);
749 750 751 752 753
        }
        if (length > 0) {
            memcpy((uint8_t *) &s->gb.buffer[get_bits_count(&s->gb) >> 3],
                   &s->gb.buffer[s->gb.size_in_bits >> 3], (length - 1));
        }
754
        skip_bits_long(&s->gb, 0);
755 756
    }

757 758 759 760
    if ((i = svq3_get_ue_golomb(&s->gb)) == INVALID_VLC || i >= 3){
        av_log(h->s.avctx, AV_LOG_ERROR, "illegal slice type %d \n", i);
        return -1;
    }
761

762
    h->slice_type = golomb_to_pict_type[i];
763

764 765 766 767 768 769 770
    if ((header & 0x9F) == 2) {
        i = (s->mb_num < 64) ? 6 : (1 + av_log2 (s->mb_num - 1));
        s->mb_skip_run = get_bits(&s->gb, i) - (s->mb_x + (s->mb_y * s->mb_width));
    } else {
        skip_bits1(&s->gb);
        s->mb_skip_run = 0;
    }
771

772 773 774
    h->slice_num = get_bits(&s->gb, 8);
    s->qscale = get_bits(&s->gb, 5);
    s->adaptive_quant = get_bits1(&s->gb);
775

776 777
    /* unknown fields */
    skip_bits1(&s->gb);
778

779
    if (svq3->unknown_flag) {
780 781
        skip_bits1(&s->gb);
    }
782

783 784
    skip_bits1(&s->gb);
    skip_bits(&s->gb, 2);
785

786 787 788
    while (get_bits1(&s->gb)) {
        skip_bits(&s->gb, 8);
    }
789

790
    /* reset intra predictors and invalidate motion vector references */
791
    if (s->mb_x > 0) {
792 793
        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy - 1      ]+3, -1, 4*sizeof(int8_t));
        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy - s->mb_x]  , -1, 8*sizeof(int8_t)*s->mb_x);
794 795
    }
    if (s->mb_y > 0) {
796
        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy - s->mb_stride], -1, 8*sizeof(int8_t)*(s->mb_width - s->mb_x));
797 798

        if (s->mb_x > 0) {
799
            h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride - 1]+3] = -1;
800
        }
801 802
    }

803
    return 0;
804 805
}

806
static av_cold int svq3_decode_init(AVCodecContext *avctx)
807
{
808 809 810
    SVQ3Context *svq3 = avctx->priv_data;
    H264Context *h = &svq3->h;
    MpegEncContext *s = &h->s;
811
    int m;
812
    unsigned char *extradata;
813
    unsigned char *extradata_end;
814
    unsigned int size;
815
    int marker_found = 0;
816

817
    if (ff_h264_decode_init(avctx) < 0)
818 819
        return -1;

820 821 822
    s->flags  = avctx->flags;
    s->flags2 = avctx->flags2;
    s->unrestricted_mv = 1;
823
    h->is_complex=1;
824
    avctx->pix_fmt = avctx->codec->pix_fmts[0];
825 826

    if (!s->context_initialized) {
827 828 829 830 831
        h->chroma_qp[0] = h->chroma_qp[1] = 4;

        svq3->halfpel_flag  = 1;
        svq3->thirdpel_flag = 1;
        svq3->unknown_flag  = 0;
832 833 834

        /* prowl for the "SEQH" marker in the extradata */
        extradata = (unsigned char *)avctx->extradata;
Alex Converse's avatar