matroskadec.c 65.7 KB
Newer Older
1
/*
2
 * Matroska file demuxer
Diego Biurrun's avatar
Diego Biurrun committed
3
 * Copyright (c) 2003-2008 The FFmpeg Project
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file libavformat/matroskadec.c
24 25 26
 * Matroska file demuxer
 * by Ronald Bultje <rbultje@ronald.bitfreak.net>
 * with a little help from Moritz Bunkus <moritz@bunkus.org>
27
 * totally reworked by Aurelien Jacobs <aurel@gnuage.org>
Diego Biurrun's avatar
Diego Biurrun committed
28
 * Specs available on the Matroska project page: http://www.matroska.org/.
29 30
 */

31
#include <stdio.h>
32
#include "avformat.h"
33
/* For ff_codec_get_id(). */
34
#include "riff.h"
35
#include "isom.h"
36
#include "matroska.h"
37
#include "libavcodec/mpeg4audio.h"
38
#include "libavutil/intfloat_readwrite.h"
39
#include "libavutil/intreadwrite.h"
40
#include "libavutil/avstring.h"
41
#include "libavutil/lzo.h"
42
#if CONFIG_ZLIB
43 44
#include <zlib.h>
#endif
45
#if CONFIG_BZLIB
46 47
#include <bzlib.h>
#endif
48

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
typedef enum {
    EBML_NONE,
    EBML_UINT,
    EBML_FLOAT,
    EBML_STR,
    EBML_UTF8,
    EBML_BIN,
    EBML_NEST,
    EBML_PASS,
    EBML_STOP,
} EbmlType;

typedef const struct EbmlSyntax {
    uint32_t id;
    EbmlType type;
    int list_elem_size;
    int data_offset;
    union {
        uint64_t    u;
        double      f;
        const char *s;
        const struct EbmlSyntax *n;
    } def;
} EbmlSyntax;

typedef struct {
    int nb_elem;
    void *elem;
} EbmlList;

typedef struct {
    int      size;
    uint8_t *data;
    int64_t  pos;
} EbmlBin;

85 86 87 88 89 90 91 92
typedef struct {
    uint64_t version;
    uint64_t max_size;
    uint64_t id_length;
    char    *doctype;
    uint64_t doctype_version;
} Ebml;

93 94 95 96
typedef struct {
    uint64_t algo;
    EbmlBin  settings;
} MatroskaTrackCompression;
97

98 99 100 101 102
typedef struct {
    uint64_t scope;
    uint64_t type;
    MatroskaTrackCompression compression;
} MatroskaTrackEncoding;
103

104 105 106 107 108 109 110 111
typedef struct {
    double   frame_rate;
    uint64_t display_width;
    uint64_t display_height;
    uint64_t pixel_width;
    uint64_t pixel_height;
    uint64_t fourcc;
} MatroskaTrackVideo;
112

113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
typedef struct {
    double   samplerate;
    double   out_samplerate;
    uint64_t bitdepth;
    uint64_t channels;

    /* real audio header (extracted from extradata) */
    int      coded_framesize;
    int      sub_packet_h;
    int      frame_size;
    int      sub_packet_size;
    int      sub_packet_cnt;
    int      pkt_cnt;
    uint8_t *buf;
} MatroskaTrackAudio;
128

129 130
typedef struct {
    uint64_t num;
131
    uint64_t uid;
132
    uint64_t type;
133
    char    *name;
134 135 136
    char    *codec_id;
    EbmlBin  codec_priv;
    char    *language;
137
    double time_scale;
138
    uint64_t default_duration;
139
    uint64_t flag_default;
140 141 142
    MatroskaTrackVideo video;
    MatroskaTrackAudio audio;
    EbmlList encodings;
143 144

    AVStream *stream;
145
    int64_t end_timecode;
146 147
} MatroskaTrack;

148
typedef struct {
149
    uint64_t uid;
150 151 152
    char *filename;
    char *mime;
    EbmlBin bin;
153 154

    AVStream *stream;
155 156
} MatroskaAttachement;

157 158 159 160 161
typedef struct {
    uint64_t start;
    uint64_t end;
    uint64_t uid;
    char    *title;
162 163

    AVChapter *chapter;
164 165
} MatroskaChapter;

166 167 168 169 170 171 172 173 174 175
typedef struct {
    uint64_t track;
    uint64_t pos;
} MatroskaIndexPos;

typedef struct {
    uint64_t time;
    EbmlList pos;
} MatroskaIndex;

176 177 178
typedef struct {
    char *name;
    char *string;
179 180
    char *lang;
    uint64_t def;
181 182 183
    EbmlList sub;
} MatroskaTag;

184 185 186 187 188 189 190 191 192 193 194 195 196
typedef struct {
    char    *type;
    uint64_t typevalue;
    uint64_t trackuid;
    uint64_t chapteruid;
    uint64_t attachuid;
} MatroskaTagTarget;

typedef struct {
    MatroskaTagTarget target;
    EbmlList tag;
} MatroskaTags;

197 198 199 200 201
typedef struct {
    uint64_t id;
    uint64_t pos;
} MatroskaSeekhead;

202
typedef struct {
203 204
    uint64_t start;
    uint64_t length;
205 206
} MatroskaLevel;

207
typedef struct {
208 209
    AVFormatContext *ctx;

Diego Biurrun's avatar
Diego Biurrun committed
210
    /* EBML stuff */
211 212 213 214
    int num_levels;
    MatroskaLevel levels[EBML_MAX_DEPTH];
    int level_up;

215 216 217
    uint64_t time_scale;
    double   duration;
    char    *title;
218
    EbmlList tracks;
219
    EbmlList attachments;
220
    EbmlList chapters;
221
    EbmlList index;
222
    EbmlList tags;
223
    EbmlList seekhead;
224 225

    /* byte position of the segment inside the stream */
226
    int64_t segment_start;
227

Diego Biurrun's avatar
Diego Biurrun committed
228
    /* the packet queue */
229 230
    AVPacket **packets;
    int num_packets;
231
    AVPacket *prev_pkt;
232

233
    int done;
234
    int has_cluster_id;
235 236 237

    /* What to skip before effectively reading a packet. */
    int skip_to_keyframe;
238
    uint64_t skip_to_timecode;
239 240
} MatroskaDemuxContext;

241 242 243
typedef struct {
    uint64_t duration;
    int64_t  reference;
244
    uint64_t non_simple;
245 246 247 248 249 250 251 252
    EbmlBin  bin;
} MatroskaBlock;

typedef struct {
    uint64_t timecode;
    EbmlList blocks;
} MatroskaCluster;

253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
static EbmlSyntax ebml_header[] = {
    { EBML_ID_EBMLREADVERSION,        EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
    { EBML_ID_EBMLMAXSIZELENGTH,      EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
    { EBML_ID_EBMLMAXIDLENGTH,        EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
    { EBML_ID_DOCTYPE,                EBML_STR,  0, offsetof(Ebml,doctype), {.s="(none)"} },
    { EBML_ID_DOCTYPEREADVERSION,     EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
    { EBML_ID_EBMLVERSION,            EBML_NONE },
    { EBML_ID_DOCTYPEVERSION,         EBML_NONE },
    { 0 }
};

static EbmlSyntax ebml_syntax[] = {
    { EBML_ID_HEADER,                 EBML_NEST, 0, 0, {.n=ebml_header} },
    { 0 }
};

269 270 271 272 273 274 275 276 277 278 279
static EbmlSyntax matroska_info[] = {
    { MATROSKA_ID_TIMECODESCALE,      EBML_UINT,  0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
    { MATROSKA_ID_DURATION,           EBML_FLOAT, 0, offsetof(MatroskaDemuxContext,duration) },
    { MATROSKA_ID_TITLE,              EBML_UTF8,  0, offsetof(MatroskaDemuxContext,title) },
    { MATROSKA_ID_WRITINGAPP,         EBML_NONE },
    { MATROSKA_ID_MUXINGAPP,          EBML_NONE },
    { MATROSKA_ID_DATEUTC,            EBML_NONE },
    { MATROSKA_ID_SEGMENTUID,         EBML_NONE },
    { 0 }
};

280 281 282 283 284 285 286
static EbmlSyntax matroska_track_video[] = {
    { MATROSKA_ID_VIDEOFRAMERATE,     EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
    { MATROSKA_ID_VIDEODISPLAYWIDTH,  EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width) },
    { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
    { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
    { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
    { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
287 288 289 290 291
    { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPL,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPR,    EBML_NONE },
    { MATROSKA_ID_VIDEODISPLAYUNIT,   EBML_NONE },
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
    { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_NONE },
    { MATROSKA_ID_VIDEOASPECTRATIO,   EBML_NONE },
    { 0 }
};

static EbmlSyntax matroska_track_audio[] = {
    { MATROSKA_ID_AUDIOSAMPLINGFREQ,  EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
    { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
    { MATROSKA_ID_AUDIOBITDEPTH,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,bitdepth) },
    { MATROSKA_ID_AUDIOCHANNELS,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
    { 0 }
};

static EbmlSyntax matroska_track_encoding_compression[] = {
    { MATROSKA_ID_ENCODINGCOMPALGO,   EBML_UINT, 0, offsetof(MatroskaTrackCompression,algo), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPSETTINGS,EBML_BIN, 0, offsetof(MatroskaTrackCompression,settings) },
    { 0 }
};

static EbmlSyntax matroska_track_encoding[] = {
    { MATROSKA_ID_ENCODINGSCOPE,      EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
    { MATROSKA_ID_ENCODINGTYPE,       EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
316
    { MATROSKA_ID_ENCODINGORDER,      EBML_NONE },
317 318 319 320 321 322 323 324 325 326
    { 0 }
};

static EbmlSyntax matroska_track_encodings[] = {
    { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
    { 0 }
};

static EbmlSyntax matroska_track[] = {
    { MATROSKA_ID_TRACKNUMBER,          EBML_UINT, 0, offsetof(MatroskaTrack,num) },
327
    { MATROSKA_ID_TRACKNAME,            EBML_UTF8, 0, offsetof(MatroskaTrack,name) },
328
    { MATROSKA_ID_TRACKUID,             EBML_UINT, 0, offsetof(MatroskaTrack,uid) },
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
    { MATROSKA_ID_TRACKTYPE,            EBML_UINT, 0, offsetof(MatroskaTrack,type) },
    { MATROSKA_ID_CODECID,              EBML_STR,  0, offsetof(MatroskaTrack,codec_id) },
    { MATROSKA_ID_CODECPRIVATE,         EBML_BIN,  0, offsetof(MatroskaTrack,codec_priv) },
    { MATROSKA_ID_TRACKLANGUAGE,        EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
    { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
    { MATROSKA_ID_TRACKTIMECODESCALE,   EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
    { MATROSKA_ID_TRACKFLAGDEFAULT,     EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
    { MATROSKA_ID_TRACKVIDEO,           EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
    { MATROSKA_ID_TRACKAUDIO,           EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
    { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
    { MATROSKA_ID_TRACKFLAGENABLED,     EBML_NONE },
    { MATROSKA_ID_TRACKFLAGFORCED,      EBML_NONE },
    { MATROSKA_ID_TRACKFLAGLACING,      EBML_NONE },
    { MATROSKA_ID_CODECNAME,            EBML_NONE },
    { MATROSKA_ID_CODECDECODEALL,       EBML_NONE },
    { MATROSKA_ID_CODECINFOURL,         EBML_NONE },
    { MATROSKA_ID_CODECDOWNLOADURL,     EBML_NONE },
    { MATROSKA_ID_TRACKMINCACHE,        EBML_NONE },
    { MATROSKA_ID_TRACKMAXCACHE,        EBML_NONE },
348
    { MATROSKA_ID_TRACKMAXBLKADDID,     EBML_NONE },
349 350 351 352 353 354 355 356
    { 0 }
};

static EbmlSyntax matroska_tracks[] = {
    { MATROSKA_ID_TRACKENTRY,         EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
    { 0 }
};

357
static EbmlSyntax matroska_attachment[] = {
358
    { MATROSKA_ID_FILEUID,            EBML_UINT, 0, offsetof(MatroskaAttachement,uid) },
359 360 361
    { MATROSKA_ID_FILENAME,           EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
    { MATROSKA_ID_FILEMIMETYPE,       EBML_STR,  0, offsetof(MatroskaAttachement,mime) },
    { MATROSKA_ID_FILEDATA,           EBML_BIN,  0, offsetof(MatroskaAttachement,bin) },
362
    { MATROSKA_ID_FILEDESC,           EBML_NONE },
363 364 365 366 367 368 369 370
    { 0 }
};

static EbmlSyntax matroska_attachments[] = {
    { MATROSKA_ID_ATTACHEDFILE,       EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
    { 0 }
};

371 372
static EbmlSyntax matroska_chapter_display[] = {
    { MATROSKA_ID_CHAPSTRING,         EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
373
    { MATROSKA_ID_CHAPLANG,           EBML_NONE },
374 375 376 377 378 379 380 381 382
    { 0 }
};

static EbmlSyntax matroska_chapter_entry[] = {
    { MATROSKA_ID_CHAPTERTIMESTART,   EBML_UINT, 0, offsetof(MatroskaChapter,start), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERTIMEEND,     EBML_UINT, 0, offsetof(MatroskaChapter,end), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERUID,         EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
    { MATROSKA_ID_CHAPTERDISPLAY,     EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
    { MATROSKA_ID_CHAPTERFLAGHIDDEN,  EBML_NONE },
383 384 385
    { MATROSKA_ID_CHAPTERFLAGENABLED, EBML_NONE },
    { MATROSKA_ID_CHAPTERPHYSEQUIV,   EBML_NONE },
    { MATROSKA_ID_CHAPTERATOM,        EBML_NONE },
386 387 388 389 390 391 392 393
    { 0 }
};

static EbmlSyntax matroska_chapter[] = {
    { MATROSKA_ID_CHAPTERATOM,        EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
    { MATROSKA_ID_EDITIONUID,         EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGHIDDEN,  EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGDEFAULT, EBML_NONE },
394
    { MATROSKA_ID_EDITIONFLAGORDERED, EBML_NONE },
395 396 397 398 399 400 401 402
    { 0 }
};

static EbmlSyntax matroska_chapters[] = {
    { MATROSKA_ID_EDITIONENTRY,       EBML_NEST, 0, 0, {.n=matroska_chapter} },
    { 0 }
};

403 404 405
static EbmlSyntax matroska_index_pos[] = {
    { MATROSKA_ID_CUETRACK,           EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
    { MATROSKA_ID_CUECLUSTERPOSITION, EBML_UINT, 0, offsetof(MatroskaIndexPos,pos)   },
406
    { MATROSKA_ID_CUEBLOCKNUMBER,     EBML_NONE },
407 408 409 410 411 412 413 414 415 416 417 418 419 420
    { 0 }
};

static EbmlSyntax matroska_index_entry[] = {
    { MATROSKA_ID_CUETIME,            EBML_UINT, 0, offsetof(MatroskaIndex,time) },
    { MATROSKA_ID_CUETRACKPOSITION,   EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
    { 0 }
};

static EbmlSyntax matroska_index[] = {
    { MATROSKA_ID_POINTENTRY,         EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
    { 0 }
};

421 422 423
static EbmlSyntax matroska_simpletag[] = {
    { MATROSKA_ID_TAGNAME,            EBML_UTF8, 0, offsetof(MatroskaTag,name) },
    { MATROSKA_ID_TAGSTRING,          EBML_UTF8, 0, offsetof(MatroskaTag,string) },
424 425
    { MATROSKA_ID_TAGLANG,            EBML_STR,  0, offsetof(MatroskaTag,lang), {.s="und"} },
    { MATROSKA_ID_TAGDEFAULT,         EBML_UINT, 0, offsetof(MatroskaTag,def) },
426 427 428 429
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
    { 0 }
};

430 431 432 433 434 435 436 437 438
static EbmlSyntax matroska_tagtargets[] = {
    { MATROSKA_ID_TAGTARGETS_TYPE,      EBML_STR,  0, offsetof(MatroskaTagTarget,type) },
    { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} },
    { MATROSKA_ID_TAGTARGETS_TRACKUID,  EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) },
    { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) },
    { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) },
    { 0 }
};

439
static EbmlSyntax matroska_tag[] = {
440 441
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} },
    { MATROSKA_ID_TAGTARGETS,         EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} },
442 443 444
    { 0 }
};

445
static EbmlSyntax matroska_tags[] = {
446
    { MATROSKA_ID_TAG,                EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} },
447 448 449
    { 0 }
};

450 451 452 453 454 455 456 457 458 459 460
static EbmlSyntax matroska_seekhead_entry[] = {
    { MATROSKA_ID_SEEKID,             EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
    { MATROSKA_ID_SEEKPOSITION,       EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
    { 0 }
};

static EbmlSyntax matroska_seekhead[] = {
    { MATROSKA_ID_SEEKENTRY,          EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
    { 0 }
};

461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
static EbmlSyntax matroska_segment[] = {
    { MATROSKA_ID_INFO,           EBML_NEST, 0, 0, {.n=matroska_info       } },
    { MATROSKA_ID_TRACKS,         EBML_NEST, 0, 0, {.n=matroska_tracks     } },
    { MATROSKA_ID_ATTACHMENTS,    EBML_NEST, 0, 0, {.n=matroska_attachments} },
    { MATROSKA_ID_CHAPTERS,       EBML_NEST, 0, 0, {.n=matroska_chapters   } },
    { MATROSKA_ID_CUES,           EBML_NEST, 0, 0, {.n=matroska_index      } },
    { MATROSKA_ID_TAGS,           EBML_NEST, 0, 0, {.n=matroska_tags       } },
    { MATROSKA_ID_SEEKHEAD,       EBML_NEST, 0, 0, {.n=matroska_seekhead   } },
    { MATROSKA_ID_CLUSTER,        EBML_STOP, 0, offsetof(MatroskaDemuxContext,has_cluster_id) },
    { 0 }
};

static EbmlSyntax matroska_segments[] = {
    { MATROSKA_ID_SEGMENT,        EBML_NEST, 0, 0, {.n=matroska_segment    } },
    { 0 }
};

478 479 480 481 482
static EbmlSyntax matroska_blockgroup[] = {
    { MATROSKA_ID_BLOCK,          EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
483
    { 1,                          EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} },
484 485 486 487 488 489 490
    { 0 }
};

static EbmlSyntax matroska_cluster[] = {
    { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
    { MATROSKA_ID_BLOCKGROUP,     EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
491 492
    { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
    { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
493 494 495 496 497
    { 0 }
};

static EbmlSyntax matroska_clusters[] = {
    { MATROSKA_ID_CLUSTER,        EBML_NEST, 0, 0, {.n=matroska_cluster} },
498 499 500 501
    { MATROSKA_ID_INFO,           EBML_NONE },
    { MATROSKA_ID_CUES,           EBML_NONE },
    { MATROSKA_ID_TAGS,           EBML_NONE },
    { MATROSKA_ID_SEEKHEAD,       EBML_NONE },
502 503 504
    { 0 }
};

505
/*
Diego Biurrun's avatar
Diego Biurrun committed
506
 * Return: Whether we reached the end of a level in the hierarchy or not.
507
 */
508
static int ebml_level_end(MatroskaDemuxContext *matroska)
509
{
510
    ByteIOContext *pb = matroska->ctx->pb;
511
    int64_t pos = url_ftell(pb);
512

513
    if (matroska->num_levels > 0) {
514
        MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
515
        if (pos - level->start >= level->length) {
516
            matroska->num_levels--;
517
            return 1;
518 519
        }
    }
520
    return 0;
521 522 523 524 525 526 527 528
}

/*
 * Read: an "EBML number", which is defined as a variable-length
 * array of bytes. The first byte indicates the length by giving a
 * number of 0-bits followed by a one. The position of the first
 * "one" bit inside the first byte indicates the length of this
 * number.
Diego Biurrun's avatar
Diego Biurrun committed
529
 * Returns: number of bytes read, < 0 on error
530
 */
531
static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb,
532
                         int max_size, uint64_t *number)
533 534 535 536
{
    int len_mask = 0x80, read = 1, n = 1;
    int64_t total = 0;

Diego Biurrun's avatar
Diego Biurrun committed
537
    /* The first byte tells us the length in bytes - get_byte() can normally
538 539 540 541 542
     * return 0, but since that's not a valid first ebmlID byte, we can
     * use it safely here to catch EOS. */
    if (!(total = get_byte(pb))) {
        /* we might encounter EOS here */
        if (!url_feof(pb)) {
543
            int64_t pos = url_ftell(pb);
544 545 546 547
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
                   pos, pos);
        }
548
        return AVERROR(EIO); /* EOS or actual I/O error */
549 550 551 552 553 554 555 556
    }

    /* get the length of the EBML number */
    while (read <= max_size && !(total & len_mask)) {
        read++;
        len_mask >>= 1;
    }
    if (read > max_size) {
557
        int64_t pos = url_ftell(pb) - 1;
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577
        av_log(matroska->ctx, AV_LOG_ERROR,
               "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
               (uint8_t) total, pos, pos);
        return AVERROR_INVALIDDATA;
    }

    /* read out length */
    total &= ~len_mask;
    while (n++ < read)
        total = (total << 8) | get_byte(pb);

    *number = total;

    return read;
}

/*
 * Read the next element as an unsigned int.
 * 0 is success, < 0 is failure.
 */
578
static int ebml_read_uint(ByteIOContext *pb, int size, uint64_t *num)
579
{
580
    int n = 0;
581

582
    if (size < 1 || size > 8)
583 584
        return AVERROR_INVALIDDATA;

Diego Biurrun's avatar
Diego Biurrun committed
585
    /* big-endian ordering; build up number */
586 587 588 589 590 591 592 593 594 595 596
    *num = 0;
    while (n++ < size)
        *num = (*num << 8) | get_byte(pb);

    return 0;
}

/*
 * Read the next element as a float.
 * 0 is success, < 0 is failure.
 */
597
static int ebml_read_float(ByteIOContext *pb, int size, double *num)
598 599 600 601 602
{
    if (size == 4) {
        *num= av_int2flt(get_be32(pb));
    } else if(size==8){
        *num= av_int2dbl(get_be64(pb));
603
    } else
604 605 606 607 608 609 610 611 612
        return AVERROR_INVALIDDATA;

    return 0;
}

/*
 * Read the next element as an ASCII string.
 * 0 is success, < 0 is failure.
 */
613
static int ebml_read_ascii(ByteIOContext *pb, int size, char **str)
614
{
615
    av_free(*str);
Diego Biurrun's avatar
Diego Biurrun committed
616
    /* EBML strings are usually not 0-terminated, so we allocate one
617
     * byte more, read the string and NULL-terminate it ourselves. */
618
    if (!(*str = av_malloc(size + 1)))
619
        return AVERROR(ENOMEM);
620
    if (get_buffer(pb, (uint8_t *) *str, size) != size) {
621
        av_free(*str);
622
        return AVERROR(EIO);
623 624 625 626 627 628
    }
    (*str)[size] = '\0';

    return 0;
}

629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
/*
 * Read the next element as binary data.
 * 0 is success, < 0 is failure.
 */
static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin)
{
    av_free(bin->data);
    if (!(bin->data = av_malloc(length)))
        return AVERROR(ENOMEM);

    bin->size = length;
    bin->pos  = url_ftell(pb);
    if (get_buffer(pb, bin->data, length) != length)
        return AVERROR(EIO);

    return 0;
}

647 648 649 650 651
/*
 * Read the next element, but only the header. The contents
 * are supposed to be sub-elements which can be read separately.
 * 0 is success, < 0 is failure.
 */
652
static int ebml_read_master(MatroskaDemuxContext *matroska, int length)
653
{
654
    ByteIOContext *pb = matroska->ctx->pb;
655 656 657 658 659
    MatroskaLevel *level;

    if (matroska->num_levels >= EBML_MAX_DEPTH) {
        av_log(matroska->ctx, AV_LOG_ERROR,
               "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
660
        return AVERROR(ENOSYS);
661 662 663 664 665 666 667 668 669 670 671
    }

    level = &matroska->levels[matroska->num_levels++];
    level->start = url_ftell(pb);
    level->length = length;

    return 0;
}

/*
 * Read signed/unsigned "EBML" numbers.
Diego Biurrun's avatar
Diego Biurrun committed
672
 * Return: number of bytes processed, < 0 on error
673
 */
674 675
static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, uint64_t *num)
676
{
677 678 679
    ByteIOContext pb;
    init_put_byte(&pb, data, size, 0, NULL, NULL, NULL, NULL);
    return ebml_read_num(matroska, &pb, 8, num);
680 681 682 683 684
}

/*
 * Same as above, but signed.
 */
685 686
static int matroska_ebmlnum_sint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, int64_t *num)
687 688 689 690 691
{
    uint64_t unum;
    int res;

    /* read as unsigned number first */
692
    if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
693 694 695
        return res;

    /* make signed (weird way) */
696
    *num = unum - ((1LL << (7*res - 1)) - 1);
697 698 699 700

    return res;
}

701 702
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data);
703

704 705
static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                         uint32_t id, void *data)
706 707
{
    int i;
708 709 710
    for (i=0; syntax[i].id; i++)
        if (id == syntax[i].id)
            break;
711
    if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32)
712 713
        av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
    return ebml_parse_elem(matroska, &syntax[i], data);
714 715
}

716 717
static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                      void *data)
718
{
719 720 721
    uint64_t id;
    int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
    id |= 1 << 7*res;
722
    return res < 0 ? res : ebml_parse_id(matroska, syntax, id, data);
723 724
}

725 726
static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                           void *data)
727
{
728
    int i, res = 0;
729

730 731 732 733 734 735 736 737 738 739 740 741
    for (i=0; syntax[i].id; i++)
        switch (syntax[i].type) {
        case EBML_UINT:
            *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
            break;
        case EBML_FLOAT:
            *(double   *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
            break;
        case EBML_STR:
        case EBML_UTF8:
            *(char    **)((char *)data+syntax[i].data_offset) = av_strdup(syntax[i].def.s);
            break;
742
        }
743

744 745
    while (!res && !ebml_level_end(matroska))
        res = ebml_parse(matroska, syntax, data);
746

747
    return res;
748 749
}

750 751 752
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data)
{
753
    ByteIOContext *pb = matroska->ctx->pb;
754
    uint32_t id = syntax->id;
755
    uint64_t length;
756 757 758 759 760 761 762 763 764 765 766
    int res;

    data = (char *)data + syntax->data_offset;
    if (syntax->list_elem_size) {
        EbmlList *list = data;
        list->elem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
        data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
        memset(data, 0, syntax->list_elem_size);
        list->nb_elem++;
    }

767
    if (syntax->type != EBML_PASS && syntax->type != EBML_STOP)
768
        if ((res = ebml_read_num(matroska, pb, 8, &length)) < 0)
769 770
            return res;

771
    switch (syntax->type) {
772 773
    case EBML_UINT:  res = ebml_read_uint  (pb, length, data);  break;
    case EBML_FLOAT: res = ebml_read_float (pb, length, data);  break;
774
    case EBML_STR:
775
    case EBML_UTF8:  res = ebml_read_ascii (pb, length, data);  break;
776
    case EBML_BIN:   res = ebml_read_binary(pb, length, data);  break;
777
    case EBML_NEST:  if ((res=ebml_read_master(matroska, length)) < 0)
778 779 780
                         return res;
                     if (id == MATROSKA_ID_SEGMENT)
                         matroska->segment_start = url_ftell(matroska->ctx->pb);
781
                     return ebml_parse_nest(matroska, syntax->def.n, data);
782
    case EBML_PASS:  return ebml_parse_id(matroska, syntax->def.n, id, data);
783
    case EBML_STOP:  *(int *)data = 1;      return 1;
784
    default:         return url_fseek(pb,length,SEEK_CUR)<0 ? AVERROR(EIO) : 0;
785
    }
786 787 788 789 790
    if (res == AVERROR_INVALIDDATA)
        av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
    else if (res == AVERROR(EIO))
        av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
    return res;
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
}

static void ebml_free(EbmlSyntax *syntax, void *data)
{
    int i, j;
    for (i=0; syntax[i].id; i++) {
        void *data_off = (char *)data + syntax[i].data_offset;
        switch (syntax[i].type) {
        case EBML_STR:
        case EBML_UTF8:  av_freep(data_off);                      break;
        case EBML_BIN:   av_freep(&((EbmlBin *)data_off)->data);  break;
        case EBML_NEST:
            if (syntax[i].list_elem_size) {
                EbmlList *list = data_off;
                char *ptr = list->elem;
                for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
                    ebml_free(syntax[i].def.n, ptr);
                av_free(list->elem);
            } else
                ebml_free(syntax[i].def.n, data_off);
        default:  break;
        }
    }
}

816 817 818 819 820 821 822 823

/*
 * Autodetecting...
 */
static int matroska_probe(AVProbeData *p)
{
    uint64_t total = 0;
    int len_mask = 0x80, size = 1, n = 1;
824
    static const char probe_data[] = "matroska";
825

Diego Biurrun's avatar
Diego Biurrun committed
826
    /* EBML header? */
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
    if (AV_RB32(p->buf) != EBML_ID_HEADER)
        return 0;

    /* length of header */
    total = p->buf[4];
    while (size <= 8 && !(total & len_mask)) {
        size++;
        len_mask >>= 1;
    }
    if (size > 8)
      return 0;
    total &= (len_mask - 1);
    while (n < size)
        total = (total << 8) | p->buf[4 + n++];

Diego Biurrun's avatar
Diego Biurrun committed
842
    /* Does the probe data contain the whole header? */
843 844 845
    if (p->buf_size < 4 + size + total)
      return 0;

Diego Biurrun's avatar
Diego Biurrun committed
846
    /* The header must contain the document type 'matroska'. For now,
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
     * we don't parse the whole header but simply check for the
     * availability of that array of characters inside the header.
     * Not fully fool-proof, but good enough. */
    for (n = 4+size; n <= 4+size+total-(sizeof(probe_data)-1); n++)
        if (!memcmp(p->buf+n, probe_data, sizeof(probe_data)-1))
            return AVPROBE_SCORE_MAX;

    return 0;
}

static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
                                                 int num)
{
    MatroskaTrack *tracks = matroska->tracks.elem;
    int i;

    for (i=0; i < matroska->tracks.nb_elem; i++)
        if (tracks[i].num == num)
            return &tracks[i];

    av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
    return NULL;
}

871 872
static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
                                  MatroskaTrack *track)
873
{
874
    MatroskaTrackEncoding *encodings = track->encodings.elem;
875 876 877 878 879 880 881
    uint8_t* data = *buf;
    int isize = *buf_size;
    uint8_t* pkt_data = NULL;
    int pkt_size = isize;
    int result = 0;
    int olen;

882
    switch (encodings[0].compression.algo) {
883
    case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP:
884
        return encodings[0].compression.settings.size;
885 886 887
    case MATROSKA_TRACK_ENCODING_COMP_LZO:
        do {
            olen = pkt_size *= 3;
Aurelien Jacobs's avatar
Aurelien Jacobs committed
888
            pkt_data = av_realloc(pkt_data, pkt_size+AV_LZO_OUTPUT_PADDING);
889 890
            result = av_lzo1x_decode(pkt_data, &olen, data, &isize);
        } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000);
891 892 893 894
        if (result)
            goto failed;
        pkt_size -= olen;
        break;
895
#if CONFIG_ZLIB
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
    case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
        z_stream zstream = {0};
        if (inflateInit(&zstream) != Z_OK)
            return -1;
        zstream.next_in = data;
        zstream.avail_in = isize;
        do {
            pkt_size *= 3;
            pkt_data = av_realloc(pkt_data, pkt_size);
            zstream.avail_out = pkt_size - zstream.total_out;
            zstream.next_out = pkt_data + zstream.total_out;
            result = inflate(&zstream, Z_NO_FLUSH);
        } while (result==Z_OK && pkt_size<10000000);
        pkt_size = zstream.total_out;
        inflateEnd(&zstream);
        if (result != Z_STREAM_END)
            goto failed;
        break;
    }
#endif
916
#if CONFIG_BZLIB
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
    case MATROSKA_TRACK_ENCODING_COMP_BZLIB: {
        bz_stream bzstream = {0};
        if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
            return -1;
        bzstream.next_in = data;
        bzstream.avail_in = isize;
        do {
            pkt_size *= 3;
            pkt_data = av_realloc(pkt_data, pkt_size);
            bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
            bzstream.next_out = pkt_data + bzstream.total_out_lo32;
            result = BZ2_bzDecompress(&bzstream);
        } while (result==BZ_OK && pkt_size<10000000);
        pkt_size = bzstream.total_out_lo32;
        BZ2_bzDecompressEnd(&bzstream);
        if (result != BZ_STREAM_END)
            goto failed;
        break;
    }
#endif
937 938
    default:
        return -1;
939 940 941 942 943 944 945 946 947 948
    }

    *buf = pkt_data;
    *buf_size = pkt_size;
    return 0;
 failed:
    av_free(pkt_data);
    return -1;
}

949
static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
950
                                    AVPacket *pkt, uint64_t display_duration)
951 952 953 954 955 956 957
{
    char *line, *layer, *ptr = pkt->data, *end = ptr+pkt->size;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',')
        layer = ++ptr;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',') {
958
        int64_t end_pts = pkt->pts + display_duration;
959 960 961 962 963 964 965 966 967 968 969 970 971
        int sc = matroska->time_scale * pkt->pts / 10000000;
        int ec = matroska->time_scale * end_pts  / 10000000;
        int sh, sm, ss, eh, em, es, len;
        sh = sc/360000;  sc -= 360000*sh;
        sm = sc/  6000;  sc -=   6000*sm;
        ss = sc/   100;  sc -=    100*ss;
        eh = ec/360000;  ec -= 360000*eh;
        em = ec/  6000;  ec -=   6000*em;
        es = ec/   100;  ec -=    100*es;
        *ptr++ = '\0';
        len = 50 + end-ptr + FF_INPUT_BUFFER_PADDING_SIZE;
        if (!(line = av_malloc(len)))
            return;
972
        snprintf(line,len,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
973 974 975 976 977 978 979
                 layer, sh, sm, ss, sc, eh, em, es, ec, ptr);
        av_free(pkt->data);
        pkt->data = line;
        pkt->size = strlen(line);
    }
}

980 981 982 983 984 985 986 987 988
static void matroska_merge_packets(AVPacket *out, AVPacket *in)
{
    out->data = av_realloc(out->data, out->size+in->size);
    memcpy(out->data+out->size, in->data, in->size);
    out->size += in->size;
    av_destruct_packet(in);
    av_free(in);
}

989 990
static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
                                 AVMetadata **metadata, char *prefix)
991 992
{
    MatroskaTag *tags = list->elem;
993 994
    char key[1024];
    int i;
995 996

    for (i=0; i < list->nb_elem; i++) {
997
        const char *lang = strcmp(tags[i].lang, "und") ? tags[i].lang : NULL;
998 999
        if (prefix)  snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
        else         av_strlcpy(key, tags[i].name, sizeof(key));
1000
        if (tags[i].def || !lang) {
1001
        av_metadata_set(metadata, key, tags[i].string);
1002
        if (tags[i].sub.nb_elem)
1003
            matroska_convert_tag(s, &tags[i].sub, metadata, key);
1004 1005 1006 1007 1008 1009 1010 1011
        }
        if (lang) {
            av_strlcat(key, "-", sizeof(key));
            av_strlcat(key, lang, sizeof(key));
            av_metadata_set(metadata, key, tags[i].string);
            if (tags[i].sub.nb_elem)
                matroska_convert_tag(s, &tags[i].sub, metadata, key);
        }
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
    }
}

static void matroska_convert_tags(AVFormatContext *s)
{
    MatroskaDemuxContext *matroska = s->priv_data;
    MatroskaTags *tags = matroska->tags.elem;
    int i, j;

    for (i=0; i < matroska->tags.nb_elem; i++) {
        if (tags[i].target.attachuid) {
            MatroskaAttachement *attachment = matroska->attachments.elem;
            for (j=0; j<matroska->attachments.nb_elem; j++)
                if (attachment[j].uid == tags[i].target.attachuid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &attachment[j].stream->metadata, NULL);
        } else if (tags[i].target.chapteruid) {
            MatroskaChapter *chapter = matroska->chapters.elem;
            for (j=0; j<matroska->chapters.nb_elem; j++)
                if (chapter[j].uid == tags[i].target.chapteruid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &chapter[j].chapter->metadata, NULL);
        } else if (tags[i].target.trackuid) {
            MatroskaTrack *track = matroska->tracks.elem;
            for (j=0; j<matroska->tracks.nb_elem; j++)
                if (track[j].uid == tags[i].target.trackuid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &track[j].stream->metadata, NULL);
        } else {