vpxenc.c 65.94 KiB
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include "vpx_config.h"
/* This is a simple program that encodes YV12 files and generates ivf
 * files using the new interface.
#if defined(_WIN32) || !CONFIG_OS_SUPPORT
#define USE_POSIX_MMAP 0
#else
#define USE_POSIX_MMAP 1
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <assert.h>
#include "vpx/vpx_encoder.h"
#if USE_POSIX_MMAP
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#endif
#include "vpx_config.h"
#include "vpx_version.h"
#include "vpx/vp8cx.h"
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
#include "tools_common.h"
#include "y4minput.h"
#include "libmkv/EbmlWriter.h"
#include "libmkv/EbmlIDs.h"
/* Need special handling of these functions on Windows */
#if defined(_MSC_VER)
/* MSVS doesn't define off_t, and uses _f{seek,tell}i64 */
typedef __int64 off_t;
#define fseeko _fseeki64
#define ftello _ftelli64
#elif defined(_WIN32)
/* MinGW defines off_t, and uses f{seek,tell}o64 */
#define fseeko fseeko64
#define ftello ftello64
#endif
#if defined(_MSC_VER)
#define LITERALU64(n) n
#else
#define LITERALU64(n) n##LLU
#endif
/* We should use 32-bit file operations in WebM file format
 * when building ARM executable file (.axf) with RVCT */
#if !CONFIG_OS_SUPPORT
typedef long off_t;
#define fseeko fseek
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
#define ftello ftell #endif static const char *exec_name; #define VP8_FOURCC (0x78385056) static const struct { char const *name; const vpx_codec_iface_t *(*iface)(void); unsigned int fourcc; unsigned int fourcc_mask; } ifaces[] = { #if CONFIG_VP8_DECODER {"vp8", &vpx_codec_vp8_dx, VP8_FOURCC, 0x00FFFFFF}, #endif }; static const struct codec_item { char const *name; const vpx_codec_iface_t *(*iface)(void); unsigned int fourcc; unsigned int fourcc_mask; } codecs[] = { #if CONFIG_VP8_ENCODER {"vp8", vpx_codec_vp8x_cx, VP8_FOURCC, 0x00FFFFFF}, #endif }; static void usage_exit(); void die(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); usage_exit(); } static void ctx_exit_on_error(vpx_codec_ctx_t *ctx, const char *s) { if (ctx->err) { const char *detail = vpx_codec_error_detail(ctx); fprintf(stderr, "%s: %s\n", s, vpx_codec_error(ctx)); if (detail) fprintf(stderr, " %s\n", detail); exit(EXIT_FAILURE); } } /* This structure is used to abstract the different ways of handling * first pass statistics. */ typedef struct { vpx_fixed_buf_t buf; int pass; FILE *file; char *buf_ptr; size_t buf_alloc_sz; } stats_io_t; int stats_open_file(stats_io_t *stats, const char *fpf, int pass) { int res; stats->pass = pass;
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
if (pass == 0) { stats->file = fopen(fpf, "wb"); stats->buf.sz = 0; stats->buf.buf = NULL, res = (stats->file != NULL); } else { #if 0 #elif USE_POSIX_MMAP struct stat stat_buf; int fd; fd = open(fpf, O_RDONLY); stats->file = fdopen(fd, "rb"); fstat(fd, &stat_buf); stats->buf.sz = stat_buf.st_size; stats->buf.buf = mmap(NULL, stats->buf.sz, PROT_READ, MAP_PRIVATE, fd, 0); res = (stats->buf.buf != NULL); #else size_t nbytes; stats->file = fopen(fpf, "rb"); if (fseek(stats->file, 0, SEEK_END)) { fprintf(stderr, "First-pass stats file must be seekable!\n"); exit(EXIT_FAILURE); } stats->buf.sz = stats->buf_alloc_sz = ftell(stats->file); rewind(stats->file); stats->buf.buf = malloc(stats->buf_alloc_sz); if (!stats->buf.buf) { fprintf(stderr, "Failed to allocate first-pass stats buffer (%lu bytes)\n", (unsigned long)stats->buf_alloc_sz); exit(EXIT_FAILURE); } nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file); res = (nbytes == stats->buf.sz); #endif } return res; } int stats_open_mem(stats_io_t *stats, int pass) { int res; stats->pass = pass; if (!pass) { stats->buf.sz = 0; stats->buf_alloc_sz = 64 * 1024; stats->buf.buf = malloc(stats->buf_alloc_sz); } stats->buf_ptr = stats->buf.buf; res = (stats->buf.buf != NULL); return res; } void stats_close(stats_io_t *stats, int last_pass) { if (stats->file) { if (stats->pass == last_pass) { #if 0 #elif USE_POSIX_MMAP munmap(stats->buf.buf, stats->buf.sz); #else
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
free(stats->buf.buf); #endif } fclose(stats->file); stats->file = NULL; } else { if (stats->pass == last_pass) free(stats->buf.buf); } } void stats_write(stats_io_t *stats, const void *pkt, size_t len) { if (stats->file) { if (fwrite(pkt, 1, len, stats->file)); } else { if (stats->buf.sz + len > stats->buf_alloc_sz) { size_t new_sz = stats->buf_alloc_sz + 64 * 1024; char *new_ptr = realloc(stats->buf.buf, new_sz); if (new_ptr) { stats->buf_ptr = new_ptr + (stats->buf_ptr - (char *)stats->buf.buf); stats->buf.buf = new_ptr; stats->buf_alloc_sz = new_sz; } else { fprintf(stderr, "\nFailed to realloc firstpass stats buffer.\n"); exit(EXIT_FAILURE); } } memcpy(stats->buf_ptr, pkt, len); stats->buf.sz += len; stats->buf_ptr += len; } } vpx_fixed_buf_t stats_get(stats_io_t *stats) { return stats->buf; } /* Stereo 3D packed frame format */ typedef enum stereo_format { STEREO_FORMAT_MONO = 0, STEREO_FORMAT_LEFT_RIGHT = 1, STEREO_FORMAT_BOTTOM_TOP = 2, STEREO_FORMAT_TOP_BOTTOM = 3, STEREO_FORMAT_RIGHT_LEFT = 11 } stereo_format_t; enum video_file_type { FILE_TYPE_RAW, FILE_TYPE_IVF, FILE_TYPE_Y4M }; struct detect_buffer { char buf[4]; size_t buf_read; size_t position; }; #define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type, y4m_input *y4m, struct detect_buffer *detect) { int plane = 0; int shortread = 0; if (file_type == FILE_TYPE_Y4M) {
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; } else { if (file_type == FILE_TYPE_IVF) { char junk[IVF_FRAME_HDR_SZ]; /* Skip the frame header. We know how big the frame should be. See * write_ivf_frame_header() for documentation on the frame header * layout. */ if (fread(junk, 1, IVF_FRAME_HDR_SZ, f)); } for (plane = 0; plane < 3; plane++) { unsigned char *ptr; int w = (plane ? (1 + img->d_w) / 2 : img->d_w); int h = (plane ? (1 + img->d_h) / 2 : img->d_h); int r; /* Determine the correct plane based on the image format. The for-loop * always counts in Y,U,V order, but this may not match the order of * the data on disk. */ switch (plane) { case 1: ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V : VPX_PLANE_U]; break; case 2: ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U : VPX_PLANE_V]; break; default: ptr = img->planes[plane]; } for (r = 0; r < h; r++) { size_t needed = w; size_t buf_position = 0; const size_t left = detect->buf_read - detect->position; if (left > 0) { const size_t more = (left < needed) ? left : needed; memcpy(ptr, detect->buf + detect->position, more); buf_position = more; needed -= more; detect->position += more; } if (needed > 0) { shortread |= (fread(ptr + buf_position, 1, needed, f) < needed); } ptr += img->stride[plane]; } } } return !shortread; } unsigned int file_is_y4m(FILE *infile, y4m_input *y4m, char detect[4]) { if (memcmp(detect, "YUV4", 4) == 0) { return 1; } return 0; } #define IVF_FILE_HDR_SZ (32) unsigned int file_is_ivf(FILE *infile, unsigned int *fourcc,
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
unsigned int *width, unsigned int *height, struct detect_buffer *detect) { char raw_hdr[IVF_FILE_HDR_SZ]; int is_ivf = 0; if (memcmp(detect->buf, "DKIF", 4) != 0) return 0; /* See write_ivf_file_header() for more documentation on the file header * layout. */ if (fread(raw_hdr + 4, 1, IVF_FILE_HDR_SZ - 4, infile) == IVF_FILE_HDR_SZ - 4) { { is_ivf = 1; if (mem_get_le16(raw_hdr + 4) != 0) fprintf(stderr, "Error: Unrecognized IVF version! This file may not" " decode properly."); *fourcc = mem_get_le32(raw_hdr + 8); } } if (is_ivf) { *width = mem_get_le16(raw_hdr + 12); *height = mem_get_le16(raw_hdr + 14); detect->position = 4; } return is_ivf; } static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg, unsigned int fourcc, int frame_cnt) { char header[32]; if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return; header[0] = 'D'; header[1] = 'K'; header[2] = 'I'; header[3] = 'F'; mem_put_le16(header + 4, 0); /* version */ mem_put_le16(header + 6, 32); /* headersize */ mem_put_le32(header + 8, fourcc); /* headersize */ mem_put_le16(header + 12, cfg->g_w); /* width */ mem_put_le16(header + 14, cfg->g_h); /* height */ mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ mem_put_le32(header + 24, frame_cnt); /* length */ mem_put_le32(header + 28, 0); /* unused */ if (fwrite(header, 1, 32, outfile)); } static void write_ivf_frame_header(FILE *outfile, const vpx_codec_cx_pkt_t *pkt) { char header[12]; vpx_codec_pts_t pts; if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return;
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
pts = pkt->data.frame.pts; mem_put_le32(header, pkt->data.frame.sz); mem_put_le32(header + 4, pts & 0xFFFFFFFF); mem_put_le32(header + 8, pts >> 32); if (fwrite(header, 1, 12, outfile)); } typedef off_t EbmlLoc; struct cue_entry { unsigned int time; uint64_t loc; }; struct EbmlGlobal { int debug; FILE *stream; int64_t last_pts_ms; vpx_rational_t framerate; /* These pointers are to the start of an element */ off_t position_reference; off_t seek_info_pos; off_t segment_info_pos; off_t track_pos; off_t cue_pos; off_t cluster_pos; /* This pointer is to a specific element to be serialized */ off_t track_id_pos; /* These pointers are to the size field of the element */ EbmlLoc startSegment; EbmlLoc startCluster; uint32_t cluster_timecode; int cluster_open; struct cue_entry *cue_list; unsigned int cues; }; void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) { if (fwrite(buffer_in, 1, len, glob->stream)); } #define WRITE_BUFFER(s) \ for(i = len-1; i>=0; i--)\ { \ x = *(const s *)buffer_in >> (i * CHAR_BIT); \ Ebml_Write(glob, &x, 1); \ } void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) { char x; int i; /* buffer_size: * 1 - int8_t; * 2 - int16_t; * 3 - int32_t; * 4 - int64_t; */ switch (buffer_size) {
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
case 1: WRITE_BUFFER(int8_t) break; case 2: WRITE_BUFFER(int16_t) break; case 4: WRITE_BUFFER(int32_t) break; case 8: WRITE_BUFFER(int64_t) break; default: break; } } #undef WRITE_BUFFER /* Need a fixed size serializer for the track ID. libmkv provides a 64 bit * one, but not a 32 bit one. */ static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) { unsigned char sizeSerialized = 4 | 0x80; Ebml_WriteID(glob, class_id); Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); Ebml_Serialize(glob, &ui, sizeof(ui), 4); } static void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id) { // todo this is always taking 8 bytes, this may need later optimization // this is a key that says length unknown uint64_t unknownLen = LITERALU64(0x01FFFFFFFFFFFFFF); Ebml_WriteID(glob, class_id); *ebmlLoc = ftello(glob->stream); Ebml_Serialize(glob, &unknownLen, sizeof(unknownLen), 8); } static void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) { off_t pos; uint64_t size; /* Save the current stream pointer */ pos = ftello(glob->stream); /* Calculate the size of this element */ size = pos - *ebmlLoc - 8; size |= LITERALU64(0x0100000000000000); /* Seek back to the beginning of the element and write the new size */ fseeko(glob->stream, *ebmlLoc, SEEK_SET); Ebml_Serialize(glob, &size, sizeof(size), 8); /* Reset the stream pointer */ fseeko(glob->stream, pos, SEEK_SET); } static void write_webm_seek_element(EbmlGlobal *ebml, unsigned long id, off_t pos) { uint64_t offset = pos - ebml->position_reference; EbmlLoc start; Ebml_StartSubElement(ebml, &start, Seek); Ebml_SerializeBinary(ebml, SeekID, id); Ebml_SerializeUnsigned64(ebml, SeekPosition, offset); Ebml_EndSubElement(ebml, &start);
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
} static void write_webm_seek_info(EbmlGlobal *ebml) { off_t pos; /* Save the current stream pointer */ pos = ftello(ebml->stream); if (ebml->seek_info_pos) fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET); else ebml->seek_info_pos = pos; { EbmlLoc start; Ebml_StartSubElement(ebml, &start, SeekHead); write_webm_seek_element(ebml, Tracks, ebml->track_pos); write_webm_seek_element(ebml, Cues, ebml->cue_pos); write_webm_seek_element(ebml, Info, ebml->segment_info_pos); Ebml_EndSubElement(ebml, &start); } { // segment info EbmlLoc startInfo; uint64_t frame_time; frame_time = (uint64_t)1000 * ebml->framerate.den / ebml->framerate.num; ebml->segment_info_pos = ftello(ebml->stream); Ebml_StartSubElement(ebml, &startInfo, Info); Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000); Ebml_SerializeFloat(ebml, Segment_Duration, ebml->last_pts_ms + frame_time); Ebml_SerializeString(ebml, 0x4D80, ebml->debug ? "vpxenc" : "vpxenc" VERSION_STRING); Ebml_SerializeString(ebml, 0x5741, ebml->debug ? "vpxenc" : "vpxenc" VERSION_STRING); Ebml_EndSubElement(ebml, &startInfo); } } static void write_webm_file_header(EbmlGlobal *glob, const vpx_codec_enc_cfg_t *cfg, const struct vpx_rational *fps, stereo_format_t stereo_fmt) { { EbmlLoc start; Ebml_StartSubElement(glob, &start, EBML); Ebml_SerializeUnsigned(glob, EBMLVersion, 1); Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); // EBML Read Version Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); // EBML Max ID Length Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); // EBML Max Size Length Ebml_SerializeString(glob, DocType, "webm"); // Doc Type Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); // Doc Type Version Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); // Doc Type Read Version Ebml_EndSubElement(glob, &start); } { Ebml_StartSubElement(glob, &glob->startSegment, Segment); // segment glob->position_reference = ftello(glob->stream); glob->framerate = *fps; write_webm_seek_info(glob); {
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
EbmlLoc trackStart; glob->track_pos = ftello(glob->stream); Ebml_StartSubElement(glob, &trackStart, Tracks); { unsigned int trackNumber = 1; uint64_t trackID = 0; EbmlLoc start; Ebml_StartSubElement(glob, &start, TrackEntry); Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); glob->track_id_pos = ftello(glob->stream); Ebml_SerializeUnsigned32(glob, TrackUID, trackID); Ebml_SerializeUnsigned(glob, TrackType, 1); // video is always 1 Ebml_SerializeString(glob, CodecID, "V_VP8"); { unsigned int pixelWidth = cfg->g_w; unsigned int pixelHeight = cfg->g_h; float frameRate = (float)fps->num / (float)fps->den; EbmlLoc videoStart; Ebml_StartSubElement(glob, &videoStart, Video); Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth); Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); Ebml_SerializeFloat(glob, FrameRate, frameRate); Ebml_EndSubElement(glob, &videoStart); // Video } Ebml_EndSubElement(glob, &start); // Track Entry } Ebml_EndSubElement(glob, &trackStart); } // segment element is open } } static void write_webm_block(EbmlGlobal *glob, const vpx_codec_enc_cfg_t *cfg, const vpx_codec_cx_pkt_t *pkt) { unsigned long block_length; unsigned char track_number; unsigned short block_timecode = 0; unsigned char flags; int64_t pts_ms; int start_cluster = 0, is_keyframe; /* Calculate the PTS of this frame in milliseconds */ pts_ms = pkt->data.frame.pts * 1000 * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den; if (pts_ms <= glob->last_pts_ms) pts_ms = glob->last_pts_ms + 1; glob->last_pts_ms = pts_ms; /* Calculate the relative time of this block */ if (pts_ms - glob->cluster_timecode > SHRT_MAX) start_cluster = 1; else block_timecode = pts_ms - glob->cluster_timecode; is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY); if (start_cluster || is_keyframe) { if (glob->cluster_open) Ebml_EndSubElement(glob, &glob->startCluster); /* Open the new cluster */ block_timecode = 0; glob->cluster_open = 1; glob->cluster_timecode = pts_ms; glob->cluster_pos = ftello(glob->stream);