vp8_scalable_patterns.c 23.46 KiB
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 * This is an example demonstrating how to implement a multi-layer VP8
 * encoding scheme based on temporal scalability for video applications
 * that benefit from a scalable bitstream.
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#define VPX_CODEC_DISABLE_COMPAT 1
#include "vpx/vpx_encoder.h"
#include "vpx/vp8cx.h"
#define interface (vpx_codec_vp8_cx())
#define fourcc    0x30385056
#define IVF_FILE_HDR_SZ  (32)
#define IVF_FRAME_HDR_SZ (12)
static void mem_put_le16(char *mem, unsigned int val) {
    mem[0] = val;
    mem[1] = val>>8;
static void mem_put_le32(char *mem, unsigned int val) {
    mem[0] = val;
    mem[1] = val>>8;
    mem[2] = val>>16;
    mem[3] = val>>24;
static void die(const char *fmt, ...) {
    va_list ap;
    va_start(ap, fmt);
    vprintf(fmt, ap);
    if(fmt[strlen(fmt)-1] != '\n')
        printf("\n");
    exit(EXIT_FAILURE);
static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
    const char *detail = vpx_codec_error_detail(ctx);
    printf("%s: %s\n", s, vpx_codec_error(ctx));
    if(detail)
        printf("    %s\n",detail);
    exit(EXIT_FAILURE);
static int read_frame(FILE *f, vpx_image_t *img) {
    size_t nbytes, to_read;
    int    res = 1;
    to_read = img->w*img->h*3/2;
    nbytes = fread(img->planes[0], 1, to_read, f);
    if(nbytes != to_read) {
        res = 0;
        if(nbytes > 0)
            printf("Warning: Read partial frame. Check your width & height!\n");
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
} return res; } static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg, int frame_cnt) { char header[32]; if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return; header[0] = 'D'; header[1] = 'K'; header[2] = 'I'; header[3] = 'F'; mem_put_le16(header+4, 0); /* version */ mem_put_le16(header+6, 32); /* headersize */ mem_put_le32(header+8, fourcc); /* headersize */ mem_put_le16(header+12, cfg->g_w); /* width */ mem_put_le16(header+14, cfg->g_h); /* height */ mem_put_le32(header+16, cfg->g_timebase.den); /* rate */ mem_put_le32(header+20, cfg->g_timebase.num); /* scale */ mem_put_le32(header+24, frame_cnt); /* length */ mem_put_le32(header+28, 0); /* unused */ (void) fwrite(header, 1, 32, outfile); } static void write_ivf_frame_header(FILE *outfile, const vpx_codec_cx_pkt_t *pkt) { char header[12]; vpx_codec_pts_t pts; if(pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; pts = pkt->data.frame.pts; mem_put_le32(header, pkt->data.frame.sz); mem_put_le32(header+4, pts&0xFFFFFFFF); mem_put_le32(header+8, pts >> 32); (void) fwrite(header, 1, 12, outfile); } static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; int main(int argc, char **argv) { FILE *infile, *outfile[VPX_TS_MAX_LAYERS]; vpx_codec_ctx_t codec; vpx_codec_enc_cfg_t cfg; int frame_cnt = 0; vpx_image_t raw; vpx_codec_err_t res; unsigned int width; unsigned int height; int frame_avail; int got_data; int flags = 0; int i; int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ int layering_mode = 0; int frames_in_layer[VPX_TS_MAX_LAYERS] = {0}; int layer_flags[VPX_TS_MAX_PERIODICITY] = {0}; int flag_periodicity; int max_intra_size_pct;
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
/* Check usage and arguments */ if (argc < 9) die("Usage: %s <infile> <outfile> <width> <height> <rate_num> " " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]); width = strtol (argv[3], NULL, 0); height = strtol (argv[4], NULL, 0); if (width < 16 || width%2 || height <16 || height%2) die ("Invalid resolution: %d x %d", width, height); if (!sscanf(argv[7], "%d", &layering_mode)) die ("Invalid mode %s", argv[7]); if (layering_mode<0 || layering_mode>11) die ("Invalid mode (0..11) %s", argv[7]); if (argc != 8+mode_to_num_layers[layering_mode]) die ("Invalid number of arguments"); if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 32)) die ("Failed to allocate image", width, height); printf("Using %s\n",vpx_codec_iface_name(interface)); /* Populate encoder configuration */ res = vpx_codec_enc_config_default(interface, &cfg, 0); if(res) { printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); return EXIT_FAILURE; } /* Update the default configuration with our settings */ cfg.g_w = width; cfg.g_h = height; /* Timebase format e.g. 30fps: numerator=1, demoninator=30 */ if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) die ("Invalid timebase numerator %s", argv[5]); if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) die ("Invalid timebase denominator %s", argv[6]); for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) if (!sscanf(argv[i], "%ud", &cfg.ts_target_bitrate[i-8])) die ("Invalid data rate %s", argv[i]); /* Real time parameters */ cfg.rc_dropframe_thresh = 0; cfg.rc_end_usage = VPX_CBR; cfg.rc_resize_allowed = 0; cfg.rc_min_quantizer = 2; cfg.rc_max_quantizer = 56; cfg.rc_undershoot_pct = 100; cfg.rc_overshoot_pct = 15; cfg.rc_buf_initial_sz = 500; cfg.rc_buf_optimal_sz = 600; cfg.rc_buf_sz = 1000; /* Enable error resilient mode */ cfg.g_error_resilient = 1; cfg.g_lag_in_frames = 0; cfg.kf_mode = VPX_KF_DISABLED; /* Disable automatic keyframe placement */ cfg.kf_min_dist = cfg.kf_max_dist = 3000; /* Default setting for bitrate: used in special case of 1 layer (case 0). */ cfg.rc_target_bitrate = cfg.ts_target_bitrate[0]; /* Temporal scaling parameters: */ /* NOTE: The 3 prediction frames cannot be used interchangeably due to * differences in the way they are handled throughout the code. The