vpxenc.c

/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "./vpxenc.h"
#include "./vpx_config.h"

#include <assert.h>
#include <limits.h>
#include <math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if CONFIG_LIBYUV
#include "third_party/libyuv/include/libyuv/scale.h"
#endif

#include "vpx/vpx_encoder.h"
#if CONFIG_DECODERS
#include "vpx/vpx_decoder.h"
#endif

#include "./args.h"
#include "./ivfenc.h"
#include "./tools_common.h"

#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
#endif
#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif

#include "vpx/vpx_integer.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
#include "./rate_hist.h"
#include "./vpxstats.h"
#include "./warnings.h"
#if CONFIG_WEBM_IO
#include "./webmenc.h"
#endif
#include "./y4minput.h"

/* Swallow warnings about unused results of fread/fwrite */
static size_t wrap_fread(void *ptr, size_t size, size_t nmemb,
                         FILE *stream) {
  return fread(ptr, size, nmemb, stream);
}
#define fread wrap_fread

static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb,
                          FILE *stream) {
  return fwrite(ptr, size, nmemb, stream);
}
#define fwrite wrap_fwrite


static const char *exec_name;

static void warn_or_exit_on_errorv(vpx_codec_ctx_t *ctx, int fatal,
                                   const char *s, va_list ap) {
  if (ctx->err) {
    const char *detail = vpx_codec_error_detail(ctx);

    vfprintf(stderr, s, ap);
    fprintf(stderr, ": %s\n", vpx_codec_error(ctx));

    if (detail)
      fprintf(stderr, "    %s\n", detail);

    if (fatal)
      exit(EXIT_FAILURE);
  }
}

static void ctx_exit_on_error(vpx_codec_ctx_t *ctx, const char *s, ...) {
  va_list ap;

  va_start(ap, s);
  warn_or_exit_on_errorv(ctx, 1, s, ap);
  va_end(ap);
}

static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal,
                                  const char *s, ...) {
  va_list ap;

  va_start(ap, s);
  warn_or_exit_on_errorv(ctx, fatal, s, ap);
  va_end(ap);
}

static int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
  FILE *f = input_ctx->file;
  y4m_input *y4m = &input_ctx->y4m;
  int shortread = 0;

  if (input_ctx->file_type == FILE_TYPE_Y4M) {
    if (y4m_input_fetch_frame(y4m, f, img) < 1)
      return 0;
  } else {
    shortread = read_yuv_frame(input_ctx, img);
  }

  return !shortread;
}

static int file_is_y4m(const char detect[4]) {
  if (memcmp(detect, "YUV4", 4) == 0) {
    return 1;
  }
  return 0;
}

static int fourcc_is_ivf(const char detect[4]) {
  if (memcmp(detect, "DKIF", 4) == 0) {
    return 1;
  }
  return 0;
}

static const arg_def_t debugmode = ARG_DEF(
    "D", "debug", 0, "Debug mode (makes output deterministic)");
static const arg_def_t outputfile = ARG_DEF(
    "o", "output", 1, "Output filename");
static const arg_def_t use_yv12 = ARG_DEF(
    NULL, "yv12", 0, "Input file is YV12 ");
static const arg_def_t use_i420 = ARG_DEF(
    NULL, "i420", 0, "Input file is I420 (default)");
static const arg_def_t use_i422 = ARG_DEF(
    NULL, "i422", 0, "Input file is I422");
static const arg_def_t use_i444 = ARG_DEF(
    NULL, "i444", 0, "Input file is I444");
static const arg_def_t use_i440 = ARG_DEF(
    NULL, "i440", 0, "Input file is I440");
static const arg_def_t codecarg = ARG_DEF(
    NULL, "codec", 1, "Codec to use");
static const arg_def_t passes = ARG_DEF(
    "p", "passes", 1, "Number of passes (1/2)");
static const arg_def_t pass_arg = ARG_DEF(
    NULL, "pass", 1, "Pass to execute (1/2)");
static const arg_def_t fpf_name = ARG_DEF(
    NULL, "fpf", 1, "First pass statistics file name");
#if CONFIG_FP_MB_STATS
static const arg_def_t fpmbf_name = ARG_DEF(
    NULL, "fpmbf", 1, "First pass block statistics file name");
#endif
static const arg_def_t limit = ARG_DEF(
    NULL, "limit", 1, "Stop encoding after n input frames");
static const arg_def_t skip = ARG_DEF(
    NULL, "skip", 1, "Skip the first n input frames");
static const arg_def_t deadline = ARG_DEF(
    "d", "deadline", 1, "Deadline per frame (usec)");
static const arg_def_t best_dl = ARG_DEF(
    NULL, "best", 0, "Use Best Quality Deadline");
static const arg_def_t good_dl = ARG_DEF(
    NULL, "good", 0, "Use Good Quality Deadline");
static const arg_def_t rt_dl = ARG_DEF(
    NULL, "rt", 0, "Use Realtime Quality Deadline");
static const arg_def_t quietarg = ARG_DEF(
    "q", "quiet", 0, "Do not print encode progress");
static const arg_def_t verbosearg = ARG_DEF(
    "v", "verbose", 0, "Show encoder parameters");
static const arg_def_t psnrarg = ARG_DEF(
    NULL, "psnr", 0, "Show PSNR in status line");

static const struct arg_enum_list test_decode_enum[] = {
  {"off",   TEST_DECODE_OFF},
  {"fatal", TEST_DECODE_FATAL},
  {"warn",  TEST_DECODE_WARN},
  {NULL, 0}
};
static const arg_def_t recontest = ARG_DEF_ENUM(
    NULL, "test-decode", 1, "Test encode/decode mismatch", test_decode_enum);
static const arg_def_t framerate = ARG_DEF(
    NULL, "fps", 1, "Stream frame rate (rate/scale)");
static const arg_def_t use_webm = ARG_DEF(
    NULL, "webm", 0, "Output WebM (default when WebM IO is enabled)");
static const arg_def_t use_ivf = ARG_DEF(
    NULL, "ivf", 0, "Output IVF");
static const arg_def_t out_part = ARG_DEF(
    "P", "output-partitions", 0,
    "Makes encoder output partitions. Requires IVF output!");
static const arg_def_t q_hist_n = ARG_DEF(
    NULL, "q-hist", 1, "Show quantizer histogram (n-buckets)");
static const arg_def_t rate_hist_n = ARG_DEF(
    NULL, "rate-hist", 1, "Show rate histogram (n-buckets)");
static const arg_def_t disable_warnings = ARG_DEF(
    NULL, "disable-warnings", 0,
    "Disable warnings about potentially incorrect encode settings.");
static const arg_def_t disable_warning_prompt = ARG_DEF(
    "y", "disable-warning-prompt", 0,
    "Display warnings, but do not prompt user to continue.");

#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
static const arg_def_t test16bitinternalarg = ARG_DEF(
    NULL, "test-16bit-internal", 0, "Force use of 16 bit internal buffer");
#endif

static const arg_def_t *main_args[] = {
  &debugmode,
  &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &skip,
  &deadline, &best_dl, &good_dl, &rt_dl,
  &quietarg, &verbosearg, &psnrarg, &use_webm, &use_ivf, &out_part, &q_hist_n,
  &rate_hist_n, &disable_warnings, &disable_warning_prompt,
  NULL
};

static const arg_def_t usage = ARG_DEF(
    "u", "usage", 1, "Usage profile number to use");
static const arg_def_t threads = ARG_DEF(
    "t", "threads", 1, "Max number of threads to use");
static const arg_def_t profile = ARG_DEF(
    NULL, "profile", 1, "Bitstream profile number to use");
static const arg_def_t width = ARG_DEF("w", "width", 1, "Frame width");
static const arg_def_t height = ARG_DEF("h", "height", 1, "Frame height");
#if CONFIG_WEBM_IO
static const struct arg_enum_list stereo_mode_enum[] = {
  {"mono", STEREO_FORMAT_MONO},
  {"left-right", STEREO_FORMAT_LEFT_RIGHT},
  {"bottom-top", STEREO_FORMAT_BOTTOM_TOP},
  {"top-bottom", STEREO_FORMAT_TOP_BOTTOM},
  {"right-left", STEREO_FORMAT_RIGHT_LEFT},
  {NULL, 0}
};
static const arg_def_t stereo_mode = ARG_DEF_ENUM(
    NULL, "stereo-mode", 1, "Stereo 3D video format", stereo_mode_enum);
#endif
static const arg_def_t timebase = ARG_DEF(
    NULL, "timebase", 1, "Output timestamp precision (fractional seconds)");
static const arg_def_t error_resilient = ARG_DEF(
    NULL, "error-resilient", 1, "Enable error resiliency features");
static const arg_def_t lag_in_frames = ARG_DEF(
    NULL, "lag-in-frames", 1, "Max number of frames to lag");

static const arg_def_t *global_args[] = {
  &use_yv12, &use_i420, &use_i422, &use_i444, &use_i440,
  &usage, &threads, &profile,
  &width, &height,
#if CONFIG_WEBM_IO
  &stereo_mode,
#endif
  &timebase, &framerate,
  &error_resilient,
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
  &test16bitinternalarg,
#endif
  &lag_in_frames, NULL
};

static const arg_def_t dropframe_thresh = ARG_DEF(
    NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
static const arg_def_t resize_allowed = ARG_DEF(
    NULL, "resize-allowed", 1, "Spatial resampling enabled (bool)");
static const arg_def_t resize_width = ARG_DEF(
    NULL, "resize-width", 1, "Width of encoded frame");
static const arg_def_t resize_height = ARG_DEF(
    NULL, "resize-height", 1, "Height of encoded frame");
static const arg_def_t resize_up_thresh = ARG_DEF(
    NULL, "resize-up", 1, "Upscale threshold (buf %)");
static const arg_def_t resize_down_thresh = ARG_DEF(
    NULL, "resize-down", 1, "Downscale threshold (buf %)");
static const struct arg_enum_list end_usage_enum[] = {
  {"vbr", VPX_VBR},
  {"cbr", VPX_CBR},
  {"cq",  VPX_CQ},
  {"q",   VPX_Q},
  {NULL, 0}
};
static const arg_def_t end_usage = ARG_DEF_ENUM(
    NULL, "end-usage", 1, "Rate control mode", end_usage_enum);
static const arg_def_t target_bitrate = ARG_DEF(
    NULL, "target-bitrate", 1, "Bitrate (kbps)");
static const arg_def_t min_quantizer = ARG_DEF(
    NULL, "min-q", 1, "Minimum (best) quantizer");
static const arg_def_t max_quantizer = ARG_DEF(
    NULL, "max-q", 1, "Maximum (worst) quantizer");
static const arg_def_t undershoot_pct = ARG_DEF(
    NULL, "undershoot-pct", 1, "Datarate undershoot (min) target (%)");
static const arg_def_t overshoot_pct = ARG_DEF(
    NULL, "overshoot-pct", 1, "Datarate overshoot (max) target (%)");
static const arg_def_t buf_sz = ARG_DEF(
    NULL, "buf-sz", 1, "Client buffer size (ms)");
static const arg_def_t buf_initial_sz = ARG_DEF(
    NULL, "buf-initial-sz", 1, "Client initial buffer size (ms)");
static const arg_def_t buf_optimal_sz = ARG_DEF(
    NULL, "buf-optimal-sz", 1, "Client optimal buffer size (ms)");
static const arg_def_t *rc_args[] = {
  &dropframe_thresh, &resize_allowed, &resize_width, &resize_height,
  &resize_up_thresh, &resize_down_thresh, &end_usage, &target_bitrate,
  &min_quantizer, &max_quantizer, &undershoot_pct, &overshoot_pct, &buf_sz,
  &buf_initial_sz, &buf_optimal_sz, NULL
};


static const arg_def_t bias_pct = ARG_DEF(
    NULL, "bias-pct", 1, "CBR/VBR bias (0=CBR, 100=VBR)");
static const arg_def_t minsection_pct = ARG_DEF(
    NULL, "minsection-pct", 1, "GOP min bitrate (% of target)");
static const arg_def_t maxsection_pct = ARG_DEF(
    NULL, "maxsection-pct", 1, "GOP max bitrate (% of target)");
static const arg_def_t *rc_twopass_args[] = {
  &bias_pct, &minsection_pct, &maxsection_pct, NULL
};


static const arg_def_t kf_min_dist = ARG_DEF(
    NULL, "kf-min-dist", 1, "Minimum keyframe interval (frames)");
static const arg_def_t kf_max_dist = ARG_DEF(
    NULL, "kf-max-dist", 1, "Maximum keyframe interval (frames)");
static const arg_def_t kf_disabled = ARG_DEF(
    NULL, "disable-kf", 0, "Disable keyframe placement");
static const arg_def_t *kf_args[] = {
  &kf_min_dist, &kf_max_dist, &kf_disabled, NULL
};


static const arg_def_t noise_sens = ARG_DEF(
    NULL, "noise-sensitivity", 1, "Noise sensitivity (frames to blur)");
static const arg_def_t sharpness = ARG_DEF(
    NULL, "sharpness", 1, "Loop filter sharpness (0..7)");
static const arg_def_t static_thresh = ARG_DEF(
    NULL, "static-thresh", 1, "Motion detection threshold");
static const arg_def_t auto_altref = ARG_DEF(
    NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames");
static const arg_def_t arnr_maxframes = ARG_DEF(
    NULL, "arnr-maxframes", 1, "AltRef max frames (0..15)");
static const arg_def_t arnr_strength = ARG_DEF(
    NULL, "arnr-strength", 1, "AltRef filter strength (0..6)");
static const arg_def_t arnr_type = ARG_DEF(
    NULL, "arnr-type", 1, "AltRef type");
static const struct arg_enum_list tuning_enum[] = {
  {"psnr", VP8_TUNE_PSNR},
  {"ssim", VP8_TUNE_SSIM},
  {NULL, 0}
};
static const arg_def_t tune_ssim = ARG_DEF_ENUM(
    NULL, "tune", 1, "Material to favor", tuning_enum);
static const arg_def_t cq_level = ARG_DEF(
    NULL, "cq-level", 1, "Constant/Constrained Quality level");
static const arg_def_t max_intra_rate_pct = ARG_DEF(
    NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");

#if CONFIG_VP8_ENCODER
static const arg_def_t cpu_used_vp8 = ARG_DEF(
    NULL, "cpu-used", 1, "CPU Used (-16..16)");
static const arg_def_t token_parts = ARG_DEF(
    NULL, "token-parts", 1, "Number of token partitions to use, log2");
static const arg_def_t screen_content_mode = ARG_DEF(
    NULL, "screen-content-mode", 1, "Screen content mode");
static const arg_def_t *vp8_args[] = {
  &cpu_used_vp8, &auto_altref, &noise_sens, &sharpness, &static_thresh,
  &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
  &tune_ssim, &cq_level, &max_intra_rate_pct, &screen_content_mode,
  NULL
};
static const int vp8_arg_ctrl_map[] = {
  VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
  VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
  VP8E_SET_TOKEN_PARTITIONS,
  VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
  VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
  VP8E_SET_SCREEN_CONTENT_MODE,
  0
};
#endif

#if CONFIG_VP9_ENCODER
static const arg_def_t cpu_used_vp9 = ARG_DEF(
    NULL, "cpu-used", 1, "CPU Used (-8..8)");
static const arg_def_t tile_cols = ARG_DEF(
    NULL, "tile-columns", 1, "Number of tile columns to use, log2");
static const arg_def_t tile_rows = ARG_DEF(
    NULL, "tile-rows", 1, "Number of tile rows to use, log2");
static const arg_def_t lossless = ARG_DEF(
    NULL, "lossless", 1, "Lossless mode");
static const arg_def_t frame_parallel_decoding = ARG_DEF(
    NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
static const arg_def_t aq_mode = ARG_DEF(
    NULL, "aq-mode", 1,
    "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
    "3: cyclic refresh)");
static const arg_def_t frame_periodic_boost = ARG_DEF(
    NULL, "frame-boost", 1,
    "Enable frame periodic boost (0: off (default), 1: on)");
static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
    NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
static const arg_def_t max_inter_rate_pct = ARG_DEF(
    NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");

static const struct arg_enum_list color_space_enum[] = {
  { "unknown", VPX_CS_UNKNOWN },
  { "bt601", VPX_CS_BT_601 },
  { "bt709", VPX_CS_BT_709 },
  { "smpte170", VPX_CS_SMPTE_170 },
  { "smpte240", VPX_CS_SMPTE_240 },
  { "bt2020", VPX_CS_BT_2020 },
  { "reserved", VPX_CS_RESERVED },
  { "sRGB", VPX_CS_SRGB },
  { NULL, 0 }
};

static const arg_def_t input_color_space = ARG_DEF_ENUM(
    NULL, "color-space", 1,
    "The color space of input content:", color_space_enum);

#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
  {"8",  VPX_BITS_8},
  {"10", VPX_BITS_10},
  {"12", VPX_BITS_12},
  {NULL, 0}
};

static const arg_def_t bitdeptharg = ARG_DEF_ENUM(
    "b", "bit-depth", 1,
    "Bit depth for codec (8 for version <=1, 10 or 12 for version 2)",
    bitdepth_enum);
static const arg_def_t inbitdeptharg = ARG_DEF(
    NULL, "input-bit-depth", 1, "Bit depth of input");
#endif

static const struct arg_enum_list tune_content_enum[] = {
  {"default", VP9E_CONTENT_DEFAULT},
  {"screen", VP9E_CONTENT_SCREEN},
  {NULL, 0}
};

static const arg_def_t tune_content = ARG_DEF_ENUM(
    NULL, "tune-content", 1, "Tune content type", tune_content_enum);

static const arg_def_t *vp9_args[] = {
  &cpu_used_vp9, &auto_altref, &sharpness, &static_thresh,
  &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
  &tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct,
  &gf_cbr_boost_pct, &lossless,
  &frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
  &noise_sens, &tune_content, &input_color_space,
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
  &bitdeptharg, &inbitdeptharg,
#endif
  NULL
};
static const int vp9_arg_ctrl_map[] = {
  VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
  VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
  VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS,
  VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
  VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
  VP9E_SET_MAX_INTER_BITRATE_PCT, VP9E_SET_GF_CBR_BOOST_PCT,
  VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
  VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY,
  VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE,
  0
};
#endif

static const arg_def_t *no_args[] = { NULL };

void usage_exit(void) {
  int i;
  const int num_encoder = get_vpx_encoder_count();

  fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
          exec_name);

  fprintf(stderr, "\nOptions:\n");
  arg_show_usage(stderr, main_args);
  fprintf(stderr, "\nEncoder Global Options:\n");
  arg_show_usage(stderr, global_args);
  fprintf(stderr, "\nRate Control Options:\n");
  arg_show_usage(stderr, rc_args);
  fprintf(stderr, "\nTwopass Rate Control Options:\n");
  arg_show_usage(stderr, rc_twopass_args);
  fprintf(stderr, "\nKeyframe Placement Options:\n");
  arg_show_usage(stderr, kf_args);
#if CONFIG_VP8_ENCODER
  fprintf(stderr, "\nVP8 Specific Options:\n");
  arg_show_usage(stderr, vp8_args);
#endif
#if CONFIG_VP9_ENCODER
  fprintf(stderr, "\nVP9 Specific Options:\n");
  arg_show_usage(stderr, vp9_args);
#endif
  fprintf(stderr, "\nStream timebase (--timebase):\n"
          "  The desired precision of timestamps in the output, expressed\n"
          "  in fractional seconds. Default is 1/1000.\n");
  fprintf(stderr, "\nIncluded encoders:\n\n");

  for (i = 0; i < num_encoder; ++i) {
    const VpxInterface *const encoder = get_vpx_encoder_by_index(i);
    const char* defstr = (i == (num_encoder - 1)) ? "(default)" : "";
      fprintf(stderr, "    %-6s - %s %s\n",
              encoder->name, vpx_codec_iface_name(encoder->codec_interface()),
              defstr);
  }
  fprintf(stderr, "\n        ");
  fprintf(stderr, "Use --codec to switch to a non-default encoder.\n\n");

  exit(EXIT_FAILURE);
}

#define mmin(a, b)  ((a) < (b) ? (a) : (b))

#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
static void find_mismatch_high(const vpx_image_t *const img1,
                               const vpx_image_t *const img2,
                               int yloc[4], int uloc[4], int vloc[4]) {
  uint16_t *plane1, *plane2;
  uint32_t stride1, stride2;
  const uint32_t bsize = 64;
  const uint32_t bsizey = bsize >> img1->y_chroma_shift;
  const uint32_t bsizex = bsize >> img1->x_chroma_shift;
  const uint32_t c_w =
      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
  const uint32_t c_h =
      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
  int match = 1;
  uint32_t i, j;
  yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
  plane1 = (uint16_t*)img1->planes[VPX_PLANE_Y];
  plane2 = (uint16_t*)img2->planes[VPX_PLANE_Y];
  stride1 = img1->stride[VPX_PLANE_Y]/2;
  stride2 = img2->stride[VPX_PLANE_Y]/2;
  for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
    for (j = 0; match && j < img1->d_w; j += bsize) {
      int k, l;
      const int si = mmin(i + bsize, img1->d_h) - i;
      const int sj = mmin(j + bsize, img1->d_w) - j;
      for (k = 0; match && k < si; ++k) {
        for (l = 0; match && l < sj; ++l) {
          if (*(plane1 + (i + k) * stride1 + j + l) !=
              *(plane2 + (i + k) * stride2 + j + l)) {
            yloc[0] = i + k;
            yloc[1] = j + l;
            yloc[2] = *(plane1 + (i + k) * stride1 + j + l);
            yloc[3] = *(plane2 + (i + k) * stride2 + j + l);
            match = 0;
            break;
          }
        }
      }
    }
  }

  uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
  plane1 = (uint16_t*)img1->planes[VPX_PLANE_U];
  plane2 = (uint16_t*)img2->planes[VPX_PLANE_U];
  stride1 = img1->stride[VPX_PLANE_U]/2;
  stride2 = img2->stride[VPX_PLANE_U]/2;
  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
    for (j = 0; match && j < c_w; j += bsizex) {
      int k, l;
      const int si = mmin(i + bsizey, c_h - i);
      const int sj = mmin(j + bsizex, c_w - j);
      for (k = 0; match && k < si; ++k) {
        for (l = 0; match && l < sj; ++l) {
          if (*(plane1 + (i + k) * stride1 + j + l) !=
              *(plane2 + (i + k) * stride2 + j + l)) {
            uloc[0] = i + k;
            uloc[1] = j + l;
            uloc[2] = *(plane1 + (i + k) * stride1 + j + l);
            uloc[3] = *(plane2 + (i + k) * stride2 + j + l);
            match = 0;
            break;
          }
        }
      }
    }
  }

  vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
  plane1 = (uint16_t*)img1->planes[VPX_PLANE_V];
  plane2 = (uint16_t*)img2->planes[VPX_PLANE_V];
  stride1 = img1->stride[VPX_PLANE_V]/2;
  stride2 = img2->stride[VPX_PLANE_V]/2;
  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
    for (j = 0; match && j < c_w; j += bsizex) {
      int k, l;
      const int si = mmin(i + bsizey, c_h - i);
      const int sj = mmin(j + bsizex, c_w - j);
      for (k = 0; match && k < si; ++k) {
        for (l = 0; match && l < sj; ++l) {
          if (*(plane1 + (i + k) * stride1 + j + l) !=
              *(plane2 + (i + k) * stride2 + j + l)) {
            vloc[0] = i + k;
            vloc[1] = j + l;
            vloc[2] = *(plane1 + (i + k) * stride1 + j + l);
            vloc[3] = *(plane2 + (i + k) * stride2 + j + l);
            match = 0;
            break;
          }
        }
      }
    }
  }
}
#endif

static void find_mismatch(const vpx_image_t *const img1,
                          const vpx_image_t *const img2,
                          int yloc[4], int uloc[4], int vloc[4]) {
  const uint32_t bsize = 64;
  const uint32_t bsizey = bsize >> img1->y_chroma_shift;
  const uint32_t bsizex = bsize >> img1->x_chroma_shift;
  const uint32_t c_w =
      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
  const uint32_t c_h =
      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
  int match = 1;
  uint32_t i, j;
  yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
  for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
    for (j = 0; match && j < img1->d_w; j += bsize) {
      int k, l;
      const int si = mmin(i + bsize, img1->d_h) - i;
      const int sj = mmin(j + bsize, img1->d_w) - j;
      for (k = 0; match && k < si; ++k) {
        for (l = 0; match && l < sj; ++l) {
          if (*(img1->planes[VPX_PLANE_Y] +
                (i + k) * img1->stride[VPX_PLANE_Y] + j + l) !=
              *(img2->planes[VPX_PLANE_Y] +
                (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) {
            yloc[0] = i + k;
            yloc[1] = j + l;
            yloc[2] = *(img1->planes[VPX_PLANE_Y] +
                        (i + k) * img1->stride[VPX_PLANE_Y] + j + l);
            yloc[3] = *(img2->planes[VPX_PLANE_Y] +
                        (i + k) * img2->stride[VPX_PLANE_Y] + j + l);
            match = 0;
            break;
          }
        }
      }
    }
  }

  uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
    for (j = 0; match && j < c_w; j += bsizex) {
      int k, l;
      const int si = mmin(i + bsizey, c_h - i);
      const int sj = mmin(j + bsizex, c_w - j);
      for (k = 0; match && k < si; ++k) {
        for (l = 0; match && l < sj; ++l) {
          if (*(img1->planes[VPX_PLANE_U] +
                (i + k) * img1->stride[VPX_PLANE_U] + j + l) !=
              *(img2->planes[VPX_PLANE_U] +
                (i + k) * img2->stride[VPX_PLANE_U] + j + l)) {
            uloc[0] = i + k;
            uloc[1] = j + l;
            uloc[2] = *(img1->planes[VPX_PLANE_U] +
                        (i + k) * img1->stride[VPX_PLANE_U] + j + l);
            uloc[3] = *(img2->planes[VPX_PLANE_U] +
                        (i + k) * img2->stride[VPX_PLANE_U] + j + l);
            match = 0;
            break;
          }
        }
      }
    }
  }
  vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
    for (j = 0; match && j < c_w; j += bsizex) {
      int k, l;
      const int si = mmin(i + bsizey, c_h - i);
      const int sj = mmin(j + bsizex, c_w - j);
      for (k = 0; match && k < si; ++k) {
        for (l = 0; match && l < sj; ++l) {
          if (*(img1->planes[VPX_PLANE_V] +
                (i + k) * img1->stride[VPX_PLANE_V] + j + l) !=
              *(img2->planes[VPX_PLANE_V] +
                (i + k) * img2->stride[VPX_PLANE_V] + j + l)) {
            vloc[0] = i + k;
            vloc[1] = j + l;
            vloc[2] = *(img1->planes[VPX_PLANE_V] +
                        (i + k) * img1->stride[VPX_PLANE_V] + j + l);
            vloc[3] = *(img2->planes[VPX_PLANE_V] +
                        (i + k) * img2->stride[VPX_PLANE_V] + j + l);
            match = 0;
            break;
          }
        }
      }
    }
  }
}

static int compare_img(const vpx_image_t *const img1,
                       const vpx_image_t *const img2) {
  uint32_t l_w = img1->d_w;
  uint32_t c_w =
      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
  const uint32_t c_h =
      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
  uint32_t i;
  int match = 1;

  match &= (img1->fmt == img2->fmt);
  match &= (img1->d_w == img2->d_w);
  match &= (img1->d_h == img2->d_h);
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
  if (img1->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
    l_w *= 2;
    c_w *= 2;
  }
#endif

  for (i = 0; i < img1->d_h; ++i)
    match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
                     img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
                     l_w) == 0);

  for (i = 0; i < c_h; ++i)
    match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
                     img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
                     c_w) == 0);

  for (i = 0; i < c_h; ++i)
    match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
                     img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
                     c_w) == 0);

  return match;
}


#define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
#define MAX(x,y) ((x)>(y)?(x):(y))
#if CONFIG_VP8_ENCODER && !CONFIG_VP9_ENCODER
#define ARG_CTRL_CNT_MAX NELEMENTS(vp8_arg_ctrl_map)
#elif !CONFIG_VP8_ENCODER && CONFIG_VP9_ENCODER
#define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map)
#else
#define ARG_CTRL_CNT_MAX MAX(NELEMENTS(vp8_arg_ctrl_map), \
                             NELEMENTS(vp9_arg_ctrl_map))
#endif

#if !CONFIG_WEBM_IO
typedef int stereo_format_t;
struct EbmlGlobal { int debug; };
#endif

/* Per-stream configuration */
struct stream_config {
  struct vpx_codec_enc_cfg  cfg;
  const char               *out_fn;
  const char               *stats_fn;
#if CONFIG_FP_MB_STATS
  const char               *fpmb_stats_fn;
#endif
  stereo_format_t           stereo_fmt;
  int                       arg_ctrls[ARG_CTRL_CNT_MAX][2];
  int                       arg_ctrl_cnt;
  int                       write_webm;
  int                       have_kf_max_dist;
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
  // whether to use 16bit internal buffers
  int                       use_16bit_internal;
#endif
};


struct stream_state {
  int                       index;
  struct stream_state      *next;
  struct stream_config      config;
  FILE                     *file;
  struct rate_hist         *rate_hist;
  struct EbmlGlobal         ebml;
  uint64_t                  psnr_sse_total;
  uint64_t                  psnr_samples_total;
  double                    psnr_totals[4];
  int                       psnr_count;
  int                       counts[64];
  vpx_codec_ctx_t           encoder;
  unsigned int              frames_out;
  uint64_t                  cx_time;
  size_t                    nbytes;
  stats_io_t                stats;
#if CONFIG_FP_MB_STATS
  stats_io_t                fpmb_stats;
#endif
  struct vpx_image         *img;
  vpx_codec_ctx_t           decoder;
  int                       mismatch_seen;
};


static void validate_positive_rational(const char          *msg,
                                       struct vpx_rational *rat) {
  if (rat->den < 0) {
    rat->num *= -1;
    rat->den *= -1;
  }

  if (rat->num < 0)
    die("Error: %s must be positive\n", msg);

  if (!rat->den)
    die("Error: %s has zero denominator\n", msg);
}


static void parse_global_config(struct VpxEncoderConfig *global, char **argv) {
  char       **argi, **argj;
  struct arg   arg;
  const int num_encoder = get_vpx_encoder_count();

  if (num_encoder < 1)
    die("Error: no valid encoder available\n");

  /* Initialize default parameters */
  memset(global, 0, sizeof(*global));
  global->codec = get_vpx_encoder_by_index(num_encoder - 1);
  global->passes = 0;
  global->color_type = I420;
  /* Assign default deadline to good quality */
  global->deadline = VPX_DL_GOOD_QUALITY;

  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
    arg.argv_step = 1;

    if (arg_match(&arg, &codecarg, argi)) {
      global->codec = get_vpx_encoder_by_name(arg.val);
      if (!global->codec)
        die("Error: Unrecognized argument (%s) to --codec\n", arg.val);
    } else if (arg_match(&arg, &passes, argi)) {
      global->passes = arg_parse_uint(&arg);

      if (global->passes < 1 || global->passes > 2)
        die("Error: Invalid number of passes (%d)\n", global->passes);
    } else if (arg_match(&arg, &pass_arg, argi)) {
      global->pass = arg_parse_uint(&arg);

      if (global->pass < 1 || global->pass > 2)
        die("Error: Invalid pass selected (%d)\n",
            global->pass);
    } else if (arg_match(&arg, &usage, argi))
      global->usage = arg_parse_uint(&arg);
    else if (arg_match(&arg, &deadline, argi))
      global->deadline = arg_parse_uint(&arg);
    else if (arg_match(&arg, &best_dl, argi))
      global->deadline = VPX_DL_BEST_QUALITY;
    else if (arg_match(&arg, &good_dl, argi))
      global->deadline = VPX_DL_GOOD_QUALITY;
    else if (arg_match(&arg, &rt_dl, argi))
      global->deadline = VPX_DL_REALTIME;
    else if (arg_match(&arg, &use_yv12, argi))
      global->color_type = YV12;
    else if (arg_match(&arg, &use_i420, argi))
      global->color_type = I420;
    else if (arg_match(&arg, &use_i422, argi))
      global->color_type = I422;
    else if (arg_match(&arg, &use_i444, argi))
      global->color_type = I444;
    else if (arg_match(&arg, &use_i440, argi))
      global->color_type = I440;
    else if (arg_match(&arg, &quietarg, argi))
      global->quiet = 1;
    else if (arg_match(&arg, &verbosearg, argi))
      global->verbose = 1;
    else if (arg_match(&arg, &limit, argi))
      global->limit = arg_parse_uint(&arg);
    else if (arg_match(&arg, &skip, argi))
      global->skip_frames = arg_parse_uint(&arg);
    else if (arg_match(&arg, &psnrarg, argi))
      global->show_psnr = 1;
    else if (arg_match(&arg, &recontest, argi))
      global->test_decode = arg_parse_enum_or_int(&arg);
    else if (arg_match(&arg, &framerate, argi)) {
      global->framerate = arg_parse_rational(&arg);
      validate_positive_rational(arg.name, &global->framerate);
      global->have_framerate = 1;
    } else if (arg_match(&arg, &out_part, argi))
      global->out_part = 1;
    else if (arg_match(&arg, &debugmode, argi))
      global->debug = 1;
    else if (arg_match(&arg, &q_hist_n, argi))
      global->show_q_hist_buckets = arg_parse_uint(&arg);
    else if (arg_match(&arg, &rate_hist_n, argi))
      global->show_rate_hist_buckets = arg_parse_uint(&arg);
    else if (arg_match(&arg, &disable_warnings, argi))
      global->disable_warnings = 1;
    else if (arg_match(&arg, &disable_warning_prompt, argi))
      global->disable_warning_prompt = 1;
    else
      argj++;
  }

  if (global->pass) {
    /* DWIM: Assume the user meant passes=2 if pass=2 is specified */
    if (global->pass > global->passes) {
      warn("Assuming --pass=%d implies --passes=%d\n",
           global->pass, global->pass);
      global->passes = global->pass;
    }
  }
  /* Validate global config */
  if (global->passes == 0) {
#if CONFIG_VP9_ENCODER
    // Make default VP9 passes = 2 until there is a better quality 1-pass
    // encoder
    if (global->codec != NULL && global->codec->name != NULL)
      global->passes = (strcmp(global->codec->name, "vp9") == 0 &&
                        global->deadline != VPX_DL_REALTIME) ? 2 : 1;
#else
    global->passes = 1;
#endif
  }

  if (global->deadline == VPX_DL_REALTIME &&
      global->passes > 1) {
    warn("Enforcing one-pass encoding in realtime mode\n");
    global->passes = 1;
  }
}


static void open_input_file(struct VpxInputContext *input) {
  /* Parse certain options from the input file, if possible */
  input->file = strcmp(input->filename, "-")
      ? fopen(input->filename, "rb") : set_binary_mode(stdin);

  if (!input->file)
    fatal("Failed to open input file");

  if (!fseeko(input->file, 0, SEEK_END)) {
    /* Input file is seekable. Figure out how long it is, so we can get
     * progress info.
     */
    input->length = ftello(input->file);
    rewind(input->file);
  }

  /* Default to 1:1 pixel aspect ratio. */
  input->pixel_aspect_ratio.numerator = 1;
  input->pixel_aspect_ratio.denominator = 1;

  /* For RAW input sources, these bytes will applied on the first frame
   *  in read_frame().
   */
  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
  input->detect.position = 0;

  if (input->detect.buf_read == 4
      && file_is_y4m(input->detect.buf)) {
    if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4,
                       input->only_i420) >= 0) {
      input->file_type = FILE_TYPE_Y4M;
      input->width = input->y4m.pic_w;
      input->height = input->y4m.pic_h;
      input->pixel_aspect_ratio.numerator = input->y4m.par_n;
      input->pixel_aspect_ratio.denominator = input->y4m.par_d;
      input->framerate.numerator = input->y4m.fps_n;
      input->framerate.denominator = input->y4m.fps_d;
      input->fmt = input->y4m.vpx_fmt;
      input->bit_depth = input->y4m.bit_depth;
    } else
      fatal("Unsupported Y4M stream.");
  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
    fatal("IVF is not supported as input.");
  } else {
    input->file_type = FILE_TYPE_RAW;
  }
}


static void close_input_file(struct VpxInputContext *input) {
  fclose(input->file);
  if (input->file_type == FILE_TYPE_Y4M)
    y4m_input_close(&input->y4m);
}

static struct stream_state *new_stream(struct VpxEncoderConfig *global,
                                       struct stream_state *prev) {
  struct stream_state *stream;

  stream = calloc(1, sizeof(*stream));
  if (stream == NULL) {
    fatal("Failed to allocate new stream.");
  }

  if (prev) {
    memcpy(stream, prev, sizeof(*stream));
    stream->index++;
    prev->next = stream;
  } else {
    vpx_codec_err_t  res;