vp9_spatial_svc_encoder.c 15.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

/*
 * This is an example demonstrating how to implement a multi-layer
 * VP9 encoding scheme based on spatial scalability for video applications
 * that benefit from a scalable bitstream.
 */
16

17
#include <stdarg.h>
18
#include <stdlib.h>
19
#include <string.h>
20
#include <time.h>
21

22 23 24
#include "../args.h"
#include "../tools_common.h"
#include "../video_writer.h"
25

26
#include "vpx/svc_context.h"
27
#include "vpx/vp8cx.h"
28
#include "vpx/vpx_encoder.h"
29
#include "../vpxstats.h"
30 31 32 33 34 35 36 37 38 39 40

static const arg_def_t skip_frames_arg =
    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
static const arg_def_t frames_arg =
    ARG_DEF("f", "frames", 1, "number of frames to encode");
static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
static const arg_def_t timebase_arg =
    ARG_DEF("t", "timebase", 1, "timebase (num/den)");
static const arg_def_t bitrate_arg = ARG_DEF(
    "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
41 42 43 44
static const arg_def_t spatial_layers_arg =
    ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
static const arg_def_t temporal_layers_arg =
    ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
45 46 47 48
static const arg_def_t kf_dist_arg =
    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
static const arg_def_t scale_factors_arg =
    ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
49 50 51 52 53 54
static const arg_def_t passes_arg =
    ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
static const arg_def_t pass_arg =
    ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
static const arg_def_t fpf_name_arg =
    ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
55 56 57 58 59 60 61 62
static const arg_def_t min_q_arg =
    ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
static const arg_def_t max_q_arg =
    ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
static const arg_def_t min_bitrate_arg =
    ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
static const arg_def_t max_bitrate_arg =
    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
63 64 65 66 67
static const arg_def_t lag_in_frame_arg =
    ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
        "generating any outputs");
static const arg_def_t rc_end_usage_arg =
    ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
68

69 70 71 72 73 74 75 76 77 78 79 80 81 82
#if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
  {"8",  VPX_BITS_8},
  {"10", VPX_BITS_10},
  {"12", VPX_BITS_12},
  {NULL, 0}
};

static const arg_def_t bitdepth_arg =
    ARG_DEF_ENUM("d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ",
                 bitdepth_enum);
#endif  // CONFIG_VP9_HIGHBITDEPTH


83
static const arg_def_t *svc_args[] = {
84
  &frames_arg,        &width_arg,         &height_arg,
85
  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,
86 87
  &kf_dist_arg,       &scale_factors_arg, &passes_arg,      &pass_arg,
  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
88
  &max_bitrate_arg,   &temporal_layers_arg,                 &lag_in_frame_arg,
89 90 91
#if CONFIG_VP9_HIGHBITDEPTH
  &bitdepth_arg,
#endif
92
  &rc_end_usage_arg,  NULL
93 94 95 96 97 98 99 100 101 102
};

static const uint32_t default_frames_to_skip = 0;
static const uint32_t default_frames_to_code = 60 * 60;
static const uint32_t default_width = 1920;
static const uint32_t default_height = 1080;
static const uint32_t default_timebase_num = 1;
static const uint32_t default_timebase_den = 60;
static const uint32_t default_bitrate = 1000;
static const uint32_t default_spatial_layers = 5;
103
static const uint32_t default_temporal_layers = 1;
104 105 106
static const uint32_t default_kf_dist = 100;

typedef struct {
107 108
  const char *input_filename;
  const char *output_filename;
109 110
  uint32_t frames_to_code;
  uint32_t frames_to_skip;
111 112 113 114
  struct VpxInputContext input_ctx;
  stats_io_t rc_stats;
  int passes;
  int pass;
115 116
} AppInput;

117 118
static const char *exec_name;

119
void usage_exit(void) {
120 121 122 123
  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
          exec_name);
  fprintf(stderr, "Options:\n");
  arg_show_usage(stderr, svc_args);
124 125 126
  exit(EXIT_FAILURE);
}

127
static void parse_command_line(int argc, const char **argv_,
128
                               AppInput *app_input, SvcContext *svc_ctx,
129
                               vpx_codec_enc_cfg_t *enc_cfg) {
130 131 132 133
  struct arg arg = {0};
  char **argv = NULL;
  char **argi = NULL;
  char **argj = NULL;
134
  vpx_codec_err_t res;
135 136 137
  int passes = 0;
  int pass = 0;
  const char *fpf_file_name = NULL;
138 139
  unsigned int min_bitrate = 0;
  unsigned int max_bitrate = 0;
140
  char string_options[1024] = {0};
141

142 143 144
  // initialize SvcContext with parameters that will be passed to vpx_svc_init
  svc_ctx->log_level = SVC_LOG_DEBUG;
  svc_ctx->spatial_layers = default_spatial_layers;
145
  svc_ctx->temporal_layers = default_temporal_layers;
146

147 148
  // start with default encoder configuration
  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
149 150 151
  if (res) {
    die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
  }
152 153 154 155 156 157 158 159
  // update enc_cfg with app default values
  enc_cfg->g_w = default_width;
  enc_cfg->g_h = default_height;
  enc_cfg->g_timebase.num = default_timebase_num;
  enc_cfg->g_timebase.den = default_timebase_den;
  enc_cfg->rc_target_bitrate = default_bitrate;
  enc_cfg->kf_min_dist = default_kf_dist;
  enc_cfg->kf_max_dist = default_kf_dist;
160
  enc_cfg->rc_end_usage = VPX_CQ;
161 162 163 164 165 166 167 168 169 170

  // initialize AppInput with default values
  app_input->frames_to_code = default_frames_to_code;
  app_input->frames_to_skip = default_frames_to_skip;

  // process command line options
  argv = argv_dup(argc - 1, argv_ + 1);
  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
    arg.argv_step = 1;

171
    if (arg_match(&arg, &frames_arg, argi)) {
172 173 174 175 176 177 178 179 180 181 182
      app_input->frames_to_code = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &width_arg, argi)) {
      enc_cfg->g_w = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &height_arg, argi)) {
      enc_cfg->g_h = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &timebase_arg, argi)) {
      enc_cfg->g_timebase = arg_parse_rational(&arg);
    } else if (arg_match(&arg, &bitrate_arg, argi)) {
      enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &skip_frames_arg, argi)) {
      app_input->frames_to_skip = arg_parse_uint(&arg);
183
    } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
184
      svc_ctx->spatial_layers = arg_parse_uint(&arg);
185 186
    } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
      svc_ctx->temporal_layers = arg_parse_uint(&arg);
187 188 189 190
    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
191
      snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
192
               string_options, arg.val);
193 194 195 196 197 198 199 200 201 202 203 204
    } else if (arg_match(&arg, &passes_arg, argi)) {
      passes = arg_parse_uint(&arg);
      if (passes < 1 || passes > 2) {
        die("Error: Invalid number of passes (%d)\n", passes);
      }
    } else if (arg_match(&arg, &pass_arg, argi)) {
      pass = arg_parse_uint(&arg);
      if (pass < 1 || pass > 2) {
        die("Error: Invalid pass selected (%d)\n", pass);
      }
    } else if (arg_match(&arg, &fpf_name_arg, argi)) {
      fpf_file_name = arg.val;
205
    } else if (arg_match(&arg, &min_q_arg, argi)) {
206
      snprintf(string_options, sizeof(string_options), "%s min-quantizers=%s",
207
               string_options, arg.val);
208
    } else if (arg_match(&arg, &max_q_arg, argi)) {
209
      snprintf(string_options, sizeof(string_options), "%s max-quantizers=%s",
210
               string_options, arg.val);
211 212 213 214
    } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
      min_bitrate = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
      max_bitrate = arg_parse_uint(&arg);
215 216 217 218
    } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
      enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
      enc_cfg->rc_end_usage = arg_parse_uint(&arg);
219 220
#if CONFIG_VP9_HIGHBITDEPTH
    } else if (arg_match(&arg, &bitdepth_arg, argi)) {
221
      enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
      switch (enc_cfg->g_bit_depth) {
        case VPX_BITS_8:
          enc_cfg->g_input_bit_depth = 8;
          enc_cfg->g_profile = 0;
          break;
        case VPX_BITS_10:
          enc_cfg->g_input_bit_depth = 10;
          enc_cfg->g_profile = 2;
          break;
         case VPX_BITS_12:
          enc_cfg->g_input_bit_depth = 12;
          enc_cfg->g_profile = 2;
          break;
        default:
          die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
          break;
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
240 241 242 243
    } else {
      ++argj;
    }
  }
244

245 246 247 248
  // There will be a space in front of the string options
  if (strlen(string_options) > 0)
    vpx_svc_set_options(svc_ctx, string_options + 1);

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
  if (passes == 0 || passes == 1) {
    if (pass) {
      fprintf(stderr, "pass is ignored since there's only one pass\n");
    }
    enc_cfg->g_pass = VPX_RC_ONE_PASS;
  } else {
    if (pass == 0) {
      die("pass must be specified when passes is 2\n");
    }

    if (fpf_file_name == NULL) {
      die("fpf must be specified when passes is 2\n");
    }

    if (pass == 1) {
      enc_cfg->g_pass = VPX_RC_FIRST_PASS;
      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
        fatal("Failed to open statistics store");
      }
    } else {
      enc_cfg->g_pass = VPX_RC_LAST_PASS;
      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
        fatal("Failed to open statistics store");
      }
      enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
    }
    app_input->passes = passes;
    app_input->pass = pass;
  }

279 280 281 282 283 284 285 286 287 288 289
  if (enc_cfg->rc_target_bitrate > 0) {
    if (min_bitrate > 0) {
      enc_cfg->rc_2pass_vbr_minsection_pct =
          min_bitrate * 100 / enc_cfg->rc_target_bitrate;
    }
    if (max_bitrate > 0) {
      enc_cfg->rc_2pass_vbr_maxsection_pct =
          max_bitrate * 100 / enc_cfg->rc_target_bitrate;
    }
  }

290 291 292 293
  // Check for unrecognized options
  for (argi = argv; *argi; ++argi)
    if (argi[0][0] == '-' && strlen(argi[0]) > 1)
      die("Error: Unrecognized option %s\n", *argi);
294

295
  if (argv[0] == NULL || argv[1] == 0) {
296
    usage_exit();
297
  }
298
  app_input->input_filename = argv[0];
299 300
  app_input->output_filename = argv[1];
  free(argv);
301

302 303 304
  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
      enc_cfg->g_h % 2)
    die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
305

306 307
  printf(
      "Codec %s\nframes: %d, skip: %d\n"
308
      "layers: %d\n"
309 310
      "width %d, height: %d,\n"
      "num: %d, den: %d, bitrate: %d,\n"
311
      "gop size: %d\n",
312
      vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
313
      app_input->frames_to_skip,
314 315
      svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
316
      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
317 318
}

319 320
int main(int argc, const char **argv) {
  AppInput app_input = {0};
321 322
  VpxVideoWriter *writer = NULL;
  VpxVideoInfo info = {0};
323
  vpx_codec_ctx_t codec;
324
  vpx_codec_enc_cfg_t enc_cfg;
325
  SvcContext svc_ctx;
326 327
  uint32_t i;
  uint32_t frame_cnt = 0;
328
  vpx_image_t raw;
329
  vpx_codec_err_t res;
330 331
  int pts = 0;            /* PTS starts at 0 */
  int frame_duration = 1; /* 1 timebase tick per frame */
332
  FILE *infile = NULL;
333
  int end_of_stream = 0;
334
  int frames_received = 0;
335

336 337
  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
338
  exec_name = argv[0];
339
  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
340 341

  // Allocate image buffer
342 343 344 345 346 347 348 349
#if CONFIG_VP9_HIGHBITDEPTH
  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ?
                         VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
                     enc_cfg.g_w, enc_cfg.g_h, 32)) {
    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
  }
#else
  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
350
    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
351 352
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
353

354 355
  if (!(infile = fopen(app_input.input_filename, "rb")))
    die("Failed to open %s for reading\n", app_input.input_filename);
356 357 358 359 360

  // Initialize codec
  if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
      VPX_CODEC_OK)
    die("Failed to initialize encoder\n");
Ivan Maltz's avatar
Ivan Maltz committed
361

362 363 364
  info.codec_fourcc = VP9_FOURCC;
  info.time_base.numerator = enc_cfg.g_timebase.num;
  info.time_base.denominator = enc_cfg.g_timebase.den;
365 366 367 368 369 370 371 372

  if (!(app_input.passes == 2 && app_input.pass == 1)) {
    // We don't save the bitstream for the 1st pass on two pass rate control
    writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
                                   &info);
    if (!writer)
      die("Failed to open %s for writing\n", app_input.output_filename);
  }
373 374

  // skip initial frames
375 376
  for (i = 0; i < app_input.frames_to_skip; ++i)
    vpx_img_read(&raw, infile);
377

378
  // Encode frames
379
  while (!end_of_stream) {
380 381
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *cx_pkt;
382 383 384 385 386
    if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
      // We need one extra vpx_svc_encode call at end of stream to flush
      // encoder and get remaining data
      end_of_stream = 1;
    }
387

388
    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
389
                         pts, frame_duration, VPX_DL_GOOD_QUALITY);
390 391
    printf("%s", vpx_svc_get_message(&svc_ctx));
    if (res != VPX_CODEC_OK) {
392 393
      die_codec(&codec, "Failed to encode frame");
    }
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418

    while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
      switch (cx_pkt->kind) {
        case VPX_CODEC_CX_FRAME_PKT: {
          if (cx_pkt->data.frame.sz > 0)
            vpx_video_writer_write_frame(writer,
                                         cx_pkt->data.frame.buf,
                                         cx_pkt->data.frame.sz,
                                         cx_pkt->data.frame.pts);

          printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
                 !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
                 (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
          ++frames_received;
          break;
        }
        case VPX_CODEC_STATS_PKT: {
          stats_write(&app_input.rc_stats,
                      cx_pkt->data.twopass_stats.buf,
                      cx_pkt->data.twopass_stats.sz);
          break;
        }
        default: {
          break;
        }
419 420
      }
    }
421

422 423 424 425
    if (!end_of_stream) {
      ++frame_cnt;
      pts += frame_duration;
    }
426
  }
427

428
  printf("Processed %d frames\n", frame_cnt);
429

430
  fclose(infile);
431 432
  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");

433 434 435 436 437 438
  if (app_input.passes == 2)
    stats_close(&app_input.rc_stats, 1);

  if (writer) {
    vpx_video_writer_close(writer);
  }
439

440 441 442 443 444 445
  vpx_img_free(&raw);

  // display average size, psnr
  printf("%s", vpx_svc_dump_statistics(&svc_ctx));

  vpx_svc_release(&svc_ctx);
446 447 448

  return EXIT_SUCCESS;
}