vp9_encodeframe.c

  *max_block_size = find_partition_size(*max_block_size,
                                        row8x8_remaining, col8x8_remaining,
                                        &bh, &bw);
  *min_block_size = MIN(*min_block_size, *max_block_size);

  // When use_square_partition_only is true, make sure at least one square
  // partition is allowed by selecting the next smaller square size as
  // *min_block_size.
  if (cpi->sf.use_square_partition_only &&
      next_square_size[*max_block_size] < *min_block_size) {
    *min_block_size = next_square_size[*max_block_size];
  }
}

static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
  vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
}

static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
  vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
}

// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                              TOKENEXTRA **tp, int mi_row,
                              int mi_col, BLOCK_SIZE bsize, int *rate,
                              int64_t *dist, int do_recon, int64_t best_rd) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCK *const x = &cpi->mb;
  const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
  ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  PARTITION_CONTEXT sl[8], sa[8];
  TOKENEXTRA *tp_orig = *tp;
  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
  int i, pl;
  BLOCK_SIZE subsize;
  int this_rate, sum_rate = 0, best_rate = INT_MAX;
  int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
  int64_t sum_rd = 0;
  int do_split = bsize >= BLOCK_8X8;
  int do_rect = 1;
  // Override skipping rectangular partition operations for edge blocks
  const int force_horz_split = (mi_row + ms >= cm->mi_rows);
  const int force_vert_split = (mi_col + ms >= cm->mi_cols);
  const int xss = x->e_mbd.plane[1].subsampling_x;
  const int yss = x->e_mbd.plane[1].subsampling_y;

  int partition_none_allowed = !force_horz_split && !force_vert_split;
  int partition_horz_allowed = !force_vert_split && yss <= xss &&
                               bsize >= BLOCK_8X8;
  int partition_vert_allowed = !force_horz_split && xss <= yss &&
                               bsize >= BLOCK_8X8;
  (void) *tp_orig;

  if (bsize < BLOCK_8X8) {
    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
    // there is nothing to be done.
    if (x->ab_index != 0) {
      *rate = 0;
      *dist = 0;
      return;
    }
  }
  assert(num_8x8_blocks_wide_lookup[bsize] ==
             num_8x8_blocks_high_lookup[bsize]);

  if (bsize == BLOCK_16X16) {
    set_offsets(cpi, tile, mi_row, mi_col, bsize);
    x->mb_energy = vp9_block_energy(cpi, x, bsize);
  }

  // Determine partition types in search according to the speed features.
  // The threshold set here has to be of square block size.
  if (cpi->sf.auto_min_max_partition_size) {
    partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
                               bsize >= cpi->sf.min_partition_size);
    partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
                                bsize >  cpi->sf.min_partition_size) ||
                                force_horz_split);
    partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
                                bsize >  cpi->sf.min_partition_size) ||
                                force_vert_split);
    do_split &= bsize > cpi->sf.min_partition_size;
  }
  if (cpi->sf.use_square_partition_only) {
    partition_horz_allowed &= force_horz_split;
    partition_vert_allowed &= force_vert_split;
  }

  save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);

  if (cpi->sf.disable_split_var_thresh && partition_none_allowed) {
    unsigned int source_variancey;
    vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
    source_variancey = get_sby_perpixel_variance(cpi, x, bsize);
    if (source_variancey < cpi->sf.disable_split_var_thresh) {
      do_split = 0;
      if (source_variancey < cpi->sf.disable_split_var_thresh / 2)
        do_rect = 0;
    }
  }

  if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed))
    do_split = 0;
  // PARTITION_NONE
  if (partition_none_allowed) {
    rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
                     ctx, best_rd);
    if (this_rate != INT_MAX) {
      if (bsize >= BLOCK_8X8) {
        pl = partition_plane_context(cpi->above_seg_context,
                                     cpi->left_seg_context,
                                     mi_row, mi_col, bsize);
        this_rate += x->partition_cost[pl][PARTITION_NONE];
      }
      sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
      if (sum_rd < best_rd) {
        int64_t stop_thresh = 4096;
        int64_t stop_thresh_rd;

        best_rate = this_rate;
        best_dist = this_dist;
        best_rd = sum_rd;
        if (bsize >= BLOCK_8X8)
          *(get_sb_partitioning(x, bsize)) = bsize;

        // Adjust threshold according to partition size.
        stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
            b_height_log2_lookup[bsize]);

        stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
        // If obtained distortion is very small, choose current partition
        // and stop splitting.
        if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
          do_split = 0;
          do_rect = 0;
        }
      }
    }
    if (!x->in_active_map) {
      do_split = 0;
      do_rect = 0;
    }
    restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
  }

  // store estimated motion vector
  if (cpi->sf.adaptive_motion_search)
    store_pred_mv(x, ctx);

  // PARTITION_SPLIT
  sum_rd = 0;
  // TODO(jingning): use the motion vectors given by the above search as
  // the starting point of motion search in the following partition type check.
  if (do_split) {
    subsize = get_subsize(bsize, PARTITION_SPLIT);
    for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
      const int x_idx = (i & 1) * ms;
      const int y_idx = (i >> 1) * ms;

      if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
        continue;

      *get_sb_index(x, subsize) = i;
      if (cpi->sf.adaptive_motion_search)
        load_pred_mv(x, ctx);
      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
          partition_none_allowed)
        get_block_context(x, subsize)->pred_interp_filter =
            ctx->mic.mbmi.interp_filter;
      rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
                        &this_rate, &this_dist, i != 3, best_rd - sum_rd);

      if (this_rate == INT_MAX) {
        sum_rd = INT64_MAX;
      } else {
        sum_rate += this_rate;
        sum_dist += this_dist;
        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
      }
    }
    if (sum_rd < best_rd && i == 4) {
      pl = partition_plane_context(cpi->above_seg_context,
                                   cpi->left_seg_context,
                                   mi_row, mi_col, bsize);
      sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
      sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
      if (sum_rd < best_rd) {
        best_rate = sum_rate;
        best_dist = sum_dist;
        best_rd = sum_rd;
        *(get_sb_partitioning(x, bsize)) = subsize;
      }
    } else {
      // skip rectangular partition test when larger block size
      // gives better rd cost
      if (cpi->sf.less_rectangular_check)
        do_rect &= !partition_none_allowed;
    }
    restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
  }

  // PARTITION_HORZ
  if (partition_horz_allowed && do_rect) {
    subsize = get_subsize(bsize, PARTITION_HORZ);
    *get_sb_index(x, subsize) = 0;
    if (cpi->sf.adaptive_motion_search)
      load_pred_mv(x, ctx);
    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
        partition_none_allowed)
      get_block_context(x, subsize)->pred_interp_filter =
          ctx->mic.mbmi.interp_filter;
    rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                     get_block_context(x, subsize), best_rd);
    sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);

    if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
                   subsize, 0);
      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);

      *get_sb_index(x, subsize) = 1;
      if (cpi->sf.adaptive_motion_search)
        load_pred_mv(x, ctx);
      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
          partition_none_allowed)
        get_block_context(x, subsize)->pred_interp_filter =
            ctx->mic.mbmi.interp_filter;
      rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
                       &this_dist, subsize, get_block_context(x, subsize),
                       best_rd - sum_rd);
      if (this_rate == INT_MAX) {
        sum_rd = INT64_MAX;
      } else {
        sum_rate += this_rate;
        sum_dist += this_dist;
        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
      }
    }
    if (sum_rd < best_rd) {
      pl = partition_plane_context(cpi->above_seg_context,
                                   cpi->left_seg_context,
                                   mi_row, mi_col, bsize);
      sum_rate += x->partition_cost[pl][PARTITION_HORZ];
      sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
      if (sum_rd < best_rd) {
        best_rd = sum_rd;
        best_rate = sum_rate;
        best_dist = sum_dist;
        *(get_sb_partitioning(x, bsize)) = subsize;
      }
    }
    restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
  }

  // PARTITION_VERT
  if (partition_vert_allowed && do_rect) {
    subsize = get_subsize(bsize, PARTITION_VERT);

    *get_sb_index(x, subsize) = 0;
    if (cpi->sf.adaptive_motion_search)
      load_pred_mv(x, ctx);
    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
        partition_none_allowed)
      get_block_context(x, subsize)->pred_interp_filter =
          ctx->mic.mbmi.interp_filter;
    rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                     get_block_context(x, subsize), best_rd);
    sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
    if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
                   subsize, 0);
      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);

      *get_sb_index(x, subsize) = 1;
      if (cpi->sf.adaptive_motion_search)
        load_pred_mv(x, ctx);
      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
          partition_none_allowed)
        get_block_context(x, subsize)->pred_interp_filter =
            ctx->mic.mbmi.interp_filter;
      rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
                       &this_dist, subsize, get_block_context(x, subsize),
                       best_rd - sum_rd);
      if (this_rate == INT_MAX) {
        sum_rd = INT64_MAX;
      } else {
        sum_rate += this_rate;
        sum_dist += this_dist;
        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
      }
    }
    if (sum_rd < best_rd) {
      pl = partition_plane_context(cpi->above_seg_context,
                                   cpi->left_seg_context,
                                   mi_row, mi_col, bsize);
      sum_rate += x->partition_cost[pl][PARTITION_VERT];
      sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
      if (sum_rd < best_rd) {
        best_rate = sum_rate;
        best_dist = sum_dist;
        best_rd = sum_rd;
        *(get_sb_partitioning(x, bsize)) = subsize;
      }
    }
    restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
  }

  // TODO(jbb): This code added so that we avoid static analysis
  // warning related to the fact that best_rd isn't used after this
  // point.  This code should be refactored so that the duplicate
  // checks occur in some sub function and thus are used...
  (void) best_rd;
  *rate = best_rate;
  *dist = best_dist;

  if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
    int output_enabled = (bsize == BLOCK_64X64);

    // Check the projected output rate for this SB against it's target
    // and and if necessary apply a Q delta using segmentation to get
    // closer to the target.
    if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
      select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate);
    }
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
      cpi->cyclic_refresh.projected_rate_sb = best_rate;
      cpi->cyclic_refresh.projected_dist_sb = best_dist;
    }

    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
  }
  if (bsize == BLOCK_64X64) {
    assert(tp_orig < *tp);
    assert(best_rate < INT_MAX);
    assert(best_dist < INT64_MAX);
  } else {
    assert(tp_orig == *tp);
  }
}

static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                             int mi_row, TOKENEXTRA **tp) {
  VP9_COMMON *const cm = &cpi->common;
  int mi_col;

  // Initialize the left context for the new SB row
  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
  vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));

  // Code each SB in the row
  for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
       mi_col += MI_BLOCK_SIZE) {
    int dummy_rate;
    int64_t dummy_dist;

    BLOCK_SIZE i;
    MACROBLOCK *x = &cpi->mb;

    if (cpi->sf.adaptive_pred_interp_filter) {
      for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) {
        const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
        const int num_4x4_h = num_4x4_blocks_high_lookup[i];
        const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
        for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index)
          for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index)
            for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index)
              get_block_context(x, i)->pred_interp_filter = SWITCHABLE;
      }
    }

    vp9_zero(cpi->mb.pred_mv);

    if ((cpi->sf.partition_search_type == SEARCH_PARTITION &&
         cpi->sf.use_lastframe_partitioning) ||
        cpi->sf.partition_search_type == FIXED_PARTITION ||
        cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
      const int idx_str = cm->mode_info_stride * mi_row + mi_col;
      MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
      MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
      cpi->mb.source_variance = UINT_MAX;
      if (cpi->sf.partition_search_type == FIXED_PARTITION) {
        set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
                               cpi->sf.always_this_block_size);
        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                         &dummy_rate, &dummy_dist, 1);
      } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
        BLOCK_SIZE bsize;
        set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
        bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                         &dummy_rate, &dummy_dist, 1);
      } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
        choose_partitioning(cpi, tile, mi_row, mi_col);
        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                         &dummy_rate, &dummy_dist, 1);
      } else {
        if ((cm->current_video_frame
            % cpi->sf.last_partitioning_redo_frequency) == 0
            || cm->prev_mi == 0
            || cm->show_frame == 0
            || cm->frame_type == KEY_FRAME
            || cpi->rc.is_src_frame_alt_ref
            || ((cpi->sf.use_lastframe_partitioning ==
                 LAST_FRAME_PARTITION_LOW_MOTION) &&
                 sb_has_motion(cm, prev_mi_8x8))) {
          // If required set upper and lower partition size limits
          if (cpi->sf.auto_min_max_partition_size) {
            set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
            rd_auto_partition_range(cpi, tile, mi_row, mi_col,
                                    &cpi->sf.min_partition_size,
                                    &cpi->sf.max_partition_size);
          }
          rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                            &dummy_rate, &dummy_dist, 1, INT64_MAX);
        } else {
          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
          rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                           &dummy_rate, &dummy_dist, 1);
        }
      }
    } else {
      // If required set upper and lower partition size limits
      if (cpi->sf.auto_min_max_partition_size) {
        set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
        rd_auto_partition_range(cpi, tile, mi_row, mi_col,
                                &cpi->sf.min_partition_size,
                                &cpi->sf.max_partition_size);
      }
      rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, INT64_MAX);
    }
  }
}

static void init_encode_frame_mb_context(VP9_COMP *cpi) {
  MACROBLOCK *const x = &cpi->mb;
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);

  x->act_zbin_adj = 0;
  cpi->seg0_idx = 0;

  xd->mode_info_stride = cm->mode_info_stride;

  // Copy data over into macro block data structures.
  vp9_setup_src_planes(x, cpi->Source, 0, 0);

  // TODO(jkoleszar): are these initializations required?
  vp9_setup_pre_planes(xd, 0, get_ref_frame_buffer(cpi, LAST_FRAME), 0, 0,
                       NULL);
  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0);

  vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);

  xd->mi_8x8[0]->mbmi.mode = DC_PRED;
  xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED;

  vp9_zero(cm->counts.y_mode);
  vp9_zero(cm->counts.uv_mode);
  vp9_zero(cm->counts.inter_mode);
  vp9_zero(cm->counts.partition);
  vp9_zero(cm->counts.intra_inter);
  vp9_zero(cm->counts.comp_inter);
  vp9_zero(cm->counts.single_ref);
  vp9_zero(cm->counts.comp_ref);
  vp9_zero(cm->counts.tx);
  vp9_zero(cm->counts.skip);

  // Note: this memset assumes above_context[0], [1] and [2]
  // are allocated as part of the same buffer.
  vpx_memset(cpi->above_context[0], 0,
             sizeof(*cpi->above_context[0]) *
             2 * aligned_mi_cols * MAX_MB_PLANE);
  vpx_memset(cpi->above_seg_context, 0,
             sizeof(*cpi->above_seg_context) * aligned_mi_cols);
}

static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
  if (lossless) {
    // printf("Switching to lossless\n");
    cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
    cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
    cpi->mb.optimize = 0;
    cpi->common.lf.filter_level = 0;
    cpi->zbin_mode_boost_enabled = 0;
    cpi->common.tx_mode = ONLY_4X4;
  } else {
    // printf("Not lossless\n");
    cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
    cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
  }
}

static int check_dual_ref_flags(VP9_COMP *cpi) {
  const int ref_flags = cpi->ref_frame_flags;

  if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
    return 0;
  } else {
    return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
        + !!(ref_flags & VP9_ALT_FLAG)) >= 2;
  }
}

static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) {
  int x, y;

  for (y = 0; y < ymbs; y++) {
    for (x = 0; x < xmbs; x++) {
      if (!mi_8x8[y * mis + x]->mbmi.skip)
        return 0;
    }
  }

  return 1;
}

static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs,
                          TX_SIZE tx_size) {
  int x, y;

  for (y = 0; y < ymbs; y++) {
    for (x = 0; x < xmbs; x++)
      mi_8x8[y * mis + x]->mbmi.tx_size = tx_size;
  }
}

static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis,
                                   TX_SIZE max_tx_size, int bw, int bh,
                                   int mi_row, int mi_col,
                                   MODE_INFO **mi_8x8) {
  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
    return;
  } else {
    const MB_MODE_INFO *const mbmi = &mi_8x8[0]->mbmi;
    if (mbmi->tx_size > max_tx_size) {
      const int ymbs = MIN(bh, cm->mi_rows - mi_row);
      const int xmbs = MIN(bw, cm->mi_cols - mi_col);

      assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
             get_skip_flag(mi_8x8, mis, ymbs, xmbs));
      set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
    }
  }
}

static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8,
                                    TX_SIZE max_tx_size, int mi_row, int mi_col,
                                    BLOCK_SIZE bsize) {
  const int mis = cm->mode_info_stride;
  int bw, bh;
  const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;

  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
    return;

  bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
  bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];

  if (bw == bs && bh == bs) {
    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col,
                           mi_8x8);
  } else if (bw == bs && bh < bs) {
    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col,
                           mi_8x8);
    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs,
                           mi_col, mi_8x8 + hbs * mis);
  } else if (bw < bs && bh == bs) {
    reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col,
                           mi_8x8);
    reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row,
                           mi_col + hbs, mi_8x8 + hbs);
  } else {
    const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
    int n;

    assert(bw < bs && bh < bs);

    for (n = 0; n < 4; n++) {
      const int mi_dc = hbs * (n & 1);
      const int mi_dr = hbs * (n >> 1);

      reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size,
                              mi_row + mi_dr, mi_col + mi_dc, subsize);
    }
  }
}

static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
  int mi_row, mi_col;
  const int mis = cm->mode_info_stride;
  MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible;

  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
    mi_8x8 = mi_ptr;
    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) {
      reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col,
                              BLOCK_64X64);
    }
  }
}

static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
  if (frame_is_intra_only(&cpi->common))
    return INTRA_FRAME;
  else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
    return ALTREF_FRAME;
  else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
    return LAST_FRAME;
  else
    return GOLDEN_FRAME;
}

static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
  if (cpi->oxcf.lossless) {
    return ONLY_4X4;
  } else if (cpi->common.current_video_frame == 0) {
    return TX_MODE_SELECT;
  } else {
    if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
      return ALLOW_32X32;
    } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
      const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
      return cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >
                 cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
                     ALLOW_32X32 : TX_MODE_SELECT;
    } else {
      unsigned int total = 0;
      int i;
      for (i = 0; i < TX_SIZES; ++i)
        total += cpi->tx_stepdown_count[i];

      if (total) {
        const double fraction = (double)cpi->tx_stepdown_count[0] / total;
        return fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
      } else {
        return cpi->common.tx_mode;
      }
    }
  }
}

// Start RTC Exploration
typedef enum {
  BOTH_ZERO = 0,
  ZERO_PLUS_PREDICTED = 1,
  BOTH_PREDICTED = 2,
  NEW_PLUS_NON_INTRA = 3,
  BOTH_NEW = 4,
  INTRA_PLUS_NON_INTRA = 5,
  BOTH_INTRA = 6,
  INVALID_CASE = 9
} motion_vector_context;

static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
                          MB_PREDICTION_MODE mode) {
  mbmi->mode = mode;
  mbmi->uv_mode = mode;
  mbmi->mv[0].as_int = 0;
  mbmi->mv[1].as_int = 0;
  mbmi->ref_frame[0] = INTRA_FRAME;
  mbmi->ref_frame[1] = NONE;
  mbmi->tx_size = max_txsize_lookup[bsize];
  mbmi->skip = 0;
  mbmi->sb_type = bsize;
  mbmi->segment_id = 0;
}

static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
                                int mi_row, int mi_col,
                                int *rate, int64_t *dist,
                                BLOCK_SIZE bsize) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCK *const x = &cpi->mb;
  MACROBLOCKD *const xd = &x->e_mbd;
  set_offsets(cpi, tile, mi_row, mi_col, bsize);
  xd->mi_8x8[0]->mbmi.sb_type = bsize;

  if (!frame_is_intra_only(cm)) {
    vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
                        rate, dist, bsize);
  } else {
    MB_PREDICTION_MODE intramode = DC_PRED;
    set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode);
  }
  duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize);
}

static void nonrd_use_partition(VP9_COMP *cpi,
                                const TileInfo *const tile,
                                MODE_INFO **mi_8x8,
                                TOKENEXTRA **tp,
                                int mi_row, int mi_col,
                                BLOCK_SIZE bsize, int output_enabled,
                                int *totrate, int64_t *totdist) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCK *const x = &cpi->mb;
  const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
  const int mis = cm->mode_info_stride;
  PARTITION_TYPE partition;
  BLOCK_SIZE subsize;
  int rate;
  int64_t dist;

  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
    return;

  if (bsize >= BLOCK_8X8) {
    subsize = mi_8x8[0]->mbmi.sb_type;
  } else {
    subsize = BLOCK_4X4;
  }

  partition = partition_lookup[bsl][subsize];

  switch (partition) {
    case PARTITION_NONE:
      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
      break;
    case PARTITION_VERT:
      *get_sb_index(x, subsize) = 0;
      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
      if (mi_col + hbs < cm->mi_cols) {
        *get_sb_index(x, subsize) = 1;
        nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
                            &rate, &dist, subsize);
        if (rate != INT_MAX && dist != INT64_MAX &&
            *totrate != INT_MAX && *totdist != INT64_MAX) {
          *totrate += rate;
          *totdist += dist;
        }
      }
      break;
    case PARTITION_HORZ:
      *get_sb_index(x, subsize) = 0;
      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
      if (mi_row + hbs < cm->mi_rows) {
        *get_sb_index(x, subsize) = 1;
        nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
                            &rate, &dist, subsize);
        if (rate != INT_MAX && dist != INT64_MAX &&
            *totrate != INT_MAX && *totdist != INT64_MAX) {
          *totrate += rate;
          *totdist += dist;
        }
      }
      break;
    case PARTITION_SPLIT:
      subsize = get_subsize(bsize, PARTITION_SPLIT);

      *get_sb_index(x, subsize) = 0;
      nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
                          subsize, output_enabled, totrate, totdist);
      *get_sb_index(x, subsize) = 1;
      nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp,
                          mi_row, mi_col + hbs, subsize, output_enabled,
                          &rate, &dist);
      if (rate != INT_MAX && dist != INT64_MAX &&
          *totrate != INT_MAX && *totdist != INT64_MAX) {
        *totrate += rate;
        *totdist += dist;
      }
      *get_sb_index(x, subsize) = 2;
      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp,
                          mi_row + hbs, mi_col, subsize, output_enabled,
                          &rate, &dist);
      if (rate != INT_MAX && dist != INT64_MAX &&
          *totrate != INT_MAX && *totdist != INT64_MAX) {
        *totrate += rate;
        *totdist += dist;
      }
      *get_sb_index(x, subsize) = 3;
      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp,
                          mi_row + hbs, mi_col + hbs, subsize, output_enabled,
                          &rate, &dist);
      if (rate != INT_MAX && dist != INT64_MAX &&
          *totrate != INT_MAX && *totdist != INT64_MAX) {
        *totrate += rate;
        *totdist += dist;
      }
      break;
    default:
      assert("Invalid partition type.");
  }

  if (bsize == BLOCK_64X64 && output_enabled) {
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
      cpi->cyclic_refresh.projected_rate_sb = *totrate;
      cpi->cyclic_refresh.projected_dist_sb = *totdist;
    }
    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize);
  }
}

static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                int mi_row, TOKENEXTRA **tp) {
  VP9_COMMON *cm = &cpi->common;
  int mi_col;

  // Initialize the left context for the new SB row
  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
  vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));

  // Code each SB in the row
  for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
       mi_col += MI_BLOCK_SIZE) {
    int dummy_rate;
    int64_t dummy_dist;
    const int idx_str = cm->mode_info_stride * mi_row + mi_col;
    MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
    MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;

    BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
        cpi->sf.always_this_block_size :
        get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);

    cpi->mb.source_variance = UINT_MAX;

    // Set the partition type of the 64X64 block
    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
      choose_partitioning(cpi, tile, mi_row, mi_col);
    else if (cpi->sf.partition_search_type == REFERENCE_PARTITION) {
      if (cpi->sf.partition_check) {
        MACROBLOCK *x = &cpi->mb;
        int rate1, rate2, rate3;
        int64_t dist1, dist2, dist3;
        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_8X8);
        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                            0, &rate1, &dist1);
        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_16X16);
        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                            0, &rate2, &dist2);
        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_32X32);
        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                            0, &rate3, &dist3);

        if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) <
            RDCOST(x->rdmult, x->rddiv, rate2, dist2)) {
          if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) <
              RDCOST(x->rdmult, x->rddiv, rate3, dist3))
            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
                                   BLOCK_8X8);
          else
            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
                                   BLOCK_32X32);
        } else {
          if (RDCOST(x->rdmult, x->rddiv, rate2, dist2) <
              RDCOST(x->rdmult, x->rddiv, rate3, dist3))
            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
                                   BLOCK_16X16);
          else
            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
                                   BLOCK_32X32);
        }

      } else {
        if (!sb_has_motion(cm, prev_mi_8x8))
          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
        else
          set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
      }
    }
    else
      set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);

    nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1,
                        &dummy_rate, &dummy_dist);
  }
}
// end RTC play code

static void encode_frame_internal(VP9_COMP *cpi) {
  int mi_row;
  MACROBLOCK *const x = &cpi->mb;
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;

//  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
//           cpi->common.current_video_frame, cpi->common.show_frame,
//           cm->frame_type);

  vp9_zero(cm->counts.switchable_interp);
  vp9_zero(cpi->tx_stepdown_count);

  xd->mi_8x8 = cm->mi_grid_visible;
  // required for vp9_frame_init_quantizer
  xd->mi_8x8[0] = cm->mi;

  vp9_zero(cm->counts.mv);
  vp9_zero(cpi->coef_counts);
  vp9_zero(cm->counts.eob_branch);

  // Set frame level transform size use case
  cm->tx_mode = select_tx_mode(cpi);

  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
      && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
  switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);

  vp9_frame_init_quantizer(cpi);

  vp9_initialize_rd_consts(cpi);
  vp9_initialize_me_consts(cpi, cm->base_qindex);

  if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
    // Initialize encode frame context.
    init_encode_frame_mb_context(cpi);

    // Build a frame level activity map
    build_activity_map(cpi);
  }

  // Re-initialize encode frame context.
  init_encode_frame_mb_context(cpi);

  vp9_zero(cpi->rd_comp_pred_diff);
  vp9_zero(cpi->rd_filter_diff);
  vp9_zero(cpi->rd_tx_select_diff);
  vp9_zero(cpi->rd_tx_select_threshes);

  set_prev_mi(cm);

  if (cpi->sf.use_nonrd_pick_mode) {
    // Initialize internal buffer pointers for rtc coding, where non-RD
    // mode decision is used and hence no buffer pointer swap needed.
    int i;
    struct macroblock_plane *const p = x->plane;
    struct macroblockd_plane *const pd = xd->plane;
    PICK_MODE_CONTEXT *ctx = &cpi->mb.sb64_context;

    for (i = 0; i < MAX_MB_PLANE; ++i) {
      p[i].coeff = ctx->coeff_pbuf[i][0];
      p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
      pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
      p[i].eobs = ctx->eobs_pbuf[i][0];
    }
    vp9_zero(x->zcoeff_blk);
  }

  {
    struct vpx_usec_timer emr_timer;
    vpx_usec_timer_start(&emr_timer);

    {
      // Take tiles into account and give start/end MB
      int tile_col, tile_row;
      TOKENEXTRA *tp = cpi->tok;
      const int tile_cols = 1 << cm->log2_tile_cols;
      const int tile_rows = 1 << cm->log2_tile_rows;

      for (tile_row = 0; tile_row < tile_rows; tile_row++) {
        for (tile_col = 0; tile_col < tile_cols; tile_col++) {
          TileInfo tile;
          TOKENEXTRA *tp_old = tp;

          // For each row of SBs in the frame
          vp9_tile_init(&tile, cm, tile_row, tile_col);
          for (mi_row = tile.mi_row_start;
               mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
            if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME)
              encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
            else
              encode_rd_sb_row(cpi, &tile, mi_row, &tp);
          }
          cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
          assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
        }
      }
    }

    vpx_usec_timer_mark(&emr_timer);
    cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
  }

  if (cpi->sf.skip_encode_sb) {
    int j;
    unsigned int intra_count = 0, inter_count = 0;
    for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
      intra_count += cm->counts.intra_inter[j][0];
      inter_count += cm->counts.intra_inter[j][1];
    }
    cpi->sf.skip_encode_frame = (intra_count << 2) < inter_count &&
                                cm->frame_type != KEY_FRAME &&
                                cm->show_frame;
  } else {
    cpi->sf.skip_encode_frame = 0;
  }

#if 0
  // Keep record of the total distortion this time around for future use
  cpi->last_frame_distortion = cpi->frame_distortion;
#endif
}