vp9_temporal_filter.c 27.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

11
#include <assert.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
12 13
#include <math.h>
#include <limits.h>
14

15
#include "vp9/common/vp9_alloccommon.h"
Johann's avatar
Johann committed
16
#include "vp9/common/vp9_common.h"
17
#include "vp9/common/vp9_onyxc_int.h"
18
#include "vp9/common/vp9_quant_common.h"
19
#include "vp9/common/vp9_reconinter.h"
20
#include "vp9/encoder/vp9_encodeframe.h"
21
#include "vp9/encoder/vp9_ethread.h"
22
#include "vp9/encoder/vp9_extend.h"
23
#include "vp9/encoder/vp9_firstpass.h"
24
#include "vp9/encoder/vp9_mcomp.h"
25
#include "vp9/encoder/vp9_encoder.h"
26
#include "vp9/encoder/vp9_quantize.h"
27 28
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
29
#include "vp9/encoder/vp9_temporal_filter.h"
30
#include "vpx_dsp/vpx_dsp_common.h"
31
#include "vpx_mem/vpx_mem.h"
32
#include "vpx_ports/mem.h"
33
#include "vpx_ports/vpx_timer.h"
34
#include "vpx_scale/vpx_scale.h"
35

36 37
static int fixed_divide[512];

38 39 40 41
static void temporal_filter_predictors_mb_c(
    MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
    int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
    uint8_t *pred, struct scale_factors *scale, int x, int y) {
42
  const int which_mv = 0;
43
  const MV mv = { mv_row, mv_col };
44
  const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP];
45

Alex Converse's avatar
Alex Converse committed
46 47
  enum mv_precision mv_precision_uv;
  int uv_stride;
48
  if (uv_block_width == 8) {
Alex Converse's avatar
Alex Converse committed
49 50 51 52 53 54
    uv_stride = (stride + 1) >> 1;
    mv_precision_uv = MV_PRECISION_Q4;
  } else {
    uv_stride = stride;
    mv_precision_uv = MV_PRECISION_Q3;
  }
John Koleszar's avatar
John Koleszar committed
55

56 57
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
58 59 60 61
    vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride,
                                     CONVERT_TO_SHORTPTR(&pred[0]), 16, &mv,
                                     scale, 16, 16, which_mv, kernel,
                                     MV_PRECISION_Q3, x, y, xd->bd);
62

63 64
    vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride,
                                     CONVERT_TO_SHORTPTR(&pred[256]),
65 66 67 68
                                     uv_block_width, &mv, scale, uv_block_width,
                                     uv_block_height, which_mv, kernel,
                                     mv_precision_uv, x, y, xd->bd);

69 70
    vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride,
                                     CONVERT_TO_SHORTPTR(&pred[512]),
71 72 73
                                     uv_block_width, &mv, scale, uv_block_width,
                                     uv_block_height, which_mv, kernel,
                                     mv_precision_uv, x, y, xd->bd);
74 75 76
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
77
  (void)xd;
78 79 80 81 82 83 84 85 86 87
  vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
                            which_mv, kernel, MV_PRECISION_Q3, x, y);

  vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
                            &mv, scale, uv_block_width, uv_block_height,
                            which_mv, kernel, mv_precision_uv, x, y);

  vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
                            &mv, scale, uv_block_width, uv_block_height,
                            which_mv, kernel, mv_precision_uv, x, y);
88
}
89

90
void vp9_temporal_filter_init(void) {
91 92 93
  int i;

  fixed_divide[0] = 0;
94
  for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
95 96
}

97 98 99
void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
                                 const uint8_t *frame2,
                                 unsigned int block_width,
100
                                 unsigned int block_height, int strength,
Johann's avatar
Johann committed
101
                                 int filter_weight, uint32_t *accumulator,
102
                                 uint16_t *count) {
John Koleszar's avatar
John Koleszar committed
103 104 105
  unsigned int i, j, k;
  int modifier;
  int byte = 0;
106
  const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
John Koleszar's avatar
John Koleszar committed
107

108 109 110 111 112 113
  assert(strength >= 0);
  assert(strength <= 6);

  assert(filter_weight >= 0);
  assert(filter_weight <= 2);

114 115
  for (i = 0, k = 0; i < block_height; i++) {
    for (j = 0; j < block_width; j++, k++) {
116 117 118 119 120 121 122 123
      int pixel_value = *frame2;

      // non-local mean approach
      int diff_sse[9] = { 0 };
      int idx, idy, index = 0;

      for (idy = -1; idy <= 1; ++idy) {
        for (idx = -1; idx <= 1; ++idx) {
124 125
          int row = (int)i + idy;
          int col = (int)j + idx;
126

127 128
          if (row >= 0 && row < (int)block_height && col >= 0 &&
              col < (int)block_width) {
129
            int diff = frame1[byte + idy * (int)stride + idx] -
130
                       frame2[idy * (int)block_width + idx];
131 132 133 134 135 136 137 138 139
            diff_sse[index] = diff * diff;
            ++index;
          }
        }
      }

      assert(index > 0);

      modifier = 0;
140
      for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
141 142 143 144 145

      modifier *= 3;
      modifier /= index;

      ++frame2;
John Koleszar's avatar
John Koleszar committed
146

147
      modifier += rounding;
John Koleszar's avatar
John Koleszar committed
148 149
      modifier >>= strength;

150
      if (modifier > 16) modifier = 16;
John Koleszar's avatar
John Koleszar committed
151 152 153 154 155 156 157 158

      modifier = 16 - modifier;
      modifier *= filter_weight;

      count[k] += modifier;
      accumulator[k] += modifier * pixel_value;

      byte++;
159
    }
John Koleszar's avatar
John Koleszar committed
160

161
    byte += stride - block_width;
John Koleszar's avatar
John Koleszar committed
162
  }
163 164
}

165
#if CONFIG_VP9_HIGHBITDEPTH
166
void vp9_highbd_temporal_filter_apply_c(
167
    const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8,
168
    unsigned int block_width, unsigned int block_height, int strength,
Johann's avatar
Johann committed
169
    int filter_weight, uint32_t *accumulator, uint16_t *count) {
170 171
  const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
  const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
172 173 174 175 176 177 178
  unsigned int i, j, k;
  int modifier;
  int byte = 0;
  const int rounding = strength > 0 ? 1 << (strength - 1) : 0;

  for (i = 0, k = 0; i < block_height; i++) {
    for (j = 0; j < block_width; j++, k++) {
179 180 181 182 183 184
      int pixel_value = *frame2;
      int diff_sse[9] = { 0 };
      int idx, idy, index = 0;

      for (idy = -1; idy <= 1; ++idy) {
        for (idx = -1; idx <= 1; ++idx) {
185 186
          int row = (int)i + idy;
          int col = (int)j + idx;
187

188 189
          if (row >= 0 && row < (int)block_height && col >= 0 &&
              col < (int)block_width) {
190
            int diff = frame1[byte + idy * (int)stride + idx] -
191
                       frame2[idy * (int)block_width + idx];
192 193 194 195 196 197 198 199
            diff_sse[index] = diff * diff;
            ++index;
          }
        }
      }
      assert(index > 0);

      modifier = 0;
200
      for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
201

202
      modifier *= 3;
203 204 205
      modifier /= index;

      ++frame2;
206 207 208
      modifier += rounding;
      modifier >>= strength;

209
      if (modifier > 16) modifier = 16;
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224

      modifier = 16 - modifier;
      modifier *= filter_weight;

      count[k] += modifier;
      accumulator[k] += modifier * pixel_value;

      byte++;
    }

    byte += stride - block_width;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

225
static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
226
                                                   ThreadData *td,
227 228
                                                   uint8_t *arf_frame_buf,
                                                   uint8_t *frame_ptr_buf,
229 230
                                                   int stride, MV *ref_mv) {
  MACROBLOCK *const x = &td->mb;
231
  MACROBLOCKD *const xd = &x->e_mbd;
232
  MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
233
  const SEARCH_METHODS search_method = HEX;
John Koleszar's avatar
John Koleszar committed
234 235
  int step_param;
  int sadpb = x->sadperbit16;
236
  uint32_t bestsme = UINT_MAX;
237 238
  uint32_t distortion;
  uint32_t sse;
239
  int cost_list[5];
240
  const MvLimits tmp_mv_limits = x->mv_limits;
John Koleszar's avatar
John Koleszar committed
241

242
  MV best_ref_mv1 = { 0, 0 };
243
  MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
John Koleszar's avatar
John Koleszar committed
244 245

  // Save input state
John Koleszar's avatar
John Koleszar committed
246
  struct buf_2d src = x->plane[0].src;
247
  struct buf_2d pre = xd->plane[0].pre[0];
John Koleszar's avatar
John Koleszar committed
248

249 250
  best_ref_mv1_full.col = best_ref_mv1.col >> 3;
  best_ref_mv1_full.row = best_ref_mv1.row >> 3;
John Koleszar's avatar
John Koleszar committed
251 252

  // Setup frame pointers
253 254 255 256
  x->plane[0].src.buf = arf_frame_buf;
  x->plane[0].src.stride = stride;
  xd->plane[0].pre[0].buf = frame_ptr_buf;
  xd->plane[0].pre[0].stride = stride;
John Koleszar's avatar
John Koleszar committed
257

258
  step_param = mv_sf->reduce_first_step_size;
259
  step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
John Koleszar's avatar
John Koleszar committed
260

261 262
  vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);

263
  vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
264 265
                        search_method, sadpb, cond_cost_list(cpi, cost_list),
                        &best_ref_mv1, ref_mv, 0, 0);
266

267 268 269
  /* restore UMV window */
  x->mv_limits = tmp_mv_limits;

270
  // Ignore mv costing by sending NULL pointer instead of cost array
271 272 273 274 275
  bestsme = cpi->find_fractional_mv_step(
      x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
      x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0,
      mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL,
      &distortion, &sse, NULL, 0, 0);
276

277
  // Restore input state
John Koleszar's avatar
John Koleszar committed
278
  x->plane[0].src = src;
279
  xd->plane[0].pre[0] = pre;
280

John Koleszar's avatar
John Koleszar committed
281
  return bestsme;
282 283
}

284 285 286
void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
                                       int mb_row, int mb_col_start,
                                       int mb_col_end) {
287 288 289 290 291 292
  ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data;
  YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames;
  int frame_count = arnr_filter_data->frame_count;
  int alt_ref_index = arnr_filter_data->alt_ref_index;
  int strength = arnr_filter_data->strength;
  struct scale_factors *scale = &arnr_filter_data->sf;
John Koleszar's avatar
John Koleszar committed
293 294
  int byte;
  int frame;
295
  int mb_col;
John Koleszar's avatar
John Koleszar committed
296
  unsigned int filter_weight;
297 298
  int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
  int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
Johann's avatar
Johann committed
299
  DECLARE_ALIGNED(16, uint32_t, accumulator[16 * 16 * 3]);
300
  DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
301
  MACROBLOCKD *mbd = &td->mb.e_mbd;
302
  YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
303
  uint8_t *dst1, *dst2;
304
#if CONFIG_VP9_HIGHBITDEPTH
305 306
  DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
  DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
307 308
  uint8_t *predictor;
#else
309
  DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
310
#endif
Alex Converse's avatar
Alex Converse committed
311
  const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
312
  const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
313 314 315 316
  // Addition of the tile col level offsets
  int mb_y_offset = mb_row * 16 * (f->y_stride) + 16 * mb_col_start;
  int mb_uv_offset =
      mb_row * mb_uv_height * f->uv_stride + mb_uv_width * mb_col_start;
John Koleszar's avatar
John Koleszar committed
317

318 319 320 321 322 323 324
#if CONFIG_VP9_HIGHBITDEPTH
  if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    predictor = CONVERT_TO_BYTEPTR(predictor16);
  } else {
    predictor = predictor8;
  }
#endif
325

326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
  // Source frames are extended to 16 pixels. This is different than
  //  L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
  // A 6/8 tap filter is used for motion search.  This requires 2 pixels
  //  before and 3 pixels after.  So the largest Y mv on a border would
  //  then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
  //  Y and therefore only extended by 8.  The largest mv that a UV block
  //  can support is 8 - VP9_INTERP_EXTEND.  A UV mv is half of a Y mv.
  //  (16 - VP9_INTERP_EXTEND) >> 1 which is greater than
  //  8 - VP9_INTERP_EXTEND.
  // To keep the mv in play for both Y and UV planes the max that it
  //  can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
  td->mb.mv_limits.row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
  td->mb.mv_limits.row_max =
      ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND);

  for (mb_col = mb_col_start; mb_col < mb_col_end; mb_col++) {
    int i, j, k;
    int stride;
    MV ref_mv;

Johann's avatar
Johann committed
346 347
    vp9_zero_array(accumulator, 16 * 16 * 3);
    vp9_zero_array(count, 16 * 16 * 3);
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375

    td->mb.mv_limits.col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
    td->mb.mv_limits.col_max =
        ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND);

    for (frame = 0; frame < frame_count; frame++) {
      const uint32_t thresh_low = 10000;
      const uint32_t thresh_high = 20000;

      if (frames[frame] == NULL) continue;

      ref_mv.row = 0;
      ref_mv.col = 0;

      if (frame == alt_ref_index) {
        filter_weight = 2;
      } else {
        // Find best match in this frame by MC
        uint32_t err = temporal_filter_find_matching_mb_c(
            cpi, td, frames[alt_ref_index]->y_buffer + mb_y_offset,
            frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride,
            &ref_mv);

        // Assign higher weight to matching MB if its error
        // score is lower. If not applying MC default behavior
        // is to weight all MBs equal.
        filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
      }
John Koleszar's avatar
John Koleszar committed
376

377 378 379 380 381 382 383 384
      if (filter_weight != 0) {
        // Construct the predictors
        temporal_filter_predictors_mb_c(
            mbd, frames[frame]->y_buffer + mb_y_offset,
            frames[frame]->u_buffer + mb_uv_offset,
            frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
            mb_uv_width, mb_uv_height, ref_mv.row, ref_mv.col, predictor, scale,
            mb_col * 16, mb_row * 16);
John Koleszar's avatar
John Koleszar committed
385

386
#if CONFIG_VP9_HIGHBITDEPTH
387 388 389
        if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          int adj_strength = strength + 2 * (mbd->bd - 8);
          // Apply the filter (YUV)
390
          vp9_highbd_temporal_filter_apply(
391 392
              f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
              adj_strength, filter_weight, accumulator, count);
393
          vp9_highbd_temporal_filter_apply(
394 395 396
              f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
              mb_uv_width, mb_uv_height, adj_strength, filter_weight,
              accumulator + 256, count + 256);
397
          vp9_highbd_temporal_filter_apply(
398 399 400 401
              f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
              mb_uv_width, mb_uv_height, adj_strength, filter_weight,
              accumulator + 512, count + 512);
        } else {
402
          // Apply the filter (YUV)
403
          vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
404 405
                                    predictor, 16, 16, strength, filter_weight,
                                    accumulator, count);
406
          vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
407 408 409
                                    predictor + 256, mb_uv_width, mb_uv_height,
                                    strength, filter_weight, accumulator + 256,
                                    count + 256);
410
          vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
411 412 413
                                    predictor + 512, mb_uv_width, mb_uv_height,
                                    strength, filter_weight, accumulator + 512,
                                    count + 512);
414 415 416 417 418 419 420 421 422 423 424 425 426 427
        }
#else
        // Apply the filter (YUV)
        vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
                                  predictor, 16, 16, strength, filter_weight,
                                  accumulator, count);
        vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
                                  predictor + 256, mb_uv_width, mb_uv_height,
                                  strength, filter_weight, accumulator + 256,
                                  count + 256);
        vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
                                  predictor + 512, mb_uv_width, mb_uv_height,
                                  strength, filter_weight, accumulator + 512,
                                  count + 512);
428
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
429
      }
430
    }
John Koleszar's avatar
John Koleszar committed
431

432
#if CONFIG_VP9_HIGHBITDEPTH
433 434 435 436 437 438 439 440 441 442 443 444 445
    if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      uint16_t *dst1_16;
      uint16_t *dst2_16;
      // Normalize filter output to produce AltRef frame
      dst1 = cpi->alt_ref_buffer.y_buffer;
      dst1_16 = CONVERT_TO_SHORTPTR(dst1);
      stride = cpi->alt_ref_buffer.y_stride;
      byte = mb_y_offset;
      for (i = 0, k = 0; i < 16; i++) {
        for (j = 0; j < 16; j++, k++) {
          unsigned int pval = accumulator[k] + (count[k] >> 1);
          pval *= fixed_divide[count[k]];
          pval >>= 19;
446

447 448 449 450
          dst1_16[byte] = (uint16_t)pval;

          // move to next pixel
          byte++;
451 452
        }

453 454
        byte += stride - 16;
      }
455

456 457 458 459 460 461 462 463 464
      dst1 = cpi->alt_ref_buffer.u_buffer;
      dst2 = cpi->alt_ref_buffer.v_buffer;
      dst1_16 = CONVERT_TO_SHORTPTR(dst1);
      dst2_16 = CONVERT_TO_SHORTPTR(dst2);
      stride = cpi->alt_ref_buffer.uv_stride;
      byte = mb_uv_offset;
      for (i = 0, k = 256; i < mb_uv_height; i++) {
        for (j = 0; j < mb_uv_width; j++, k++) {
          int m = k + 256;
465

466 467 468 469 470 471 472 473 474 475 476 477 478 479
          // U
          unsigned int pval = accumulator[k] + (count[k] >> 1);
          pval *= fixed_divide[count[k]];
          pval >>= 19;
          dst1_16[byte] = (uint16_t)pval;

          // V
          pval = accumulator[m] + (count[m] >> 1);
          pval *= fixed_divide[count[m]];
          pval >>= 19;
          dst2_16[byte] = (uint16_t)pval;

          // move to next pixel
          byte++;
480
        }
481 482

        byte += stride - mb_uv_width;
483
      }
484
    } else {
John Koleszar's avatar
John Koleszar committed
485 486 487 488 489 490 491
      // Normalize filter output to produce AltRef frame
      dst1 = cpi->alt_ref_buffer.y_buffer;
      stride = cpi->alt_ref_buffer.y_stride;
      byte = mb_y_offset;
      for (i = 0, k = 0; i < 16; i++) {
        for (j = 0; j < 16; j++, k++) {
          unsigned int pval = accumulator[k] + (count[k] >> 1);
492
          pval *= fixed_divide[count[k]];
John Koleszar's avatar
John Koleszar committed
493 494
          pval >>= 19;

495
          dst1[byte] = (uint8_t)pval;
John Koleszar's avatar
John Koleszar committed
496 497 498 499 500 501 502 503 504 505 506

          // move to next pixel
          byte++;
        }
        byte += stride - 16;
      }

      dst1 = cpi->alt_ref_buffer.u_buffer;
      dst2 = cpi->alt_ref_buffer.v_buffer;
      stride = cpi->alt_ref_buffer.uv_stride;
      byte = mb_uv_offset;
Alex Converse's avatar
Alex Converse committed
507
      for (i = 0, k = 256; i < mb_uv_height; i++) {
508
        for (j = 0; j < mb_uv_width; j++, k++) {
Alex Converse's avatar
Alex Converse committed
509
          int m = k + 256;
John Koleszar's avatar
John Koleszar committed
510 511 512

          // U
          unsigned int pval = accumulator[k] + (count[k] >> 1);
513
          pval *= fixed_divide[count[k]];
John Koleszar's avatar
John Koleszar committed
514
          pval >>= 19;
515
          dst1[byte] = (uint8_t)pval;
John Koleszar's avatar
John Koleszar committed
516 517 518

          // V
          pval = accumulator[m] + (count[m] >> 1);
519
          pval *= fixed_divide[count[m]];
John Koleszar's avatar
John Koleszar committed
520
          pval >>= 19;
521
          dst2[byte] = (uint8_t)pval;
John Koleszar's avatar
John Koleszar committed
522 523 524

          // move to next pixel
          byte++;
525
        }
526
        byte += stride - mb_uv_width;
John Koleszar's avatar
John Koleszar committed
527
      }
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
    }
#else
    // Normalize filter output to produce AltRef frame
    dst1 = cpi->alt_ref_buffer.y_buffer;
    stride = cpi->alt_ref_buffer.y_stride;
    byte = mb_y_offset;
    for (i = 0, k = 0; i < 16; i++) {
      for (j = 0; j < 16; j++, k++) {
        unsigned int pval = accumulator[k] + (count[k] >> 1);
        pval *= fixed_divide[count[k]];
        pval >>= 19;

        dst1[byte] = (uint8_t)pval;

        // move to next pixel
        byte++;
      }
      byte += stride - 16;
    }

    dst1 = cpi->alt_ref_buffer.u_buffer;
    dst2 = cpi->alt_ref_buffer.v_buffer;
    stride = cpi->alt_ref_buffer.uv_stride;
    byte = mb_uv_offset;
    for (i = 0, k = 256; i < mb_uv_height; i++) {
      for (j = 0; j < mb_uv_width; j++, k++) {
        int m = k + 256;

        // U
        unsigned int pval = accumulator[k] + (count[k] >> 1);
        pval *= fixed_divide[count[k]];
        pval >>= 19;
        dst1[byte] = (uint8_t)pval;

        // V
        pval = accumulator[m] + (count[m] >> 1);
        pval *= fixed_divide[count[m]];
        pval >>= 19;
        dst2[byte] = (uint8_t)pval;

        // move to next pixel
        byte++;
      }
      byte += stride - mb_uv_width;
    }
573
#endif  // CONFIG_VP9_HIGHBITDEPTH
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
    mb_y_offset += 16;
    mb_uv_offset += mb_uv_width;
  }
}

static void temporal_filter_iterate_tile_c(VP9_COMP *cpi, int tile_row,
                                           int tile_col) {
  VP9_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  TileInfo *tile_info =
      &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
  const int mb_row_start = (tile_info->mi_row_start) >> 1;
  const int mb_row_end = (tile_info->mi_row_end + 1) >> 1;
  const int mb_col_start = (tile_info->mi_col_start) >> 1;
  const int mb_col_end = (tile_info->mi_col_end + 1) >> 1;
  int mb_row;

  for (mb_row = mb_row_start; mb_row < mb_row_end; mb_row++) {
592 593
    vp9_temporal_filter_iterate_row_c(cpi, &cpi->td, mb_row, mb_col_start,
                                      mb_col_end);
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
  }
}

static void temporal_filter_iterate_c(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const int tile_rows = 1 << cm->log2_tile_rows;
  int tile_row, tile_col;
  MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
  // Save input state
  uint8_t *input_buffer[MAX_MB_PLANE];
  int i;

  for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;

  vp9_init_tile_data(cpi);

  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
      temporal_filter_iterate_tile_c(cpi, tile_row, tile_col);
614
    }
John Koleszar's avatar
John Koleszar committed
615 616 617
  }

  // Restore input state
618
  for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
619 620
}

621
// Apply buffer limits and context specific adjustments to arnr filter.
622
static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
623 624
                               int *arnr_frames, int *arnr_strength) {
  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
625
  const int frames_after_arf =
626
      vp9_lookahead_depth(cpi->lookahead) - distance - 1;
627 628
  int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
  int frames_bwd;
629 630 631 632
  int q, frames, base_strength, strength;

  // Context dependent two pass adjustment to strength.
  if (oxcf->pass == 2) {
633
    base_strength = oxcf->arnr_strength + cpi->twopass.arnr_strength_adjustment;
634 635 636 637 638
    // Clip to allowed range.
    base_strength = VPXMIN(6, VPXMAX(0, base_strength));
  } else {
    base_strength = oxcf->arnr_strength;
  }
John Koleszar's avatar
John Koleszar committed
639

640
  // Define the forward and backwards filter limits for this arnr group.
641 642
  if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
  if (frames_fwd > distance) frames_fwd = distance;
643

644
  frames_bwd = frames_fwd;
645

646 647
  // For even length filter there is one more frame backward
  // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
648
  if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
649

650
  // Set the baseline active filter size.
651
  frames = frames_bwd + 1 + frames_fwd;
652

653
  // Adjust the strength based on active max q.
654
  if (cpi->common.current_video_frame > 1)
655 656
    q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
                                      cpi->common.bit_depth));
657
  else
658 659
    q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
                                      cpi->common.bit_depth));
660
  if (q > 16) {
661
    strength = base_strength;
662
  } else {
663
    strength = base_strength - ((16 - q) / 2);
664
    if (strength < 0) strength = 0;
665 666 667
  }

  // Adjust number of frames in filter and strength based on gf boost level.
668 669 670
  if (frames > group_boost / 150) {
    frames = group_boost / 150;
    frames += !(frames & 1);
671
  }
672 673 674

  if (strength > group_boost / 300) {
    strength = group_boost / 300;
675
  }
676 677

  // Adjustments for second level arf in multi arf case.
678
  if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
679 680
    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
    if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
681
      strength >>= 1;
682 683
    }
  }
684 685 686

  *arnr_frames = frames;
  *arnr_strength = strength;
687 688 689 690 691
}

void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
  VP9_COMMON *const cm = &cpi->common;
  RATE_CONTROL *const rc = &cpi->rc;
692
  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
693
  ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data;
694 695 696 697 698 699
  int frame;
  int frames_to_blur;
  int start_frame;
  int strength;
  int frames_to_blur_backward;
  int frames_to_blur_forward;
700 701
  struct scale_factors *sf = &arnr_filter_data->sf;
  YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames;
702
  int rdmult;
703 704

  // Apply context specific adjustments to the arnr filter parameters.
705
  adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
706 707 708 709
  frames_to_blur_backward = (frames_to_blur / 2);
  frames_to_blur_forward = ((frames_to_blur - 1) / 2);
  start_frame = distance + frames_to_blur_forward;

710 711 712 713
  arnr_filter_data->strength = strength;
  arnr_filter_data->frame_count = frames_to_blur;
  arnr_filter_data->alt_ref_index = frames_to_blur_backward;

714 715 716
  // Setup frame pointers, NULL indicates frame not included in filter.
  for (frame = 0; frame < frames_to_blur; ++frame) {
    const int which_buffer = start_frame - frame;
717 718
    struct lookahead_entry *buf =
        vp9_lookahead_peek(cpi->lookahead, which_buffer);
719
    frames[frames_to_blur - 1 - frame] = &buf->img;
720 721
  }

722 723 724
  if (frames_to_blur > 0) {
    // Setup scaling factors. Scaling on each of the arnr frames is not
    // supported.
725
    if (cpi->use_svc) {
726 727 728
      // In spatial svc the scaling factors might be less then 1/2.
      // So we will use non-normative scaling.
      int frame_used = 0;
729
#if CONFIG_VP9_HIGHBITDEPTH
730
      vp9_setup_scale_factors_for_frame(
731
          sf, get_frame_new_buffer(cm)->y_crop_width,
732 733
          get_frame_new_buffer(cm)->y_crop_height,
          get_frame_new_buffer(cm)->y_crop_width,
734
          get_frame_new_buffer(cm)->y_crop_height, cm->use_highbitdepth);
735
#else
736
      vp9_setup_scale_factors_for_frame(
737
          sf, get_frame_new_buffer(cm)->y_crop_width,
738 739 740
          get_frame_new_buffer(cm)->y_crop_height,
          get_frame_new_buffer(cm)->y_crop_width,
          get_frame_new_buffer(cm)->y_crop_height);
741 742
#endif  // CONFIG_VP9_HIGHBITDEPTH

743 744 745
      for (frame = 0; frame < frames_to_blur; ++frame) {
        if (cm->mi_cols * MI_SIZE != frames[frame]->y_width ||
            cm->mi_rows * MI_SIZE != frames[frame]->y_height) {
746
          if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used],
747 748
                                       cm->width, cm->height, cm->subsampling_x,
                                       cm->subsampling_y,
749
#if CONFIG_VP9_HIGHBITDEPTH
750
                                       cm->use_highbitdepth,
751
#endif
752
                                       VP9_ENC_BORDER_IN_PIXELS,
753
                                       cm->byte_alignment, NULL, NULL, NULL)) {
754 755 756 757
            vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                               "Failed to reallocate alt_ref_buffer");
          }
          frames[frame] = vp9_scale_if_required(
758 759
              cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0,
              EIGHTTAP, 0);
760 761
          ++frame_used;
        }
762
      }
763
      cm->mi = cm->mip + cm->mi_stride + 1;
764 765
      xd->mi = cm->mi_grid_visible;
      xd->mi[0] = cm->mi;
766
    } else {
767
// ARF is produced at the native frame size and resized when coded.
768
#if CONFIG_VP9_HIGHBITDEPTH
769
      vp9_setup_scale_factors_for_frame(
770
          sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
771 772
          frames[0]->y_crop_width, frames[0]->y_crop_height,
          cm->use_highbitdepth);
773
#else
774
      vp9_setup_scale_factors_for_frame(
775
          sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
776
          frames[0]->y_crop_width, frames[0]->y_crop_height);
777
#endif  // CONFIG_VP9_HIGHBITDEPTH
778
    }
779 780
  }

781 782 783 784 785 786
  // Initialize errorperbit and sabperbit.
  rdmult = (int)vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
  if (rdmult < 1) rdmult = 1;
  set_error_per_bit(&cpi->td.mb, rdmult);
  vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);

787
  if (!cpi->row_mt)
788 789 790
    temporal_filter_iterate_c(cpi);
  else
    vp9_temporal_filter_row_mt(cpi);
791
}