vp9_variance.c 12.5 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11
#include "./vp9_rtcd.h"
Johann's avatar
Johann committed
12
#include "./vpx_dsp_rtcd.h"
John Koleszar's avatar
John Koleszar committed
13

14
#include "vpx_ports/mem.h"
15 16 17 18
#include "vpx/vpx_integer.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h"
19

20 21
#include "vp9/encoder/vp9_variance.h"

Johann's avatar
Johann committed
22 23 24 25 26 27 28 29 30 31 32
static uint8_t bilinear_filters[8][2] = {
  { 128,   0, },
  { 112,  16, },
  {  96,  32, },
  {  80,  48, },
  {  64,  64, },
  {  48,  80, },
  {  32,  96, },
  {  16, 112, },
};

Dmitry Kovalev's avatar
Dmitry Kovalev committed
33 34 35 36 37 38 39 40
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// first-pass of 2-D separable filter.
//
// Produces int32_t output to retain precision for next pass. Two filter taps
// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
// defines the offset required to move from one input to the next.
41 42 43 44 45 46
static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
                                              uint16_t *output_ptr,
                                              unsigned int src_pixels_per_line,
                                              int pixel_step,
                                              unsigned int output_height,
                                              unsigned int output_width,
Johann's avatar
Johann committed
47
                                              const uint8_t *vp9_filter) {
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
  unsigned int i, j;

  for (i = 0; i < output_height; i++) {
    for (j = 0; j < output_width; j++) {
      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                          (int)src_ptr[pixel_step] * vp9_filter[1],
                          FILTER_BITS);

      src_ptr++;
    }

    // Next row...
    src_ptr    += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
65 66 67 68 69 70 71 72
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// second-pass of 2-D separable filter.
//
// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
// filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
// stride). It defines the offset required to move from one input to the next.
73 74 75 76 77 78
static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
                                               uint8_t *output_ptr,
                                               unsigned int src_pixels_per_line,
                                               unsigned int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
Johann's avatar
Johann committed
79
                                               const uint8_t *vp9_filter) {
80 81 82 83 84 85 86 87 88 89 90 91 92 93
  unsigned int  i, j;

  for (i = 0; i < output_height; i++) {
    for (j = 0; j < output_width; j++) {
      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                          (int)src_ptr[pixel_step] * vp9_filter[1],
                          FILTER_BITS);
      src_ptr++;
    }

    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}
John Koleszar's avatar
John Koleszar committed
94

95 96 97 98 99 100 101 102 103 104
#define SUBPIX_VAR(W, H) \
unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint8_t temp2[H * W]; \
\
  var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
Johann's avatar
Johann committed
105
                                    bilinear_filters[xoffset]); \
106
  var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
107
                                     bilinear_filters[yoffset]); \
108
\
Johann's avatar
Johann committed
109
  return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
110 111 112 113 114 115 116 117 118 119 120
}

#define SUBPIX_AVG_VAR(W, H) \
unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse, \
  const uint8_t *second_pred) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint8_t temp2[H * W]; \
121
  DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
122 123
\
  var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
Johann's avatar
Johann committed
124
                                    bilinear_filters[xoffset]); \
125
  var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
126
                                     bilinear_filters[yoffset]); \
127
\
Johann's avatar
Johann committed
128
  vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
129
\
Johann's avatar
Johann committed
130
  return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
131 132
}

133 134
SUBPIX_VAR(4, 4)
SUBPIX_AVG_VAR(4, 4)
John Koleszar's avatar
John Koleszar committed
135

136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
SUBPIX_VAR(4, 8)
SUBPIX_AVG_VAR(4, 8)

SUBPIX_VAR(8, 4)
SUBPIX_AVG_VAR(8, 4)

SUBPIX_VAR(8, 8)
SUBPIX_AVG_VAR(8, 8)

SUBPIX_VAR(8, 16)
SUBPIX_AVG_VAR(8, 16)

SUBPIX_VAR(16, 8)
SUBPIX_AVG_VAR(16, 8)

SUBPIX_VAR(16, 16)
SUBPIX_AVG_VAR(16, 16)

SUBPIX_VAR(16, 32)
SUBPIX_AVG_VAR(16, 32)

SUBPIX_VAR(32, 16)
SUBPIX_AVG_VAR(32, 16)

SUBPIX_VAR(32, 32)
SUBPIX_AVG_VAR(32, 32)

SUBPIX_VAR(32, 64)
SUBPIX_AVG_VAR(32, 64)

SUBPIX_VAR(64, 32)
SUBPIX_AVG_VAR(64, 32)

SUBPIX_VAR(64, 64)
SUBPIX_AVG_VAR(64, 64)
171

172
#if CONFIG_VP9_HIGHBITDEPTH
173
static void highbd_var_filter_block2d_bil_first_pass(
174 175 176 177 178 179
    const uint8_t *src_ptr8,
    uint16_t *output_ptr,
    unsigned int src_pixels_per_line,
    int pixel_step,
    unsigned int output_height,
    unsigned int output_width,
Johann's avatar
Johann committed
180
    const uint8_t *vp9_filter) {
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
  unsigned int i, j;
  uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  for (i = 0; i < output_height; i++) {
    for (j = 0; j < output_width; j++) {
      output_ptr[j] =
          ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                             (int)src_ptr[pixel_step] * vp9_filter[1],
                             FILTER_BITS);

      src_ptr++;
    }

    // Next row...
    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

199
static void highbd_var_filter_block2d_bil_second_pass(
200 201 202 203 204 205
    const uint16_t *src_ptr,
    uint16_t *output_ptr,
    unsigned int src_pixels_per_line,
    unsigned int pixel_step,
    unsigned int output_height,
    unsigned int output_width,
Johann's avatar
Johann committed
206
    const uint8_t *vp9_filter) {
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
  unsigned int  i, j;

  for (i = 0; i < output_height; i++) {
    for (j = 0; j < output_width; j++) {
      output_ptr[j] =
          ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                             (int)src_ptr[pixel_step] * vp9_filter[1],
                             FILTER_BITS);
      src_ptr++;
    }

    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

223 224
#define HIGHBD_SUBPIX_VAR(W, H) \
unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
225 226 227 228 229 230 231
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint16_t temp2[H * W]; \
\
232
  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
Johann's avatar
Johann committed
233
                                           W, bilinear_filters[xoffset]); \
234
  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
235
                                            bilinear_filters[yoffset]); \
236
\
Johann's avatar
Johann committed
237
  return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
238
                                          dst_stride, sse); \
239 240
} \
\
241
unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
242 243 244 245 246 247 248
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint16_t temp2[H * W]; \
\
249
  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
Johann's avatar
Johann committed
250
                                           W, bilinear_filters[xoffset]); \
251
  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
252
                                            bilinear_filters[yoffset]); \
253
\
Johann's avatar
Johann committed
254
  return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
255
                                             W, dst, dst_stride, sse); \
256 257
} \
\
258
unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
259 260 261 262 263 264 265
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint16_t temp2[H * W]; \
\
266
  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
Johann's avatar
Johann committed
267
                                           W, bilinear_filters[xoffset]); \
268
  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
269
                                            bilinear_filters[yoffset]); \
270
\
Johann's avatar
Johann committed
271
  return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
272
                                             W, dst, dst_stride, sse); \
273 274
}

275 276
#define HIGHBD_SUBPIX_AVG_VAR(W, H) \
unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
277 278 279 280 281 282 283
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse, \
  const uint8_t *second_pred) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint16_t temp2[H * W]; \
284
  DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
285
\
286
  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
Johann's avatar
Johann committed
287
                                           W, bilinear_filters[xoffset]); \
288
  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
289
                                            bilinear_filters[yoffset]); \
290
\
Johann's avatar
Johann committed
291
  vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
292
                           CONVERT_TO_BYTEPTR(temp2), W); \
293
\
Johann's avatar
Johann committed
294
  return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
295
                                          dst_stride, sse); \
296 297
} \
\
298
unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
299 300 301 302 303 304 305
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse, \
  const uint8_t *second_pred) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint16_t temp2[H * W]; \
306
  DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
307
\
308
  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
Johann's avatar
Johann committed
309
                                           W, bilinear_filters[xoffset]); \
310
  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
311
                                            bilinear_filters[yoffset]); \
312
\
Johann's avatar
Johann committed
313
  vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
314
                           CONVERT_TO_BYTEPTR(temp2), W); \
315
\
Johann's avatar
Johann committed
316
  return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
317
                                             W, dst, dst_stride, sse); \
318 319
} \
\
320
unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
321 322 323 324 325 326 327
  const uint8_t *src, int  src_stride, \
  int xoffset, int  yoffset, \
  const uint8_t *dst, int dst_stride, \
  unsigned int *sse, \
  const uint8_t *second_pred) { \
  uint16_t fdata3[(H + 1) * W]; \
  uint16_t temp2[H * W]; \
328
  DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
329
\
330
  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
Johann's avatar
Johann committed
331
                                           W, bilinear_filters[xoffset]); \
332
  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
Johann's avatar
Johann committed
333
                                            bilinear_filters[yoffset]); \
334
\
Johann's avatar
Johann committed
335
  vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
336
                           CONVERT_TO_BYTEPTR(temp2), W); \
337
\
Johann's avatar
Johann committed
338
  return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
339
                                             W, dst, dst_stride, sse); \
340 341
}

342 343
HIGHBD_SUBPIX_VAR(4, 4)
HIGHBD_SUBPIX_AVG_VAR(4, 4)
344

345 346
HIGHBD_SUBPIX_VAR(4, 8)
HIGHBD_SUBPIX_AVG_VAR(4, 8)
347

348 349
HIGHBD_SUBPIX_VAR(8, 4)
HIGHBD_SUBPIX_AVG_VAR(8, 4)
350

351 352
HIGHBD_SUBPIX_VAR(8, 8)
HIGHBD_SUBPIX_AVG_VAR(8, 8)
353

354 355
HIGHBD_SUBPIX_VAR(8, 16)
HIGHBD_SUBPIX_AVG_VAR(8, 16)
356

357 358
HIGHBD_SUBPIX_VAR(16, 8)
HIGHBD_SUBPIX_AVG_VAR(16, 8)
359

360 361
HIGHBD_SUBPIX_VAR(16, 16)
HIGHBD_SUBPIX_AVG_VAR(16, 16)
362

363 364
HIGHBD_SUBPIX_VAR(16, 32)
HIGHBD_SUBPIX_AVG_VAR(16, 32)
365

366 367
HIGHBD_SUBPIX_VAR(32, 16)
HIGHBD_SUBPIX_AVG_VAR(32, 16)
368

369 370
HIGHBD_SUBPIX_VAR(32, 32)
HIGHBD_SUBPIX_AVG_VAR(32, 32)
371

372 373
HIGHBD_SUBPIX_VAR(32, 64)
HIGHBD_SUBPIX_AVG_VAR(32, 64)
374

375 376
HIGHBD_SUBPIX_VAR(64, 32)
HIGHBD_SUBPIX_AVG_VAR(64, 32)
377

378 379
HIGHBD_SUBPIX_VAR(64, 64)
HIGHBD_SUBPIX_AVG_VAR(64, 64)
380
#endif  // CONFIG_VP9_HIGHBITDEPTH