vp9_variance.c 46.8 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11
#include "./vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
12

13
#include "vpx_ports/mem.h"
14 15 16 17
#include "vpx/vpx_integer.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h"
18

19 20
#include "vp9/encoder/vp9_variance.h"

21 22 23 24 25 26 27 28
void variance(const uint8_t *src_ptr,
              int  source_stride,
              const uint8_t *ref_ptr,
              int  recon_stride,
              int  w,
              int  h,
              unsigned int *sse,
              int *sum) {
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
  int i, j;
  int diff;

  *sum = 0;
  *sse = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
      diff = src_ptr[j] - ref_ptr[j];
      *sum += diff;
      *sse += diff * diff;
    }

    src_ptr += source_stride;
    ref_ptr += recon_stride;
  }
}

/****************************************************************************
 *
 *  ROUTINE       : filter_block2d_bil_first_pass
 *
 *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
 *                  uint32_t src_pixels_per_line : Stride of input block.
 *                  uint32_t pixel_step        : Offset between filter input
 *                                               samples (see notes).
 *                  uint32_t output_height     : Input block height.
 *                  uint32_t output_width      : Input block width.
 *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
 *                                               taps.
 *
 *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
 *
 *  RETURNS       : void
 *
 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
 *                  either horizontal or vertical direction to produce the
 *                  filtered output block. Used to implement first-pass
 *                  of 2-D separable filter.
 *
 *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
 *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
 *                  pixel_step defines whether the filter is applied
 *                  horizontally (pixel_step=1) or vertically (pixel_step=
 *                  stride).
 *                  It defines the offset required to move from one input
 *                  to the next.
 *
 ****************************************************************************/
static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
                                              uint16_t *output_ptr,
                                              unsigned int src_pixels_per_line,
                                              int pixel_step,
                                              unsigned int output_height,
                                              unsigned int output_width,
                                              const int16_t *vp9_filter) {
  unsigned int i, j;

  for (i = 0; i < output_height; i++) {
    for (j = 0; j < output_width; j++) {
      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                          (int)src_ptr[pixel_step] * vp9_filter[1],
                          FILTER_BITS);

      src_ptr++;
    }

    // Next row...
    src_ptr    += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

/****************************************************************************
 *
 *  ROUTINE       : filter_block2d_bil_second_pass
 *
 *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
 *                  uint32_t src_pixels_per_line : Stride of input block.
 *                  uint32_t pixel_step        : Offset between filter input
 *                                               samples (see notes).
 *                  uint32_t output_height     : Input block height.
 *                  uint32_t output_width      : Input block width.
 *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
 *                                               taps.
 *
 *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
 *
 *  RETURNS       : void
 *
 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
 *                  either horizontal or vertical direction to produce the
 *                  filtered output block. Used to implement second-pass
 *                  of 2-D separable filter.
 *
 *  SPECIAL NOTES : Requires 32-bit input as produced by
 *                  filter_block2d_bil_first_pass.
 *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
 *                  pixel_step defines whether the filter is applied
 *                  horizontally (pixel_step=1) or vertically (pixel_step=
 *                  stride).
 *                  It defines the offset required to move from one input
 *                  to the next.
 *
 ****************************************************************************/
static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
                                               uint8_t *output_ptr,
                                               unsigned int src_pixels_per_line,
                                               unsigned int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
                                               const int16_t *vp9_filter) {
  unsigned int  i, j;

  for (i = 0; i < output_height; i++) {
    for (j = 0; j < output_width; j++) {
      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                          (int)src_ptr[pixel_step] * vp9_filter[1],
                          FILTER_BITS);
      src_ptr++;
    }

    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}
John Koleszar's avatar
John Koleszar committed
155

156
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157
  unsigned int i, sum = 0;
John Koleszar's avatar
John Koleszar committed
158

159
  for (i = 0; i < 256; i++) {
John Koleszar's avatar
John Koleszar committed
160
    sum += (src_ptr[i] * src_ptr[i]);
161
  }
John Koleszar's avatar
John Koleszar committed
162

John Koleszar's avatar
John Koleszar committed
163
  return sum;
John Koleszar's avatar
John Koleszar committed
164 165
}

166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
  *sse = var;
  return (var - (((int64_t)avg * avg) >> 11));
}

unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
                                           int  src_pixels_per_line,
                                           int  xoffset,
                                           int  yoffset,
                                           const uint8_t *dst_ptr,
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
186
  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
187 188 189
  uint8_t temp2[68 * 64];
  const int16_t *hfilter, *vfilter;

190 191
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192 193 194 195 196

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 33, 64, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);

197
  return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198 199
}

200 201 202 203 204 205 206 207
unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
                                               int  src_pixels_per_line,
                                               int  xoffset,
                                               int  yoffset,
                                               const uint8_t *dst_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
208
  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
209 210 211 212
  uint8_t temp2[68 * 64];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

213 214
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215 216 217 218 219

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 33, 64, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
  comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220
  return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221 222
}

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
  *sse = var;
  return (var - (((int64_t)avg * avg) >> 11));
}

unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
                                           int  src_pixels_per_line,
                                           int  xoffset,
                                           int  yoffset,
                                           const uint8_t *dst_ptr,
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
243
  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
244 245 246
  uint8_t temp2[68 * 64];
  const int16_t *hfilter, *vfilter;

247 248
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249 250 251

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 65, 32, hfilter);
252
  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253

254
  return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255 256
}

257 258 259 260 261 262 263 264
unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
                                               int  src_pixels_per_line,
                                               int  xoffset,
                                               int  yoffset,
                                               const uint8_t *dst_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
265
  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
266 267 268 269
  uint8_t temp2[68 * 64];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

270 271
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272 273 274 275 276

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 65, 32, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
  comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277
  return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278 279
}

280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
  *sse = var;
  return (var - (((int64_t)avg * avg) >> 9));
}

unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
                                           int  src_pixels_per_line,
                                           int  xoffset,
                                           int  yoffset,
                                           const uint8_t *dst_ptr,
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
300
  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
301 302 303
  uint8_t temp2[36 * 32];
  const int16_t *hfilter, *vfilter;

304 305
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306 307 308 309 310

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 17, 32, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);

311
  return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312 313
}

314 315 316 317 318 319 320 321
unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
                                               int  src_pixels_per_line,
                                               int  xoffset,
                                               int  yoffset,
                                               const uint8_t *dst_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
322
  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
323 324 325 326
  uint8_t temp2[36 * 32];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

327 328
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329 330 331 332 333

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 17, 32, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
  comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334
  return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335 336
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
  *sse = var;
  return (var - (((int64_t)avg * avg) >> 9));
}

unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
                                           int  src_pixels_per_line,
                                           int  xoffset,
                                           int  yoffset,
                                           const uint8_t *dst_ptr,
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
357
  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
358 359 360
  uint8_t temp2[36 * 32];
  const int16_t *hfilter, *vfilter;

361 362
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363 364 365

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 33, 16, hfilter);
366
  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367

368
  return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369 370
}

371 372 373 374 375 376 377 378
unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
                                               int  src_pixels_per_line,
                                               int  xoffset,
                                               int  yoffset,
                                               const uint8_t *dst_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
379
  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
380 381 382 383
  uint8_t temp2[36 * 32];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

384 385
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386 387 388 389 390

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 33, 16, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
  comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391
  return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392 393
}

394 395 396 397 398 399 400 401 402 403 404 405 406
unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
  *sse = var;
  return (var - (((int64_t)avg * avg) >> 12));
}

407
unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
408
                                 int  source_stride,
409
                                 const uint8_t *ref_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
410 411 412 413 414 415 416
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
  *sse = var;
417
  return (var - (((int64_t)avg * avg) >> 10));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
418
}
John Koleszar's avatar
John Koleszar committed
419

420
unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
421
                                 int  source_stride,
422
                                 const uint8_t *ref_ptr,
423 424
                                 int  recon_stride,
                                 unsigned int *sse) {
John Koleszar's avatar
John Koleszar committed
425 426 427 428 429
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
  *sse = var;
430
  return (var - (((unsigned int)avg * avg) >> 8));
John Koleszar's avatar
John Koleszar committed
431 432
}

433
unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
434
                                int  source_stride,
435
                                const uint8_t *ref_ptr,
436 437
                                int  recon_stride,
                                unsigned int *sse) {
John Koleszar's avatar
John Koleszar committed
438 439 440 441 442
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
  *sse = var;
443
  return (var - (((unsigned int)avg * avg) >> 7));
John Koleszar's avatar
John Koleszar committed
444 445
}

446
unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
447
                                int  source_stride,
448
                                const uint8_t *ref_ptr,
449 450
                                int  recon_stride,
                                unsigned int *sse) {
John Koleszar's avatar
John Koleszar committed
451 452 453 454 455
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
  *sse = var;
456
  return (var - (((unsigned int)avg * avg) >> 7));
John Koleszar's avatar
John Koleszar committed
457 458
}

Jim Bankoski's avatar
Jim Bankoski committed
459 460 461 462 463
void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
                       const uint8_t *ref_ptr, int ref_stride,
                       unsigned int *sse, int *sum) {
  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
}
John Koleszar's avatar
John Koleszar committed
464

465
unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
466
                               int  source_stride,
467
                               const uint8_t *ref_ptr,
468 469
                               int  recon_stride,
                               unsigned int *sse) {
John Koleszar's avatar
John Koleszar committed
470 471 472 473 474
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
  *sse = var;
475
  return (var - (((unsigned int)avg * avg) >> 6));
John Koleszar's avatar
John Koleszar committed
476 477
}

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
                               int  source_stride,
                               const uint8_t *ref_ptr,
                               int  recon_stride,
                               unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
  *sse = var;
  return (var - (((unsigned int)avg * avg) >> 5));
}

unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
                               int  source_stride,
                               const uint8_t *ref_ptr,
                               int  recon_stride,
                               unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
  *sse = var;
  return (var - (((unsigned int)avg * avg) >> 5));
}

504
unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
505
                               int  source_stride,
506
                               const uint8_t *ref_ptr,
507 508
                               int  recon_stride,
                               unsigned int *sse) {
John Koleszar's avatar
John Koleszar committed
509 510 511 512 513
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
  *sse = var;
514
  return (var - (((unsigned int)avg * avg) >> 4));
John Koleszar's avatar
John Koleszar committed
515 516 517
}


518
unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
519
                            int  source_stride,
520
                            const uint8_t *ref_ptr,
521 522
                            int  recon_stride,
                            unsigned int *sse) {
John Koleszar's avatar
John Koleszar committed
523 524 525 526 527 528
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
  *sse = var;
  return var;
John Koleszar's avatar
John Koleszar committed
529 530
}

531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
                           int  source_stride,
                           const uint8_t *ref_ptr,
                           int  recon_stride,
                           unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
  *sse = var;
  return var;
}

unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
                           int  source_stride,
                           const uint8_t *ref_ptr,
                           int  recon_stride,
                           unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
  *sse = var;
  return var;
}

unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
                          int  source_stride,
                          const uint8_t *ref_ptr,
                          int  recon_stride,
                          unsigned int *sse) {
  unsigned int var;
  int avg;

  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
  *sse = var;
  return var;
}

John Koleszar's avatar
John Koleszar committed
570

571
unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
572 573 574
                                         int  src_pixels_per_line,
                                         int  xoffset,
                                         int  yoffset,
575
                                         const uint8_t *dst_ptr,
576 577
                                         int dst_pixels_per_line,
                                         unsigned int *sse) {
578
  uint8_t temp2[20 * 16];
579
  const int16_t *hfilter, *vfilter;
580
  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
John Koleszar's avatar
John Koleszar committed
581

582 583
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
John Koleszar's avatar
John Koleszar committed
584 585

  // First filter 1d Horizontal
586 587
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 5, 4, hfilter);
John Koleszar's avatar
John Koleszar committed
588 589

  // Now filter Verticaly
590
  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
John Koleszar's avatar
John Koleszar committed
591

592
  return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
John Koleszar's avatar
John Koleszar committed
593 594
}

595 596 597 598 599 600 601 602 603 604 605
unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
                                             int  src_pixels_per_line,
                                             int  xoffset,
                                             int  yoffset,
                                             const uint8_t *dst_ptr,
                                             int dst_pixels_per_line,
                                             unsigned int *sse,
                                             const uint8_t *second_pred) {
  uint8_t temp2[20 * 16];
  const int16_t *hfilter, *vfilter;
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
606
  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
607

608 609
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
610 611 612 613 614 615 616 617

  // First filter 1d Horizontal
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 5, 4, hfilter);

  // Now filter Verticaly
  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
  comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
618
  return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
619
}
John Koleszar's avatar
John Koleszar committed
620

621
unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
622 623 624
                                         int  src_pixels_per_line,
                                         int  xoffset,
                                         int  yoffset,
625
                                         const uint8_t *dst_ptr,
626 627
                                         int dst_pixels_per_line,
                                         unsigned int *sse) {
628
  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
629
  uint8_t temp2[20 * 16];
630
  const int16_t *hfilter, *vfilter;
John Koleszar's avatar
John Koleszar committed
631

632 633
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
John Koleszar's avatar
John Koleszar committed
634

635 636 637
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 9, 8, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
John Koleszar's avatar
John Koleszar committed
638

639
  return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
John Koleszar's avatar
John Koleszar committed
640 641
}

642 643 644 645 646 647 648 649
unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
                                             int  src_pixels_per_line,
                                             int  xoffset,
                                             int  yoffset,
                                             const uint8_t *dst_ptr,
                                             int dst_pixels_per_line,
                                             unsigned int *sse,
                                             const uint8_t *second_pred) {
650
  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
651 652 653 654
  uint8_t temp2[20 * 16];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

655 656
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
657 658 659 660 661

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 9, 8, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
  comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
662
  return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
663 664
}

665
unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
666 667 668
                                           int  src_pixels_per_line,
                                           int  xoffset,
                                           int  yoffset,
669
                                           const uint8_t *dst_ptr,
670 671
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
672
  uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
673
  uint8_t temp2[20 * 16];
674
  const int16_t *hfilter, *vfilter;
John Koleszar's avatar
John Koleszar committed
675

676 677
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
John Koleszar's avatar
John Koleszar committed
678

679 680 681
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 17, 16, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
John Koleszar's avatar
John Koleszar committed
682

683
  return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
John Koleszar's avatar
John Koleszar committed
684 685
}

686 687 688 689 690 691 692 693 694 695 696 697 698
unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
                                               int  src_pixels_per_line,
                                               int  xoffset,
                                               int  yoffset,
                                               const uint8_t *dst_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
  uint16_t fdata3[17 * 16];
  uint8_t temp2[20 * 16];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

699 700
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
701 702 703 704 705 706

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 17, 16, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);

  comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
707
  return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
708 709
}

710 711 712 713 714 715 716
unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
                                           int  src_pixels_per_line,
                                           int  xoffset,
                                           int  yoffset,
                                           const uint8_t *dst_ptr,
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
717
  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
718
  uint8_t temp2[68 * 64];
719
  const int16_t *hfilter, *vfilter;
720

721 722
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
723

724 725 726
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 65, 64, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
727

728
  return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
729 730
}

731 732 733 734 735 736 737 738
unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
                                               int  src_pixels_per_line,
                                               int  xoffset,
                                               int  yoffset,
                                               const uint8_t *dst_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
739
  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
740 741 742 743
  uint8_t temp2[68 * 64];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

744 745
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
746 747 748 749 750

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 65, 64, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
  comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
751
  return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
752 753
}

754
unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
755 756 757
                                           int  src_pixels_per_line,
                                           int  xoffset,
                                           int  yoffset,
758
                                           const uint8_t *dst_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
759 760
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
761
  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
762
  uint8_t temp2[36 * 32];
763
  const int16_t *hfilter, *vfilter;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
764

765 766
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
767

768 769 770
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 33, 32, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
771

772
  return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
773
}
774

775 776 777 778 779 780 781 782
unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
                                               int  src_pixels_per_line,
                                               int  xoffset,
                                               int  yoffset,
                                               const uint8_t *dst_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
783
  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
784 785 786 787
  uint8_t temp2[36 * 32];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

788 789
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
790 791 792 793 794

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 33, 32, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
  comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
795
  return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
796 797
}

798
unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
799
                                              int  source_stride,
800
                                              const uint8_t *ref_ptr,
801 802
                                              int  recon_stride,
                                              unsigned int *sse) {
803
  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
John Koleszar's avatar
John Koleszar committed
804
                                       ref_ptr, recon_stride, sse);
805 806
}

807
unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
808
                                              int  source_stride,
809
                                              const uint8_t *ref_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
810 811
                                              int  recon_stride,
                                              unsigned int *sse) {
812
  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
813 814
                                       ref_ptr, recon_stride, sse);
}
815 816 817 818 819 820 821 822 823

unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
                                              int  source_stride,
                                              const uint8_t *ref_ptr,
                                              int  recon_stride,
                                              unsigned int *sse) {
  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
                                       ref_ptr, recon_stride, sse);
}
824

825
unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
826
                                              int  source_stride,
827
                                              const uint8_t *ref_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
828 829
                                              int  recon_stride,
                                              unsigned int *sse) {
830
  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
831 832 833
                                       ref_ptr, recon_stride, sse);
}

834
unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
835
                                              int  source_stride,
836
                                              const uint8_t *ref_ptr,
837 838
                                              int  recon_stride,
                                              unsigned int *sse) {
839
  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
John Koleszar's avatar
John Koleszar committed
840
                                       ref_ptr, recon_stride, sse);
841
}
842 843 844 845 846 847 848 849 850

unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
                                              int  source_stride,
                                              const uint8_t *ref_ptr,
                                              int  recon_stride,
                                              unsigned int *sse) {
  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
                                       ref_ptr, recon_stride, sse);
}
851

852
unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
853
                                               int  source_stride,
854
                                               const uint8_t *ref_ptr,
855 856
                                               int  recon_stride,
                                               unsigned int *sse) {
857
  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
John Koleszar's avatar
John Koleszar committed
858
                                       ref_ptr, recon_stride, sse);
859 860
}

861
unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
862
                                               int  source_stride,
863
                                               const uint8_t *ref_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
864 865
                                               int  recon_stride,
                                               unsigned int *sse) {
866
  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
867 868
                                       ref_ptr, recon_stride, sse);
}
869 870 871 872 873 874 875 876 877

unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
                                               int  source_stride,
                                               const uint8_t *ref_ptr,
                                               int  recon_stride,
                                               unsigned int *sse) {
  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
                                       ref_ptr, recon_stride, sse);
}
878

879
unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
880 881 882
                                      int  src_pixels_per_line,
                                      int  xoffset,
                                      int  yoffset,
883
                                      const uint8_t *dst_ptr,
884 885 886 887 888
                                      int dst_pixels_per_line,
                                      unsigned int *sse) {
  vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
                                xoffset, yoffset, dst_ptr,
                                dst_pixels_per_line, sse);
John Koleszar's avatar
John Koleszar committed
889
  return *sse;
John Koleszar's avatar
John Koleszar committed
890 891
}

892
unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
893 894 895
                                      int  src_pixels_per_line,
                                      int  xoffset,
                                      int  yoffset,
896
                                      const uint8_t *dst_ptr,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
897 898
                                      int dst_pixels_per_line,
                                      unsigned int *sse) {
899 900 901
  vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
                                xoffset, yoffset, dst_ptr,
                                dst_pixels_per_line, sse);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
902 903
  return *sse;
}
904 905 906 907 908 909 910 911 912 913 914 915 916

unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
                                      int  src_pixels_per_line,
                                      int  xoffset,
                                      int  yoffset,
                                      const uint8_t *dst_ptr,
                                      int dst_pixels_per_line,
                                      unsigned int *sse) {
  vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
                                xoffset, yoffset, dst_ptr,
                                dst_pixels_per_line, sse);
  return *sse;
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
917

918
unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
919 920 921
                                          int  src_pixels_per_line,
                                          int  xoffset,
                                          int  yoffset,
922
                                          const uint8_t *dst_ptr,
923 924
                                          int dst_pixels_per_line,
                                          unsigned int *sse) {
925
  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
926
  uint8_t temp2[20 * 16];
927
  const int16_t *hfilter, *vfilter;
John Koleszar's avatar
John Koleszar committed
928

929 930
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
John Koleszar's avatar
John Koleszar committed
931

932 933 934
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 9, 16, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
John Koleszar's avatar
John Koleszar committed
935

936
  return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
John Koleszar's avatar
John Koleszar committed
937 938
}

939 940 941 942 943 944 945 946
unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
                                              int  src_pixels_per_line,
                                              int  xoffset,
                                              int  yoffset,
                                              const uint8_t *dst_ptr,
                                              int dst_pixels_per_line,
                                              unsigned int *sse,
                                              const uint8_t *second_pred) {
947
  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
948 949 950 951
  uint8_t temp2[20 * 16];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

952 953
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
954 955 956 957 958

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 9, 16, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
  comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
959
  return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
960 961
}

962
unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
963 964 965
                                          int  src_pixels_per_line,
                                          int  xoffset,
                                          int  yoffset,
966
                                          const uint8_t *dst_ptr,
967 968
                                          int dst_pixels_per_line,
                                          unsigned int *sse) {
969
  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
970
  uint8_t temp2[20 * 16];
971
  const int16_t *hfilter, *vfilter;
John Koleszar's avatar
John Koleszar committed
972

973 974
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
John Koleszar's avatar
John Koleszar committed
975

976 977 978
  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 17, 8, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
John Koleszar's avatar
John Koleszar committed
979

980
  return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
John Koleszar's avatar
John Koleszar committed
981
}
982

983 984 985 986 987 988 989 990
unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
                                              int  src_pixels_per_line,
                                              int  xoffset,
                                              int  yoffset,
                                              const uint8_t *dst_ptr,
                                              int dst_pixels_per_line,
                                              unsigned int *sse,
                                              const uint8_t *second_pred) {
991
  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
992 993 994 995
  uint8_t temp2[20 * 16];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

996 997
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
998 999 1000 1001 1002

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 17, 8, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
  comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1003
  return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1004
}
1005 1006 1007 1008 1009 1010 1011 1012

unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
                                         int  src_pixels_per_line,
                                         int  xoffset,
                                         int  yoffset,
                                         const uint8_t *dst_ptr,
                                         int dst_pixels_per_line,
                                         unsigned int *sse) {
1013
  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1014 1015 1016
  uint8_t temp2[20 * 16];
  const int16_t *hfilter, *vfilter;

1017 1018
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1019 1020 1021 1022 1023

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 5, 8, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);

1024
  return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
}

unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
                                             int  src_pixels_per_line,
                                             int  xoffset,
                                             int  yoffset,
                                             const uint8_t *dst_ptr,
                                             int dst_pixels_per_line,
                                             unsigned int *sse,
                                             const uint8_t *second_pred) {
1035
  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1036 1037 1038 1039
  uint8_t temp2[20 * 16];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

1040 1041
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1042 1043 1044 1045 1046

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                    1, 5, 8, hfilter);
  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
  comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1047
  return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1048 1049 1050 1051 1052 1053 1054 1055 1056
}

unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
                                         int  src_pixels_per_line,
                                         int  xoffset,
                                         int  yoffset,
                                         const uint8_t *dst_ptr,
                                         int dst_pixels_per_line,
                                         unsigned int *sse) {
1057
  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1058 1059
  // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
  // of this big? same issue appears in all other block size settings.
1060 1061 1062
  uint8_t temp2[20 * 16];
  const int16_t *hfilter, *vfilter;

1063 1064
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1065 1066

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1067
                                    1, 9, 4, hfilter);
1068 1069
  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);

1070
  return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
}

unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
                                             int  src_pixels_per_line,
                                             int  xoffset,
                                             int  yoffset,
                                             const uint8_t *dst_ptr,
                                             int dst_pixels_per_line,
                                             unsigned int *sse,
                                             const uint8_t *second_pred) {
1081
  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1082 1083 1084 1085
  uint8_t temp2[20 * 16];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
  const int16_t *hfilter, *vfilter;

1086 1087
  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1088 1089

  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1090
                                    1, 9, 4, hfilter);
1091 1092
  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
  comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1093
  return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1094
}