• John Koleszar's avatar
    Convert subpixel filters to use convolve framework · 7a07eea1
    John Koleszar authored
    Update the code to call the new convolution functions to do subpixel
    prediction rather than the existing functions. Remove the old C and
    assembly code, since it is unused. This causes a 50% performance
    reduction on the decoder, but that will be resolved when the asm for
    the new functions is available.
    
    There is no consensus for whether 6-tap or 2-tap predictors will be
    supported in the final codec, so these filters are implemented in
    terms of the 8-tap code, so that quality testing of these modes
    can continue. Implementing the lower complexity algorithms is a
    simple exercise, should it be necessary.
    
    This code produces slightly better results in the EIGHTTAP_SMOOTH
    case, since the filter is now applied in only one direction when
    the subpel motion is only in one direction. Like the previous code,
    the filtering is skipped entirely on full-pel MVs. This combination
    seems to give the best quality gains, but this may be indicative of a
    bug in the encoder's filter selection, since the encoder could
    achieve the result of skipping the filtering on full-pel by selecting
    one of the other filters. This should be revisited.
    
    Quality gains on derf positive on almost all clips. The only clip
    that seemed to be hurt at all datarates was football
    (-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
    0.347% SSIM.
    
    Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
    7a07eea1
vp9_variance_c.c 16.57 KiB
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include "vp9/encoder/vp9_variance.h"
#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_subpelvar.h"
#include "vpx/vpx_integer.h"
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
  unsigned int i, sum = 0;
  for (i = 0; i < 256; i++) {
    sum += (src_ptr[i] * src_ptr[i]);
  return sum;
unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;
  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
  *sse = var;
  return (var - (((int64_t)avg * avg) >> 12));
unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;
  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
  *sse = var;
  return (var - (((int64_t)avg * avg) >> 10));
unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
                                 int  source_stride,
                                 const uint8_t *ref_ptr,
                                 int  recon_stride,
                                 unsigned int *sse) {
  unsigned int var;
  int avg;
  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
  *sse = var;
  return (var - (((unsigned int)avg * avg) >> 8));
unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
                                int  source_stride,
                                const uint8_t *ref_ptr,
                                int  recon_stride,
                                unsigned int *sse) {
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); *sse = var; return (var - (((unsigned int)avg * avg) >> 7)); } unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); *sse = var; return (var - (((unsigned int)avg * avg) >> 7)); } unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); *sse = var; return (var - (((unsigned int)avg * avg) >> 6)); } unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); *sse = var; return (var - (((unsigned int)avg * avg) >> 4)); } unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); *sse = var; return var; } unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) {
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
uint8_t temp2[20 * 16]; const int16_t *HFilter, *VFilter; uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); // Now filter Verticaly var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[9 * 8]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; const int16_t *HFilter, *VFilter; HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[17 * 16]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; const int16_t *HFilter, *VFilter; HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[65 * 64]; // Temp data bufffer used in filtering uint8_t temp2[68 * 64]; const int16_t *HFilter, *VFilter; HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 65, 64, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 64, 64, 64, 64, VFilter); return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[33 * 32]; // Temp data bufffer used in filtering uint8_t temp2[36 * 32]; const int16_t *HFilter, *VFilter; HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter); return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, ref_ptr, recon_stride, sse); } unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, ref_ptr, recon_stride, sse); } unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); return *sse; } unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); return *sse; } unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr,
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
int dst_pixels_per_line, unsigned int *sse) { vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); return *sse; } unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[16 * 9]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; const int16_t *HFilter, *VFilter; HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[9 * 16]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; const int16_t *HFilter, *VFilter; HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); }