• John Koleszar's avatar
    Fix half-pixel variance RTCD functions · a0ae3682
    John Koleszar authored
    This patch fixes the system dependent entries for the half-pixel
    variance functions in both the RTCD and non-RTCD cases:
    
      - The generic C versions of these functions are now correct.
        Before all three cases called the hv code.
    
      - Wire up the ARM functions in RTCD mode
    
      - Created stubs for x86 to call the optimized subpixel functions
        with the correct parameters, rather than falling back to C
        code.
    
    Change-Id: I1d937d074d929e0eb93aacb1232cc5e0ad1c6184
    a0ae3682
variance_c.c 15.63 KiB
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include "variance.h"
const int vp8_six_tap[8][6] =
    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
    { 0, -6,  123,   12,  -1,  0 },
    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
    { 0, -9,   93,   50,  -6,  0 },
    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
    { 0, -6,   50,   93,  -9,  0 },
    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
    { 0, -1,   12,  123,  -6,  0 }
const int VP8_FILTER_WEIGHT = 128;
const int VP8_FILTER_SHIFT  =   7;
const int vp8_bilinear_taps[8][2] =
    { 128,   0 },
    { 112,  16 },
    {  96,  32 },
    {  80,  48 },
    {  64,  64 },
    {  48,  80 },
    {  32,  96 },
    {  16, 112 }
unsigned int vp8_get_mb_ss_c
    const short *src_ptr
    unsigned int i = 0, sum = 0;
        sum += (src_ptr[i] * src_ptr[i]);
        i++;
    while (i < 256);
    return sum;
void  vp8_variance(
    const unsigned char *src_ptr,
    int  source_stride,
    const unsigned char *ref_ptr,
    int  recon_stride,
    int  w,
    int  h,
    unsigned int *sse,
    int *sum)
    int i, j;
    int diff;
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
*sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { diff = src_ptr[j] - ref_ptr[j]; *sum += diff; *sse += diff * diff; } src_ptr += source_stride; ref_ptr += recon_stride; } } unsigned int vp8_get8x8var_c ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum ) { vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum); return (*SSE - (((*Sum) * (*Sum)) >> 6)); } unsigned int vp8_get16x16var_c ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum ) { vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum); return (*SSE - (((*Sum) * (*Sum)) >> 8)); } unsigned int vp8_variance16x16_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); *sse = var; return (var - ((avg * avg) >> 8)); } unsigned int vp8_variance8x16_c(
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); *sse = var; return (var - ((avg * avg) >> 7)); } unsigned int vp8_variance16x8_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); *sse = var; return (var - ((avg * avg) >> 7)); } unsigned int vp8_variance8x8_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); *sse = var; return (var - ((avg * avg) >> 6)); } unsigned int vp8_variance4x4_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; int avg; vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); *sse = var; return (var - ((avg * avg) >> 4)); } unsigned int vp8_mse16x16_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride,
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
unsigned int *sse) { unsigned int var; int avg; vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); *sse = var; return var; } /**************************************************************************** * * ROUTINE : filter_block2d_bil_first_pass * * INPUTS : UINT8 *src_ptr : Pointer to source block. * UINT32 src_pixels_per_line : Stride of input block. * UINT32 pixel_step : Offset between filter input samples (see notes). * UINT32 output_height : Input block height. * UINT32 output_width : Input block width. * INT32 *vp8_filter : Array of 2 bi-linear filter taps. * * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. * * RETURNS : void * * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in * either horizontal or vertical direction to produce the * filtered output block. Used to implement first-pass * of 2-D separable filter. * * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. * Two filter taps should sum to VP8_FILTER_WEIGHT. * pixel_step defines whether the filter is applied * horizontally (pixel_step=1) or vertically (pixel_step=stride). * It defines the offset required to move from one input * to the next. * ****************************************************************************/ void vp8e_filter_block2d_bil_first_pass ( const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, unsigned int output_width, const int *vp8_filter ) { unsigned int i, j; for (i = 0; i < output_height; i++) { for (j = 0; j < output_width; j++) { // Apply bilinear filter output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; src_ptr++; } // Next row... src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } } /****************************************************************************
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
* * ROUTINE : filter_block2d_bil_second_pass * * INPUTS : INT32 *src_ptr : Pointer to source block. * UINT32 src_pixels_per_line : Stride of input block. * UINT32 pixel_step : Offset between filter input samples (see notes). * UINT32 output_height : Input block height. * UINT32 output_width : Input block width. * INT32 *vp8_filter : Array of 2 bi-linear filter taps. * * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. * * RETURNS : void * * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in * either horizontal or vertical direction to produce the * filtered output block. Used to implement second-pass * of 2-D separable filter. * * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. * Two filter taps should sum to VP8_FILTER_WEIGHT. * pixel_step defines whether the filter is applied * horizontally (pixel_step=1) or vertically (pixel_step=stride). * It defines the offset required to move from one input * to the next. * ****************************************************************************/ void vp8e_filter_block2d_bil_second_pass ( const unsigned short *src_ptr, unsigned char *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const int *vp8_filter ) { unsigned int i, j; int Temp; for (i = 0; i < output_height; i++) { for (j = 0; j < output_width; j++) { // Apply filter Temp = ((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2); output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); src_ptr++; } // Next row... src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } } /**************************************************************************** * * ROUTINE : filter_block2d_bil * * INPUTS : UINT8 *src_ptr : Pointer to source block. * UINT32 src_pixels_per_line : Stride of input block. * INT32 *HFilter : Array of 2 horizontal filter taps. * INT32 *VFilter : Array of 2 vertical filter taps. * * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
* * RETURNS : void * * FUNCTION : 2-D filters an 8x8 input block by applying a 2-tap * bi-linear filter horizontally followed by a 2-tap * bi-linear filter vertically on the result. * * SPECIAL NOTES : The intermediate horizontally filtered block must produce * 1 more point than the input block in each column. This * is to ensure that the 2-tap filter has one extra data-point * at the top of each column so filter taps do not extend * beyond data. Thus the output of the first stage filter * is an 8x9 (hx_v) block. * ****************************************************************************/ void vp8e_filter_block2d_bil ( const unsigned char *src_ptr, unsigned char *output_ptr, unsigned int src_pixels_per_line, int *HFilter, int *VFilter ) { unsigned short FData[20*16]; // Temp data bufffer used in filtering // First filter 1-D horizontally... vp8e_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, 9, 8, HFilter); // then 1-D vertically... vp8e_filter_block2d_bil_second_pass(FData, output_ptr, 8, 8, 8, 8, VFilter); } unsigned int vp8_sub_pixel_variance4x4_c ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) { unsigned char temp2[20*16]; const int *HFilter, *VFilter; unsigned short FData3[5*4]; // Temp data bufffer used in filtering HFilter = vp8_bilinear_taps[xoffset]; VFilter = vp8_bilinear_taps[yoffset]; // First filter 1d Horizontal vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); // Now filter Verticaly vp8e_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp8_sub_pixel_variance8x8_c ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset,
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) { unsigned short FData3[9*8]; // Temp data bufffer used in filtering unsigned char temp2[20*16]; const int *HFilter, *VFilter; HFilter = vp8_bilinear_taps[xoffset]; VFilter = vp8_bilinear_taps[yoffset]; vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp8_sub_pixel_variance16x16_c ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) { unsigned short FData3[17*16]; // Temp data bufffer used in filtering unsigned char temp2[20*16]; const int *HFilter, *VFilter; HFilter = vp8_bilinear_taps[xoffset]; VFilter = vp8_bilinear_taps[yoffset]; vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp8_variance_halfpixvar16x16_h_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0, ref_ptr, recon_stride, sse); } unsigned int vp8_variance_halfpixvar16x16_v_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4, ref_ptr, recon_stride, sse); } unsigned int vp8_variance_halfpixvar16x16_hv_c( const unsigned char *src_ptr, int source_stride,
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4, ref_ptr, recon_stride, sse); } unsigned int vp8_sub_pixel_mse16x16_c ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) { vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); return *sse; } unsigned int vp8_sub_pixel_variance16x8_c ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) { unsigned short FData3[16*9]; // Temp data bufffer used in filtering unsigned char temp2[20*16]; const int *HFilter, *VFilter; HFilter = vp8_bilinear_taps[xoffset]; VFilter = vp8_bilinear_taps[yoffset]; vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } unsigned int vp8_sub_pixel_variance8x16_c ( const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) { unsigned short FData3[9*16]; // Temp data bufffer used in filtering unsigned char temp2[20*16]; const int *HFilter, *VFilter; HFilter = vp8_bilinear_taps[xoffset]; VFilter = vp8_bilinear_taps[yoffset]; vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter); vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
561562563564
return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); }