Commit d393885a authored by Johann's avatar Johann

Remove halfpix specialization

This function only exists as a shortcut to subpixel variance with
predefined offsets. xoffset = 4 for horizontal, yoffset = 4 for vertical
and both for "hv"

Removing this allows the existing optimizations for the variance
functions to be called. Instead of having only sse2 optimizations, this
gives sse2, ssse3, msa and neon.

BUG=webm:1273

Change-Id: Ieb407b423b91b87d33c4263c6a1ad5e673b0efd6
parent c325fb74
......@@ -409,7 +409,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
/* go left then right and check error */
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal variance */
thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse) {
......@@ -420,7 +421,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal variance */
thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse) {
......@@ -433,7 +435,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
/* go up then down and check error */
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
/* "halfpix" vertical variance */
thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse) {
......@@ -444,7 +447,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.row += 8;
thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
/* "halfpix" vertical variance */
thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse) {
......@@ -462,25 +466,28 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
case 0:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z,
b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse =
vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
break;
case 1:
this_mv.as_mv.col += 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse =
vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
break;
case 2:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
break;
case 3:
default:
this_mv.as_mv.col += 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
break;
}
......@@ -698,7 +705,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
/* go left then right and check error */
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal variance */
thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse) {
......@@ -709,7 +717,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal variance */
thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse) {
......@@ -722,7 +731,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
/* go up then down and check error */
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
/* "halfpix" vertical variance */
thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse) {
......@@ -733,7 +743,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.row += 8;
thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
/* "halfpix" vertical variance */
thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse) {
......@@ -751,25 +762,28 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
case 0:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z,
b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse =
vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
break;
case 1:
this_mv.as_mv.col += 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse =
vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
break;
case 2:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
break;
case 3:
default:
this_mv.as_mv.col += 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
/* "halfpix" horizontal/vertical variance */
thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
break;
}
......
......@@ -1914,9 +1914,6 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16;
cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf = vpx_sub_pixel_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vpx_variance_halfpixvar16x16_h;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vpx_variance_halfpixvar16x16_v;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = vpx_variance_halfpixvar16x16_hv;
cpi->fn_ptr[BLOCK_16X16].sdx3f = vpx_sad16x16x3;
cpi->fn_ptr[BLOCK_16X16].sdx8f = vpx_sad16x16x8;
cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d;
......@@ -1924,9 +1921,6 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8;
cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf = vpx_sub_pixel_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_16X8].sdx3f = vpx_sad16x8x3;
cpi->fn_ptr[BLOCK_16X8].sdx8f = vpx_sad16x8x8;
cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d;
......@@ -1934,9 +1928,6 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16;
cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf = vpx_sub_pixel_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X16].sdx3f = vpx_sad8x16x3;
cpi->fn_ptr[BLOCK_8X16].sdx8f = vpx_sad8x16x8;
cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d;
......@@ -1944,9 +1935,6 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8;
cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf = vpx_sub_pixel_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X8].sdx3f = vpx_sad8x8x3;
cpi->fn_ptr[BLOCK_8X8].sdx8f = vpx_sad8x8x8;
cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d;
......@@ -1954,9 +1942,6 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4;
cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf = vpx_sub_pixel_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_4X4].sdx3f = vpx_sad4x4x3;
cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8;
cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d;
......
......@@ -49,24 +49,6 @@ uint32_t vpx_get_mb_ss_c(const int16_t *a) {
return sum;
}
uint32_t vpx_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
uint32_t *sse) {
return vpx_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
}
uint32_t vpx_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
uint32_t *sse) {
return vpx_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
}
uint32_t vpx_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
uint32_t *sse) {
return vpx_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
}
static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride, int w, int h, uint32_t *sse, int *sum) {
int i, j;
......
......@@ -58,9 +58,6 @@ typedef struct variance_vtable {
vpx_sad_fn_t sdf;
vpx_variance_fn_t vf;
vpx_subpixvariance_fn_t svf;
vpx_variance_fn_t svf_halfpix_h;
vpx_variance_fn_t svf_halfpix_v;
vpx_variance_fn_t svf_halfpix_hv;
vpx_sad_multi_fn_t sdx3f;
vpx_sad_multi_fn_t sdx8f;
vpx_sad_multi_d_fn_t sdx4df;
......
......@@ -310,8 +310,6 @@ DSP_SRCS-$(HAVE_MSA) += mips/sub_pixel_variance_msa.c
DSP_SRCS-$(HAVE_SSE) += x86/variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3
DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_impl_sse2.asm
DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/variance_impl_avx2.c
......
......@@ -1511,23 +1511,6 @@ add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, i
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
#
# Specialty Subpixel
#
# TODO(johannkoenig): Add neon implementations of
# vpx_variance_halfpixvar16x16_h
# vpx_variance_halfpixvar16x16_v
# vpx_variance_halfpixvar16x16_hv
# https://bugs.chromium.org/p/webm/issues/detail?id=1273
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_variance_halfpixvar16x16_h sse2/;
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_variance_halfpixvar16x16_v sse2/;
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_variance_halfpixvar16x16_hv sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance64x64 sse2/;
......
This diff is collapsed.
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
void vpx_half_horiz_vert_variance16x_h_sse2(const unsigned char *ref,
int ref_stride,
const unsigned char *src,
int src_stride, unsigned int height,
int *sum, unsigned int *sumsquared);
void vpx_half_horiz_variance16x_h_sse2(const unsigned char *ref, int ref_stride,
const unsigned char *src, int src_stride,
unsigned int height, int *sum,
unsigned int *sumsquared);
void vpx_half_vert_variance16x_h_sse2(const unsigned char *ref, int ref_stride,
const unsigned char *src, int src_stride,
unsigned int height, int *sum,
unsigned int *sumsquared);
uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src,
int src_stride,
const unsigned char *dst,
int dst_stride, uint32_t *sse) {
int xsum0;
unsigned int xxsum0;
vpx_half_horiz_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16,
&xsum0, &xxsum0);
*sse = xxsum0;
assert(xsum0 <= 255 * 16 * 16);
assert(xsum0 >= -255 * 16 * 16);
return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8));
}
uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src,
int src_stride,
const unsigned char *dst,
int dst_stride, uint32_t *sse) {
int xsum0;
unsigned int xxsum0;
vpx_half_vert_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16, &xsum0,
&xxsum0);
*sse = xxsum0;
assert(xsum0 <= 255 * 16 * 16);
assert(xsum0 >= -255 * 16 * 16);
return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8));
}
uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src,
int src_stride,
const unsigned char *dst,
int dst_stride, uint32_t *sse) {
int xsum0;
unsigned int xxsum0;
vpx_half_horiz_vert_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16,
&xsum0, &xxsum0);
*sse = xxsum0;
assert(xsum0 <= 255 * 16 * 16);
assert(xsum0 >= -255 * 16 * 16);
return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment