Commit eb88b172 authored by Johann's avatar Johann

Make vp9 subpixel match vp8

The only difference between the two was that the vp9 function allowed
for every step in the bilinear filter (16 steps) while vp8 only allowed
for half of those. Since all the call sites in vp9 (<< 1) the input, it
only ever used the same steps as vp8.

This will allow moving the subpel variance to vpx_dsp with the rest of
the variance functions.

Change-Id: I6fa2509350a2dc610c46b3e15bde98a15a084b75
parent 01853d7c
This diff is collapsed.
......@@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS];
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
DECLARE_ALIGNED(256, extern const InterpKernel,
vp9_bilinear_filters[SUBPEL_SHIFTS]);
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
// filter kernel as a 2 tap filter.
#define BILINEAR_FILTERS_2TAP(x) \
(vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
#ifdef __cplusplus
} // extern "C"
#endif
......
......@@ -16,10 +16,18 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h"
#include "vp9/encoder/vp9_variance.h"
static uint8_t bilinear_filters[8][2] = {
{ 128, 0, },
{ 112, 16, },
{ 96, 32, },
{ 80, 48, },
{ 64, 64, },
{ 48, 80, },
{ 32, 96, },
{ 16, 112, },
};
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
uint8_t *output_ptr,
......@@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
const uint8_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
......@@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
const uint8_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 16) {
......@@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
8, BILINEAR_FILTERS_2TAP(yoffset));
8, bilinear_filters[yoffset]);
return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
}
......@@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
17, 16,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
16, BILINEAR_FILTERS_2TAP(yoffset));
16, bilinear_filters[yoffset]);
return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
}
......@@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
33, 32,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
32, BILINEAR_FILTERS_2TAP(yoffset));
32, bilinear_filters[yoffset]);
return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
}
......@@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
65, 64,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
64, BILINEAR_FILTERS_2TAP(yoffset));
64, bilinear_filters[yoffset]);
return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
}
......@@ -162,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
error_per_bit + 4096) >> 13 : 0)
// convert motion vector component to offset for svf calc
// convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) {
return (x & 7) << 1;
return x & 7;
}
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
......@@ -679,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
int row_offset = (tr & 0x07) << 1;
int col_offset = (tc & 0x07) << 1;
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
......@@ -709,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
int row_offset = (tr & 0x07) << 1;
int col_offset = (tc & 0x07) << 1;
MV this_mv = {tr, tc};
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
......
......@@ -19,6 +19,17 @@
#include "vp9/encoder/vp9_variance.h"
static uint8_t bilinear_filters[8][2] = {
{ 128, 0, },
{ 112, 16, },
{ 96, 32, },
{ 80, 48, },
{ 64, 64, },
{ 48, 80, },
{ 32, 96, },
{ 16, 112, },
};
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// first-pass of 2-D separable filter.
......@@ -33,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
......@@ -65,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
......@@ -91,9 +102,9 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
BILINEAR_FILTERS_2TAP(xoffset)); \
bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
}
......@@ -110,9 +121,9 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
BILINEAR_FILTERS_2TAP(xoffset)); \
bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
......@@ -166,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
for (i = 0; i < output_height; i++) {
......@@ -192,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
......@@ -219,9 +230,9 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
......@@ -236,9 +247,9 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
......@@ -253,9 +264,9 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
......@@ -273,9 +284,9 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
......@@ -295,9 +306,9 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
......@@ -317,9 +328,9 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
......
......@@ -14,35 +14,19 @@ SECTION_RODATA
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
times 8 dw 15
times 8 dw 1
times 8 dw 14
times 8 dw 2
times 8 dw 13
times 8 dw 3
times 8 dw 12
times 8 dw 4
times 8 dw 11
times 8 dw 5
times 8 dw 10
times 8 dw 6
times 8 dw 9
times 8 dw 7
times 16 dw 8
times 8 dw 7
times 8 dw 9
times 8 dw 6
times 8 dw 10
times 8 dw 5
times 8 dw 11
times 8 dw 4
times 8 dw 12
times 8 dw 3
times 8 dw 13
times 8 dw 2
times 8 dw 14
times 8 dw 1
times 8 dw 15
SECTION .text
......
......@@ -14,52 +14,28 @@ SECTION_RODATA
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
times 8 dw 15
times 8 dw 1
times 8 dw 14
times 8 dw 2
times 8 dw 13
times 8 dw 3
times 8 dw 12
times 8 dw 4
times 8 dw 11
times 8 dw 5
times 8 dw 10
times 8 dw 6
times 8 dw 9
times 8 dw 7
times 16 dw 8
times 8 dw 7
times 8 dw 9
times 8 dw 6
times 8 dw 10
times 8 dw 5
times 8 dw 11
times 8 dw 4
times 8 dw 12
times 8 dw 3
times 8 dw 13
times 8 dw 2
times 8 dw 14
times 8 dw 1
times 8 dw 15
bilin_filter_m_ssse3: times 8 db 16, 0
times 8 db 15, 1
times 8 db 14, 2
times 8 db 13, 3
times 8 db 12, 4
times 8 db 11, 5
times 8 db 10, 6
times 8 db 9, 7
times 16 db 8
times 8 db 7, 9
times 8 db 6, 10
times 8 db 5, 11
times 8 db 4, 12
times 8 db 3, 13
times 8 db 2, 14
times 8 db 1, 15
SECTION .text
......
......@@ -17,36 +17,20 @@
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
};
#define FILTER_SRC(filter) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment