Commit c0057929 authored by Johann Koenig's avatar Johann Koenig Committed by Gerrit Code Review

Merge "Make vp9 subpixel match vp8"

parents fd891a96 eb88b172
This diff is collapsed.
......@@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS];
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
DECLARE_ALIGNED(256, extern const InterpKernel,
vp9_bilinear_filters[SUBPEL_SHIFTS]);
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
// filter kernel as a 2 tap filter.
#define BILINEAR_FILTERS_2TAP(x) \
(vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
#ifdef __cplusplus
} // extern "C"
#endif
......
......@@ -16,10 +16,18 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h"
#include "vp9/encoder/vp9_variance.h"
static uint8_t bilinear_filters[8][2] = {
{ 128, 0, },
{ 112, 16, },
{ 96, 32, },
{ 80, 48, },
{ 64, 64, },
{ 48, 80, },
{ 32, 96, },
{ 16, 112, },
};
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
uint8_t *output_ptr,
......@@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
const uint8_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
......@@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
const uint8_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 16) {
......@@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
8, BILINEAR_FILTERS_2TAP(yoffset));
8, bilinear_filters[yoffset]);
return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
}
......@@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
17, 16,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
16, BILINEAR_FILTERS_2TAP(yoffset));
16, bilinear_filters[yoffset]);
return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
}
......@@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
33, 32,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
32, BILINEAR_FILTERS_2TAP(yoffset));
32, bilinear_filters[yoffset]);
return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
}
......@@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
65, 64,
BILINEAR_FILTERS_2TAP(xoffset));
bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
64, BILINEAR_FILTERS_2TAP(yoffset));
64, bilinear_filters[yoffset]);
return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
}
......@@ -162,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
error_per_bit + 4096) >> 13 : 0)
// convert motion vector component to offset for svf calc
// convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) {
return (x & 7) << 1;
return x & 7;
}
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
......@@ -679,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
int row_offset = (tr & 0x07) << 1;
int col_offset = (tc & 0x07) << 1;
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
......@@ -709,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
int row_offset = (tr & 0x07) << 1;
int col_offset = (tc & 0x07) << 1;
MV this_mv = {tr, tc};
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
......
......@@ -19,6 +19,17 @@
#include "vp9/encoder/vp9_variance.h"
static uint8_t bilinear_filters[8][2] = {
{ 128, 0, },
{ 112, 16, },
{ 96, 32, },
{ 80, 48, },
{ 64, 64, },
{ 48, 80, },
{ 32, 96, },
{ 16, 112, },
};
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// first-pass of 2-D separable filter.
......@@ -33,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
......@@ -65,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
......@@ -91,9 +102,9 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
BILINEAR_FILTERS_2TAP(xoffset)); \
bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
}
......@@ -110,9 +121,9 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
BILINEAR_FILTERS_2TAP(xoffset)); \
bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
......@@ -166,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
for (i = 0; i < output_height; i++) {
......@@ -192,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp9_filter) {
const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
......@@ -219,9 +230,9 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
......@@ -236,9 +247,9 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
......@@ -253,9 +264,9 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
......@@ -273,9 +284,9 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
......@@ -295,9 +306,9 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
......@@ -317,9 +328,9 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
......
......@@ -14,35 +14,19 @@ SECTION_RODATA
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
times 8 dw 15
times 8 dw 1
times 8 dw 14
times 8 dw 2
times 8 dw 13
times 8 dw 3
times 8 dw 12
times 8 dw 4
times 8 dw 11
times 8 dw 5
times 8 dw 10
times 8 dw 6
times 8 dw 9
times 8 dw 7
times 16 dw 8
times 8 dw 7
times 8 dw 9
times 8 dw 6
times 8 dw 10
times 8 dw 5
times 8 dw 11
times 8 dw 4
times 8 dw 12
times 8 dw 3
times 8 dw 13
times 8 dw 2
times 8 dw 14
times 8 dw 1
times 8 dw 15
SECTION .text
......
......@@ -14,52 +14,28 @@ SECTION_RODATA
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
times 8 dw 15
times 8 dw 1
times 8 dw 14
times 8 dw 2
times 8 dw 13
times 8 dw 3
times 8 dw 12
times 8 dw 4
times 8 dw 11
times 8 dw 5
times 8 dw 10
times 8 dw 6
times 8 dw 9
times 8 dw 7
times 16 dw 8
times 8 dw 7
times 8 dw 9
times 8 dw 6
times 8 dw 10
times 8 dw 5
times 8 dw 11
times 8 dw 4
times 8 dw 12
times 8 dw 3
times 8 dw 13
times 8 dw 2
times 8 dw 14
times 8 dw 1
times 8 dw 15
bilin_filter_m_ssse3: times 8 db 16, 0
times 8 db 15, 1
times 8 db 14, 2
times 8 db 13, 3
times 8 db 12, 4
times 8 db 11, 5
times 8 db 10, 6
times 8 db 9, 7
times 16 db 8
times 8 db 7, 9
times 8 db 6, 10
times 8 db 5, 11
times 8 db 4, 12
times 8 db 3, 13
times 8 db 2, 14
times 8 db 1, 15
SECTION .text
......
......@@ -17,36 +17,20 @@
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
};
#define FILTER_SRC(filter) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment