Commit c0057929 authored by Johann Koenig's avatar Johann Koenig Committed by Gerrit Code Review
Browse files

Merge "Make vp9 subpixel match vp8"

parents fd891a96 eb88b172
This diff is collapsed.
...@@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS]; ...@@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS];
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter); const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
DECLARE_ALIGNED(256, extern const InterpKernel,
vp9_bilinear_filters[SUBPEL_SHIFTS]);
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
// filter kernel as a 2 tap filter.
#define BILINEAR_FILTERS_2TAP(x) \
(vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif
......
...@@ -16,10 +16,18 @@ ...@@ -16,10 +16,18 @@
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h" #include "vpx/vpx_integer.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_filter.h"
#include "vp9/encoder/vp9_variance.h" static uint8_t bilinear_filters[8][2] = {
{ 128, 0, },
{ 112, 16, },
{ 96, 32, },
{ 80, 48, },
{ 64, 64, },
{ 48, 80, },
{ 32, 96, },
{ 16, 112, },
};
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
uint8_t *output_ptr, uint8_t *output_ptr,
...@@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, ...@@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
int pixel_step, int pixel_step,
unsigned int output_height, unsigned int output_height,
unsigned int output_width, unsigned int output_width,
const int16_t *vp9_filter) { const uint8_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i; unsigned int i;
for (i = 0; i < output_height; ++i) { for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
...@@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr, ...@@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
int pixel_step, int pixel_step,
unsigned int output_height, unsigned int output_height,
unsigned int output_width, unsigned int output_width,
const int16_t *vp9_filter) { const uint8_t *vp9_filter) {
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i, j; unsigned int i, j;
for (i = 0; i < output_height; ++i) { for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 16) { for (j = 0; j < output_width; j += 16) {
...@@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src, ...@@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1, var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8, 9, 8,
BILINEAR_FILTERS_2TAP(xoffset)); bilinear_filters[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8, var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
8, BILINEAR_FILTERS_2TAP(yoffset)); 8, bilinear_filters[yoffset]);
return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse); return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
} }
...@@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src, ...@@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
17, 16, 17, 16,
BILINEAR_FILTERS_2TAP(xoffset)); bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16, var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
16, BILINEAR_FILTERS_2TAP(yoffset)); 16, bilinear_filters[yoffset]);
return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse); return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
} }
...@@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src, ...@@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
33, 32, 33, 32,
BILINEAR_FILTERS_2TAP(xoffset)); bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32, var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
32, BILINEAR_FILTERS_2TAP(yoffset)); 32, bilinear_filters[yoffset]);
return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse); return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
} }
...@@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src, ...@@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
65, 64, 65, 64,
BILINEAR_FILTERS_2TAP(xoffset)); bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64, var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
64, BILINEAR_FILTERS_2TAP(yoffset)); 64, bilinear_filters[yoffset]);
return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse); return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
} }
...@@ -162,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { ...@@ -162,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
error_per_bit + 4096) >> 13 : 0) error_per_bit + 4096) >> 13 : 0)
// convert motion vector component to offset for svf calc // convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) { static INLINE int sp(int x) {
return (x & 7) << 1; return x & 7;
} }
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
...@@ -679,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, ...@@ -679,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tc = bc + search_step[idx].col; tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
int row_offset = (tr & 0x07) << 1;
int col_offset = (tc & 0x07) << 1;
MV this_mv; MV this_mv;
this_mv.row = tr; this_mv.row = tr;
this_mv.col = tc; this_mv.col = tc;
if (second_pred == NULL) if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse); src_address, src_stride, &sse);
else else
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred); src_address, src_stride, &sse, second_pred);
cost_array[idx] = thismse + cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
...@@ -709,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, ...@@ -709,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep); tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
int row_offset = (tr & 0x07) << 1;
int col_offset = (tc & 0x07) << 1;
MV this_mv = {tr, tc}; MV this_mv = {tr, tc};
if (second_pred == NULL) if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse); src_address, src_stride, &sse);
else else
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred); src_address, src_stride, &sse, second_pred);
cost_array[4] = thismse + cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
......
...@@ -19,6 +19,17 @@ ...@@ -19,6 +19,17 @@
#include "vp9/encoder/vp9_variance.h" #include "vp9/encoder/vp9_variance.h"
static uint8_t bilinear_filters[8][2] = {
{ 128, 0, },
{ 112, 16, },
{ 96, 32, },
{ 80, 48, },
{ 64, 64, },
{ 48, 80, },
{ 32, 96, },
{ 16, 112, },
};
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement // or vertical direction to produce the filtered output block. Used to implement
// first-pass of 2-D separable filter. // first-pass of 2-D separable filter.
...@@ -33,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, ...@@ -33,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
int pixel_step, int pixel_step,
unsigned int output_height, unsigned int output_height,
unsigned int output_width, unsigned int output_width,
const int16_t *vp9_filter) { const uint8_t *vp9_filter) {
unsigned int i, j; unsigned int i, j;
for (i = 0; i < output_height; i++) { for (i = 0; i < output_height; i++) {
...@@ -65,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, ...@@ -65,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
unsigned int pixel_step, unsigned int pixel_step,
unsigned int output_height, unsigned int output_height,
unsigned int output_width, unsigned int output_width,
const int16_t *vp9_filter) { const uint8_t *vp9_filter) {
unsigned int i, j; unsigned int i, j;
for (i = 0; i < output_height; i++) { for (i = 0; i < output_height; i++) {
...@@ -91,9 +102,9 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ ...@@ -91,9 +102,9 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
uint8_t temp2[H * W]; \ uint8_t temp2[H * W]; \
\ \
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
BILINEAR_FILTERS_2TAP(xoffset)); \ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
} }
...@@ -110,9 +121,9 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ ...@@ -110,9 +121,9 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\ \
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
BILINEAR_FILTERS_2TAP(xoffset)); \ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\ \
...@@ -166,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass( ...@@ -166,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
int pixel_step, int pixel_step,
unsigned int output_height, unsigned int output_height,
unsigned int output_width, unsigned int output_width,
const int16_t *vp9_filter) { const uint8_t *vp9_filter) {
unsigned int i, j; unsigned int i, j;
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
for (i = 0; i < output_height; i++) { for (i = 0; i < output_height; i++) {
...@@ -192,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass( ...@@ -192,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
unsigned int pixel_step, unsigned int pixel_step,
unsigned int output_height, unsigned int output_height,
unsigned int output_width, unsigned int output_width,
const int16_t *vp9_filter) { const uint8_t *vp9_filter) {
unsigned int i, j; unsigned int i, j;
for (i = 0; i < output_height; i++) { for (i = 0; i < output_height; i++) {
...@@ -219,9 +230,9 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ ...@@ -219,9 +230,9 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \ uint16_t temp2[H * W]; \
\ \
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \ dst_stride, sse); \
...@@ -236,9 +247,9 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \ ...@@ -236,9 +247,9 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \ uint16_t temp2[H * W]; \
\ \
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \ W, dst, dst_stride, sse); \
...@@ -253,9 +264,9 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \ ...@@ -253,9 +264,9 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \ uint16_t temp2[H * W]; \
\ \
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \ W, dst, dst_stride, sse); \
...@@ -273,9 +284,9 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \ ...@@ -273,9 +284,9 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\ \
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \ CONVERT_TO_BYTEPTR(temp2), W); \
...@@ -295,9 +306,9 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ ...@@ -295,9 +306,9 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\ \
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \ CONVERT_TO_BYTEPTR(temp2), W); \
...@@ -317,9 +328,9 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ ...@@ -317,9 +328,9 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\ \
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \ bilinear_filters[yoffset]); \
\ \
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \ CONVERT_TO_BYTEPTR(temp2), W); \
......
...@@ -14,35 +14,19 @@ SECTION_RODATA ...@@ -14,35 +14,19 @@ SECTION_RODATA
pw_8: times 8 dw 8 pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16 bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0 times 8 dw 0
times 8 dw 15
times 8 dw 1
times 8 dw 14 times 8 dw 14
times 8 dw 2 times 8 dw 2
times 8 dw 13
times 8 dw 3
times 8 dw 12 times 8 dw 12
times 8 dw 4 times 8 dw 4
times 8 dw 11
times 8 dw 5
times 8 dw 10 times 8 dw 10
times 8 dw 6 times 8 dw 6
times 8 dw 9
times 8 dw 7
times 16 dw 8 times 16 dw 8
times 8 dw 7
times 8 dw 9
times 8 dw 6 times 8 dw 6
times 8 dw 10 times 8 dw 10
times 8 dw 5
times 8 dw 11
times 8 dw 4 times 8 dw 4
times 8 dw 12 times 8 dw 12
times 8 dw 3
times 8 dw 13
times 8 dw 2 times 8 dw 2
times 8 dw 14 times 8 dw 14
times 8 dw 1
times 8 dw 15
SECTION .text SECTION .text
......
...@@ -14,52 +14,28 @@ SECTION_RODATA ...@@ -14,52 +14,28 @@ SECTION_RODATA
pw_8: times 8 dw 8 pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16 bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0 times 8 dw 0
times 8 dw 15
times 8 dw 1
times 8 dw 14 times 8 dw 14
times 8 dw 2 times 8 dw 2
times 8 dw 13
times 8 dw 3
times 8 dw 12 times 8 dw 12
times 8 dw 4 times 8 dw 4
times 8 dw 11
times 8 dw 5
times 8 dw 10 times 8 dw 10
times 8 dw 6 times 8 dw 6
times 8 dw 9
times 8 dw 7
times 16 dw 8 times 16 dw 8
times 8 dw 7
times 8 dw 9
times 8 dw 6 times 8 dw 6
times 8 dw 10 times 8 dw 10
times 8 dw 5
times 8 dw 11
times 8 dw 4 times 8 dw 4
times 8 dw 12 times 8 dw 12
times 8 dw 3
times 8 dw 13
times 8 dw 2 times 8 dw 2
times 8 dw 14 times 8 dw 14
times 8 dw 1
times 8 dw 15
bilin_filter_m_ssse3: times 8 db 16, 0 bilin_filter_m_ssse3: times 8 db 16, 0
times 8 db 15, 1
times 8 db 14, 2 times 8 db 14, 2
times 8 db 13, 3
times 8 db 12, 4 times 8 db 12, 4
times 8 db 11, 5
times 8 db 10, 6 times 8 db 10, 6
times 8 db 9, 7
times 16 db 8 times 16 db 8
times 8 db 7, 9
times 8 db 6, 10 times 8 db 6, 10
times 8 db 5, 11
times 8 db 4, 12 times 8 db 4, 12
times 8 db 3, 13
times 8 db 2, 14 times 8 db 2, 14
times 8 db 1, 15
SECTION .text SECTION .text
......
...@@ -17,36 +17,20 @@ ...@@ -17,36 +17,20 @@
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,<