Commit bf8a49ab authored by Linfeng Zhang's avatar Linfeng Zhang

Clean CONVERT_TO_BYTEPTR/SHORTPTR in convolve

Replace by CAST_TO_BYTEPTR/SHORTPTR.
The rule is: if a short ptr is casted to a byte ptr, any offset
operation on the byte ptr must be doubled. We do this by casting to
short ptr first, adding offset, then casting back to byte ptr.

BUG=webm:1388

Change-Id: I9e18a73ba45ddae58fc9dae470c0ff34951fe248
parent a02f391c
......@@ -301,9 +301,9 @@ void wrapper_filter_average_block2d_8_c(
filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
dst_stride, output_width, output_height);
} else {
highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride,
hfilter, vfilter,
CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
CAST_TO_SHORTPTR(dst_ptr), dst_stride,
output_width, output_height, use_highbd);
}
#else
......@@ -324,8 +324,8 @@ void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
dst_stride, output_width, output_height);
} else {
highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, hfilter,
vfilter, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter,
vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride,
output_width, output_height, use_highbd);
}
#else
......@@ -460,7 +460,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
if (UUT_->use_highbd_ == 0) {
return input_ + offset;
} else {
return CONVERT_TO_BYTEPTR(input16_) + offset;
return CAST_TO_BYTEPTR(input16_ + offset);
}
#else
return input_ + offset;
......@@ -473,7 +473,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
if (UUT_->use_highbd_ == 0) {
return output_ + offset;
} else {
return CONVERT_TO_BYTEPTR(output16_) + offset;
return CAST_TO_BYTEPTR(output16_ + offset);
}
#else
return output_ + offset;
......@@ -486,7 +486,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
if (UUT_->use_highbd_ == 0) {
return output_ref_ + offset;
} else {
return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
return CAST_TO_BYTEPTR(output16_ref_ + offset);
}
#else
return output_ref_ + offset;
......@@ -498,7 +498,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
if (UUT_->use_highbd_ == 0) {
return list[index];
} else {
return CONVERT_TO_SHORTPTR(list)[index];
return CAST_TO_SHORTPTR(list)[index];
}
#else
return list[index];
......@@ -510,7 +510,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
if (UUT_->use_highbd_ == 0) {
list[index] = (uint8_t)val;
} else {
CONVERT_TO_SHORTPTR(list)[index] = val;
CAST_TO_SHORTPTR(list)[index] = val;
}
#else
list[index] = (uint8_t)val;
......@@ -718,7 +718,7 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
if (UUT_->use_highbd_ == 0) {
ref = ref8;
} else {
ref = CONVERT_TO_BYTEPTR(ref16);
ref = CAST_TO_BYTEPTR(ref16);
}
#else
uint8_t ref[kOutputStride * kMaxDimension];
......@@ -797,7 +797,7 @@ TEST_P(ConvolveTest, FilterExtremes) {
if (UUT_->use_highbd_ == 0) {
ref = ref8;
} else {
ref = CONVERT_TO_BYTEPTR(ref16);
ref = CAST_TO_BYTEPTR(ref16);
}
#else
uint8_t ref[kOutputStride * kMaxDimension];
......
......@@ -37,8 +37,9 @@ static INLINE void highbd_inter_predictor(
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y],
ys, w, h, bd);
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src)), src_stride,
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)), dst_stride, kernel[subpel_x],
xs, kernel[subpel_y], ys, w, h, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
......
......@@ -2417,10 +2417,11 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
kernel[y_q4 & 0xf], 16 * src_h / dst_h,
16 / factor, 16 / factor, bd);
vpx_highbd_convolve8(
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src_ptr)), src_stride,
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst_ptr)), dst_stride,
kernel[x_q4 & 0xf], 16 * src_w / dst_w, kernel[y_q4 & 0xf],
16 * src_h / dst_h, 16 / factor, 16 / factor, bd);
} else {
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
......
......@@ -2053,9 +2053,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
this_mode_pred->data, this_mode_pred->stride,
NULL, 0, NULL, 0, bw, bh, xd->bd);
vpx_highbd_convolve_copy(
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
best_pred->stride,
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(this_mode_pred->data)),
this_mode_pred->stride, NULL, 0, NULL, 0, bw, bh, xd->bd);
else
vpx_convolve_copy(best_pred->data, best_pred->stride,
this_mode_pred->data, this_mode_pred->stride, NULL,
......@@ -2162,9 +2164,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
pd->dst.buf, pd->dst.stride, NULL, 0, NULL, 0,
bw, bh, xd->bd);
vpx_highbd_convolve_copy(
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
best_pred->stride,
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(pd->dst.buf)), pd->dst.stride,
NULL, 0, NULL, 0, bw, bh, xd->bd);
else
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
......
......@@ -599,9 +599,10 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
recon = CONVERT_TO_BYTEPTR(recon);
vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0,
bs, bs, xd->bd);
vpx_highbd_convolve_copy(CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)),
dst_stride, recon, 32, NULL, 0, NULL, 0, bs,
bs, xd->bd);
recon = CONVERT_TO_BYTEPTR(recon16);
if (xd->lossless) {
vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
} else {
......
......@@ -145,8 +145,8 @@ void vpx_highbd_convolve8_horiz_neon(const uint8_t *src8, ptrdiff_t src_stride,
vpx_highbd_convolve8_horiz_c(src8, src_stride, dst8, dst_stride, filter_x,
x_step_q4, filter_y, y_step_q4, w, h, bd);
} else {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_x);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
uint16x8_t t0, t1, t2, t3;
......@@ -348,8 +348,8 @@ void vpx_highbd_convolve8_avg_horiz_neon(const uint8_t *src8,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h, bd);
} else {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_x);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
uint16x8_t t0, t1, t2, t3;
......@@ -579,8 +579,8 @@ void vpx_highbd_convolve8_vert_neon(const uint8_t *src8, ptrdiff_t src_stride,
vpx_highbd_convolve8_vert_c(src8, src_stride, dst8, dst_stride, filter_x,
x_step_q4, filter_y, y_step_q4, w, h, bd);
} else {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_y);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
......@@ -748,8 +748,8 @@ void vpx_highbd_convolve8_avg_vert_neon(const uint8_t *src8,
filter_x, x_step_q4, filter_y, y_step_q4, w,
h, bd);
} else {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_y);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
......
......@@ -18,8 +18,8 @@ void vpx_highbd_convolve_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
......
......@@ -18,8 +18,8 @@ void vpx_highbd_convolve_copy_neon(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
......
......@@ -18,7 +18,7 @@ void vpx_highbd_convolve8_neon(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4, int w,
int h, int bd) {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
// + 1 to make it divisible by 4
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
......@@ -29,13 +29,12 @@ void vpx_highbd_convolve8_neon(const uint8_t *src8, ptrdiff_t src_stride,
* height and filter a multiple of 4 lines. Since this goes in to the temp
* buffer which has lots of extra room and is subsequently discarded this is
* safe if somewhat less than ideal. */
vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
src_stride, CONVERT_TO_BYTEPTR(temp), w,
filter_x, x_step_q4, filter_y, y_step_q4, w,
intermediate_height, bd);
vpx_highbd_convolve8_horiz_neon(
CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
/* Step into the temp buffer 3 lines to get the actual frame data */
vpx_highbd_convolve8_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
vpx_highbd_convolve8_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
dst_stride, filter_x, x_step_q4, filter_y,
y_step_q4, w, h, bd);
}
......@@ -45,7 +44,7 @@ void vpx_highbd_convolve8_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int bd) {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
// + 1 to make it divisible by 4
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
......@@ -55,11 +54,10 @@ void vpx_highbd_convolve8_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
/* This implementation has the same issues as above. In addition, we only want
* to average the values after both passes.
*/
vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
src_stride, CONVERT_TO_BYTEPTR(temp), w,
filter_x, x_step_q4, filter_y, y_step_q4, w,
intermediate_height, bd);
vpx_highbd_convolve8_avg_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
vpx_highbd_convolve8_horiz_neon(
CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
vpx_highbd_convolve8_avg_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
dst_stride, filter_x, x_step_q4, filter_y,
y_step_q4, w, h, bd);
}
......@@ -324,8 +324,8 @@ static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *x_filters, int x0_q4,
int x_step_q4, int w, int h, int bd) {
int x, y;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= SUBPEL_TAPS / 2 - 1;
for (y = 0; y < h; ++y) {
......@@ -348,8 +348,8 @@ static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *x_filters, int x0_q4,
int x_step_q4, int w, int h, int bd) {
int x, y;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= SUBPEL_TAPS / 2 - 1;
for (y = 0; y < h; ++y) {
......@@ -374,8 +374,8 @@ static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *y_filters, int y0_q4,
int y_step_q4, int w, int h, int bd) {
int x, y;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
for (x = 0; x < w; ++x) {
......@@ -400,8 +400,8 @@ static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *y_filters, int y0_q4,
int y_step_q4, int w, int h, int bd) {
int x, y;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
for (x = 0; x < w; ++x) {
......@@ -449,12 +449,12 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
CONVERT_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
highbd_convolve_horiz(CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) -
src_stride * (SUBPEL_TAPS / 2 - 1)),
src_stride, CAST_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
x_step_q4, w, intermediate_height, bd);
highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
64, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h,
bd);
highbd_convolve_vert(CAST_TO_BYTEPTR(temp + 64 * (SUBPEL_TAPS / 2 - 1)), 64,
dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
......@@ -541,10 +541,10 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
assert(w <= 64);
assert(h <= 64);
vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL,
0, NULL, 0, w, h, bd);
vpx_highbd_convolve8_c(src, src_stride, CAST_TO_BYTEPTR(temp), 64, filter_x,
x_step_q4, filter_y, y_step_q4, w, h, bd);
vpx_highbd_convolve_avg_c(CAST_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL, 0,
NULL, 0, w, h, bd);
}
void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
......@@ -553,8 +553,8 @@ void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
int r;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
......@@ -575,8 +575,8 @@ void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
int x, y;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
......
......@@ -107,8 +107,8 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
if (step_q4 == 16 && filter[3] != 128) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
uint16_t *src = CAST_TO_SHORTPTR(src8); \
uint16_t *dst = CAST_TO_SHORTPTR(dst8); \
if (filter[0] | filter[1] | filter[2]) { \
while (w >= 16) { \
vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
......@@ -162,36 +162,37 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
} \
}
#define HIGH_FUN_CONV_2D(avg, opt) \
void vpx_highbd_convolve8_##avg##opt( \
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
assert(w <= 64); \
assert(h <= 64); \
if (x_step_q4 == 16 && y_step_q4 == 16) { \
if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
vpx_highbd_convolve8_horiz_##opt( \
src - 3 * src_stride, src_stride, CONVERT_TO_BYTEPTR(fdata2), 64, \
filter_x, x_step_q4, filter_y, y_step_q4, w, h + 7, bd); \
vpx_highbd_convolve8_##avg##vert_##opt( \
CONVERT_TO_BYTEPTR(fdata2) + 192, 64, dst, dst_stride, filter_x, \
x_step_q4, filter_y, y_step_q4, w, h, bd); \
} else { \
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
vpx_highbd_convolve8_horiz_##opt( \
src, src_stride, CONVERT_TO_BYTEPTR(fdata2), 64, filter_x, \
x_step_q4, filter_y, y_step_q4, w, h + 1, bd); \
vpx_highbd_convolve8_##avg##vert_##opt( \
CONVERT_TO_BYTEPTR(fdata2), 64, dst, dst_stride, filter_x, \
x_step_q4, filter_y, y_step_q4, w, h, bd); \
} \
} else { \
vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h, bd); \
} \
#define HIGH_FUN_CONV_2D(avg, opt) \
void vpx_highbd_convolve8_##avg##opt( \
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
assert(w <= 64); \
assert(h <= 64); \
if (x_step_q4 == 16 && y_step_q4 == 16) { \
if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
vpx_highbd_convolve8_horiz_##opt( \
CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) - 3 * src_stride), \
src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4, \
filter_y, y_step_q4, w, h + 7, bd); \
vpx_highbd_convolve8_##avg##vert_##opt( \
CAST_TO_BYTEPTR(fdata2 + 192), 64, dst, dst_stride, filter_x, \
x_step_q4, filter_y, y_step_q4, w, h, bd); \
} else { \
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
vpx_highbd_convolve8_horiz_##opt( \
src, src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4, \
filter_y, y_step_q4, w, h + 1, bd); \
vpx_highbd_convolve8_##avg##vert_##opt( \
CAST_TO_BYTEPTR(fdata2), 64, dst, dst_stride, filter_x, x_step_q4, \
filter_y, y_step_q4, w, h, bd); \
} \
} else { \
vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h, bd); \
} \
}
#endif // CONFIG_VP9_HIGHBITDEPTH
......
......@@ -21,8 +21,8 @@ void vpx_highbd_convolve_copy_avx2(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int width, int h, int bd) {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
const uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_y;
(void)filter_x_stride;
......@@ -104,8 +104,8 @@ void vpx_highbd_convolve_avg_avx2(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int width, int h, int bd) {
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *src = CAST_TO_SHORTPTR(src8);
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_y;
(void)filter_x_stride;
......
......@@ -32,9 +32,7 @@ cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
mov r4d, dword wm
%ifidn %2, highbd
shl r4d, 1
shl srcq, 1
shl src_strideq, 1
shl dstq, 1
shl dst_strideq, 1
%else
cmp r4d, 4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment