Commit d4648d93 authored by Yunqing Wang's avatar Yunqing Wang Committed by Gerrit Code Review
Browse files

Merge "AVX2 SubPixel AVG Variance Optimization"

parents a9288e29 ea149096
No related merge requests found
Showing with 512 additions and 555 deletions
......@@ -389,7 +389,7 @@ prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int
specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc
specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc
......@@ -419,7 +419,7 @@ prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int
specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc
specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc
......
This diff is collapsed.
......@@ -54,6 +54,20 @@ unsigned int vp9_sub_pixel_variance32xh_avx2
unsigned int *sse
);
unsigned int vp9_sub_pixel_avg_variance32xh_avx2
(
const uint8_t *src,
int src_stride,
int x_offset,
int y_offset,
const uint8_t *dst,
int dst_stride,
const uint8_t *sec,
int sec_stride,
int height,
unsigned int *sseptr
);
static void variance_avx2(const unsigned char *src_ptr, int source_stride,
const unsigned char *ref_ptr, int recon_stride,
int w, int h, unsigned int *sse, int *sum,
......@@ -207,3 +221,48 @@ unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src,
*sse_ptr = sse;
return sse - (((int64_t)se * se) >> 10);
}
unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src,
int src_stride,
int x_offset,
int y_offset,
const uint8_t *dst,
int dst_stride,
unsigned int *sseptr,
const uint8_t *sec) {
// processing 32 elements in parallel
unsigned int sse;
int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
y_offset, dst, dst_stride,
sec, 64, 64, &sse);
unsigned int sse2;
// processing the next 32 elements in parallel
int se2 = vp9_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset,
y_offset, dst + 32, dst_stride,
sec + 32, 64, 64, &sse2);
se += se2;
sse += sse2;
*sseptr = sse;
return sse - (((int64_t)se * se) >> 12);
}
unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src,
int src_stride,
int x_offset,
int y_offset,
const uint8_t *dst,
int dst_stride,
unsigned int *sseptr,
const uint8_t *sec) {
// processing 32 element in parallel
unsigned int sse;
int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
y_offset, dst, dst_stride,
sec, 32, 32, &sse);
*sseptr = sse;
return sse - (((int64_t)se * se) >> 10);
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment