Commit 5d1d72df authored by James Zern's avatar James Zern Committed by Gerrit Code Review
Browse files

Merge changes from topic 'vp9-intra-pred'

* changes:
  vp9_reconintra_neon: add d135 4x4
  vp9_reconintra: correct d135 4x4 signature
parents 4b45088f 337b221e
...@@ -211,8 +211,8 @@ INTRA_PRED_TEST(DSPR2, TestIntraPred4, vp9_dc_predictor_4x4_dspr2, NULL, NULL, ...@@ -211,8 +211,8 @@ INTRA_PRED_TEST(DSPR2, TestIntraPred4, vp9_dc_predictor_4x4_dspr2, NULL, NULL,
INTRA_PRED_TEST(NEON, TestIntraPred4, vp9_dc_predictor_4x4_neon, INTRA_PRED_TEST(NEON, TestIntraPred4, vp9_dc_predictor_4x4_neon,
vp9_dc_left_predictor_4x4_neon, vp9_dc_top_predictor_4x4_neon, vp9_dc_left_predictor_4x4_neon, vp9_dc_top_predictor_4x4_neon,
vp9_dc_128_predictor_4x4_neon, vp9_v_predictor_4x4_neon, vp9_dc_128_predictor_4x4_neon, vp9_v_predictor_4x4_neon,
vp9_h_predictor_4x4_neon, NULL, NULL, NULL, NULL, NULL, NULL, vp9_h_predictor_4x4_neon, NULL, vp9_d135_predictor_4x4_neon,
vp9_tm_predictor_4x4_neon) NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_neon)
#endif // HAVE_NEON #endif // HAVE_NEON
#if HAVE_MSA #if HAVE_MSA
......
...@@ -313,6 +313,36 @@ void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, ...@@ -313,6 +313,36 @@ void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
dc_32x32(dst, stride, NULL, NULL, 0, 0); dc_32x32(dst, stride, NULL, NULL, 0, 0);
} }
// -----------------------------------------------------------------------------
void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const uint8x8_t XABCD_u8 = vld1_u8(above - 1);
const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
const uint32x2_t zero = vdup_n_u32(0);
const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0);
const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL);
const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8));
const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
const uint8_t D = vget_lane_u8(XABCD_u8, 4);
const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
}
#if !HAVE_NEON_ASM #if !HAVE_NEON_ASM
void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
......
...@@ -533,8 +533,8 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, ...@@ -533,8 +533,8 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
} }
intra_pred_no_4x4(d117) intra_pred_no_4x4(d117)
void vp9_d135_predictor_4x4(uint8_t *dst, ptrdiff_t stride, void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { const uint8_t *above, const uint8_t *left) {
const int I = left[0]; const int I = left[0];
const int J = left[1]; const int J = left[1];
const int K = left[2]; const int K = left[2];
......
...@@ -72,7 +72,7 @@ add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co ...@@ -72,7 +72,7 @@ add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vp9_d117_predictor_4x4/; specialize qw/vp9_d117_predictor_4x4/;
add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_4x4/; specialize qw/vp9_d135_predictor_4x4 neon/;
add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment