diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c index 3e3e400a4b820a232bd97c96aabfddab56cf8c48..fddf902d09f3c4267846233a4ba36f71da249acb 100644 --- a/vp9/common/arm/neon/vp9_idct16x16_neon.c +++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c @@ -20,10 +20,10 @@ extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src, int16_t skip_adding, uint8_t *dest, int dest_stride); -extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input, +extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input, int16_t *output, int output_stride); -extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src, +extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src, int16_t *output, int16_t *pass1Output, int16_t skip_adding, @@ -107,7 +107,7 @@ void vp9_short_idct16x16_add_neon(int16_t *input, return; } -void vp9_short_idct10_16x16_add_neon(int16_t *input, +void vp9_short_idct16x16_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride) { int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; @@ -118,12 +118,12 @@ void vp9_short_idct10_16x16_add_neon(int16_t *input, /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8); + vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_short_idct10_16x16_add_neon_pass2(input+1, + vp9_short_idct16x16_10_add_neon_pass2(input+1, row_idct_output, pass1_output, 0, diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm index 7464e800f3438f5be6c211957eb782018b23de0c..856022bbf4781e10348cf3629b1fadd76289f5f2 100644 --- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm @@ -10,8 +10,8 @@ EXPORT |vp9_short_idct16x16_add_neon_pass1| EXPORT |vp9_short_idct16x16_add_neon_pass2| - EXPORT |vp9_short_idct10_16x16_add_neon_pass1| - EXPORT |vp9_short_idct10_16x16_add_neon_pass2| + EXPORT |vp9_short_idct16x16_10_add_neon_pass1| + EXPORT |vp9_short_idct16x16_10_add_neon_pass2| EXPORT |save_neon_registers| EXPORT |restore_neon_registers| ARM @@ -788,7 +788,7 @@ end_idct16x16_pass2 bx lr ENDP ; |vp9_short_idct16x16_add_neon_pass2| -;void |vp9_short_idct10_16x16_add_neon_pass1|(int16_t *input, +;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input, ; int16_t *output, int output_stride) ; ; r0 int16_t input @@ -798,7 +798,7 @@ end_idct16x16_pass2 ; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_short_idct10_16x16_add_neon_pass1| PROC +|vp9_short_idct16x16_10_add_neon_pass1| PROC ; TODO(hkuang): Find a better way to load the elements. ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15 @@ -907,9 +907,9 @@ end_idct16x16_pass2 vst1.64 {d31}, [r1], r2 bx lr - ENDP ; |vp9_short_idct10_16x16_add_neon_pass1| + ENDP ; |vp9_short_idct16x16_10_add_neon_pass1| -;void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src, +;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src, ; int16_t *output, ; int16_t *pass1Output, ; int16_t skip_adding, @@ -926,7 +926,7 @@ end_idct16x16_pass2 ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_short_idct10_16x16_add_neon_pass2| PROC +|vp9_short_idct16x16_10_add_neon_pass2| PROC push {r3-r9} ; TODO(hkuang): Find a better way to load the elements. @@ -1177,7 +1177,7 @@ end_idct16x16_pass2 end_idct10_16x16_pass2 pop {r3-r9} bx lr - ENDP ; |vp9_short_idct10_16x16_add_neon_pass2| + ENDP ; |vp9_short_idct16x16_10_add_neon_pass2| ;void |save_neon_registers|() |save_neon_registers| PROC vpush {d8-d15} diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index f06bf047b68afb7244f5385f26a2a16313c2ef66..9975d3678afdedd91fbfbfba04cfdd7eef0deeb8 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -838,7 +838,7 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, + dest[j * dest_stride + i]); } } -void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, +void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[16 * 16] = { 0 }; int16_t *outptr = out; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 042afbbef4a245dbebf4853faa334433c773ef36..52bcbcf84b0b1edddc125bdbee1b559d380623ee 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -315,8 +315,8 @@ specialize vp9_short_idct16x16_1_add sse2 neon prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct16x16_add sse2 neon -prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct10_16x16_add sse2 neon +prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct16x16_10_add sse2 neon prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct32x32_add sse2 neon diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index 8f740f4127f7de539af001a1b9d7a33a13f6314c..f97a6f5bf2bf00874dd99a2160c0c3ee22a7c37a 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -2456,7 +2456,7 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride, write_buffer_8x16(dest, in1, stride); } -void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest, +void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 395e636b8477022faae3f0389257cd618cf0cb37..00d2751bd22865d0d3cbe8fb92284e77c6d30aa8 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -126,7 +126,7 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { vp9_short_idct16x16_1_add(input, dest, stride); input[0] = 0; } else if (eob <= 10) { - vp9_short_idct10_16x16_add(input, dest, stride); + vp9_short_idct16x16_10_add(input, dest, stride); vpx_memset(input, 0, 512); } else { vp9_short_idct16x16_add(input, dest, stride); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 6b9109c944936497ea788f2734c07898c594ee91..058bde66f73116774b6ce55a87b8ae55fcd01454 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -64,7 +64,7 @@ static void inverse_transform_b_16x16_add(int eob, if (eob <= 1) vp9_short_idct16x16_1_add(dqcoeff, dest, stride); else if (eob <= 10) - vp9_short_idct10_16x16_add(dqcoeff, dest, stride); + vp9_short_idct16x16_10_add(dqcoeff, dest, stride); else vp9_short_idct16x16_add(dqcoeff, dest, stride); }