Commit bdc43711 authored by Jingning Han's avatar Jingning Han
Browse files

Take out assertion from inverse transforms

Separate the rounding and right shift operations of forward transform
from those of inverse transform. Take out the assertion check from
inverse transforms. If the transform coefficients were constructed to
cause intermediate steps of inverse transform overflow, the codec will
just let it overflow without breaking the decoding flow.

Change-Id: I73cfc3706c4e840fc543a77cbc4cdb0b05d07730
Showing with 147 additions and 142 deletions
...@@ -77,8 +77,7 @@ static const int sinpi_4_9 = 15212; ...@@ -77,8 +77,7 @@ static const int sinpi_4_9 = 15212;
static INLINE int dct_const_round_shift(int input) { static INLINE int dct_const_round_shift(int input) {
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX); return (int16_t)rv;
return rv;
} }
typedef void (*transform_1d)(const int16_t*, int16_t*); typedef void (*transform_1d)(const int16_t*, int16_t*);
......
...@@ -20,6 +20,12 @@ ...@@ -20,6 +20,12 @@
#include "vp9/encoder/vp9_dct.h" #include "vp9/encoder/vp9_dct.h"
static INLINE int fdct_round_shift(int input) {
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX);
return rv;
}
static void fdct4(const int16_t *input, int16_t *output) { static void fdct4(const int16_t *input, int16_t *output) {
int16_t step[4]; int16_t step[4];
int temp1, temp2; int temp1, temp2;
...@@ -31,12 +37,12 @@ static void fdct4(const int16_t *input, int16_t *output) { ...@@ -31,12 +37,12 @@ static void fdct4(const int16_t *input, int16_t *output) {
temp1 = (step[0] + step[1]) * cospi_16_64; temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64;
output[0] = dct_const_round_shift(temp1); output[0] = fdct_round_shift(temp1);
output[2] = dct_const_round_shift(temp2); output[2] = fdct_round_shift(temp2);
temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
output[1] = dct_const_round_shift(temp1); output[1] = fdct_round_shift(temp1);
output[3] = dct_const_round_shift(temp2); output[3] = fdct_round_shift(temp2);
} }
void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
...@@ -80,12 +86,12 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { ...@@ -80,12 +86,12 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
step[3] = input[0] - input[3]; step[3] = input[0] - input[3];
temp1 = (step[0] + step[1]) * cospi_16_64; temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64;
out[0] = dct_const_round_shift(temp1); out[0] = fdct_round_shift(temp1);
out[2] = dct_const_round_shift(temp2); out[2] = fdct_round_shift(temp2);
temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
out[1] = dct_const_round_shift(temp1); out[1] = fdct_round_shift(temp1);
out[3] = dct_const_round_shift(temp2); out[3] = fdct_round_shift(temp2);
// Do next column (which is a transposed row in second/horizontal pass) // Do next column (which is a transposed row in second/horizontal pass)
in++; in++;
out += 4; out += 4;
...@@ -138,10 +144,10 @@ static void fadst4(const int16_t *input, int16_t *output) { ...@@ -138,10 +144,10 @@ static void fadst4(const int16_t *input, int16_t *output) {
s3 = x2 - x0 + x3; s3 = x2 - x0 + x3;
// 1-D transform scaling factor is sqrt(2). // 1-D transform scaling factor is sqrt(2).
output[0] = dct_const_round_shift(s0); output[0] = fdct_round_shift(s0);
output[1] = dct_const_round_shift(s1); output[1] = fdct_round_shift(s1);
output[2] = dct_const_round_shift(s2); output[2] = fdct_round_shift(s2);
output[3] = dct_const_round_shift(s3); output[3] = fdct_round_shift(s3);
} }
static const transform_2d FHT_4[] = { static const transform_2d FHT_4[] = {
...@@ -204,16 +210,16 @@ static void fdct8(const int16_t *input, int16_t *output) { ...@@ -204,16 +210,16 @@ static void fdct8(const int16_t *input, int16_t *output) {
t1 = (x0 - x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
output[0] = dct_const_round_shift(t0); output[0] = fdct_round_shift(t0);
output[2] = dct_const_round_shift(t2); output[2] = fdct_round_shift(t2);
output[4] = dct_const_round_shift(t1); output[4] = fdct_round_shift(t1);
output[6] = dct_const_round_shift(t3); output[6] = fdct_round_shift(t3);
// Stage 2 // Stage 2
t0 = (s6 - s5) * cospi_16_64; t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64;
t2 = dct_const_round_shift(t0); t2 = fdct_round_shift(t0);
t3 = dct_const_round_shift(t1); t3 = fdct_round_shift(t1);
// Stage 3 // Stage 3
x0 = s4 + t2; x0 = s4 + t2;
...@@ -226,10 +232,10 @@ static void fdct8(const int16_t *input, int16_t *output) { ...@@ -226,10 +232,10 @@ static void fdct8(const int16_t *input, int16_t *output) {
t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
output[1] = dct_const_round_shift(t0); output[1] = fdct_round_shift(t0);
output[3] = dct_const_round_shift(t2); output[3] = fdct_round_shift(t2);
output[5] = dct_const_round_shift(t1); output[5] = fdct_round_shift(t1);
output[7] = dct_const_round_shift(t3); output[7] = fdct_round_shift(t3);
} }
void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
...@@ -264,16 +270,16 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { ...@@ -264,16 +270,16 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
t1 = (x0 - x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
output[0 * 8] = dct_const_round_shift(t0); output[0 * 8] = fdct_round_shift(t0);
output[2 * 8] = dct_const_round_shift(t2); output[2 * 8] = fdct_round_shift(t2);
output[4 * 8] = dct_const_round_shift(t1); output[4 * 8] = fdct_round_shift(t1);
output[6 * 8] = dct_const_round_shift(t3); output[6 * 8] = fdct_round_shift(t3);
// Stage 2 // Stage 2
t0 = (s6 - s5) * cospi_16_64; t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64;
t2 = dct_const_round_shift(t0); t2 = fdct_round_shift(t0);
t3 = dct_const_round_shift(t1); t3 = fdct_round_shift(t1);
// Stage 3 // Stage 3
x0 = s4 + t2; x0 = s4 + t2;
...@@ -286,10 +292,10 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { ...@@ -286,10 +292,10 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
output[1 * 8] = dct_const_round_shift(t0); output[1 * 8] = fdct_round_shift(t0);
output[3 * 8] = dct_const_round_shift(t2); output[3 * 8] = fdct_round_shift(t2);
output[5 * 8] = dct_const_round_shift(t1); output[5 * 8] = fdct_round_shift(t1);
output[7 * 8] = dct_const_round_shift(t3); output[7 * 8] = fdct_round_shift(t3);
input++; input++;
output++; output++;
} }
...@@ -388,16 +394,16 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { ...@@ -388,16 +394,16 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
t1 = (x0 - x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t3 = x3 * cospi_24_64 - x2 * cospi_8_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
out[0] = dct_const_round_shift(t0); out[0] = fdct_round_shift(t0);
out[4] = dct_const_round_shift(t2); out[4] = fdct_round_shift(t2);
out[8] = dct_const_round_shift(t1); out[8] = fdct_round_shift(t1);
out[12] = dct_const_round_shift(t3); out[12] = fdct_round_shift(t3);
// Stage 2 // Stage 2
t0 = (s6 - s5) * cospi_16_64; t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64;
t2 = dct_const_round_shift(t0); t2 = fdct_round_shift(t0);
t3 = dct_const_round_shift(t1); t3 = fdct_round_shift(t1);
// Stage 3 // Stage 3
x0 = s4 + t2; x0 = s4 + t2;
...@@ -410,22 +416,22 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { ...@@ -410,22 +416,22 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
out[2] = dct_const_round_shift(t0); out[2] = fdct_round_shift(t0);
out[6] = dct_const_round_shift(t2); out[6] = fdct_round_shift(t2);
out[10] = dct_const_round_shift(t1); out[10] = fdct_round_shift(t1);
out[14] = dct_const_round_shift(t3); out[14] = fdct_round_shift(t3);
} }
// Work on the next eight values; step1 -> odd_results // Work on the next eight values; step1 -> odd_results
{ {
// step 2 // step 2
temp1 = (step1[5] - step1[2]) * cospi_16_64; temp1 = (step1[5] - step1[2]) * cospi_16_64;
temp2 = (step1[4] - step1[3]) * cospi_16_64; temp2 = (step1[4] - step1[3]) * cospi_16_64;
step2[2] = dct_const_round_shift(temp1); step2[2] = fdct_round_shift(temp1);
step2[3] = dct_const_round_shift(temp2); step2[3] = fdct_round_shift(temp2);
temp1 = (step1[4] + step1[3]) * cospi_16_64; temp1 = (step1[4] + step1[3]) * cospi_16_64;
temp2 = (step1[5] + step1[2]) * cospi_16_64; temp2 = (step1[5] + step1[2]) * cospi_16_64;
step2[4] = dct_const_round_shift(temp1); step2[4] = fdct_round_shift(temp1);
step2[5] = dct_const_round_shift(temp2); step2[5] = fdct_round_shift(temp2);
// step 3 // step 3
step3[0] = step1[0] + step2[3]; step3[0] = step1[0] + step2[3];
step3[1] = step1[1] + step2[2]; step3[1] = step1[1] + step2[2];
...@@ -438,12 +444,12 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { ...@@ -438,12 +444,12 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
// step 4 // step 4
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64;
step2[1] = dct_const_round_shift(temp1); step2[1] = fdct_round_shift(temp1);
step2[2] = dct_const_round_shift(temp2); step2[2] = fdct_round_shift(temp2);
temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
step2[5] = dct_const_round_shift(temp1); step2[5] = fdct_round_shift(temp1);
step2[6] = dct_const_round_shift(temp2); step2[6] = fdct_round_shift(temp2);
// step 5 // step 5
step1[0] = step3[0] + step2[1]; step1[0] = step3[0] + step2[1];
step1[1] = step3[0] - step2[1]; step1[1] = step3[0] - step2[1];
...@@ -456,20 +462,20 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { ...@@ -456,20 +462,20 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
// step 6 // step 6
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
out[1] = dct_const_round_shift(temp1); out[1] = fdct_round_shift(temp1);
out[9] = dct_const_round_shift(temp2); out[9] = fdct_round_shift(temp2);
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
out[5] = dct_const_round_shift(temp1); out[5] = fdct_round_shift(temp1);
out[13] = dct_const_round_shift(temp2); out[13] = fdct_round_shift(temp2);
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
out[3] = dct_const_round_shift(temp1); out[3] = fdct_round_shift(temp1);
out[11] = dct_const_round_shift(temp2); out[11] = fdct_round_shift(temp2);
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
out[7] = dct_const_round_shift(temp1); out[7] = fdct_round_shift(temp1);
out[15] = dct_const_round_shift(temp2); out[15] = fdct_round_shift(temp2);
} }
// Do next column (which is a transposed row in second/horizontal pass) // Do next column (which is a transposed row in second/horizontal pass)
in++; in++;
...@@ -503,14 +509,14 @@ static void fadst8(const int16_t *input, int16_t *output) { ...@@ -503,14 +509,14 @@ static void fadst8(const int16_t *input, int16_t *output) {
s6 = cospi_26_64 * x6 + cospi_6_64 * x7; s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7; s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
x0 = dct_const_round_shift(s0 + s4); x0 = fdct_round_shift(s0 + s4);
x1 = dct_const_round_shift(s1 + s5); x1 = fdct_round_shift(s1 + s5);
x2 = dct_const_round_shift(s2 + s6); x2 = fdct_round_shift(s2 + s6);
x3 = dct_const_round_shift(s3 + s7); x3 = fdct_round_shift(s3 + s7);
x4 = dct_const_round_shift(s0 - s4); x4 = fdct_round_shift(s0 - s4);
x5 = dct_const_round_shift(s1 - s5); x5 = fdct_round_shift(s1 - s5);
x6 = dct_const_round_shift(s2 - s6); x6 = fdct_round_shift(s2 - s6);
x7 = dct_const_round_shift(s3 - s7); x7 = fdct_round_shift(s3 - s7);
// stage 2 // stage 2
s0 = x0; s0 = x0;
...@@ -526,10 +532,10 @@ static void fadst8(const int16_t *input, int16_t *output) { ...@@ -526,10 +532,10 @@ static void fadst8(const int16_t *input, int16_t *output) {
x1 = s1 + s3; x1 = s1 + s3;
x2 = s0 - s2; x2 = s0 - s2;
x3 = s1 - s3; x3 = s1 - s3;
x4 = dct_const_round_shift(s4 + s6); x4 = fdct_round_shift(s4 + s6);
x5 = dct_const_round_shift(s5 + s7); x5 = fdct_round_shift(s5 + s7);
x6 = dct_const_round_shift(s4 - s6); x6 = fdct_round_shift(s4 - s6);
x7 = dct_const_round_shift(s5 - s7); x7 = fdct_round_shift(s5 - s7);
// stage 3 // stage 3
s2 = cospi_16_64 * (x2 + x3); s2 = cospi_16_64 * (x2 + x3);
...@@ -537,10 +543,10 @@ static void fadst8(const int16_t *input, int16_t *output) { ...@@ -537,10 +543,10 @@ static void fadst8(const int16_t *input, int16_t *output) {
s6 = cospi_16_64 * (x6 + x7); s6 = cospi_16_64 * (x6 + x7);
s7 = cospi_16_64 * (x6 - x7); s7 = cospi_16_64 * (x6 - x7);
x2 = dct_const_round_shift(s2); x2 = fdct_round_shift(s2);
x3 = dct_const_round_shift(s3); x3 = fdct_round_shift(s3);
x6 = dct_const_round_shift(s6); x6 = fdct_round_shift(s6);
x7 = dct_const_round_shift(s7); x7 = fdct_round_shift(s7);
output[0] = x0; output[0] = x0;
output[1] = - x4; output[1] = - x4;
...@@ -693,16 +699,16 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { ...@@ -693,16 +699,16 @@ static void fdct16(const int16_t in[16], int16_t out[16]) {
t1 = (x0 - x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t3 = x3 * cospi_24_64 - x2 * cospi_8_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
out[0] = dct_const_round_shift(t0); out[0] = fdct_round_shift(t0);
out[4] = dct_const_round_shift(t2); out[4] = fdct_round_shift(t2);
out[8] = dct_const_round_shift(t1); out[8] = fdct_round_shift(t1);
out[12] = dct_const_round_shift(t3); out[12] = fdct_round_shift(t3);
// Stage 2 // Stage 2
t0 = (s6 - s5) * cospi_16_64; t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64;
t2 = dct_const_round_shift(t0); t2 = fdct_round_shift(t0);
t3 = dct_const_round_shift(t1); t3 = fdct_round_shift(t1);
// Stage 3 // Stage 3
x0 = s4 + t2; x0 = s4 + t2;
...@@ -715,21 +721,21 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { ...@@ -715,21 +721,21 @@ static void fdct16(const int16_t in[16], int16_t out[16]) {
t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
out[2] = dct_const_round_shift(t0); out[2] = fdct_round_shift(t0);
out[6] = dct_const_round_shift(t2); out[6] = fdct_round_shift(t2);
out[10] = dct_const_round_shift(t1); out[10] = fdct_round_shift(t1);
out[14] = dct_const_round_shift(t3); out[14] = fdct_round_shift(t3);
} }
// step 2 // step 2
temp1 = (step1[5] - step1[2]) * cospi_16_64; temp1 = (step1[5] - step1[2]) * cospi_16_64;
temp2 = (step1[4] - step1[3]) * cospi_16_64; temp2 = (step1[4] - step1[3]) * cospi_16_64;
step2[2] = dct_const_round_shift(temp1); step2[2] = fdct_round_shift(temp1);
step2[3] = dct_const_round_shift(temp2); step2[3] = fdct_round_shift(temp2);
temp1 = (step1[4] + step1[3]) * cospi_16_64; temp1 = (step1[4] + step1[3]) * cospi_16_64;
temp2 = (step1[5] + step1[2]) * cospi_16_64; temp2 = (step1[5] + step1[2]) * cospi_16_64;
step2[4] = dct_const_round_shift(temp1); step2[4] = fdct_round_shift(temp1);
step2[5] = dct_const_round_shift(temp2); step2[5] = fdct_round_shift(temp2);
// step 3 // step 3
step3[0] = step1[0] + step2[3]; step3[0] = step1[0] + step2[3];
...@@ -744,12 +750,12 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { ...@@ -744,12 +750,12 @@ static void fdct16(const int16_t in[16], int16_t out[16]) {
// step 4 // step 4
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64;
step2[1] = dct_const_round_shift(temp1); step2[1] = fdct_round_shift(temp1);
step2[2] = dct_const_round_shift(temp2); step2[2] = fdct_round_shift(temp2);
temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
step2[5] = dct_const_round_shift(temp1); step2[5] = fdct_round_shift(temp1);
step2[6] = dct_const_round_shift(temp2); step2[6] = fdct_round_shift(temp2);
// step 5 // step 5
step1[0] = step3[0] + step2[1]; step1[0] = step3[0] + step2[1];
...@@ -764,23 +770,23 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { ...@@ -764,23 +770,23 @@ static void fdct16(const int16_t in[16], int16_t out[16]) {
// step 6 // step 6
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
out[1] = dct_const_round_shift(temp1); out[1] = fdct_round_shift(temp1);
out[9] = dct_const_round_shift(temp2); out[9] = fdct_round_shift(temp2);
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
out[5] = dct_const_round_shift(temp1); out[5] = fdct_round_shift(temp1);
out[13] = dct_const_round_shift(temp2); out[13] = fdct_round_shift(temp2);
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
out[3] = dct_const_round_shift(temp1); out[3] = fdct_round_shift(temp1);
out[11] = dct_const_round_shift(temp2); out[11] = fdct_round_shift(temp2);
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
out[7] = dct_const_round_shift(temp1); out[7] = fdct_round_shift(temp1);
out[15] = dct_const_round_shift(temp2); out[15] = fdct_round_shift(temp2);
} }
static void fadst16(const int16_t *input, int16_t *output) { static void fadst16(const int16_t *input, int16_t *output) {
...@@ -821,22 +827,22 @@ static void fadst16(const int16_t *input, int16_t *output) { ...@@ -821,22 +827,22 @@ static void fadst16(const int16_t *input, int16_t *output) {
s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
x0 = dct_const_round_shift(s0 + s8); x0 = fdct_round_shift(s0 + s8);
x1 = dct_const_round_shift(s1 + s9); x1 = fdct_round_shift(s1 + s9);
x2 = dct_const_round_shift(s2 + s10); x2 = fdct_round_shift(s2 + s10);
x3 = dct_const_round_shift(s3 + s11); x3 = fdct_round_shift(s3 + s11);
x4 = dct_const_round_shift(s4 + s12); x4 = fdct_round_shift(s4 + s12);
x5 = dct_const_round_shift(s5 + s13); x5 = fdct_round_shift(s5 + s13);
x6 = dct_const_round_shift(s6 + s14); x6 = fdct_round_shift(s6 + s14);
x7 = dct_const_round_shift(s7 + s15); x7 = fdct_round_shift(s7 + s15);
x8 = dct_const_round_shift(s0 - s8); x8 = fdct_round_shift(s0 - s8);
x9 = dct_const_round_shift(s1 - s9); x9 = fdct_round_shift(s1 - s9);
x10 = dct_const_round_shift(s2 - s10); x10 = fdct_round_shift(s2 - s10);
x11 = dct_const_round_shift(s3 - s11); x11 = fdct_round_shift(s3 - s11);
x12 = dct_const_round_shift(s4 - s12); x12 = fdct_round_shift(s4 - s12);
x13 = dct_const_round_shift(s5 - s13); x13 = fdct_round_shift(s5 - s13);
x14 = dct_const_round_shift(s6 - s14); x14 = fdct_round_shift(s6 - s14);
x15 = dct_const_round_shift(s7 - s15); x15 = fdct_round_shift(s7 - s15);
// stage 2 // stage 2
s0 = x0; s0 = x0;
...@@ -864,14 +870,14 @@ static void fadst16(const int16_t *input, int16_t *output) { ...@@ -864,14 +870,14 @@ static void fadst16(const int16_t *input, int16_t *output) {
x5 = s1 - s5; x5 = s1 - s5;
x6 = s2 - s6; x6 = s2 - s6;
x7 = s3 - s7; x7 = s3 - s7;
x8 = dct_const_round_shift(s8 + s12); x8 = fdct_round_shift(s8 + s12);
x9 = dct_const_round_shift(s9 + s13); x9 = fdct_round_shift(s9 + s13);
x10 = dct_const_round_shift(s10 + s14); x10 = fdct_round_shift(s10 + s14);
x11 = dct_const_round_shift(s11 + s15); x11 = fdct_round_shift(s11 + s15);
x12 = dct_const_round_shift(s8 - s12); x12 = fdct_round_shift(s8 - s12);
x13 = dct_const_round_shift(s9 - s13); x13 = fdct_round_shift(s9 - s13);
x14 = dct_const_round_shift(s10 - s14); x14 = fdct_round_shift(s10 - s14);
x15 = dct_const_round_shift(s11 - s15); x15 = fdct_round_shift(s11 - s15);
// stage 3 // stage 3
s0 = x0; s0 = x0;
...@@ -895,18 +901,18 @@ static void fadst16(const int16_t *input, int16_t *output) { ...@@ -895,18 +901,18 @@ static void fadst16(const int16_t *input, int16_t *output) {
x1 = s1 + s3; x1 = s1 + s3;
x2 = s0 - s2; x2 = s0 - s2;
x3 = s1 - s3; x3 = s1 - s3;
x4 = dct_const_round_shift(s4 + s6); x4 = fdct_round_shift(s4 + s6);
x5 = dct_const_round_shift(s5 + s7); x5 = fdct_round_shift(s5 + s7);
x6 = dct_const_round_shift(s4 - s6); x6 = fdct_round_shift(s4 - s6);
x7 = dct_const_round_shift(s5 - s7); x7 = fdct_round_shift(s5 - s7);
x8 = s8 + s10; x8 = s8 + s10;
x9 = s9 + s11; x9 = s9 + s11;
x10 = s8 - s10; x10 = s8 - s10;
x11 = s9 - s11; x11 = s9 - s11;
x12 = dct_const_round_shift(s12 + s14); x12 = fdct_round_shift(s12 + s14);
x13 = dct_const_round_shift(s13 + s15); x13 = fdct_round_shift(s13 + s15);
x14 = dct_const_round_shift(s12 - s14); x14 = fdct_round_shift(s12 - s14);
x15 = dct_const_round_shift(s13 - s15); x15 = fdct_round_shift(s13 - s15);
// stage 4 // stage 4
s2 = (- cospi_16_64) * (x2 + x3); s2 = (- cospi_16_64) * (x2 + x3);
...@@ -918,14 +924,14 @@ static void fadst16(const int16_t *input, int16_t *output) { ...@@ -918,14 +924,14 @@ static void fadst16(const int16_t *input, int16_t *output) {
s14 = (- cospi_16_64) * (x14 + x15); s14 = (- cospi_16_64) * (x14 + x15);
s15 = cospi_16_64 * (x14 - x15); s15 = cospi_16_64 * (x14 - x15);
x2 = dct_const_round_shift(s2); x2 = fdct_round_shift(s2);
x3 = dct_const_round_shift(s3); x3 = fdct_round_shift(s3);
x6 = dct_const_round_shift(s6); x6 = fdct_round_shift(s6);
x7 = dct_const_round_shift(s7); x7 = fdct_round_shift(s7);
x10 = dct_const_round_shift(s10); x10 = fdct_round_shift(s10);
x11 = dct_const_round_shift(s11); x11 = fdct_round_shift(s11);
x14 = dct_const_round_shift(s14); x14 = fdct_round_shift(s14);
x15 = dct_const_round_shift(s15); x15 = fdct_round_shift(s15);
output[0] = x0; output[0] = x0;
output[1] = - x8; output[1] = - x8;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment