diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h new file mode 100644 index 0000000000000000000000000000000000000000..680a206274ae71c41a8f0fb1ada90e96aeda22ce --- /dev/null +++ b/vp9/common/vp9_idct.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_IDCT_H +#define VP9_COMMON_VP9_IDCT_H + +#include "./vpx_config.h" + +// Constants and Macros used by all idct/dct functions +#define DCT_CONST_BITS 14 +#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) +// Constants are 16384 * cos(kPi/64) where k = 1 to 31. +// Note: sin(kPi/64) = cos((32-k)Pi/64) +static const int cospi_1_64 = 16364; +static const int cospi_2_64 = 16305; +static const int cospi_3_64 = 16207; +static const int cospi_4_64 = 16069; +static const int cospi_5_64 = 15893; +static const int cospi_6_64 = 15679; +static const int cospi_7_64 = 15426; +static const int cospi_8_64 = 15137; +static const int cospi_9_64 = 14811; +static const int cospi_10_64 = 14449; +static const int cospi_11_64 = 14053; +static const int cospi_12_64 = 13623; +static const int cospi_13_64 = 13160; +static const int cospi_14_64 = 12665; +static const int cospi_15_64 = 12140; +static const int cospi_16_64 = 11585; +static const int cospi_17_64 = 11003; +static const int cospi_18_64 = 10394; +static const int cospi_19_64 = 9760; +static const int cospi_20_64 = 9102; +static const int cospi_21_64 = 8423; +static const int cospi_22_64 = 7723; +static const int cospi_23_64 = 7005; +static const int cospi_24_64 = 6270; +static const int cospi_25_64 = 5520; +static const int cospi_26_64 = 4756; +static const int cospi_27_64 = 3981; +static const int cospi_28_64 = 3196; +static const int cospi_29_64 = 2404; +static const int cospi_30_64 = 1606; +static const int cospi_31_64 = 804; + +static INLINE int dct_const_round_shift(int input) { + int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS; + assert((rv <= INT16_MAX) && (rv >= INT16_MIN)); + return rv; +} +#endif diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 92367fe5a695b8f64bb546184b4ccd7d4b3d004b..b27b34cf2c70096f03b7d55aa02bb794ee90ceaa 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -28,6 +28,7 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_idct.h" @@ -493,49 +494,6 @@ void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr, } #endif -// Constants and Macros used by all idct functions -// TODO(Yaowu): move these to a header file as they shared by DCTs and iDCTs -#define DCT_CONST_BITS 14 -#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) -// Constants are 16384 * cos(kPi/64) where k = 1 to 31. -// Note: sin(kPi/64) = cos((32-k)Pi/64) -static const int cospi_1_64 = 16364; -static const int cospi_2_64 = 16305; -static const int cospi_3_64 = 16207; -static const int cospi_4_64 = 16069; -static const int cospi_5_64 = 15893; -static const int cospi_6_64 = 15679; -static const int cospi_7_64 = 15426; -static const int cospi_8_64 = 15137; -static const int cospi_9_64 = 14811; -static const int cospi_10_64 = 14449; -static const int cospi_11_64 = 14053; -static const int cospi_12_64 = 13623; -static const int cospi_13_64 = 13160; -static const int cospi_14_64 = 12665; -static const int cospi_15_64 = 12140; -static const int cospi_16_64 = 11585; -static const int cospi_17_64 = 11003; -static const int cospi_18_64 = 10394; -static const int cospi_19_64 = 9760; -static const int cospi_20_64 = 9102; -static const int cospi_21_64 = 8423; -static const int cospi_22_64 = 7723; -static const int cospi_23_64 = 7005; -static const int cospi_24_64 = 6270; -static const int cospi_25_64 = 5520; -static const int cospi_26_64 = 4756; -static const int cospi_27_64 = 3981; -static const int cospi_28_64 = 3196; -static const int cospi_29_64 = 2404; -static const int cospi_30_64 = 1606; -static const int cospi_31_64 = 804; - -static INLINE int dct_const_round_shift(int input) { - int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS; - assert((rv <= INT16_MAX) && (rv >= INT16_MIN)); - return rv; -} void idct4_1d(int16_t *input, int16_t *output) { int16_t step[4]; @@ -875,302 +833,6 @@ void vp9_short_ihaar2x2_c(int16_t *input, int16_t *output, int pitch) { op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1; } - -#if 0 -// Keep a really bad float version as reference for now. -void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double x; - const int short_pitch = pitch >> 1; - int i, j, k, l; - for (l = 0; l < 16; ++l) { - for (k = 0; k < 16; ++k) { - double s = 0; - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) { - x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/32; - if (i != 0) - x *= sqrt(2.0); - if (j != 0) - x *= sqrt(2.0); - s += x; - } - } - output[k*short_pitch+l] = (short)round(s); - } - } - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} -#endif - -#define TEST_INT_16x16_IDCT 1 -#if !TEST_INT_16x16_IDCT - -static void butterfly_16x16_idct_1d(double input[16], double output[16]) { - - static const double C1 = 0.995184726672197; - static const double C2 = 0.98078528040323; - static const double C3 = 0.956940335732209; - static const double C4 = 0.923879532511287; - static const double C5 = 0.881921264348355; - static const double C6 = 0.831469612302545; - static const double C7 = 0.773010453362737; - static const double C8 = 0.707106781186548; - static const double C9 = 0.634393284163646; - static const double C10 = 0.555570233019602; - static const double C11 = 0.471396736825998; - static const double C12 = 0.38268343236509; - static const double C13 = 0.290284677254462; - static const double C14 = 0.195090322016128; - static const double C15 = 0.098017140329561; - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double step[16]; - double intermediate[16]; - double temp1, temp2; - - - // step 1 and 2 - step[ 0] = input[0] + input[8]; - step[ 1] = input[0] - input[8]; - - temp1 = input[4]*C12; - temp2 = input[12]*C4; - - temp1 -= temp2; - temp1 *= C8; - - step[ 2] = 2*(temp1); - - temp1 = input[4]*C4; - temp2 = input[12]*C12; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - step[ 3] = 2*(temp1); - - temp1 = input[2]*C8; - temp1 = 2*(temp1); - temp2 = input[6] + input[10]; - - step[ 4] = temp1 + temp2; - step[ 5] = temp1 - temp2; - - temp1 = input[14]*C8; - temp1 = 2*(temp1); - temp2 = input[6] - input[10]; - - step[ 6] = temp2 - temp1; - step[ 7] = temp2 + temp1; - - // for odd input - temp1 = input[3]*C12; - temp2 = input[13]*C4; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - intermediate[ 8] = 2*(temp1); - - temp1 = input[3]*C4; - temp2 = input[13]*C12; - temp2 -= temp1; - temp2 = (temp2); - temp2 *= C8; - intermediate[ 9] = 2*(temp2); - - intermediate[10] = 2*(input[9]*C8); - intermediate[11] = input[15] - input[1]; - intermediate[12] = input[15] + input[1]; - intermediate[13] = 2*((input[7]*C8)); - - temp1 = input[11]*C12; - temp2 = input[5]*C4; - temp2 -= temp1; - temp2 = (temp2); - temp2 *= C8; - intermediate[14] = 2*(temp2); - - temp1 = input[11]*C4; - temp2 = input[5]*C12; - temp1 += temp2; - temp1 = (temp1); - temp1 *= C8; - intermediate[15] = 2*(temp1); - - step[ 8] = intermediate[ 8] + intermediate[14]; - step[ 9] = intermediate[ 9] + intermediate[15]; - step[10] = intermediate[10] + intermediate[11]; - step[11] = intermediate[10] - intermediate[11]; - step[12] = intermediate[12] + intermediate[13]; - step[13] = intermediate[12] - intermediate[13]; - step[14] = intermediate[ 8] - intermediate[14]; - step[15] = intermediate[ 9] - intermediate[15]; - - // step 3 - output[0] = step[ 0] + step[ 3]; - output[1] = step[ 1] + step[ 2]; - output[2] = step[ 1] - step[ 2]; - output[3] = step[ 0] - step[ 3]; - - temp1 = step[ 4]*C14; - temp2 = step[ 7]*C2; - temp1 -= temp2; - output[4] = (temp1); - - temp1 = step[ 4]*C2; - temp2 = step[ 7]*C14; - temp1 += temp2; - output[7] = (temp1); - - temp1 = step[ 5]*C10; - temp2 = step[ 6]*C6; - temp1 -= temp2; - output[5] = (temp1); - - temp1 = step[ 5]*C6; - temp2 = step[ 6]*C10; - temp1 += temp2; - output[6] = (temp1); - - output[8] = step[ 8] + step[11]; - output[9] = step[ 9] + step[10]; - output[10] = step[ 9] - step[10]; - output[11] = step[ 8] - step[11]; - output[12] = step[12] + step[15]; - output[13] = step[13] + step[14]; - output[14] = step[13] - step[14]; - output[15] = step[12] - step[15]; - - // output 4 - step[ 0] = output[0] + output[7]; - step[ 1] = output[1] + output[6]; - step[ 2] = output[2] + output[5]; - step[ 3] = output[3] + output[4]; - step[ 4] = output[3] - output[4]; - step[ 5] = output[2] - output[5]; - step[ 6] = output[1] - output[6]; - step[ 7] = output[0] - output[7]; - - temp1 = output[8]*C7; - temp2 = output[15]*C9; - temp1 -= temp2; - step[ 8] = (temp1); - - temp1 = output[9]*C11; - temp2 = output[14]*C5; - temp1 += temp2; - step[ 9] = (temp1); - - temp1 = output[10]*C3; - temp2 = output[13]*C13; - temp1 -= temp2; - step[10] = (temp1); - - temp1 = output[11]*C15; - temp2 = output[12]*C1; - temp1 += temp2; - step[11] = (temp1); - - temp1 = output[11]*C1; - temp2 = output[12]*C15; - temp2 -= temp1; - step[12] = (temp2); - - temp1 = output[10]*C13; - temp2 = output[13]*C3; - temp1 += temp2; - step[13] = (temp1); - - temp1 = output[9]*C5; - temp2 = output[14]*C11; - temp2 -= temp1; - step[14] = (temp2); - - temp1 = output[8]*C9; - temp2 = output[15]*C7; - temp1 += temp2; - step[15] = (temp1); - - // step 5 - output[0] = (step[0] + step[15]); - output[1] = (step[1] + step[14]); - output[2] = (step[2] + step[13]); - output[3] = (step[3] + step[12]); - output[4] = (step[4] + step[11]); - output[5] = (step[5] + step[10]); - output[6] = (step[6] + step[ 9]); - output[7] = (step[7] + step[ 8]); - - output[15] = (step[0] - step[15]); - output[14] = (step[1] - step[14]); - output[13] = (step[2] - step[13]); - output[12] = (step[3] - step[12]); - output[11] = (step[4] - step[11]); - output[10] = (step[5] - step[10]); - output[9] = (step[6] - step[ 9]); - output[8] = (step[7] - step[ 8]); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -// Remove once an int version of iDCT is written -#if 0 -void reference_16x16_idct_1d(double input[16], double output[16]) { - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - const double kPi = 3.141592653589793238462643383279502884; - const double kSqrt2 = 1.414213562373095048801688724209698; - for (int k = 0; k < 16; k++) { - output[k] = 0.0; - for (int n = 0; n < 16; n++) { - output[k] += input[n]*cos(kPi*(2*k+1)*n/32.0); - if (n == 0) - output[k] = output[k]/kSqrt2; - } - } - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} -#endif - -void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { - - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double out[16*16], out2[16*16]; - const int short_pitch = pitch >> 1; - int i, j; - // First transform rows - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = input[j + i*short_pitch]; - butterfly_16x16_idct_1d(temp_in, temp_out); - for (j = 0; j < 16; ++j) - out[j + i*16] = temp_out[j]; - } - // Then transform columns - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - butterfly_16x16_idct_1d(temp_in, temp_out); - for (j = 0; j < 16; ++j) - out2[j*16 + i] = temp_out[j]; - } - for (i = 0; i < 16*16; ++i) - output[i] = round(out2[i]/128); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -#else - void idct16_1d(int16_t *input, int16_t *output) { int16_t step1[16], step2[16]; int temp1, temp2; @@ -1396,7 +1058,7 @@ void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) { out = dct_const_round_shift(tmp); *output = (out + 32) >> 6; } -#endif + #if !CONFIG_DWTDCTHYBRID void idct32_1d(int16_t *input, int16_t *output) { diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index d4f5c0c074054426b2e744be9ae79d4029f75ded..7c4343144d70830abdb35538a40ad8be4162e04a 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -15,6 +15,7 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_idct.h" // TODO: these transforms can be converted into integer forms to reduce // the complexity @@ -783,49 +784,6 @@ void vp9_fht_int_c(const int16_t *input, int pitch, int16_t *output, op += tx_dim; } } -// Constants and Macros used by all idct functions -// TODO(Yaowu): move these to a header file as they shared by DCTs and iDCTs -#define DCT_CONST_BITS 14 -#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) -// Constants are 16384 * cos(kPi/64) where k = 1 to 31. -// Note: sin(kPi/64) = cos((32-k)Pi/64) -static const int cospi_1_64 = 16364; -static const int cospi_2_64 = 16305; -static const int cospi_3_64 = 16207; -static const int cospi_4_64 = 16069; -static const int cospi_5_64 = 15893; -static const int cospi_6_64 = 15679; -static const int cospi_7_64 = 15426; -static const int cospi_8_64 = 15137; -static const int cospi_9_64 = 14811; -static const int cospi_10_64 = 14449; -static const int cospi_11_64 = 14053; -static const int cospi_12_64 = 13623; -static const int cospi_13_64 = 13160; -static const int cospi_14_64 = 12665; -static const int cospi_15_64 = 12140; -static const int cospi_16_64 = 11585; -static const int cospi_17_64 = 11003; -static const int cospi_18_64 = 10394; -static const int cospi_19_64 = 9760; -static const int cospi_20_64 = 9102; -static const int cospi_21_64 = 8423; -static const int cospi_22_64 = 7723; -static const int cospi_23_64 = 7005; -static const int cospi_24_64 = 6270; -static const int cospi_25_64 = 5520; -static const int cospi_26_64 = 4756; -static const int cospi_27_64 = 3981; -static const int cospi_28_64 = 3196; -static const int cospi_29_64 = 2404; -static const int cospi_30_64 = 1606; -static const int cospi_31_64 = 804; - -static INLINE int dct_const_round_shift(int input) { - int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS; - assert((rv <= INT16_MAX) && (rv >= INT16_MIN)); - return rv; -} static void fdct4_1d(int16_t *input, int16_t *output) { int16_t step[4]; diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index f3ae049a1f85407e9c4a55fc14fe39c1d4ec8aae..d970990e30ed77b75de5d9135def2305d7413d82 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -36,6 +36,7 @@ VP9_COMMON_SRCS-yes += common/vp9_entropymv.h VP9_COMMON_SRCS-yes += common/vp9_extend.h VP9_COMMON_SRCS-yes += common/vp9_findnearmv.h VP9_COMMON_SRCS-yes += common/vp9_header.h +VP9_COMMON_SRCS-yes += common/vp9_idct.h VP9_COMMON_SRCS-yes += common/vp9_invtrans.h VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h VP9_COMMON_SRCS-yes += common/vp9_modecont.h