Commit 10783d4f authored by Deb Mukherjee's avatar Deb Mukherjee

Adds high bitdepth transform functions and tests

Adds various high bitdepth transform functions and tests.
Much of the changes are related to using typedefs tran_low_t
and tran_high_t for the final transform cofficients and intermediate
stages of the transform computation respectively rather than fixed
types int16_t/int. When vp9_highbitdepth configure flag is off,
these map tp int16_t/int32_t, but when the flag is on, they map
to int32_t/int64_t to make space for needed extra precision.

Change-Id: I3c56de79e15b904d6f655b62ffae170729befdd8
parent 1e4136d3
......@@ -281,6 +281,7 @@ EXPERIMENT_LIST="
spatial_svc
vp9_temporal_denoising
fp_mb_stats
emulate_hardware_highbitdepth
"
CONFIG_LIST="
external_build
......
......@@ -581,6 +581,8 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
using std::tr1::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
#else
const ConvolveFunctions convolve8_c(
vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
......@@ -600,8 +602,11 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
make_tuple(64, 32, &convolve8_c),
make_tuple(32, 64, &convolve8_c),
make_tuple(64, 64, &convolve8_c)));
#endif
#if HAVE_SSE2
#if HAVE_SSE2 && ARCH_X86_64
#if CONFIG_VP9_HIGHBITDEPTH
#else
const ConvolveFunctions convolve8_sse2(
vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
......@@ -622,6 +627,7 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
make_tuple(32, 64, &convolve8_sse2),
make_tuple(64, 64, &convolve8_sse2)));
#endif
#endif
#if HAVE_SSSE3
const ConvolveFunctions convolve8_ssse3(
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -109,7 +109,8 @@ TEST(VP9Idct8x8Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
for (int i = 0; i < count_test_block; ++i) {
int16_t input[64], coeff[64];
int16_t input[64];
tran_low_t coeff[64];
double output_r[64];
uint8_t dst[64], src[64];
......
......@@ -26,8 +26,8 @@
using libvpx_test::ACMRandom;
namespace {
typedef void (*FwdTxfmFunc)(const int16_t *in, int16_t *out, int stride);
typedef void (*InvTxfmFunc)(const int16_t *in, uint8_t *out, int stride);
typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef std::tr1::tuple<FwdTxfmFunc,
InvTxfmFunc,
InvTxfmFunc,
......@@ -74,8 +74,8 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
FAIL() << "Wrong Size!";
break;
}
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block2, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
......@@ -83,7 +83,7 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
const int block_size = size * size;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kMaxNumCoeffs);
int max_error = 0;
for (int i = 0; i < count_test_block; ++i) {
......@@ -153,8 +153,8 @@ TEST_P(PartialIDctTest, ResultsMatch) {
FAIL() << "Wrong Size!";
break;
}
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block2, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
const int count_test_block = 1000;
......@@ -229,6 +229,7 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct4x4_16_add_c,
&vp9_idct4x4_1_add_c,
TX_4X4, 1)));
#if HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(
NEON, PartialIDctTest,
......@@ -259,7 +260,7 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4, 1)));
#endif
#if HAVE_SSE2
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSE2, PartialIDctTest,
::testing::Values(
......@@ -293,7 +294,7 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4, 1)));
#endif
#if HAVE_SSSE3 && ARCH_X86_64
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSSE3_64, PartialIDctTest,
::testing::Values(
......@@ -303,7 +304,7 @@ INSTANTIATE_TEST_CASE_P(
TX_8X8, 12)));
#endif
#if HAVE_SSSE3
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSSE3, PartialIDctTest,
::testing::Values(
......
......@@ -21,6 +21,7 @@
#include "vp9/common/vp9_common_data.h"
#include "vp9/common/vp9_enums.h"
#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_scale.h"
#include "vp9/common/vp9_seg_common.h"
......@@ -176,7 +177,7 @@ struct buf_2d {
};
struct macroblockd_plane {
int16_t *dqcoeff;
tran_low_t *dqcoeff;
PLANE_TYPE plane_type;
int subsampling_x;
int subsampling_y;
......@@ -223,11 +224,17 @@ typedef struct macroblockd {
/* mc buffer */
DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
#if CONFIG_VP9_HIGHBITDEPTH
/* Bit depth: 8, 10, 12 */
int bd;
DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
#endif
int lossless;
int corrupted;
DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
......
This diff is collapsed.
......@@ -36,52 +36,69 @@ extern "C" {
#define dual_set_epi16(a, b) \
_mm_set_epi16(b, b, b, b, a, a, a, a)
// Note:
// tran_low_t is the datatype used for final transform coefficients.
// tran_high_t is the datatype used for intermediate transform stages.
#if CONFIG_VP9_HIGHBITDEPTH
typedef int64_t tran_high_t;
typedef int32_t tran_low_t;
#else
typedef int32_t tran_high_t;
typedef int16_t tran_low_t;
#endif
// Constants:
// for (int i = 1; i< 32; ++i)
// printf("static const int cospi_%d_64 = %.0f;\n", i,
// round(16384 * cos(i*M_PI/64)));
// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
static const int cospi_1_64 = 16364;
static const int cospi_2_64 = 16305;
static const int cospi_3_64 = 16207;
static const int cospi_4_64 = 16069;
static const int cospi_5_64 = 15893;
static const int cospi_6_64 = 15679;
static const int cospi_7_64 = 15426;
static const int cospi_8_64 = 15137;
static const int cospi_9_64 = 14811;
static const int cospi_10_64 = 14449;
static const int cospi_11_64 = 14053;
static const int cospi_12_64 = 13623;
static const int cospi_13_64 = 13160;
static const int cospi_14_64 = 12665;
static const int cospi_15_64 = 12140;
static const int cospi_16_64 = 11585;
static const int cospi_17_64 = 11003;
static const int cospi_18_64 = 10394;
static const int cospi_19_64 = 9760;
static const int cospi_20_64 = 9102;
static const int cospi_21_64 = 8423;
static const int cospi_22_64 = 7723;
static const int cospi_23_64 = 7005;
static const int cospi_24_64 = 6270;
static const int cospi_25_64 = 5520;
static const int cospi_26_64 = 4756;
static const int cospi_27_64 = 3981;
static const int cospi_28_64 = 3196;
static const int cospi_29_64 = 2404;
static const int cospi_30_64 = 1606;
static const int cospi_31_64 = 804;
static const tran_high_t cospi_1_64 = 16364;
static const tran_high_t cospi_2_64 = 16305;
static const tran_high_t cospi_3_64 = 16207;
static const tran_high_t cospi_4_64 = 16069;
static const tran_high_t cospi_5_64 = 15893;
static const tran_high_t cospi_6_64 = 15679;
static const tran_high_t cospi_7_64 = 15426;
static const tran_high_t cospi_8_64 = 15137;
static const tran_high_t cospi_9_64 = 14811;
static const tran_high_t cospi_10_64 = 14449;
static const tran_high_t cospi_11_64 = 14053;
static const tran_high_t cospi_12_64 = 13623;
static const tran_high_t cospi_13_64 = 13160;
static const tran_high_t cospi_14_64 = 12665;
static const tran_high_t cospi_15_64 = 12140;
static const tran_high_t cospi_16_64 = 11585;
static const tran_high_t cospi_17_64 = 11003;
static const tran_high_t cospi_18_64 = 10394;
static const tran_high_t cospi_19_64 = 9760;
static const tran_high_t cospi_20_64 = 9102;
static const tran_high_t cospi_21_64 = 8423;
static const tran_high_t cospi_22_64 = 7723;
static const tran_high_t cospi_23_64 = 7005;
static const tran_high_t cospi_24_64 = 6270;
static const tran_high_t cospi_25_64 = 5520;
static const tran_high_t cospi_26_64 = 4756;
static const tran_high_t cospi_27_64 = 3981;
static const tran_high_t cospi_28_64 = 3196;
static const tran_high_t cospi_29_64 = 2404;
static const tran_high_t cospi_30_64 = 1606;
static const tran_high_t cospi_31_64 = 804;
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
static const int sinpi_1_9 = 5283;
static const int sinpi_2_9 = 9929;
static const int sinpi_3_9 = 13377;
static const int sinpi_4_9 = 15212;
static INLINE int dct_const_round_shift(int input) {
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
#if CONFIG_COEFFICIENT_RANGE_CHECKING
static const tran_high_t sinpi_1_9 = 5283;
static const tran_high_t sinpi_2_9 = 9929;
static const tran_high_t sinpi_3_9 = 13377;
static const tran_high_t sinpi_4_9 = 15212;
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
#if CONFIG_VP9_HIGHBITDEPTH
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
// stay within the ranges:
// - 8 bit: signed 16 bit integer
// - 10 bit: signed 18 bit integer
// - 12 bit: signed 20 bit integer
#elif CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid VP9 input streams, intermediate stage coefficients should always
// stay within the range of a signed 16 bit integer. Coefficients can go out
// of this range for invalid/corrupt VP9 streams. However, strictly checking
......@@ -91,32 +108,59 @@ static INLINE int dct_const_round_shift(int input) {
assert(INT16_MIN <= rv);
assert(rv <= INT16_MAX);
#endif
return (int16_t)rv;
return (tran_low_t)rv;
}
typedef void (*transform_1d)(const int16_t*, int16_t*);
typedef void (*transform_1d)(const tran_low_t*, tran_low_t*);
typedef struct {
transform_1d cols, rows; // vertical and horizontal
} transform_2d;
void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
typedef void (*high_transform_1d)(const tran_low_t*, tran_low_t*, int bd);
void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int
typedef struct {
high_transform_1d cols, rows; // vertical and horizontal
} high_transform_2d;
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
eob);
void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
int stride, int eob);
void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
int stride, int eob);
void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd);
void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd);
void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
#endif
......
This diff is collapsed.
......@@ -195,7 +195,7 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
struct macroblockd_plane *const pd = &xd->plane[plane];
if (eob > 0) {
TX_TYPE tx_type = DCT_DCT;
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
if (xd->lossless) {
tx_type = DCT_DCT;
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
......
......@@ -51,7 +51,7 @@
} while (0)
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
int16_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
vp9_reader *r) {
const int max_eob = 16 << (tx_size << 1);
......
......@@ -28,8 +28,8 @@ typedef struct {
struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
int16_t *qcoeff;
int16_t *coeff;
tran_low_t *qcoeff;
tran_low_t *coeff;
uint16_t *eobs;
struct buf_2d src;
......@@ -119,8 +119,12 @@ struct macroblock {
// Used to store sub partition's choices.
MV pred_mv[MAX_REF_FRAMES];
void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
void (*high_itxm_add)(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
#endif
};
#ifdef __cplusplus
......
......@@ -30,13 +30,13 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
vpx_memalign(16, num_pix * sizeof(uint16_t)));
vpx_memalign(16, num_pix * sizeof(*ctx->eobs[i][k])));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
......
......@@ -19,15 +19,15 @@ struct VP9_COMP;
typedef struct {
MODE_INFO mic;
uint8_t *zcoeff_blk;
int16_t *coeff[MAX_MB_PLANE][3];
int16_t *qcoeff[MAX_MB_PLANE][3];
int16_t *dqcoeff[MAX_MB_PLANE][3];
tran_low_t *coeff[MAX_MB_PLANE][3];
tran_low_t *qcoeff[MAX_MB_PLANE][3];
tran_low_t *dqcoeff[MAX_MB_PLANE][3];
uint16_t *eobs[MAX_MB_PLANE][3];
// dual buffer pointers, 0: in use, 1: best in store
int16_t *coeff_pbuf[MAX_MB_PLANE][3];
int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
tran_low_t *coeff_pbuf[MAX_MB_PLANE][3];
tran_low_t *qcoeff_pbuf[MAX_MB_PLANE][3];
tran_low_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
int is_coded;
......
This diff is collapsed.
......@@ -107,9 +107,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
vp9_token_state tokens[1025][2];
unsigned best_index[1025][2];
uint8_t token_cache[1024];
const int16_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
const int default_eob = 16 << (tx_size << 1);
......@@ -294,22 +294,33 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
}
static INLINE void fdct32x32(int rd_transform,
const int16_t *src, int16_t *dst, int src_stride) {
const int16_t *src, tran_low_t *dst,
int src_stride) {
if (rd_transform)
vp9_fdct32x32_rd(src, dst, src_stride);
else
vp9_fdct32x32(src, dst, src_stride);
}
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE void high_fdct32x32(int rd_transform, const int16_t *src,
tran_low_t *dst, int src_stride) {
if (rd_transform)
vp9_high_fdct32x32_rd(src, dst, src_stride);
else
vp9_high_fdct32x32(src, dst, src_stride);
}
#endif
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
int i, j;
......@@ -357,9 +368,9 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
int i, j;
......@@ -405,9 +416,9 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
int i, j;
......@@ -458,7 +469,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
struct optimize_ctx *const ctx = args->ctx;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
int i, j;
uint8_t *dst;
ENTROPY_CONTEXT *a, *l;
......@@ -538,7 +549,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
int i, j;
uint8_t *dst;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
......@@ -587,9 +598,9 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const scan_order *scan_order;
TX_TYPE tx_type;
PREDICTION_MODE mode;
......
......@@ -556,6 +556,9 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth = oxcf->use_highbitdepth;
#endif
cm->color_space = UNKNOWN;
cm->width = oxcf->width;
......@@ -613,6 +616,11 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
assert(cm->bit_depth > VPX_BITS_8);
cpi->oxcf = *oxcf;
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.use_highbitdepth) {
cpi->mb.e_mbd.bd = (int)cm->bit_depth;
}
#endif
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
......@@ -2768,7 +2776,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (oxcf->pass == 1 &&
(!cpi->use_svc || is_two_pass_svc(cpi))) {
const int lossless = is_lossless_requested(oxcf);
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.use_highbitdepth)
cpi->mb.fwd_txm4x4 = lossless ? vp9_high_fwht4x4 : vp9_high_fdct4x4;
else
cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
cpi->mb.high_itxm_add = lossless ? vp9_high_iwht4x4_add :
vp9_high_idct4x4_add;
#else
cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
#endif
cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
vp9_first_pass(cpi, source);
} else if (oxcf->pass == 2 &&
......
......@@ -217,6 +217,9 @@ typedef struct VP9EncoderConfig {
vp8e_tuning tuning;
vp9e_tune_content content;
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth;
#endif
} VP9EncoderConfig;
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
......
......@@ -19,9 +19,9 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
......@@ -40,9 +40,9 @@ void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
*eob_ptr = eob + 1;
}
void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
......@@ -62,11 +62,11 @@ void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
*eob_ptr = eob + 1;
}
void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
......@@ -78,13 +78,13 @@ void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
(void)zbin_oq_value;
(void)iscan;
vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < count; i++) {
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
......@@ -105,12 +105,12 @@ void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
// TODO(jingning) Refactor this file and combine functions with similar
// operations.
void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
......@@ -120,8 +120,8 @@ void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
(void)zbin_oq_value;
(void)iscan;
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
for (i = 0; i < n_coeffs; i++) {
......@@ -146,27 +146,27 @@ void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int i, non_zero_count = (int)count, eob = -1;
int i, non_zero_count = (int)n_coeffs, eob = -1;
const int zbins[2] = { zbin_ptr[0] + zbin_oq_value,
zbin_ptr[1] + zbin_oq_value };
const int nzbins[2] = { zbins[0] * -1,
zbins[1] * -1 };
(void)iscan;
vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));