diff --git a/configure b/configure index bd3bf1641c971b291c38ea12cd4277461c7ac105..d52052504fc627d45e0c19580ea0a3a16503efbe 100755 --- a/configure +++ b/configure @@ -242,7 +242,6 @@ EXPERIMENT_LIST=" superblocks pred_filter lossless - newbestrefmv subpelrefmv new_mvref implicit_segmentation diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index a0b91f3890f3e33c0105b7beff1ef972b72ee48c..d572ee6be0a40611f69d79b8339700b879f89046 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -17,7 +17,7 @@ extern "C" { #include "vp9/common/entropy.h" #include "vp9/common/idct.h" -#include "vp9/encoder/dct.h" +#include "vpx_rtcd.h" } #include "acm_random.h" @@ -256,7 +256,7 @@ void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) { } -TEST(VP8Idct16x16Test, AccuracyCheck) { +TEST(VP9Idct16x16Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; for (int i = 0; i < count_test_block; ++i) { @@ -271,7 +271,7 @@ TEST(VP8Idct16x16Test, AccuracyCheck) { reference_16x16_dct_2d(in, out_r); for (int j = 0; j < 256; j++) coeff[j] = round(out_r[j]); - vp8_short_idct16x16_c(coeff, out_c, 32); + vp9_short_idct16x16_c(coeff, out_c, 32); for (int j = 0; j < 256; ++j) { const int diff = out_c[j] - in[j]; const int error = diff * diff; @@ -280,7 +280,7 @@ TEST(VP8Idct16x16Test, AccuracyCheck) { << " at index " << j; } - vp8_short_fdct16x16_c(in, out_c, 32); + vp9_short_fdct16x16_c(in, out_c, 32); for (int j = 0; j < 256; ++j) { const double diff = coeff[j] - out_c[j]; const double error = diff * diff; @@ -291,7 +291,7 @@ TEST(VP8Idct16x16Test, AccuracyCheck) { } } -TEST(VP8Fdct16x16Test, AccuracyCheck) { +TEST(VP9Fdct16x16Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; double total_error = 0; @@ -306,8 +306,8 @@ TEST(VP8Fdct16x16Test, AccuracyCheck) { test_input_block[j] = rnd.Rand8() - rnd.Rand8(); const int pitch = 32; - vp8_short_fdct16x16_c(test_input_block, test_temp_block, pitch); - vp8_short_idct16x16_c(test_temp_block, test_output_block, pitch); + vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch); + vp9_short_idct16x16_c(test_temp_block, test_output_block, pitch); for (int j = 0; j < 256; ++j) { const int diff = test_input_block[j] - test_output_block[j]; @@ -325,7 +325,7 @@ TEST(VP8Fdct16x16Test, AccuracyCheck) { << "Error: 16x16 FDCT/IDCT has average roundtrip error > 1/10 per block"; } -TEST(VP8Fdct16x16Test, CoeffSizeCheck) { +TEST(VP9Fdct16x16Test, CoeffSizeCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; for (int i = 0; i < count_test_block; ++i) { @@ -342,8 +342,8 @@ TEST(VP8Fdct16x16Test, CoeffSizeCheck) { input_extreme_block[j] = 255; const int pitch = 32; - vp8_short_fdct16x16_c(input_block, output_block, pitch); - vp8_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch); + vp9_short_fdct16x16_c(input_block, output_block, pitch); + vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch); // The minimum quant value is 4. for (int j = 0; j < 256; ++j) { diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index d7066b3ff2c59b2931fcfe139042a8a15e6bb5c0..e2eb28e5a34ae8d9ab3320b9f4b5a3e566773ddd 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -16,7 +16,7 @@ extern "C" { #include "vp9/common/idct.h" -#include "vp9/encoder/dct.h" +#include "vpx_rtcd.h" } #include "acm_random.h" @@ -26,7 +26,7 @@ using libvpx_test::ACMRandom; namespace { -TEST(Vp8FdctTest, SignBiasCheck) { +TEST(Vp9FdctTest, SignBiasCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int16_t test_input_block[16]; int16_t test_output_block[16]; @@ -43,7 +43,7 @@ TEST(Vp8FdctTest, SignBiasCheck) { // TODO(Yaowu): this should be converted to a parameterized test // to test optimized versions of this function. - vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); + vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch); for (int j = 0; j < 16; ++j) { if (test_output_block[j] < 0) @@ -70,7 +70,7 @@ TEST(Vp8FdctTest, SignBiasCheck) { // TODO(Yaowu): this should be converted to a parameterized test // to test optimized versions of this function. - vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); + vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch); for (int j = 0; j < 16; ++j) { if (test_output_block[j] < 0) @@ -89,7 +89,7 @@ TEST(Vp8FdctTest, SignBiasCheck) { } }; -TEST(Vp8FdctTest, RoundTripErrorCheck) { +TEST(Vp9FdctTest, RoundTripErrorCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; double total_error = 0; @@ -106,7 +106,7 @@ TEST(Vp8FdctTest, RoundTripErrorCheck) { // TODO(Yaowu): this should be converted to a parameterized test // to test optimized versions of this function. const int pitch = 8; - vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch); + vp9_short_fdct4x4_c(test_input_block, test_temp_block, pitch); for (int j = 0; j < 16; ++j) { if(test_temp_block[j] > 0) { @@ -121,7 +121,7 @@ TEST(Vp8FdctTest, RoundTripErrorCheck) { } // Because the bitstream is not frozen yet, use the idct in the codebase. - vp8_short_idct4x4llm_c(test_temp_block, test_output_block, pitch); + vp9_short_idct4x4llm_c(test_temp_block, test_output_block, pitch); for (int j = 0; j < 16; ++j) { const int diff = test_input_block[j] - test_output_block[j]; diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index 97680fe081ad2a4f7edd87f793d307c15dd7094a..fc7084ed81ef413d7af149e583b57dfcad7fdcd1 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -15,8 +15,8 @@ #include "third_party/googletest/src/include/gtest/gtest.h" extern "C" { -#include "vp9/encoder/dct.h" #include "vp9/common/idct.h" +#include "vpx_rtcd.h" } #include "acm_random.h" @@ -26,7 +26,7 @@ using libvpx_test::ACMRandom; namespace { -TEST(VP8Fdct8x8Test, SignBiasCheck) { +TEST(VP9Fdct8x8Test, SignBiasCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int16_t test_input_block[64]; int16_t test_output_block[64]; @@ -41,7 +41,7 @@ TEST(VP8Fdct8x8Test, SignBiasCheck) { for (int j = 0; j < 64; ++j) test_input_block[j] = rnd.Rand8() - rnd.Rand8(); - vp8_short_fdct8x8_c(test_input_block, test_output_block, pitch); + vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch); for (int j = 0; j < 64; ++j) { if (test_output_block[j] < 0) @@ -66,7 +66,7 @@ TEST(VP8Fdct8x8Test, SignBiasCheck) { for (int j = 0; j < 64; ++j) test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4); - vp8_short_fdct8x8_c(test_input_block, test_output_block, pitch); + vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch); for (int j = 0; j < 64; ++j) { if (test_output_block[j] < 0) @@ -85,7 +85,7 @@ TEST(VP8Fdct8x8Test, SignBiasCheck) { } }; -TEST(VP8Fdct8x8Test, RoundTripErrorCheck) { +TEST(VP9Fdct8x8Test, RoundTripErrorCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; double total_error = 0; @@ -100,7 +100,7 @@ TEST(VP8Fdct8x8Test, RoundTripErrorCheck) { test_input_block[j] = rnd.Rand8() - rnd.Rand8(); const int pitch = 16; - vp8_short_fdct8x8_c(test_input_block, test_temp_block, pitch); + vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch); for (int j = 0; j < 64; ++j){ if(test_temp_block[j] > 0) { test_temp_block[j] += 2; @@ -112,7 +112,7 @@ TEST(VP8Fdct8x8Test, RoundTripErrorCheck) { test_temp_block[j] *= 4; } } - vp8_short_idct8x8_c(test_temp_block, test_output_block, pitch); + vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch); for (int j = 0; j < 64; ++j) { const int diff = test_input_block[j] - test_output_block[j]; @@ -130,7 +130,7 @@ TEST(VP8Fdct8x8Test, RoundTripErrorCheck) { << "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block"; }; -TEST(VP8Fdct8x8Test, ExtremalCheck) { +TEST(VP9Fdct8x8Test, ExtremalCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; double total_error = 0; @@ -145,8 +145,8 @@ TEST(VP8Fdct8x8Test, ExtremalCheck) { test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255; const int pitch = 16; - vp8_short_fdct8x8_c(test_input_block, test_temp_block, pitch); - vp8_short_idct8x8_c(test_temp_block, test_output_block, pitch); + vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch); + vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch); for (int j = 0; j < 64; ++j) { const int diff = test_input_block[j] - test_output_block[j]; diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc index 6d3fe3d597cf004f2f119f812110af0a6b3cc594..0155716b5a724b5ae9dc8f6dd713b31141009f53 100644 --- a/test/idct8x8_test.cc +++ b/test/idct8x8_test.cc @@ -15,8 +15,8 @@ #include "third_party/googletest/src/include/gtest/gtest.h" extern "C" { -#include "vp9/encoder/dct.h" #include "vp9/common/idct.h" +#include "vpx_rtcd.h" } #include "acm_random.h" @@ -99,7 +99,7 @@ void reference_idct_2d(double input[64], int16_t output[64]) { output[i] = round(out2[i]/32); } -TEST(VP8Idct8x8Test, AccuracyCheck) { +TEST(VP9Idct8x8Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 10000; for (int i = 0; i < count_test_block; ++i) { @@ -112,7 +112,7 @@ TEST(VP8Idct8x8Test, AccuracyCheck) { input[j] = rnd.Rand8() - rnd.Rand8(); const int pitch = 16; - vp8_short_fdct8x8_c(input, output_c, pitch); + vp9_short_fdct8x8_c(input, output_c, pitch); reference_dct_2d(input, output_r); for (int j = 0; j < 64; ++j) { @@ -140,7 +140,7 @@ TEST(VP8Idct8x8Test, AccuracyCheck) { reference_dct_2d(input, output_r); for (int j = 0; j < 64; ++j) coeff[j] = round(output_r[j]); - vp8_short_idct8x8_c(coeff, output_c, pitch); + vp9_short_idct8x8_c(coeff, output_c, pitch); for (int j = 0; j < 64; ++j) { const int diff = output_c[j] -input[j]; const int error = diff * diff; diff --git a/test/test.mk b/test/test.mk index 3c6d44c972f04141f5fe1d0bceddccbf15add106..5e9565c33a79d115af835efc584d440ea651e0bd 100644 --- a/test/test.mk +++ b/test/test.mk @@ -33,9 +33,12 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc ## ifeq ($(CONFIG_SHARED),) +## VP8 +ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),) + # These tests require both the encoder and decoder to be built. ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) -LIBVPX_TEST_SRCS-yes += boolcoder_test.cc +LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc endif LIBVPX_TEST_SRCS-yes += idctllm_test.cc @@ -47,13 +50,22 @@ LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc -# VP9 tests +endif # VP8 + +## VP9 +ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),) + +# These tests require both the encoder and decoder to be built. +ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes) +LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc +endif + LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc -ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),) +#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-yes += idct8x8_test.cc -endif +endif # VP9 + endif diff --git a/test/boolcoder_test.cc b/test/vp8_boolcoder_test.cc similarity index 100% rename from test/boolcoder_test.cc rename to test/vp8_boolcoder_test.cc diff --git a/test/vp9_boolcoder_test.cc b/test/vp9_boolcoder_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..68a69d510c78665880f04ba636231cde81b2de4f --- /dev/null +++ b/test/vp9_boolcoder_test.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +extern "C" { +#include "vp9/encoder/boolhuff.h" +#include "vp9/decoder/dboolhuff.h" +} + +#include "acm_random.h" +#include "vpx/vpx_integer.h" + +using libvpx_test::ACMRandom; + +namespace { +const int num_tests = 10; +} // namespace + +TEST(VP9, TestBitIO) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int n = 0; n < num_tests; ++n) { + for (int method = 0; method <= 7; ++method) { // we generate various proba + const int bits_to_test = 1000; + uint8_t probas[bits_to_test]; + + for (int i = 0; i < bits_to_test; ++i) { + const int parity = i & 1; + probas[i] = + (method == 0) ? 0 : (method == 1) ? 255 : + (method == 2) ? 128 : + (method == 3) ? rnd.Rand8() : + (method == 4) ? (parity ? 0 : 255) : + // alternate between low and high proba: + (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : + (method == 6) ? + (parity ? rnd(64) : 255 - rnd(64)) : + (parity ? rnd(32) : 255 - rnd(32)); + } + for (int bit_method = 0; bit_method <= 3; ++bit_method) { + const int random_seed = 6432; + const int buffer_size = 10000; + ACMRandom bit_rnd(random_seed); + BOOL_CODER bw; + uint8_t bw_buffer[buffer_size]; + vp9_start_encode(&bw, bw_buffer); + + int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; + for (int i = 0; i < bits_to_test; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + encode_bool(&bw, bit, static_cast<int>(probas[i])); + } + + vp9_stop_encode(&bw); + + BOOL_DECODER br; + vp9_start_decode(&br, bw_buffer, buffer_size); + bit_rnd.Reset(random_seed); + for (int i = 0; i < bits_to_test; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + GTEST_ASSERT_EQ(decode_bool(&br, probas[i]), bit) + << "pos: " << i << " / " << bits_to_test + << " bit_method: " << bit_method + << " method: " << method; + } + } + } + } +} diff --git a/vp9/common/blockd.h b/vp9/common/blockd.h index ec010e9b1a25e833677ac4f56310bc461ac99a32..9d5eedc58fad46b9088ef55405b88b5d6e855671 100644 --- a/vp9/common/blockd.h +++ b/vp9/common/blockd.h @@ -44,9 +44,7 @@ void vpx_log(const char *format, ...); /* Segment Feature Masks */ #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 -#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF #define MAX_MV_REFS 19 -#endif typedef struct { int r, c; @@ -216,9 +214,7 @@ typedef struct { MV_REFERENCE_FRAME ref_frame, second_ref_frame; TX_SIZE txfm_size; int_mv mv[2]; // for each reference frame used -#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS]; -#endif SPLITMV_PARTITIONING_TYPE partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ @@ -280,7 +276,7 @@ typedef struct macroblockd { DECLARE_ALIGNED(16, unsigned char, predictor[384]); DECLARE_ALIGNED(16, short, qcoeff[400]); DECLARE_ALIGNED(16, short, dqcoeff[400]); - DECLARE_ALIGNED(16, char, eobs[25]); + DECLARE_ALIGNED(16, unsigned short, eobs[25]); /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ BLOCKD block[25]; @@ -467,7 +463,10 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) { TX_TYPE tx_type = DCT_DCT; if (xd->mode_info_context->mbmi.mode == I8X8_PRED && xd->q_index < ACTIVE_HT8) { - tx_type = txfm_map(pred_mode_conv(b->bmi.as_mode.first)); + // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged + // or the relationship otherwise modified to address this type conversion. + tx_type = txfm_map(pred_mode_conv( + (MB_PREDICTION_MODE)b->bmi.as_mode.first)); } return tx_type; } @@ -483,7 +482,7 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) { static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) { TX_TYPE tx_type = DCT_DCT; - int ib = (b - xd->block); + int ib = (int)(b - xd->block); if (ib >= 16) return tx_type; if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) { diff --git a/vp9/common/findnearmv.c b/vp9/common/findnearmv.c index 5270abf6abfe85c053feef1a8a4ae51a511191d7..cc560cf3ac023ec5aee1709bd71b2272aaf5257e 100644 --- a/vp9/common/findnearmv.c +++ b/vp9/common/findnearmv.c @@ -11,6 +11,7 @@ #include "findnearmv.h" #include "vp9/common/sadmxn.h" +#include "vp9/common/subpelvar.h" #include <limits.h> const unsigned char vp9_mbsplit_offset[4][16] = { @@ -167,7 +168,6 @@ vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, return p; } -#if CONFIG_NEWBESTREFMV #define SP(x) (((x) & 7) << 1) unsigned int vp9_sad3x16_c( const unsigned char *src_ptr, @@ -186,6 +186,76 @@ unsigned int vp9_sad16x3_c( return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3); } +#if CONFIG_SUBPELREFMV +unsigned int vp9_variance2x16_c(const unsigned char *src_ptr, + const int source_stride, + const unsigned char *ref_ptr, + const int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg); + *sse = var; + return (var - ((avg * avg) >> 5)); +} + +unsigned int vp9_variance16x2_c(const unsigned char *src_ptr, + const int source_stride, + const unsigned char *ref_ptr, + const int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg); + *sse = var; + return (var - ((avg * avg) >> 5)); +} + +unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr, + const int src_pixels_per_line, + const int xoffset, + const int yoffset, + const unsigned char *dst_ptr, + const int dst_pixels_per_line, + unsigned int *sse) { + unsigned short FData3[16 * 3]; // Temp data bufffer used in filtering + unsigned char temp2[20 * 16]; + const short *HFilter, *VFilter; + + HFilter = vp9_bilinear_filters[xoffset]; + VFilter = vp9_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, + src_pixels_per_line, 1, 3, 16, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter); + + return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, + const int src_pixels_per_line, + const int xoffset, + const int yoffset, + const unsigned char *dst_ptr, + const int dst_pixels_per_line, + unsigned int *sse) { + unsigned short FData3[2 * 17]; // Temp data bufffer used in filtering + unsigned char temp2[2 * 16]; + const short *HFilter, *VFilter; + + HFilter = vp9_bilinear_filters[xoffset]; + VFilter = vp9_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, + src_pixels_per_line, 1, 17, 2, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter); + + return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); +} +#endif + /* check a list of motion vectors by sad score using a number rows of pixels * above and a number cols of pixels in the left to select the one with best * score to use as ref motion vector @@ -323,5 +393,3 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, // Copy back the re-ordered mv list vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs)); } - -#endif // CONFIG_NEWBESTREFMV diff --git a/vp9/common/findnearmv.h b/vp9/common/findnearmv.h index 3cba90d14c7cd536f498f8c80de5b63674b6cf51..c53575ea9a2655ac6eb38d9bc82b20123b3f7b7e 100644 --- a/vp9/common/findnearmv.h +++ b/vp9/common/findnearmv.h @@ -18,7 +18,6 @@ #include "treecoder.h" #include "onyxc_int.h" -#if CONFIG_NEWBESTREFMV /* check a list of motion vectors by sad score using a number rows of pixels * above and a number cols of pixels in the left to select the one with best * score to use as ref motion vector @@ -30,7 +29,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int_mv *best_mv, int_mv *nearest, int_mv *near); -#endif static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) { MV xmv; diff --git a/vp9/common/idctllm.c b/vp9/common/idctllm.c index becb6ef1f6e536bab27856c165d53baa7d842fa7..4b0ac55d6f07054df45f6e0d05f945c169354fce 100644 --- a/vp9/common/idctllm.c +++ b/vp9/common/idctllm.c @@ -1013,6 +1013,8 @@ void vp9_short_idct16x16_c(short *input, short *output, int pitch) { } #endif +#define TEST_INT_16x16_IDCT 1 +#if !TEST_INT_16x16_IDCT static const double C1 = 0.995184726672197; static const double C2 = 0.98078528040323; static const double C3 = 0.956940335732209; @@ -1273,3 +1275,235 @@ void vp9_short_idct16x16_c(short *input, short *output, int pitch) { } vp9_clear_system_state(); // Make it simd safe : __asm emms; } + +#else +static const int16_t C1 = 16305; +static const int16_t C2 = 16069; +static const int16_t C3 = 15679; +static const int16_t C4 = 15137; +static const int16_t C5 = 14449; +static const int16_t C6 = 13623; +static const int16_t C7 = 12665; +static const int16_t C8 = 11585; +static const int16_t C9 = 10394; +static const int16_t C10 = 9102; +static const int16_t C11 = 7723; +static const int16_t C12 = 6270; +static const int16_t C13 = 4756; +static const int16_t C14 = 3196; +static const int16_t C15 = 1606; + +#define INITIAL_SHIFT 2 +#define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1)) +#define RIGHT_SHIFT 14 +#define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1)) + +static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16], + int last_shift_bits) { + int16_t step[16]; + int intermediate[16]; + int temp1, temp2; + + int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT; + int step1_rounding = 1 << (step1_shift - 1); + int last_rounding = 0; + + if (last_shift_bits > 0) + last_rounding = 1 << (last_shift_bits - 1); + + // step 1 and 2 + step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT; + step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT; + + temp1 = input[4] * C12; + temp2 = input[12] * C4; + temp1 = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift; + + temp1 = input[4] * C4; + temp2 = input[12] * C12; + temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift; + + temp1 = input[2] * C8; + temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 = input[6] + input[10]; + step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + + temp1 = input[14] * C8; + temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 = input[6] - input[10]; + step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + + // for odd input + temp1 = input[3] * C12; + temp2 = input[13] * C4; + temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = input[3] * C4; + temp2 = input[13] * C12; + temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 *= C8; + intermediate[ 9] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + intermediate[11] = input[15] - input[1]; + intermediate[12] = input[15] + input[1]; + intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = input[11] * C12; + temp2 = input[5] * C4; + temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 *= C8; + intermediate[14] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = input[11] * C4; + temp2 = input[5] * C12; + temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + intermediate[15] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + + // step 3 + output[0] = step[ 0] + step[ 3]; + output[1] = step[ 1] + step[ 2]; + output[2] = step[ 1] - step[ 2]; + output[3] = step[ 0] - step[ 3]; + + temp1 = step[ 4] * C14; + temp2 = step[ 7] * C2; + output[4] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[ 4] * C2; + temp2 = step[ 7] * C14; + output[7] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[ 5] * C10; + temp2 = step[ 6] * C6; + output[5] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[ 5] * C6; + temp2 = step[ 6] * C10; + output[6] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + output[8] = step[ 8] + step[11]; + output[9] = step[ 9] + step[10]; + output[10] = step[ 9] - step[10]; + output[11] = step[ 8] - step[11]; + output[12] = step[12] + step[15]; + output[13] = step[13] + step[14]; + output[14] = step[13] - step[14]; + output[15] = step[12] - step[15]; + + // output 4 + step[ 0] = output[0] + output[7]; + step[ 1] = output[1] + output[6]; + step[ 2] = output[2] + output[5]; + step[ 3] = output[3] + output[4]; + step[ 4] = output[3] - output[4]; + step[ 5] = output[2] - output[5]; + step[ 6] = output[1] - output[6]; + step[ 7] = output[0] - output[7]; + + temp1 = output[8] * C7; + temp2 = output[15] * C9; + step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[9] * C11; + temp2 = output[14] * C5; + step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[10] * C3; + temp2 = output[13] * C13; + step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[11] * C15; + temp2 = output[12] * C1; + step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[11] * C1; + temp2 = output[12] * C15; + step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[10] * C13; + temp2 = output[13] * C3; + step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[9] * C5; + temp2 = output[14] * C11; + step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[8] * C9; + temp2 = output[15] * C7; + step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + + // step 5 + output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits; + output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits; + output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits; + output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits; + output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits; + output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits; + output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits; + output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits; + + output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits; + output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits; + output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits; + output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits; + output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits; + output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits; + output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits; + output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits; +} + +void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { + int16_t out[16 * 16]; + int16_t *outptr = &out[0]; + const int short_pitch = pitch >> 1; + int i, j; + int16_t temp_in[16], temp_out[16]; + + // First transform rows + for (i = 0; i < 16; ++i) { + butterfly_16x16_idct_1d(input, outptr, 0); + input += short_pitch; + outptr += 16; + } + + // Then transform columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + butterfly_16x16_idct_1d(temp_in, temp_out, 3); + for (j = 0; j < 16; ++j) + output[j * 16 + i] = temp_out[j]; + } +} +#undef INITIAL_SHIFT +#undef INITIAL_ROUNDING +#undef RIGHT_SHIFT +#undef RIGHT_ROUNDING +#endif diff --git a/vp9/common/mv.h b/vp9/common/mv.h index 48c92f9589a7055062acc04cfa13bdc0da53a035..bbe6d2c8bd4ca1b72c478d7439d835b87e4e176d 100644 --- a/vp9/common/mv.h +++ b/vp9/common/mv.h @@ -18,9 +18,9 @@ typedef struct { short col; } MV; -typedef union { - uint32_t as_int; - MV as_mv; -} int_mv; /* facilitates faster equality tests and copies */ +typedef union int_mv { + uint32_t as_int; + MV as_mv; +} int_mv; /* facilitates faster equality tests and copies */ #endif diff --git a/vp9/common/mvref_common.c b/vp9/common/mvref_common.c index 7fd48b938429bb204941195ce815bffb146a33bc..d6faa138d102b69b486128a8f909a260902589bd 100644 --- a/vp9/common/mvref_common.c +++ b/vp9/common/mvref_common.c @@ -10,8 +10,6 @@ #include "mvref_common.h" -#if CONFIG_NEWBESTREFMV - #define MVREF_NEIGHBOURS 8 static int mv_ref_search[MVREF_NEIGHBOURS][2] = { {0,-1},{-1,0},{-1,-1},{0,-2},{-2,0},{-1,-2},{-2,-1},{-2,-2} }; @@ -338,5 +336,3 @@ void vp9_find_mv_refs( // Copy over the candidate list. vpx_memcpy(mv_ref_list, candidate_mvs, sizeof(candidate_mvs)); } - -#endif diff --git a/vp9/common/mvref_common.h b/vp9/common/mvref_common.h index 7f396e4d7b0ab0f6f4b49ac20f02c87295b34d09..06050406b504912ec4551424e7d78e4242e72461 100644 --- a/vp9/common/mvref_common.h +++ b/vp9/common/mvref_common.h @@ -11,8 +11,6 @@ #include "onyxc_int.h" #include "blockd.h" -// MR reference entropy header file. -#if CONFIG_NEWBESTREFMV #ifndef __INC_MVREF_COMMON_H #define __INC_MVREF_COMMON_H @@ -28,4 +26,3 @@ void vp9_find_mv_refs( #endif -#endif diff --git a/vp9/common/postproc.c b/vp9/common/postproc.c index b947938e328c36ee487d6c546cdbef65a1be1974..4c5748e7af19ca7c35607f521a92597a0f3112f9 100644 --- a/vp9/common/postproc.c +++ b/vp9/common/postproc.c @@ -12,6 +12,7 @@ #include "vpx_ports/config.h" #include "vpx_scale/yv12config.h" #include "postproc.h" +#include "vp9/common/textblit.h" #include "vpx_scale/vpxscale.h" #include "systemdependent.h" @@ -126,10 +127,6 @@ const short vp9_rv[] = { }; -extern void vp9_blit_text(const char *msg, unsigned char *address, - const int pitch); -extern void vp9_blit_line(int x0, int x1, int y0, int y1, - unsigned char *image, const int pitch); /**************************************************************************** */ void vp9_post_proc_down_and_across_c(unsigned char *src_ptr, diff --git a/vp9/common/quant_common.c b/vp9/common/quant_common.c index 8c3f42732212581c3c3b276be8d273149e53f363..720b271134b9334f650bfbd0824eba34322dd6fd 100644 --- a/vp9/common/quant_common.c +++ b/vp9/common/quant_common.c @@ -30,9 +30,9 @@ void vp9_init_quant_tables() { last_val = current_val; ac_val = ac_qlookup[i]; - dc_qlookup[i] = (0.000000305 * ac_val * ac_val * ac_val) + - (-0.00065 * ac_val * ac_val) + - (0.9 * ac_val) + 0.5; + dc_qlookup[i] = (int)((0.000000305 * ac_val * ac_val * ac_val) + + (-0.00065 * ac_val * ac_val) + + (0.9 * ac_val) + 0.5); if (dc_qlookup[i] < ACDC_MIN) dc_qlookup[i] = ACDC_MIN; } diff --git a/vp9/common/reconintra.h b/vp9/common/reconintra.h index bf990b4c673a1370ef899872734e07df7c41ccda..7bc66707c5a7a25c33371f3a96811a02fe2f495a 100644 --- a/vp9/common/reconintra.h +++ b/vp9/common/reconintra.h @@ -13,6 +13,6 @@ #include "blockd.h" -extern void init_intra_left_above_pixels(MACROBLOCKD *xd); +extern void vp9_recon_intra_mbuv(MACROBLOCKD *xd); #endif // __INC_RECONINTRA_H diff --git a/vp9/common/rtcd_defs.sh b/vp9/common/rtcd_defs.sh index fc91954277d9e09d4868c3f6f6263f0d80c46c3e..f296c25ef77ea1bb18b2bbcfbabf55aeb90a6351 100644 --- a/vp9/common/rtcd_defs.sh +++ b/vp9/common/rtcd_defs.sh @@ -11,6 +11,8 @@ struct block; struct macroblock; struct variance_vtable; +#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] + /* Encoder forward decls */ struct variance_vtable; union int_mv; @@ -43,31 +45,37 @@ specialize vp9_dequantize_b mmx prototype void vp9_dequantize_b_2x2 "struct blockd *x" specialize vp9_dequantize_b_2x2 -prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc, struct macroblockd *xd" +prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, short *dc, struct macroblockd *xd" specialize vp9_dequant_dc_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs, struct macroblockd *xd" specialize vp9_dequant_idct_add_uv_block_8x8 prototype void vp9_dequant_idct_add_16x16 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride" specialize vp9_dequant_idct_add_16x16 +prototype void vp9_dequant_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride" +specialize vp9_dequant_idct_add_8x8 + +prototype void vp9_dequant_dc_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc" +specialize vp9_dequant_dc_idct_add_8x8 + prototype void vp9_dequant_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride" specialize vp9_dequant_idct_add prototype void vp9_dequant_dc_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc" specialize vp9_dequant_dc_idct_add -prototype void vp9_dequant_dc_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc" +prototype void vp9_dequant_dc_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, short *dc" specialize vp9_dequant_dc_idct_add_y_block mmx -prototype void vp9_dequant_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs" +prototype void vp9_dequant_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs" specialize vp9_dequant_idct_add_y_block mmx -prototype void vp9_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, char *eobs" +prototype void vp9_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs" specialize vp9_dequant_idct_add_uv_block mmx # @@ -219,13 +227,11 @@ vp9_loop_filter_simple_bh_neon=vp9_loop_filter_bhs_neon # # sad 16x3, 3x16 # -if [ "$CONFIG_NEWBESTREFMV" = "yes" ]; then prototype unsigned int vp9_sad16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" specialize vp9_sad16x3 sse2 prototype unsigned int vp9_sad3x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" specialize vp9_sad3x16 sse2 -fi # # Encoder functions below this point. @@ -368,22 +374,22 @@ specialize vp9_sad8x8x8 sse4 prototype void vp9_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" specialize vp9_sad4x4x8 sse4 -prototype void vp9_sad32x32x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +prototype void vp9_sad32x32x4d "const unsigned char *src_ptr, int src_stride, const unsigned char* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x4d -prototype void vp9_sad16x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +prototype void vp9_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad16x16x4d sse3 -prototype void vp9_sad16x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +prototype void vp9_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad16x8x4d sse3 -prototype void vp9_sad8x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +prototype void vp9_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad8x16x4d sse3 -prototype void vp9_sad8x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +prototype void vp9_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad8x8x4d sse3 -prototype void vp9_sad4x4x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +prototype void vp9_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad4x4x4d sse3 # @@ -478,6 +484,21 @@ specialize vp9_short_walsh4x4_x8 prototype void vp9_short_walsh8x4_x8 "short *InputData, short *OutputData, int pitch" specialize vp9_short_walsh8x4_x8 +# +# Motion search +# +prototype int vp9_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +specialize vp9_full_search_sad sse3 sse4_1 +vp9_full_search_sad_sse3=vp9_full_search_sadx3 +vp9_full_search_sad_sse4_1=vp9_full_search_sadx8 + +prototype int vp9_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +specialize vp9_refining_search_sad sse3 +vp9_refining_search_sad_sse3=vp9_refining_search_sadx4 + +prototype int vp9_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4 + fi # end encoder functions diff --git a/vp9/common/subpelvar.h b/vp9/common/subpelvar.h new file mode 100644 index 0000000000000000000000000000000000000000..83cc2a7ce0caae0083cd1329ddbc44b11134ba65 --- /dev/null +++ b/vp9/common/subpelvar.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vp9/common/filter.h" + + + +static void variance(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + int w, + int h, + unsigned int *sse, + int *sum) { + int i, j; + int diff; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + diff = src_ptr[j] - ref_ptr[j]; + *sum += diff; + *sse += diff * diff; + } + + src_ptr += source_stride; + ref_ptr += recon_stride; + } +} + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_first_pass + * + * INPUTS : UINT8 *src_ptr : Pointer to source block. + * UINT32 src_pixels_per_line : Stride of input block. + * UINT32 pixel_step : Offset between filter input samples (see notes). + * UINT32 output_height : Input block height. + * UINT32 output_width : Input block width. + * INT32 *vp9_filter : Array of 2 bi-linear filter taps. + * + * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in + * either horizontal or vertical direction to produce the + * filtered output block. Used to implement first-pass + * of 2-D separable filter. + * + * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. + * Two filter taps should sum to VP9_FILTER_WEIGHT. + * pixel_step defines whether the filter is applied + * horizontally (pixel_step=1) or vertically (pixel_step=stride). + * It defines the offset required to move from one input + * to the next. + * + ****************************************************************************/ +static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr, + unsigned short *output_ptr, + unsigned int src_pixels_per_line, + int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp9_filter) { + unsigned int i, j; + + for (i = 0; i < output_height; i++) { + for (j = 0; j < output_width; j++) { + // Apply bilinear filter + output_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) + + ((int)src_ptr[pixel_step] * vp9_filter[1]) + + (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT; + src_ptr++; + } + + // Next row... + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +} + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_second_pass + * + * INPUTS : INT32 *src_ptr : Pointer to source block. + * UINT32 src_pixels_per_line : Stride of input block. + * UINT32 pixel_step : Offset between filter input samples (see notes). + * UINT32 output_height : Input block height. + * UINT32 output_width : Input block width. + * INT32 *vp9_filter : Array of 2 bi-linear filter taps. + * + * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in + * either horizontal or vertical direction to produce the + * filtered output block. Used to implement second-pass + * of 2-D separable filter. + * + * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. + * Two filter taps should sum to VP9_FILTER_WEIGHT. + * pixel_step defines whether the filter is applied + * horizontally (pixel_step=1) or vertically (pixel_step=stride). + * It defines the offset required to move from one input + * to the next. + * + ****************************************************************************/ +static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr, + unsigned char *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp9_filter) { + unsigned int i, j; + int Temp; + + for (i = 0; i < output_height; i++) { + for (j = 0; j < output_width; j++) { + // Apply filter + Temp = ((int)src_ptr[0] * vp9_filter[0]) + + ((int)src_ptr[pixel_step] * vp9_filter[1]) + + (VP9_FILTER_WEIGHT / 2); + output_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT); + src_ptr++; + } + + // Next row... + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +} + diff --git a/vp9/common/textblit.c b/vp9/common/textblit.c index e5a061b88e2186b11ac3def3ed18e7e6aabc150f..601d298dc5572821b1868528805562c8afcdfdb4 100644 --- a/vp9/common/textblit.c +++ b/vp9/common/textblit.c @@ -10,6 +10,7 @@ #include <stdlib.h> +#include "vp9/common/textblit.h" void vp9_blit_text(const char *msg, unsigned char *address, const int pitch) { int letter_bitmap; diff --git a/vp9/common/textblit.h b/vp9/common/textblit.h new file mode 100644 index 0000000000000000000000000000000000000000..39edbb09de7504bfc618c13964a8f701c5bf6189 --- /dev/null +++ b/vp9/common/textblit.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef __INC_TEXTBLIT_H +#define __INC_TEXTBLIT_H + +extern void vp9_blit_text(const char *msg, unsigned char *address, + const int pitch); +extern void vp9_blit_line(int x0, int x1, int y0, int y1, + unsigned char *image, const int pitch); + +#endif // __INC_TEXTBLIT_H diff --git a/vp9/common/x86/loopfilter_x86.c b/vp9/common/x86/loopfilter_x86.c index 6aeddd3879ba7eeb60287d893ac74dacddeb37b9..1ce6540929d15ab97c2db583d943c03bb9fe3af3 100644 --- a/vp9/common/x86/loopfilter_x86.c +++ b/vp9/common/x86/loopfilter_x86.c @@ -97,13 +97,17 @@ void vp9_mbloop_filter_horizontal_edge_c_sse2(unsigned char *s, DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); __m128i mask, hev, flat; - __m128i thresh, limit, blimit; const __m128i zero = _mm_set1_epi16(0); __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; - - thresh = _mm_shuffle_epi32(_mm_cvtsi32_si128(_thresh[0] * 0x01010101), 0); - limit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_limit[0] * 0x01010101), 0); - blimit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_blimit[0] * 0x01010101), 0); + const unsigned int extended_thresh = _thresh[0] * 0x01010101u; + const unsigned int extended_limit = _limit[0] * 0x01010101u; + const unsigned int extended_blimit = _blimit[0] * 0x01010101u; + const __m128i thresh = + _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_thresh), 0); + const __m128i limit = + _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_limit), 0); + const __m128i blimit = + _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0); p4 = _mm_loadu_si128((__m128i *)(s - 5 * p)); p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); diff --git a/vp9/common/x86/sadmxn_x86.c b/vp9/common/x86/sadmxn_x86.c index 71fa320dba3f085e8b6db3a73c48088cc0973296..b12680d6c91641d6ce9d58181c93b7268bce7b3d 100644 --- a/vp9/common/x86/sadmxn_x86.c +++ b/vp9/common/x86/sadmxn_x86.c @@ -13,9 +13,6 @@ #include "./vpx_rtcd.h" -#if CONFIG_NEWBESTREFMV - - #if HAVE_SSE2 unsigned int vp9_sad16x3_sse2( const unsigned char *src_ptr, @@ -89,4 +86,3 @@ unsigned int vp9_sad3x16_sse2( #endif -#endif // CONFIG_NEWBESTREFMV diff --git a/vp9/decoder/arm/armv6/idct_blk_v6.c b/vp9/decoder/arm/armv6/idct_blk_v6.c index fe9b9035efed0216ddcf4a44a85a2999caea7918..e430f2995e7f8bcb1e6b91244c095805a3567544 100644 --- a/vp9/decoder/arm/armv6/idct_blk_v6.c +++ b/vp9/decoder/arm/armv6/idct_blk_v6.c @@ -12,9 +12,10 @@ #include "vp9/common/idct.h" #include "vp9/decoder/dequantize.h" -void vp8_dequant_dc_idct_add_y_block_v6 -(short *q, short *dq, unsigned char *pre, - unsigned char *dst, int stride, char *eobs, short *dc) { +void vp8_dequant_dc_idct_add_y_block_v6(short *q, short *dq, + unsigned char *pre, + unsigned char *dst, int stride, + unsigned short *eobs, short *dc) { int i; for (i = 0; i < 4; i++) { @@ -46,9 +47,9 @@ void vp8_dequant_dc_idct_add_y_block_v6 } } -void vp8_dequant_idct_add_y_block_v6 -(short *q, short *dq, unsigned char *pre, - unsigned char *dst, int stride, char *eobs) { +void vp8_dequant_idct_add_y_block_v6(short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, + unsigned short *eobs) { int i; for (i = 0; i < 4; i++) { @@ -87,9 +88,9 @@ void vp8_dequant_idct_add_y_block_v6 } } -void vp8_dequant_idct_add_uv_block_v6 -(short *q, short *dq, unsigned char *pre, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) { +void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, + int stride, unsigned short *eobs) { int i; for (i = 0; i < 2; i++) { diff --git a/vp9/decoder/arm/neon/idct_blk_neon.c b/vp9/decoder/arm/neon/idct_blk_neon.c index fb5d298df0a9d9c8ed644204277135a9fc95fa5d..5711e86e8dcfc7c8638bacc0f097e09add705c50 100644 --- a/vp9/decoder/arm/neon/idct_blk_neon.c +++ b/vp9/decoder/arm/neon/idct_blk_neon.c @@ -27,9 +27,10 @@ void idct_dequant_0_2x_neon (short *q, short dq, unsigned char *pre, int pitch, unsigned char *dst, int stride); -void vp8_dequant_dc_idct_add_y_block_neon -(short *q, short *dq, unsigned char *pre, - unsigned char *dst, int stride, char *eobs, short *dc) { +void vp8_dequant_dc_idct_add_y_block_neon(short *q, short *dq, + unsigned char *pre, + unsigned char *dst, int stride, + unsigned short *eobs, short *dc) { int i; for (i = 0; i < 4; i++) { @@ -51,9 +52,9 @@ void vp8_dequant_dc_idct_add_y_block_neon } } -void vp8_dequant_idct_add_y_block_neon -(short *q, short *dq, unsigned char *pre, - unsigned char *dst, int stride, char *eobs) { +void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, + unsigned short *eobs) { int i; for (i = 0; i < 4; i++) { @@ -74,9 +75,11 @@ void vp8_dequant_idct_add_y_block_neon } } -void vp8_dequant_idct_add_uv_block_neon -(short *q, short *dq, unsigned char *pre, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) { +void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, + unsigned char *pre, + unsigned char *dstu, + unsigned char *dstv, int stride, + unsigned short *eobs) { if (((short *)eobs)[0] & 0xfefe) idct_dequant_full_2x_neon(q, dq, pre, dstu, 8, stride); else diff --git a/vp9/decoder/dboolhuff.h b/vp9/decoder/dboolhuff.h index 285d2a9363308967595e2acf264cef4f29518389..d46d81bb4974d8ad2915a613e2480759c028b3ed 100644 --- a/vp9/decoder/dboolhuff.h +++ b/vp9/decoder/dboolhuff.h @@ -56,7 +56,7 @@ int vp9_inv_recenter_nonneg(int v, int m); { \ int shift = VP9_BD_VALUE_SIZE - 8 - ((_count) + 8); \ int loop_end, x; \ - size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \ + int bits_left = (int)(((_bufend)-(_bufptr))*CHAR_BIT); \ \ x = shift + CHAR_BIT - bits_left; \ loop_end = 0; \ diff --git a/vp9/decoder/decodemv.c b/vp9/decoder/decodemv.c index c4ff898e8a6fe6264e9a3075d8ba063e94746b80..5013462a3d38f8902fcdf7fcbed409ecb289b6c2 100644 --- a/vp9/decoder/decodemv.c +++ b/vp9/decoder/decodemv.c @@ -654,7 +654,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, const int mis = pbi->common.mode_info_stride; MACROBLOCKD *const xd = &pbi->mb; - int_mv *const mv = &mbmi->mv; + int_mv *const mv = &mbmi->mv[0]; int mb_to_left_edge; int mb_to_right_edge; int mb_to_top_edge; @@ -712,17 +712,13 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int_mv nearest_second, nearby_second, best_mv_second; vp9_prob mv_ref_p [VP9_MVREFS - 1]; -#if CONFIG_NEWBESTREFMV int recon_y_stride, recon_yoffset; int recon_uv_stride, recon_uvoffset; -#endif vp9_find_near_mvs(xd, mi, prev_mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, cm->ref_frame_sign_bias); - -#if CONFIG_NEWBESTREFMV { int ref_fb_idx; MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; @@ -755,7 +751,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->ref_mvs[ref_frame], &best_mv, &nearest, &nearby); } -#endif vp9_mv_ref_probs(&pbi->common, mv_ref_p, rct); @@ -808,7 +803,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->second_ref_frame = mbmi->ref_frame + 1; if (mbmi->second_ref_frame == 4) mbmi->second_ref_frame = 1; -#if CONFIG_NEWBESTREFMV if (mbmi->second_ref_frame) { int second_ref_fb_idx; /* Select the appropriate reference frame for this MB */ @@ -845,13 +839,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, &nearest_second, &nearby_second); } -#else - vp9_find_near_mvs(xd, mi, prev_mi, - &nearest_second, &nearby_second, &best_mv_second, - rct, - mbmi->second_ref_frame, - pbi->common.ref_frame_sign_bias); -#endif + } else { mbmi->second_ref_frame = 0; } @@ -1172,7 +1160,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, } } -void vp9_decode_mode_mvs_init(VP9D_COMP *pbi, BOOL_DECODER* const bc) { +void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { VP9_COMMON *cm = &pbi->common; vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs)); @@ -1184,8 +1172,8 @@ void vp9_decode_mode_mvs_init(VP9D_COMP *pbi, BOOL_DECODER* const bc) { mb_mode_mv_init(pbi, bc); } -void vp9_decode_mb_mode_mv(VP9D_COMP *pbi, - MACROBLOCKD *xd, +void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { diff --git a/vp9/decoder/decodframe.c b/vp9/decoder/decodframe.c index dd1d5ab0bf3e2667ea6731eb2f4216da0b7dd5ac..60bf7bacab1ae9b96ca603077c04d0aec9876326 100644 --- a/vp9/decoder/decodframe.c +++ b/vp9/decoder/decodframe.c @@ -14,6 +14,7 @@ #include "vp9/common/reconintra.h" #include "vp9/common/reconintra4x4.h" #include "vp9/common/reconinter.h" +#include "vp9/decoder/decodframe.h" #include "detokenize.h" #include "vp9/common/invtrans.h" #include "vp9/common/alloccommon.h" @@ -728,7 +729,8 @@ static void setup_token_decoder(VP9D_COMP *pbi, "%d length", 1); } - if (vp9_start_decode(bool_decoder, partition, partition_size)) + if (vp9_start_decode(bool_decoder, + partition, (unsigned int)partition_size)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", 1); } @@ -985,7 +987,8 @@ int vp9_decode_frame(VP9D_COMP *pbi) { init_frame(pbi); - if (vp9_start_decode(&header_bc, data, first_partition_length_in_bytes)) + if (vp9_start_decode(&header_bc, data, + (unsigned int)first_partition_length_in_bytes)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); if (pc->frame_type == KEY_FRAME) { diff --git a/vp9/decoder/decodframe.h b/vp9/decoder/decodframe.h new file mode 100644 index 0000000000000000000000000000000000000000..cc480798a57860be73b8371b646ea66a36a9b580 --- /dev/null +++ b/vp9/decoder/decodframe.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef __INC_DECODFRAME_H +#define __INC_DECODFRAME_H + +struct VP9Decompressor; + +extern void vp9_init_de_quantizer(struct VP9Decompressor *pbi); + +#endif // __INC_DECODFRAME_H diff --git a/vp9/decoder/dequantize.c b/vp9/decoder/dequantize.c index 508aaef5aad67237688a4ca86cf7241ae98779fb..d11fe38c2336d38aaaa0479ac497eadbe9bd3483 100644 --- a/vp9/decoder/dequantize.c +++ b/vp9/decoder/dequantize.c @@ -9,24 +9,12 @@ */ -#include "vpx_ports/config.h" +#include "vpx_rtcd.h" #include "dequantize.h" #include "vp9/common/idct.h" #include "vpx_mem/vpx_mem.h" #include "onyxd_int.h" -extern void vp9_short_idct4x4llm_c(short *input, short *output, int pitch); -extern void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch); -extern void vp9_short_idct8x8_c(short *input, short *output, int pitch); -extern void vp9_short_idct8x8_1_c(short *input, short *output, int pitch); - -#if CONFIG_LOSSLESS -extern void vp9_short_inv_walsh4x4_x8_c(short *input, short *output, - int pitch); -extern void vp9_short_inv_walsh4x4_1_x8_c(short *input, short *output, - int pitch); -#endif - #ifdef DEC_DEBUG extern int dec_debug; #endif diff --git a/vp9/decoder/dequantize.h b/vp9/decoder/dequantize.h index 912061f289125e7079ecdba78a7fda1d3fdc4fe5..560c4a417b4f34e30617f4187757d510a36e2c18 100644 --- a/vp9/decoder/dequantize.h +++ b/vp9/decoder/dequantize.h @@ -25,17 +25,20 @@ extern void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq, extern void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs, + int stride, + unsigned short *eobs, short *dc); extern void vp9_dequant_idct_add_y_block_lossless_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs); + int stride, + unsigned short *eobs); extern void vp9_dequant_idct_add_uv_block_lossless_c(short *q, short *dq, unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, - int stride, char *eobs); + int stride, + unsigned short *eobs); #endif typedef void (*vp9_dequant_idct_add_fn_t)(short *input, short *dq, @@ -44,12 +47,13 @@ typedef void(*vp9_dequant_dc_idct_add_fn_t)(short *input, short *dq, unsigned char *pred, unsigned char *output, int pitch, int stride, int dc); typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(short *q, short *dq, - unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc); + unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, + short *dc); typedef void(*vp9_dequant_idct_add_y_block_fn_t)(short *q, short *dq, - unsigned char *pre, unsigned char *dst, int stride, char *eobs); + unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs); typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, short *dq, unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride, - char *eobs); + unsigned short *eobs); void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, unsigned char *pred, unsigned char *dest, @@ -66,12 +70,14 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq, #if CONFIG_SUPERBLOCKS void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq, unsigned char *dst, - int stride, char *eobs, + int stride, + unsigned short *eobs, short *dc, MACROBLOCKD *xd); void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq, unsigned char *dstu, unsigned char *dstv, - int stride, char *eobs, + int stride, + unsigned short *eobs, MACROBLOCKD *xd); #endif diff --git a/vp9/decoder/detokenize.c b/vp9/decoder/detokenize.c index 58f5044eada64771f270030c6105aa7f4c3881f9..68f2c283a76901c5d3444960b07675b61351bfae 100644 --- a/vp9/decoder/detokenize.c +++ b/vp9/decoder/detokenize.c @@ -98,7 +98,7 @@ DECLARE_ALIGNED(16, static const int, coef_bands_x_16x16[256]) = { static const unsigned char cat6_prob[14] = { 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -void vp9_reset_mb_tokens_context(MACROBLOCKD *xd) { +void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { /* Clear entropy contexts for Y2 blocks */ if ((xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != I8X8_PRED && @@ -414,12 +414,13 @@ SKIP_START: return c; } -int vp9_decode_mb_tokens_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, +int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, BOOL_DECODER* const bc) { ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; - char* const eobs = xd->eobs; + unsigned short* const eobs = xd->eobs; PLANE_TYPE type; int c, i, eobtotal = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; @@ -477,12 +478,13 @@ int vp9_decode_mb_tokens_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, return eobtotal; } -int vp9_decode_mb_tokens_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, +int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, BOOL_DECODER* const bc) { ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; - char *const eobs = xd->eobs; + unsigned short *const eobs = xd->eobs; PLANE_TYPE type; int c, i, eobtotal = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; @@ -571,12 +573,13 @@ int vp9_decode_mb_tokens_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, } -int vp9_decode_mb_tokens(VP9D_COMP *dx, MACROBLOCKD *xd, +int vp9_decode_mb_tokens(VP9D_COMP* const dx, + MACROBLOCKD* const xd, BOOL_DECODER* const bc) { ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; - char *const eobs = xd->eobs; + unsigned short *const eobs = xd->eobs; const int *scan = vp9_default_zig_zag1d; PLANE_TYPE type; int c, i, eobtotal = 0, seg_eob = 16; diff --git a/vp9/decoder/idct_blk.c b/vp9/decoder/idct_blk.c index e9605fc96636a95abc1629b10a3286570a995f09..d9fbf97c22361356fac00f3a22f0b10148dda26e 100644 --- a/vp9/decoder/idct_blk.c +++ b/vp9/decoder/idct_blk.c @@ -8,30 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_ports/config.h" +#include "vpx_rtcd.h" #include "vp9/common/idct.h" -#include "dequantize.h" - -void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride, - int Dc); -void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride); -void vp9_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, - unsigned char *dst_ptr, int pitch, int stride); -#if CONFIG_LOSSLESS -void vp9_dequant_idct_add_lossless_c(short *input, short *dq, - unsigned char *pred, unsigned char *dest, - int pitch, int stride); -void vp9_dc_only_idct_add_lossless_c(short input_dc, unsigned char *pred_ptr, - unsigned char *dst_ptr, - int pitch, int stride); -#endif void vp9_dequant_dc_idct_add_y_block_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs, + int stride, unsigned short *eobs, short *dc) { int i, j; @@ -56,7 +39,7 @@ void vp9_dequant_dc_idct_add_y_block_c(short *q, short *dq, void vp9_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs) { + int stride, unsigned short *eobs) { int i, j; for (i = 0; i < 4; i++) { @@ -80,7 +63,7 @@ void vp9_dequant_idct_add_y_block_c(short *q, short *dq, void vp9_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, - int stride, char *eobs) { + int stride, unsigned short *eobs) { int i, j; for (i = 0; i < 2; i++) { @@ -124,7 +107,8 @@ void vp9_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *pre, void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs, short *dc, + int stride, unsigned short *eobs, + short *dc, MACROBLOCKD *xd) { vp9_dequant_dc_idct_add_8x8_c(q, dq, pre, dst, 16, stride, dc[0]); vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, dc[1]); @@ -137,7 +121,8 @@ void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, short *dq, #if CONFIG_SUPERBLOCKS void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq, unsigned char *dst, - int stride, char *eobs, + int stride, + unsigned short *eobs, short *dc, MACROBLOCKD *xd) { vp9_dequant_dc_idct_add_8x8_c(q, dq, dst, dst, stride, stride, dc[0]); vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, dst + 8, @@ -152,7 +137,7 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq, void vp9_dequant_idct_add_y_block_8x8_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs, + int stride, unsigned short *eobs, MACROBLOCKD *xd) { unsigned char *origdest = dst; unsigned char *origpred = pre; @@ -170,7 +155,7 @@ void vp9_dequant_idct_add_uv_block_8x8_c(short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, - int stride, char *eobs, + int stride, unsigned short *eobs, MACROBLOCKD *xd) { vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride); @@ -184,7 +169,8 @@ void vp9_dequant_idct_add_uv_block_8x8_c(short *q, short *dq, void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq, unsigned char *dstu, unsigned char *dstv, - int stride, char *eobs, + int stride, + unsigned short *eobs, MACROBLOCKD *xd) { vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride); @@ -198,7 +184,8 @@ void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq, void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs, + int stride, + unsigned short *eobs, short *dc) { int i, j; @@ -223,7 +210,7 @@ void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, short *dq, void vp9_dequant_idct_add_y_block_lossless_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs) { + int stride, unsigned short *eobs) { int i, j; for (i = 0; i < 4; i++) { @@ -249,7 +236,8 @@ void vp9_dequant_idct_add_uv_block_lossless_c(short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, - int stride, char *eobs) { + int stride, + unsigned short *eobs) { int i, j; for (i = 0; i < 2; i++) { diff --git a/vp9/decoder/onyxd_if.c b/vp9/decoder/onyxd_if.c index 2654985b81fdb0e6205b619544bef9ccee2541f1..c9f5820a03bf237dc7deac4ac1c7072fb885c8e9 100644 --- a/vp9/decoder/onyxd_if.c +++ b/vp9/decoder/onyxd_if.c @@ -26,12 +26,12 @@ #include "vpx_scale/vpxscale.h" #include "vp9/common/systemdependent.h" #include "vpx_ports/vpx_timer.h" +#include "vp9/decoder/decodframe.h" #include "detokenize.h" #if ARCH_ARM #include "vpx_ports/arm.h" #endif -extern void vp9_init_de_quantizer(VP9D_COMP *pbi); static int get_free_fb(VP9_COMMON *cm); static void ref_cnt_fb(int *buf, int *idx, int new_idx); diff --git a/vp9/decoder/x86/idct_blk_mmx.c b/vp9/decoder/x86/idct_blk_mmx.c index 189a846deabcd0affc740010bc7458e237141ef7..6e5473106530e8197b858c07f174585689b5e785 100644 --- a/vp9/decoder/x86/idct_blk_mmx.c +++ b/vp9/decoder/x86/idct_blk_mmx.c @@ -15,7 +15,8 @@ void vp9_dequant_dc_idct_add_y_block_mmx(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs, short *dc) { + int stride, unsigned short *eobs, + short *dc) { int i; for (i = 0; i < 4; i++) { @@ -53,7 +54,7 @@ void vp9_dequant_dc_idct_add_y_block_mmx(short *q, short *dq, void vp9_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs) { + int stride, unsigned short *eobs) { int i; for (i = 0; i < 4; i++) { @@ -96,7 +97,7 @@ void vp9_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, - int stride, char *eobs) { + int stride, unsigned short *eobs) { int i; for (i = 0; i < 2; i++) { diff --git a/vp9/decoder/x86/idct_blk_sse2.c b/vp9/decoder/x86/idct_blk_sse2.c index bc3c5d663367789ad34dad9f597a8aec28a46c12..5914c16bc76d05a47ff46ff7c7f888c24aba0f92 100644 --- a/vp9/decoder/x86/idct_blk_sse2.c +++ b/vp9/decoder/x86/idct_blk_sse2.c @@ -31,7 +31,8 @@ void vp9_idct_dequant_full_2x_sse2(short *q, short *dq, void vp9_dequant_dc_idct_add_y_block_sse2(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs, short *dc) { + int stride, unsigned short *eobs, + short *dc) { int i; for (i = 0; i < 4; i++) { @@ -57,7 +58,7 @@ void vp9_dequant_dc_idct_add_y_block_sse2(short *q, short *dq, void vp9_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int stride, char *eobs) { + int stride, unsigned short *eobs) { int i; for (i = 0; i < 4; i++) { @@ -82,7 +83,7 @@ void vp9_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, - int stride, char *eobs) { + int stride, unsigned short *eobs) { if (((short *)(eobs))[0] & 0xfefe) vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstu, stride, 8); else diff --git a/vp9/encoder/bitstream.c b/vp9/encoder/bitstream.c index 747434414408d4a8ec1bac1463dbab249836f835..b7bc99cb30dca1a166402d4c97cf8b0a7217b765 100644 --- a/vp9/encoder/bitstream.c +++ b/vp9/encoder/bitstream.c @@ -29,10 +29,7 @@ #include "vp9/common/entropy.h" #include "vp9/encoder/encodemv.h" #include "vp9/common/entropymv.h" - -#if CONFIG_NEWBESTREFMV #include "vp9/common/mvref_common.h" -#endif #if defined(SECTIONBITS_OUTPUT) unsigned __int64 Sectionbits[500]; @@ -186,7 +183,6 @@ static int get_binary_prob(int n0, int n1) { void vp9_update_skip_probs(VP9_COMP *cpi) { VP9_COMMON *const pc = &cpi->common; - int prob_skip_false[3] = {0, 0, 0}; int k; for (k = 0; k < MBSKIP_CONTEXTS; ++k) { @@ -218,7 +214,6 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, static void update_refpred_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; int i; - int tot_count; vp9_prob new_pred_probs[PREDICTION_PROBS]; int old_cost, new_cost; @@ -884,7 +879,6 @@ static void update_ref_probs(VP9_COMP *const cpi) { } static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { - int i; VP9_COMMON *const pc = &cpi->common; const nmv_context *nmvc = &pc->fc.nmvc; MACROBLOCK *x = &cpi->mb; @@ -1062,9 +1056,8 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { // Only used for context just now and soon to be deprecated. vp9_find_near_mvs(xd, m, prev_m, &n1, &n2, &best_mv, ct, rf, cpi->common.ref_frame_sign_bias); -#if CONFIG_NEWBESTREFMV + best_mv.as_int = mi->ref_mvs[rf][0].as_int; -#endif vp9_mv_ref_probs(&cpi->common, mv_ref_p, ct); @@ -1124,10 +1117,8 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { mi->second_ref_frame, cpi->common.ref_frame_sign_bias); -#if CONFIG_NEWBESTREFMV best_second_mv.as_int = mi->ref_mvs[mi->second_ref_frame][0].as_int; -#endif } // does the feature use compound prediction or not @@ -1315,7 +1306,6 @@ static void write_mb_modes_kf(const VP9_COMMON *c, const MODE_INFO *m, int mode_info_stride, vp9_writer *const bc) { - const int mis = mode_info_stride; int ym; int segment_id; @@ -1331,6 +1321,7 @@ static void write_mb_modes_kf(const VP9_COMMON *c, (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { int skip_coeff = m->mbmi.mb_skip_coeff; #if CONFIG_SUPERBLOCKS + const int mis = mode_info_stride; if (m->mbmi.encoded_as_sb) { skip_coeff &= m[1].mbmi.mb_skip_coeff; skip_coeff &= m[mis].mbmi.mb_skip_coeff; diff --git a/vp9/encoder/block.h b/vp9/encoder/block.h index 43c249ca74dfd2be1983643df8ed195607db1435..3b3794c00762407f5ab6a029fcf4347465e52a8a 100644 --- a/vp9/encoder/block.h +++ b/vp9/encoder/block.h @@ -70,9 +70,7 @@ typedef struct { PARTITION_INFO partition_info; int_mv best_ref_mv; int_mv second_best_ref_mv; -#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS]; -#endif int rate; int distortion; int64_t intra_error; diff --git a/vp9/encoder/dct.c b/vp9/encoder/dct.c index 5e56b4e7f537dbd11387e7e8aae53e98786adb62..5be2d76d5ea736ed534ee7cbcda578b9098ac8a1 100644 --- a/vp9/encoder/dct.c +++ b/vp9/encoder/dct.c @@ -901,6 +901,8 @@ void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) { } #endif +#define TEST_INT_16x16_DCT 1 +#if !TEST_INT_16x16_DCT static const double C1 = 0.995184726672197; static const double C2 = 0.98078528040323; static const double C3 = 0.956940335732209; @@ -1107,3 +1109,225 @@ void vp9_short_fdct16x16_c(short *input, short *out, int pitch) { } vp9_clear_system_state(); // Make it simd safe : __asm emms; } + +#else +static const int16_t C1 = 16305; +static const int16_t C2 = 16069; +static const int16_t C3 = 15679; +static const int16_t C4 = 15137; +static const int16_t C5 = 14449; +static const int16_t C6 = 13623; +static const int16_t C7 = 12665; +static const int16_t C8 = 11585; +static const int16_t C9 = 10394; +static const int16_t C10 = 9102; +static const int16_t C11 = 7723; +static const int16_t C12 = 6270; +static const int16_t C13 = 4756; +static const int16_t C14 = 3196; +static const int16_t C15 = 1606; + +#define RIGHT_SHIFT 14 +#define ROUNDING (1 << (RIGHT_SHIFT - 1)) + +static void dct16x16_1d(int16_t input[16], int16_t output[16], + int last_shift_bits) { + int16_t step[16]; + int intermediate[16]; + int temp1, temp2; + int final_shift = RIGHT_SHIFT; + int final_rounding = ROUNDING; + int output_shift = 0; + int output_rounding = 0; + + final_shift += last_shift_bits; + if (final_shift > 0) + final_rounding = 1 << (final_shift - 1); + + output_shift += last_shift_bits; + if (output_shift > 0) + output_rounding = 1 << (output_shift - 1); + + // step 1 + step[ 0] = input[0] + input[15]; + step[ 1] = input[1] + input[14]; + step[ 2] = input[2] + input[13]; + step[ 3] = input[3] + input[12]; + step[ 4] = input[4] + input[11]; + step[ 5] = input[5] + input[10]; + step[ 6] = input[6] + input[ 9]; + step[ 7] = input[7] + input[ 8]; + step[ 8] = input[7] - input[ 8]; + step[ 9] = input[6] - input[ 9]; + step[10] = input[5] - input[10]; + step[11] = input[4] - input[11]; + step[12] = input[3] - input[12]; + step[13] = input[2] - input[13]; + step[14] = input[1] - input[14]; + step[15] = input[0] - input[15]; + + // step 2 + output[0] = step[0] + step[7]; + output[1] = step[1] + step[6]; + output[2] = step[2] + step[5]; + output[3] = step[3] + step[4]; + output[4] = step[3] - step[4]; + output[5] = step[2] - step[5]; + output[6] = step[1] - step[6]; + output[7] = step[0] - step[7]; + + temp1 = step[ 8] * C7; + temp2 = step[15] * C9; + output[ 8] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[ 9] * C11; + temp2 = step[14] * C5; + output[ 9] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[10] * C3; + temp2 = step[13] * C13; + output[10] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[11] * C15; + temp2 = step[12] * C1; + output[11] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[11] * C1; + temp2 = step[12] * C15; + output[12] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[10] * C13; + temp2 = step[13] * C3; + output[13] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[ 9] * C5; + temp2 = step[14] * C11; + output[14] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[ 8] * C9; + temp2 = step[15] * C7; + output[15] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT; + + // step 3 + step[ 0] = output[0] + output[3]; + step[ 1] = output[1] + output[2]; + step[ 2] = output[1] - output[2]; + step[ 3] = output[0] - output[3]; + + temp1 = output[4] * C14; + temp2 = output[7] * C2; + step[ 4] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[5] * C10; + temp2 = output[6] * C6; + step[ 5] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[5] * C6; + temp2 = output[6] * C10; + step[ 6] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT; + + temp1 = output[4] * C2; + temp2 = output[7] * C14; + step[ 7] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT; + + step[ 8] = output[ 8] + output[11]; + step[ 9] = output[ 9] + output[10]; + step[10] = output[ 9] - output[10]; + step[11] = output[ 8] - output[11]; + + step[12] = output[12] + output[15]; + step[13] = output[13] + output[14]; + step[14] = output[13] - output[14]; + step[15] = output[12] - output[15]; + + // step 4 + output[ 0] = (step[ 0] + step[ 1] + output_rounding) >> output_shift; + output[ 8] = (step[ 0] - step[ 1] + output_rounding) >> output_shift; + + temp1 = step[2] * C12; + temp2 = step[3] * C4; + temp1 = (temp1 + temp2 + final_rounding) >> final_shift; + output[ 4] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT; + + temp1 = step[2] * C4; + temp2 = step[3] * C12; + temp1 = (temp2 - temp1 + final_rounding) >> final_shift; + output[12] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT; + + output[ 2] = (2 * ((step[4] + step[ 5]) * C8) + final_rounding) + >> final_shift; + output[14] = (2 * ((step[7] - step[ 6]) * C8) + final_rounding) + >> final_shift; + + temp1 = step[4] - step[5]; + temp2 = step[6] + step[7]; + output[ 6] = (temp1 + temp2 + output_rounding) >> output_shift; + output[10] = (temp1 - temp2 + output_rounding) >> output_shift; + + intermediate[8] = step[8] + step[14]; + intermediate[9] = step[9] + step[15]; + + temp1 = intermediate[8] * C12; + temp2 = intermediate[9] * C4; + temp1 = (temp1 - temp2 + final_rounding) >> final_shift; + output[3] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT; + + temp1 = intermediate[8] * C4; + temp2 = intermediate[9] * C12; + temp1 = (temp2 + temp1 + final_rounding) >> final_shift; + output[13] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT; + + output[ 9] = (2 * ((step[10] + step[11]) * C8) + final_rounding) + >> final_shift; + + intermediate[11] = step[10] - step[11]; + intermediate[12] = step[12] + step[13]; + intermediate[13] = step[12] - step[13]; + intermediate[14] = step[ 8] - step[14]; + intermediate[15] = step[ 9] - step[15]; + + output[15] = (intermediate[11] + intermediate[12] + output_rounding) + >> output_shift; + output[ 1] = -(intermediate[11] - intermediate[12] + output_rounding) + >> output_shift; + + output[ 7] = (2 * (intermediate[13] * C8) + final_rounding) >> final_shift; + + temp1 = intermediate[14] * C12; + temp2 = intermediate[15] * C4; + temp1 = (temp1 - temp2 + final_rounding) >> final_shift; + output[11] = (-2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT; + + temp1 = intermediate[14] * C4; + temp2 = intermediate[15] * C12; + temp1 = (temp2 + temp1 + final_rounding) >> final_shift; + output[ 5] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT; +} + +void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { + int shortpitch = pitch >> 1; + int i, j; + int16_t output[256]; + int16_t *outptr = &output[0]; + + // First transform columns + for (i = 0; i < 16; i++) { + int16_t temp_in[16]; + int16_t temp_out[16]; + for (j = 0; j < 16; j++) + temp_in[j] = input[j * shortpitch + i]; + dct16x16_1d(temp_in, temp_out, 0); + for (j = 0; j < 16; j++) + output[j * 16 + i] = temp_out[j]; + } + + // Then transform rows + for (i = 0; i < 16; ++i) { + dct16x16_1d(outptr, out, 1); + outptr += 16; + out += 16; + } +} +#undef RIGHT_SHIFT +#undef ROUNDING +#endif diff --git a/vp9/encoder/encodeframe.c b/vp9/encoder/encodeframe.c index 47f7c37493132a55fe4dcb2738a24fb3905ee19b..703b49e35a047270eccf0bdecb750424a01a5fc6 100644 --- a/vp9/encoder/encodeframe.c +++ b/vp9/encoder/encodeframe.c @@ -10,6 +10,7 @@ #include "vpx_ports/config.h" +#include "vp9/encoder/encodeframe.h" #include "encodemb.h" #include "encodemv.h" #include "vp9/common/common.h" @@ -27,6 +28,7 @@ #include "vp9/common/findnearmv.h" #include "vp9/common/reconintra.h" #include "vp9/common/seg_common.h" +#include "vp9/encoder/tokenize.h" #include "vpx_rtcd.h" #include <stdio.h> #include <math.h> @@ -34,12 +36,9 @@ #include "vp9/common/subpixel.h" #include "vpx_ports/vpx_timer.h" #include "vp9/common/pred_common.h" - -#define DBG_PRNT_SEGMAP 0 -#if CONFIG_NEWBESTREFMV #include "vp9/common/mvref_common.h" -#endif +#define DBG_PRNT_SEGMAP 0 #if CONFIG_RUNTIME_CPU_DETECT #define RTCD(x) &cpi->common.rtcd.x @@ -54,35 +53,19 @@ int enc_debug = 0; int mb_row_debug, mb_col_debug; #endif -extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex); - -extern void vp9_auto_select_speed(VP9_COMP *cpi); - -int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion); - -extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, - int recon_uvoffset, int *r, int *d); - -void vp9_build_block_offsets(MACROBLOCK *x); - -void vp9_setup_block_ptrs(MACROBLOCK *x); +static void encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int recon_yoffset, + int recon_uvoffset, int output_enabled); -void vp9_encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int output_enabled); +static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int recon_yoffset, + int recon_uvoffset, int mb_col, int mb_row); -void vp9_encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int mb_col, int mb_row); +static void encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int output_enabled); -void vp9_encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int output_enabled); - -void vp9_encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int mb_col); +static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int mb_col); static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); @@ -634,7 +617,7 @@ static void pick_mb_modes(VP9_COMP *cpi, *totaldist += d; // Dummy encode, do not do the tokenization - vp9_encode_intra_macro_block(cpi, x, tp, 0); + encode_intra_macro_block(cpi, x, tp, 0); // Note the encoder may have changed the segment_id // Save the coding context @@ -661,8 +644,8 @@ static void pick_mb_modes(VP9_COMP *cpi, *totaldist += d; // Dummy encode, do not do the tokenization - vp9_encode_inter_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 0); + encode_inter_macroblock(cpi, x, tp, + recon_yoffset, recon_uvoffset, 0); seg_id = mbmi->segment_id; if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) { @@ -992,10 +975,10 @@ static void encode_sb(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { #if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.encoded_as_sb) - vp9_encode_intra_super_block(cpi, x, tp, mb_col); + encode_intra_super_block(cpi, x, tp, mb_col); else #endif - vp9_encode_intra_macro_block(cpi, x, tp, 1); + encode_intra_macro_block(cpi, x, tp, 1); // Note the encoder may have changed the segment_id #ifdef MODE_STATS @@ -1018,12 +1001,12 @@ static void encode_sb(VP9_COMP *cpi, #if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.encoded_as_sb) - vp9_encode_inter_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, - mb_col, mb_row); + encode_inter_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, + mb_col, mb_row); else #endif - vp9_encode_inter_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 1); + encode_inter_macroblock(cpi, x, tp, + recon_yoffset, recon_uvoffset, 1); // Note the encoder may have changed the segment_id #ifdef MODE_STATS @@ -1404,7 +1387,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset; } - cpi->tok_count = tp - cpi->tok; + cpi->tok_count = (unsigned int)(tp - cpi->tok); } vpx_usec_timer_mark(&emr_timer); @@ -1592,7 +1575,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { encode_frame_internal(cpi); for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - const int diff = cpi->rd_comp_pred_diff[i] / cpi->common.MBs; + const int diff = (int)(cpi->rd_comp_pred_diff[i] / cpi->common.MBs); cpi->rd_prediction_type_threshes[frame_type][i] += diff; cpi->rd_prediction_type_threshes[frame_type][i] >>= 1; } @@ -1602,7 +1585,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { int diff; if (i == TX_MODE_SELECT) pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZE_MAX - 1), 0); - diff = pd / cpi->common.MBs; + diff = (int)(pd / cpi->common.MBs); cpi->rd_tx_select_threshes[frame_type][i] += diff; cpi->rd_tx_select_threshes[frame_type][i] /= 2; } @@ -1851,10 +1834,8 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, } } -void vp9_encode_intra_super_block(VP9_COMP *cpi, - MACROBLOCK *x, - TOKENEXTRA **t, - int mb_col) { +static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int mb_col) { const int output_enabled = 1; int n; MACROBLOCKD *xd = &x->e_mbd; @@ -1931,10 +1912,8 @@ void vp9_encode_intra_super_block(VP9_COMP *cpi, } #endif /* CONFIG_SUPERBLOCKS */ -void vp9_encode_intra_macro_block(VP9_COMP *cpi, - MACROBLOCK *x, - TOKENEXTRA **t, - int output_enabled) { +static void encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int output_enabled) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; if ((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled) { adjust_act_zbin(cpi, x); @@ -1977,18 +1956,13 @@ void vp9_encode_intra_macro_block(VP9_COMP *cpi, } else { mbmi->txfm_size = TX_4X4; } - } -#if CONFIG_NEWBESTREFMV - else + } else { vp9_tokenize_mb(cpi, &x->e_mbd, t, 1); -#endif + } } - -extern void vp9_fix_contexts(MACROBLOCKD *xd); - -void vp9_encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled) { +static void encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int recon_yoffset, + int recon_uvoffset, int output_enabled) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; @@ -2172,9 +2146,10 @@ void vp9_encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_SUPERBLOCKS -void vp9_encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int mb_col, int mb_row) { +static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int recon_yoffset, + int recon_uvoffset, + int mb_col, int mb_row) { const int output_enabled = 1; VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; diff --git a/vp9/encoder/encodeframe.h b/vp9/encoder/encodeframe.h new file mode 100644 index 0000000000000000000000000000000000000000..5693d681c91437890c8c3f9e7bc20600b71324cf --- /dev/null +++ b/vp9/encoder/encodeframe.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef __INC_ENCODEFRAME_H +#define __INC_ENCODEFRAME_H + +struct macroblock; + +extern void vp9_build_block_offsets(struct macroblock *x); + +extern void vp9_setup_block_ptrs(struct macroblock *x); + +#endif // __INC_ENCODEFRAME_H diff --git a/vp9/encoder/encodemv.c b/vp9/encoder/encodemv.c index 0483d81c7332c1b65600df65aef17ea829366e20..a9431bdee101d74c9accb4ed5b407adc069de9ec 100644 --- a/vp9/encoder/encodemv.c +++ b/vp9/encoder/encodemv.c @@ -168,7 +168,7 @@ static int update_nmv_savings(const unsigned int ct[2], if (cur_b - mod_b - cost > 0) { return cur_b - mod_b - cost; } else { - return -vp9_cost_zero(upd_p); + return 0 - vp9_cost_zero(upd_p); } } diff --git a/vp9/encoder/encodemv.h b/vp9/encoder/encodemv.h index f19613d0a1208debab6ee853f341e6da18b291d6..a35fabfdc19dea6c5f3f14f50053578d8f46a768 100644 --- a/vp9/encoder/encodemv.h +++ b/vp9/encoder/encodemv.h @@ -18,11 +18,11 @@ void vp9_write_nmvprobs(VP9_COMP* const, int usehp, vp9_writer* const); void vp9_encode_nmv(vp9_writer* const w, const MV* const mv, const MV* const ref, const nmv_context* const mvctx); void vp9_encode_nmv_fp(vp9_writer* const w, const MV* const mv, - const MV* const ref, const nmv_context *mvctx, + const MV* const ref, const nmv_context* const mvctx, int usehp); void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], - const nmv_context *mvctx, + const nmv_context* const mvctx, int usehp, int mvc_flag_v, int mvc_flag_h); diff --git a/vp9/encoder/firstpass.c b/vp9/encoder/firstpass.c index b9a3ac4c91a076a34768f17e0755cf646b26061e..0238232656e6aca69a0f5c96578a51c6dc6ba189 100644 --- a/vp9/encoder/firstpass.c +++ b/vp9/encoder/firstpass.c @@ -18,12 +18,14 @@ #include "mcomp.h" #include "firstpass.h" #include "vpx_scale/vpxscale.h" +#include "vp9/encoder/encodeframe.h" #include "encodemb.h" #include "vp9/common/extend.h" #include "vp9/common/systemdependent.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/swapyv12buffer.h" #include <stdio.h> +#include "vp9/encoder/quantize.h" #include "rdopt.h" #include "ratectrl.h" #include "vp9/common/quant_common.h" @@ -38,17 +40,6 @@ #define IF_RTCD(x) NULL #endif -extern void vp9_build_block_offsets(MACROBLOCK *x); - -extern void vp9_setup_block_ptrs(MACROBLOCK *x); - -extern void vp9_frame_init_quantizer(VP9_COMP *cpi); - -extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, - int_mv *mv); - -extern void vp9_alloc_compressor_data(VP9_COMP *cpi); - #define IIFACTOR 12.5 #define IIKFACTOR1 12.5 #define IIKFACTOR2 15.0 @@ -705,9 +696,9 @@ void vp9_first_pass(VP9_COMP *cpi) { FIRSTPASS_STATS fps; fps.frame = cm->current_video_frame; - fps.intra_error = intra_error >> 8; - fps.coded_error = coded_error >> 8; - fps.sr_coded_error = sr_coded_error >> 8; + fps.intra_error = (double)(intra_error >> 8); + fps.coded_error = (double)(coded_error >> 8); + fps.sr_coded_error = (double)(sr_coded_error >> 8); weight = simple_weight(cpi->Source); @@ -747,8 +738,8 @@ void vp9_first_pass(VP9_COMP *cpi) { // TODO: handle the case when duration is set to 0, or something less // than the full time between subsequent cpi->source_time_stamp s . - fps.duration = cpi->source->ts_end - - cpi->source->ts_start; + fps.duration = (double)(cpi->source->ts_end + - cpi->source->ts_start); // don't want to do output stats with a stack variable! memcpy(cpi->twopass.this_frame_stats, @@ -910,7 +901,7 @@ static int estimate_max_q(VP9_COMP *cpi, double err_per_mb = section_err / num_mbs; double err_correction_factor; double speed_correction = 1.0; - int overhead_bits_per_mb; + double overhead_bits_per_mb; if (section_target_bandwitdh <= 0) return cpi->twopass.maxq_max_limit; // Highest value allowed @@ -985,7 +976,7 @@ static int estimate_max_q(VP9_COMP *cpi, err_correction_factor = 5.0; bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q) + overhead_bits_per_mb; + vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb; bits_per_mb_at_this_q = (int)(.5 + err_correction_factor * (double)bits_per_mb_at_this_q); @@ -1012,7 +1003,7 @@ static int estimate_max_q(VP9_COMP *cpi, // Give average a chance to settle though. // PGW TODO.. This code is broken for the extended Q range if ((cpi->ni_frames > - ((unsigned int)cpi->twopass.total_stats->count >> 8)) && + ((int)cpi->twopass.total_stats->count >> 8)) && (cpi->ni_frames > 150)) { adjust_maxq_qrange(cpi); } @@ -1038,7 +1029,7 @@ static int estimate_cq(VP9_COMP *cpi, double speed_correction = 1.0; double clip_iiratio; double clip_iifactor; - int overhead_bits_per_mb; + double overhead_bits_per_mb; target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) @@ -1091,7 +1082,7 @@ static int estimate_cq(VP9_COMP *cpi, err_correction_factor = 5.0; bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q) + overhead_bits_per_mb; + vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb; bits_per_mb_at_this_q = (int)(.5 + err_correction_factor * (double)bits_per_mb_at_this_q); @@ -1426,7 +1417,7 @@ static int calc_arf_boost( calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out)); } - *f_boost = boost_score; + *f_boost = (int)boost_score; // Reset for backward looking loop boost_score = 0.0; @@ -1464,7 +1455,7 @@ static int calc_arf_boost( calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out)); } - *b_boost = boost_score; + *b_boost = (int)boost_score; arf_boost = (*f_boost + *b_boost); if (arf_boost < ((b_frames + f_frames) * 20)) @@ -1486,8 +1477,8 @@ static void configure_arnr_filter(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Note: this_frame->frame has been updated in the loop // so it now points at the ARF frame. half_gf_int = cpi->baseline_gf_interval >> 1; - frames_after_arf = cpi->twopass.total_stats->count - - this_frame->frame - 1; + frames_after_arf = (int)(cpi->twopass.total_stats->count - + this_frame->frame - 1); switch (cpi->oxcf.arnr_type) { case 1: // Backward filter @@ -1710,7 +1701,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { (cpi->twopass.kf_group_error_left > 0)) { cpi->twopass.gf_group_bits = (int)((double)cpi->twopass.kf_group_bits * - (gf_group_err / (double)cpi->twopass.kf_group_error_left)); + (gf_group_err / cpi->twopass.kf_group_error_left)); } else cpi->twopass.gf_group_bits = 0; @@ -1776,7 +1767,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { alt_gf_grp_bits = (double)cpi->twopass.kf_group_bits * (mod_frame_err * (double)cpi->baseline_gf_interval) / - DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left); + DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left); alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits / (double)allocation_chunks)); @@ -1792,7 +1783,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits * mod_frame_err / - DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left)); + DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left)); if (alt_gf_bits > gf_bits) { gf_bits = alt_gf_bits; @@ -1815,7 +1806,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { { // Adjust KF group bits and error remainin - cpi->twopass.kf_group_error_left -= gf_group_err; + cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err; cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; if (cpi->twopass.kf_group_bits < 0) @@ -1826,9 +1817,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // of the group (except in Key frame case where this has already // happened) if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME) - cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err; + cpi->twopass.gf_group_error_left = (int64_t)(gf_group_err + - gf_first_frame_err); else - cpi->twopass.gf_group_error_left = gf_group_err; + cpi->twopass.gf_group_error_left = (int64_t)gf_group_err; cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; @@ -1848,8 +1840,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { pct_extra = (boost - 100) / 50; pct_extra = (pct_extra > 20) ? 20 : pct_extra; - cpi->twopass.alt_extra_bits = - (cpi->twopass.gf_group_bits * pct_extra) / 100; + cpi->twopass.alt_extra_bits = (int) + ((cpi->twopass.gf_group_bits * pct_extra) / 100); cpi->twopass.gf_group_bits -= cpi->twopass.alt_extra_bits; cpi->twopass.alt_extra_bits /= ((cpi->baseline_gf_interval - 1) >> 1); @@ -1872,9 +1864,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { avg_stats(§ionstats); - cpi->twopass.section_intra_rating = - sectionstats.intra_error / - DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->twopass.section_intra_rating = (int) + (sectionstats.intra_error / + DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); reset_fpf_position(cpi, start_pos); } @@ -1907,10 +1899,11 @@ static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { target_frame_size = max_bits; if (target_frame_size > cpi->twopass.gf_group_bits) - target_frame_size = cpi->twopass.gf_group_bits; + target_frame_size = (int)cpi->twopass.gf_group_bits; } - cpi->twopass.gf_group_error_left -= modified_err; // Adjust error remaining + // Adjust error remaining + cpi->twopass.gf_group_error_left -= (int64_t)modified_err; cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining if (cpi->twopass.gf_group_bits < 0) @@ -2019,25 +2012,26 @@ void vp9_second_pass(VP9_COMP *cpi) { } // Keep a globally available copy of this and the next frame's iiratio. - cpi->twopass.this_iiratio = this_frame_intra_error / - DOUBLE_DIVIDE_CHECK(this_frame_coded_error); + cpi->twopass.this_iiratio = (int)(this_frame_intra_error / + DOUBLE_DIVIDE_CHECK(this_frame_coded_error)); { FIRSTPASS_STATS next_frame; if (lookup_next_frame_stats(cpi, &next_frame) != EOF) { - cpi->twopass.next_iiratio = next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error); + cpi->twopass.next_iiratio = (int)(next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); } } // Set nominal per second bandwidth for this frame - cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate; + cpi->target_bandwidth = (int)(cpi->per_frame_bandwidth + * cpi->output_frame_rate); if (cpi->target_bandwidth < 0) cpi->target_bandwidth = 0; // Account for mv, mode and other overheads. - overhead_bits = estimate_modemvcost( - cpi, cpi->twopass.total_left_stats); + overhead_bits = (int)estimate_modemvcost( + cpi, cpi->twopass.total_left_stats); // Special case code for first frame. if (cpi->common.current_video_frame == 0) { @@ -2427,9 +2421,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { avg_stats(§ionstats); - cpi->twopass.section_intra_rating = - sectionstats.intra_error - / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->twopass.section_intra_rating = (int) + (sectionstats.intra_error + / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); } // Reset the first pass file position @@ -2437,7 +2431,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Work out how many bits to allocate for the key frame itself if (1) { - int kf_boost = boost_score; + int kf_boost = (int)boost_score; int allocation_chunks; int alt_kf_bits; @@ -2519,10 +2513,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; - cpi->twopass.kf_bits += cpi->min_frame_bandwidth; // Add in the minimum frame allowance - - cpi->per_frame_bandwidth = cpi->twopass.kf_bits; // Peer frame bit target for this frame - cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate; // Convert to a per second bitrate + // Add in the minimum frame allowance + cpi->twopass.kf_bits += cpi->min_frame_bandwidth; + + // Peer frame bit target for this frame + cpi->per_frame_bandwidth = cpi->twopass.kf_bits; + // Convert to a per second bitrate + cpi->target_bandwidth = (int)(cpi->twopass.kf_bits * + cpi->output_frame_rate); } // Note the total error score of the kf group minus the key frame itself diff --git a/vp9/encoder/generic/csystemdependent.c b/vp9/encoder/generic/csystemdependent.c index 589d0a50bec061500a55b69fa959df90a53a3cfc..05b1d61171f486addf926a57d0aca0b498732920 100644 --- a/vp9/encoder/generic/csystemdependent.c +++ b/vp9/encoder/generic/csystemdependent.c @@ -27,10 +27,6 @@ extern void vp9_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, void vp9_cmachine_specific_config(VP9_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT cpi->rtcd.common = &cpi->common.rtcd; - - cpi->rtcd.search.full_search = vp9_full_search_sad; - cpi->rtcd.search.refining_search = vp9_refining_search_sad; - cpi->rtcd.search.diamond_search = vp9_diamond_search_sad; cpi->rtcd.temporal.apply = vp9_temporal_filter_apply_c; #endif diff --git a/vp9/encoder/lookahead.c b/vp9/encoder/lookahead.c index ca0b4cdf4e4876dbf2de9f57d46f1a91261dc213..c10fecd47978e9f277d505a2755812b5edddee3c 100644 --- a/vp9/encoder/lookahead.c +++ b/vp9/encoder/lookahead.c @@ -43,7 +43,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { if (ctx) { if (ctx->buf) { - int i; + unsigned int i; for (i = 0; i < ctx->max_sz; i++) vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img); @@ -59,7 +59,6 @@ vp9_lookahead_init(unsigned int width, unsigned int height, unsigned int depth) { struct lookahead_ctx *ctx = NULL; - int i; /* Clamp the lookahead queue depth */ if (depth < 1) @@ -74,6 +73,7 @@ vp9_lookahead_init(unsigned int width, /* Allocate the lookahead structures */ ctx = calloc(1, sizeof(*ctx)); if (ctx) { + unsigned int i; ctx->max_sz = depth; ctx->buf = calloc(depth, sizeof(*ctx->buf)); if (!ctx->buf) @@ -175,9 +175,9 @@ vp9_lookahead_peek(struct lookahead_ctx *ctx, struct lookahead_entry *buf = NULL; assert(index < ctx->max_sz); - if (index < ctx->sz) { + if (index < (int)ctx->sz) { index += ctx->read_idx; - if (index >= ctx->max_sz) + if (index >= (int)ctx->max_sz) index -= ctx->max_sz; buf = ctx->buf + index; } diff --git a/vp9/encoder/mbgraph.c b/vp9/encoder/mbgraph.c index 301826e500427b450c2466180c9e05ae8ec8bffd..e7506168fc7ada4e0aaf5cca66e796c1841f7a0b 100644 --- a/vp9/encoder/mbgraph.c +++ b/vp9/encoder/mbgraph.c @@ -139,7 +139,7 @@ static int do_16x16_motion_search // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well if (ref_mv->as_int) { - int tmp_err; + unsigned int tmp_err; int_mv zero_ref_mv, tmp_mv; zero_ref_mv.as_int = 0; @@ -202,7 +202,7 @@ static int find_best_16x16_intra MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; MB_PREDICTION_MODE best_mode = -1, mode; - int best_err = INT_MAX; + unsigned int best_err = INT_MAX; // calculate SATD for each intra prediction mode; // we're intentionally not doing 4x4, we just want a rough estimate @@ -449,7 +449,7 @@ void vp9_update_mbgraph_stats // being a GF - so exit if we don't look ahead beyond that if (n_frames <= cpi->frames_till_gf_update_due) return; - if (n_frames > cpi->common.frames_till_alt_ref_frame) + if (n_frames > (int)cpi->common.frames_till_alt_ref_frame) n_frames = cpi->common.frames_till_alt_ref_frame; if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS; diff --git a/vp9/encoder/mcomp.c b/vp9/encoder/mcomp.c index 60ec49c95039628d79fcca5dd27555073da51738..aaff8bc7f3dac3865f6ef193dd35d033686ffa6f 100644 --- a/vp9/encoder/mcomp.c +++ b/vp9/encoder/mcomp.c @@ -1255,11 +1255,11 @@ cal_neighbors: #undef CHECK_POINT #undef CHECK_BETTER -int vp9_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *ref_mv, int_mv *best_mv, - int search_param, int sad_per_bit, int *num00, - vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, - int_mv *center_mv) { +int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + int_mv *ref_mv, int_mv *best_mv, + int search_param, int sad_per_bit, int *num00, + vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, + int_mv *center_mv) { int i, j, step; unsigned char *what = (*(b->base_src) + b->src); @@ -1380,7 +1380,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int tot_steps; int_mv this_mv; - int bestsad = INT_MAX; + unsigned int bestsad = INT_MAX; int best_site = 0; int last_site = 0; @@ -1568,10 +1568,10 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, return bestsme; } -int vp9_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int sad_per_bit, int distance, - vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, - int_mv *center_mv) { +int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + int sad_per_bit, int distance, + vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, + int_mv *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; unsigned char *in_what; @@ -1674,7 +1674,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned char *bestaddress; int_mv *best_mv = &d->bmi.as_mv.first; int_mv this_mv; - int bestsad = INT_MAX; + unsigned int bestsad = INT_MAX; int r, c; unsigned char *check_here; @@ -1802,7 +1802,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned char *bestaddress; int_mv *best_mv = &d->bmi.as_mv.first; int_mv this_mv; - int bestsad = INT_MAX; + unsigned int bestsad = INT_MAX; int r, c; unsigned char *check_here; @@ -1942,11 +1942,10 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, else return INT_MAX; } - -int vp9_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int error_per_bit, int search_range, - vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, - int_mv *center_mv) { +int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + int_mv *ref_mv, int error_per_bit, + int search_range, vp9_variance_fn_ptr_t *fn_ptr, + DEC_MVCOSTS, int_mv *center_mv) { MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; int i, j; short this_row_offset, this_col_offset; diff --git a/vp9/encoder/mcomp.h b/vp9/encoder/mcomp.h index f754837e624b2d25034f6a762aabd957fba97979..ac8c8efc6c436f55f6b0b4f12f624c3cffbf78a4 100644 --- a/vp9/encoder/mcomp.h +++ b/vp9/encoder/mcomp.h @@ -68,87 +68,24 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively; extern fractional_mv_step_fp vp9_find_best_sub_pixel_step; extern fractional_mv_step_fp vp9_find_best_half_pixel_step; -#define prototype_full_search_sad(sym)\ - int (sym)\ - (\ - MACROBLOCK *x, \ - BLOCK *b, \ - BLOCKD *d, \ - int_mv *ref_mv, \ - int sad_per_bit, \ - int distance, \ - vp9_variance_fn_ptr_t *fn_ptr, \ - DEC_MVSADCOSTS, \ - int_mv *center_mv \ - ) - -#define prototype_refining_search_sad(sym)\ - int (sym)\ - (\ - MACROBLOCK *x, \ - BLOCK *b, \ - BLOCKD *d, \ - int_mv *ref_mv, \ - int sad_per_bit, \ - int distance, \ - vp9_variance_fn_ptr_t *fn_ptr, \ - DEC_MVSADCOSTS, \ - int_mv *center_mv \ - ) - -#define prototype_diamond_search_sad(sym)\ - int (sym)\ - (\ - MACROBLOCK *x, \ - BLOCK *b, \ - BLOCKD *d, \ - int_mv *ref_mv, \ - int_mv *best_mv, \ - int search_param, \ - int sad_per_bit, \ - int *num00, \ - vp9_variance_fn_ptr_t *fn_ptr, \ - DEC_MVSADCOSTS, \ - int_mv *center_mv \ - ) - -#if ARCH_X86 || ARCH_X86_64 -#include "x86/mcomp_x86.h" -#endif - -typedef prototype_full_search_sad(*vp9_full_search_fn_t); -extern prototype_full_search_sad(vp9_full_search_sad); -extern prototype_full_search_sad(vp9_full_search_sadx3); -extern prototype_full_search_sad(vp9_full_search_sadx8); - -typedef prototype_refining_search_sad(*vp9_refining_search_fn_t); -extern prototype_refining_search_sad(vp9_refining_search_sad); -extern prototype_refining_search_sad(vp9_refining_search_sadx4); - -typedef prototype_diamond_search_sad(*vp9_diamond_search_fn_t); -extern prototype_diamond_search_sad(vp9_diamond_search_sad); -extern prototype_diamond_search_sad(vp9_diamond_search_sadx4); - -#ifndef vp9_search_full_search -#define vp9_search_full_search vp9_full_search_sad -#endif -extern prototype_full_search_sad(vp9_search_full_search); - -#ifndef vp9_search_refining_search -#define vp9_search_refining_search vp9_refining_search_sad -#endif -extern prototype_refining_search_sad(vp9_search_refining_search); - -#ifndef vp9_search_diamond_search -#define vp9_search_diamond_search vp9_diamond_search_sad -#endif -extern prototype_diamond_search_sad(vp9_search_diamond_search); +typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + int_mv *ref_mv, int sad_per_bit, + int distance, vp9_variance_fn_ptr_t *fn_ptr, + DEC_MVCOSTS, int_mv *center_mv); + +typedef int (*vp9_refining_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + int_mv *ref_mv, int sad_per_bit, + int distance, + vp9_variance_fn_ptr_t *fn_ptr, + DEC_MVCOSTS, int_mv *center_mv); + +typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, + int_mv *ref_mv, int_mv *best_mv, + int search_param, int sad_per_bit, + int *num00, + vp9_variance_fn_ptr_t *fn_ptr, + DEC_MVCOSTS, int_mv *center_mv); -typedef struct { - prototype_full_search_sad(*full_search); - prototype_refining_search_sad(*refining_search); - prototype_diamond_search_sad(*diamond_search); -} vp9_search_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT #define SEARCH_INVOKE(ctx,fn) (ctx)->fn diff --git a/vp9/encoder/onyx_if.c b/vp9/encoder/onyx_if.c index 3534c39f1c34b0c5f05f40264fcb0309c586c261..bd8e6d4d9d0c07af3e0bd35057305b957f681c00 100644 --- a/vp9/encoder/onyx_if.c +++ b/vp9/encoder/onyx_if.c @@ -36,11 +36,10 @@ #include "vp9/common/pred_common.h" #include "vp9/encoder/rdopt.h" #include "bitstream.h" +#include "vp9/encoder/picklpf.h" #include "ratectrl.h" - -#if CONFIG_NEWBESTREFMV #include "vp9/common/mvref_common.h" -#endif + #if ARCH_ARM #include "vpx_ports/arm.h" @@ -58,18 +57,8 @@ #define RTCD(x) NULL #endif -extern void vp9_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi); - -extern void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val); - -extern void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi); - extern void vp9_cmachine_specific_config(VP9_COMP *cpi); -extern void vp9_deblock_frame(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int filt_lvl, int low_var_thresh, int flag); - extern void print_tree_update_probs(); #if HAVE_ARMV7 @@ -80,10 +69,6 @@ extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); #endif -int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); - -extern void vp9_temporal_filter_prepare_c(VP9_COMP *cpi, int distance); - static void set_default_lf_deltas(VP9_COMP *cpi); #define DEFAULT_INTERP_FILTER EIGHTTAP /* SWITCHABLE for better performance */ @@ -257,14 +242,14 @@ static void init_base_skip_probs(void) { skip_prob = 255; base_skip_false_prob[i][1] = skip_prob; - skip_prob = t * 0.75; + skip_prob = t * 3 / 4; if (skip_prob < 1) skip_prob = 1; else if (skip_prob > 255) skip_prob = 255; base_skip_false_prob[i][2] = skip_prob; - skip_prob = t * 1.25; + skip_prob = t * 5 / 4; if (skip_prob < 1) skip_prob = 1; else if (skip_prob > 255) @@ -1413,7 +1398,7 @@ rescale(int val, int num, int denom) { int64_t llden = denom; int64_t llval = val; - return llval * llnum / llden; + return (int)(llval * llnum / llden); } @@ -1925,7 +1910,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_init_first_pass(cpi); } else if (cpi->pass == 2) { size_t packet_sz = sizeof(FIRSTPASS_STATS); - int packets = oxcf->two_pass_stats_in.sz / packet_sz; + int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; cpi->twopass.stats_in = cpi->twopass.stats_in_start; @@ -1989,9 +1974,9 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->fn_ptr[BLOCK_4X4].copymem = vp9_copy32xn; #endif - cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); - cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search); - cpi->refining_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, refining_search); + cpi->full_search_sad = vp9_full_search_sad; + cpi->diamond_search_sad = vp9_diamond_search_sad; + cpi->refining_search_sad = vp9_refining_search_sad; // make sure frame 1 is okay cpi->error_bins[0] = cpi->common.MBs; @@ -2351,7 +2336,7 @@ static void generate_psnr_packet(VP9_COMP *cpi) { for (i = 0; i < 4; i++) pkt.data.psnr.psnr[i] = vp9_mse2psnr(pkt.data.psnr.samples[i], 255.0, - pkt.data.psnr.sse[i]); + (double)pkt.data.psnr.sse[i]); vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } @@ -2917,7 +2902,9 @@ static void encode_frame_to_data_rate // pass function that sets the target bandwidth so must set it here if (cpi->common.refresh_alt_ref_frame) { cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame - cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate; // per second target bitrate + // per second target bitrate + cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * + cpi->output_frame_rate); } // Default turn off buffer to buffer copying @@ -4119,7 +4106,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, - cpi->last_time_stamp_seen; // do a step update if the duration changes by 10% if (last_duration) - step = ((this_duration - last_duration) * 10 / last_duration); + step = (int)((this_duration - last_duration) * 10 / last_duration); } if (this_duration) { @@ -4132,7 +4119,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, * frame rate. If we haven't seen 1 second yet, then average * over the whole interval seen. */ - interval = cpi->source->ts_end - cpi->first_time_stamp_ever; + interval = (double)(cpi->source->ts_end + - cpi->first_time_stamp_ever); if (interval > 10000000.0) interval = 10000000; @@ -4234,17 +4222,17 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, int y_samples = orig->y_height * orig->y_width; int uv_samples = orig->uv_height * orig->uv_width; int t_samples = y_samples + 2 * uv_samples; - int64_t sq_error; + double sq_error; - ye = calc_plane_error(orig->y_buffer, orig->y_stride, + ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height); - ue = calc_plane_error(orig->u_buffer, orig->uv_stride, + ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride, recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height); - ve = calc_plane_error(orig->v_buffer, orig->uv_stride, + ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride, recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height); @@ -4265,15 +4253,15 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #endif vp9_clear_system_state(); - ye = calc_plane_error(orig->y_buffer, orig->y_stride, + ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride, pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height); - ue = calc_plane_error(orig->u_buffer, orig->uv_stride, + ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride, pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height); - ve = calc_plane_error(orig->v_buffer, orig->uv_stride, + ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride, pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height); diff --git a/vp9/encoder/onyx_int.h b/vp9/encoder/onyx_int.h index b1bd7932550efd0dd5b8e91372cb62d1b67e4357..5406e9029b243b1d4b11de36edcee24dd70a84de 100644 --- a/vp9/encoder/onyx_int.h +++ b/vp9/encoder/onyx_int.h @@ -349,7 +349,6 @@ typedef struct { typedef struct VP9_ENCODER_RTCD { VP9_COMMON_RTCD *common; - vp9_search_rtcd_vtable_t search; vp9_temporal_rtcd_vtable_t temporal; } VP9_ENCODER_RTCD; @@ -665,7 +664,8 @@ typedef struct VP9_COMP { int maxq_min_limit; int static_scene_max_gf_interval; int kf_bits; - int gf_group_error_left; // Remaining error from uncoded frames in a gf group. Two pass use only + // Remaining error from uncoded frames in a gf group. Two pass use only + int64_t gf_group_error_left; // Projected total bits available for a key frame group of frames int64_t kf_group_bits; @@ -673,8 +673,10 @@ typedef struct VP9_COMP { // Error score of frames still to be coded in kf group int64_t kf_group_error_left; - int gf_group_bits; // Projected Bits available for a group of frames including 1 GF or ARF - int gf_bits; // Bits for the golden frame or ARF - 2 pass only + // Projected Bits available for a group of frames including 1 GF or ARF + int64_t gf_group_bits; + // Bits for the golden frame or ARF - 2 pass only + int gf_bits; int alt_extra_bits; int sr_update_lag; @@ -764,11 +766,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x); -void vp9_tokenize_mb(VP9_COMP *, MACROBLOCKD *, TOKENEXTRA **, int dry_run); -void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); - void vp9_set_speed_features(VP9_COMP *cpi); +extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest); + +extern void vp9_alloc_compressor_data(VP9_COMP *cpi); + #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval,expr) do {\ lval = (expr); \ diff --git a/vp9/encoder/picklpf.c b/vp9/encoder/picklpf.c index 71c0fb1f15d519a70b0381338fecf562cc3c148b..0107facc390b6a3feaf79977438e04737010d18c 100644 --- a/vp9/encoder/picklpf.c +++ b/vp9/encoder/picklpf.c @@ -11,6 +11,7 @@ #include "vp9/common/onyxc_int.h" #include "onyx_int.h" +#include "vp9/encoder/picklpf.h" #include "quantize.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/vpxscale.h" @@ -20,8 +21,6 @@ #include "vpx_ports/arm.h" #endif -extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest); #if HAVE_ARMV7 extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); #endif diff --git a/vp9/encoder/picklpf.h b/vp9/encoder/picklpf.h new file mode 100644 index 0000000000000000000000000000000000000000..9c3f442ae9e29a608d6d9d605bb51287ab0759d4 --- /dev/null +++ b/vp9/encoder/picklpf.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef __INC_PICKLPF_H +#define __INC_PICKLPF_H + +struct yv12_buffer_config; +struct VP9_COMP; + +extern void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd, + struct VP9_COMP *cpi); + +extern void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); + +extern void vp9_pick_filter_level(struct yv12_buffer_config *sd, + struct VP9_COMP *cpi); + +#endif // __INC_PICKLPF_H diff --git a/vp9/encoder/ratectrl.c b/vp9/encoder/ratectrl.c index 62a940d2a510d586504476559f07d51818e53694..2838e26f0f1a02f3984006af1cc8455b313b9652 100644 --- a/vp9/encoder/ratectrl.c +++ b/vp9/encoder/ratectrl.c @@ -311,8 +311,8 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { target = cpi->per_frame_bandwidth; if (cpi->oxcf.rc_max_intra_bitrate_pct) { - unsigned int max_rate = cpi->per_frame_bandwidth - * cpi->oxcf.rc_max_intra_bitrate_pct / 100; + int max_rate = cpi->per_frame_bandwidth + * cpi->oxcf.rc_max_intra_bitrate_pct / 100; if (target > max_rate) target = max_rate; diff --git a/vp9/encoder/rdopt.c b/vp9/encoder/rdopt.c index 1db912787be777fadd53fe0a30ee4a95daba6ef3..19b96af4023ba3acea05562f07b8c495ab9bf1e9 100644 --- a/vp9/encoder/rdopt.c +++ b/vp9/encoder/rdopt.c @@ -41,9 +41,7 @@ #include "vp9/common/pred_common.h" #include "vp9/common/entropy.h" #include "vpx_rtcd.h" -#if CONFIG_NEWBESTREFMV #include "vp9/common/mvref_common.h" -#endif #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) @@ -51,9 +49,6 @@ #define IF_RTCD(x) NULL #endif -extern void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x); -extern void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x); - #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) #define INVALID_MV 0x80008000 @@ -391,59 +386,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { } } -void vp9_auto_select_speed(VP9_COMP *cpi) { - int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate); - - milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16; - - /* - // this is done during parameter valid check - if( cpi->oxcf.cpu_used > 16) - cpi->oxcf.cpu_used = 16; - if( cpi->oxcf.cpu_used < -16) - cpi->oxcf.cpu_used = -16; - */ - - if (cpi->avg_pick_mode_time < milliseconds_for_compress && - (cpi->avg_encode_time - cpi->avg_pick_mode_time) < - milliseconds_for_compress) { - if (cpi->avg_pick_mode_time == 0) { - cpi->Speed = 4; - } else { - if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) { - cpi->Speed += 2; - cpi->avg_pick_mode_time = 0; - cpi->avg_encode_time = 0; - - if (cpi->Speed > 16) { - cpi->Speed = 16; - } - } - - if (milliseconds_for_compress * 100 > - cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) { - cpi->Speed -= 1; - cpi->avg_pick_mode_time = 0; - cpi->avg_encode_time = 0; - - // In real-time mode, cpi->speed is in [4, 16]. - if (cpi->Speed < 4) { // if ( cpi->Speed < 0 ) - cpi->Speed = 4; // cpi->Speed = 0; - } - } - } - } else { - cpi->Speed += 4; - - if (cpi->Speed > 16) - cpi->Speed = 16; - - - cpi->avg_pick_mode_time = 0; - cpi->avg_encode_time = 0; - } -} - int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) { int i, error = 0; @@ -613,7 +555,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, default_eob = 64; if (type == PLANE_TYPE_Y_WITH_DC) { BLOCKD *bb; - int ib = (b - xd->block); + int ib = (int)(b - xd->block); if (ib < 16) { ib = (ib & 8) + ((ib & 4) >> 1); bb = xd->block + ib; @@ -707,9 +649,6 @@ static void macro_block_yrd_4x4(MACROBLOCK *mb, BLOCK *beptr; int d; - vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), xd->predictor, - mb->block[0].src_stride); - // Fdct and building the 2nd order block for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) { mb->vp9_short_fdct8x4(beptr->src_diff, beptr->coeff, 32); @@ -778,9 +717,6 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, BLOCKD *const x_y2 = xd->block + 24; int d; - vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), xd->predictor, - mb->block[0].src_stride); - vp9_transform_mby_8x8(mb); vp9_quantize_mby_8x8(mb); @@ -827,9 +763,6 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, BLOCK *be = &mb->block[0]; TX_TYPE tx_type; - vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), mb->e_mbd.predictor, - mb->block[0].src_stride); - tx_type = get_tx_type_16x16(xd, b); if (tx_type != DCT_DCT) { vp9_fht(be->src_diff, 32, be->coeff, tx_type, 16); @@ -866,7 +799,9 @@ static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int d16x16, r16x16, r16x16s, s16x16; int64_t rd16x16, rd16x16s; - // FIXME don't do sub x3 + vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor, + x->block[0].src_stride); + if (skip_prob == 0) skip_prob = 1; s0 = vp9_cost_bit(skip_prob, 0); @@ -1243,7 +1178,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat cost += vp9_cost_bit(128, allow_comp); #endif *Rate = cost; - *rate_y += tot_rate_y; + *rate_y = tot_rate_y; *Distortion = distortion; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); @@ -1579,7 +1514,7 @@ static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, #endif } *Rate = cost; - *rate_y += tot_rate_y; + *rate_y = tot_rate_y; *Distortion = distortion; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } @@ -2767,7 +2702,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, if (mbmi->second_ref_frame) x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int; - return bsi.segment_rd; + return (int)(bsi.segment_rd); } /* Order arr in increasing order, original position stored in idx */ @@ -3251,7 +3186,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - vp9_find_near_mvs(xd, xd->mode_info_context, xd->prev_mode_info_context, &frame_nearest_mv[frame_type], &frame_near_mv[frame_type], @@ -3262,7 +3196,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, u_buffer[frame_type] = yv12->u_buffer + recon_uvoffset; v_buffer[frame_type] = yv12->v_buffer + recon_uvoffset; -#if CONFIG_NEWBESTREFMV vp9_find_mv_refs(xd, xd->mode_info_context, xd->prev_mode_info_context, frame_type, @@ -3275,7 +3208,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, &frame_best_ref_mv[frame_type], &frame_nearest_mv[frame_type], &frame_near_mv[frame_type]); -#endif } static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -3453,7 +3385,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #endif } - if (sse < threshold) { + if ((int)sse < threshold) { unsigned int q2dc = xd->block[24].dequant[0]; /* If there is no codeable 2nd order dc or a very small uniform pixel change change */ @@ -3527,10 +3459,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return this_rd; // if 0, this will be re-calculated by caller } -void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion, - int64_t *returnintra) { +static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, int *returndistortion, + int64_t *returnintra) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; union b_mode_info best_bmodes[16]; @@ -3936,7 +3868,7 @@ void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, second_ref, best_yrd, mdcounts, &rate, &rate_y, &distortion, &skippable, - this_rd_thresh, seg_mvs, + (int)this_rd_thresh, seg_mvs, txfm_cache); rate2 += rate; distortion2 += distortion; @@ -4153,7 +4085,9 @@ void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; if (this_mode != B_PRED) { - adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode]; + const int64_t txfm_mode_diff = + txfm_cache[i] - txfm_cache[cm->txfm_mode]; + adj_rd = this_rd + txfm_mode_diff; } else { adj_rd = this_rd; } @@ -4268,11 +4202,12 @@ void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } end: - store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition, - &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], - &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame], - best_pred_diff[0], best_pred_diff[1], best_pred_diff[2], - best_txfm_diff); + store_coding_context(x, &x->mb_context[xd->mb_index], + best_mode_index, &best_partition, + &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], + &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame], + (int)best_pred_diff[0], (int)best_pred_diff[1], (int)best_pred_diff[2], + best_txfm_diff); } #if CONFIG_SUPERBLOCKS @@ -4834,8 +4769,8 @@ void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, { int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; - vp9_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, - &distortion, &intra_error); + rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, + &distortion, &intra_error); /* restore cpi->zbin_mode_boost_enabled */ cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; diff --git a/vp9/encoder/rdopt.h b/vp9/encoder/rdopt.h index ce7c8ca6cfc2474d31e93481e287e2e083b3dbb0..109dbfe4498c23624bbb9ae15243c30a8d3cca25 100644 --- a/vp9/encoder/rdopt.h +++ b/vp9/encoder/rdopt.h @@ -17,10 +17,7 @@ extern void vp9_initialize_rd_consts(VP9_COMP *cpi, int Qvalue); -extern void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion, - int64_t *returnintra); +extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex); extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int *r, int *d); @@ -28,6 +25,14 @@ extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, extern void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *r, int *d); +extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, + int recon_uvoffset, int *r, int *d); + +extern int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, int *returndist); + extern void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here, int_mv *mvp, int refframe, int *ref_frame_sign_bias, diff --git a/vp9/encoder/temporal_filter.h b/vp9/encoder/temporal_filter.h index b396abfe8566aff9c09cad75e7d4e9ad562d9c02..0d9161db157e7f22ceee92196a4fd8171defd96b 100644 --- a/vp9/encoder/temporal_filter.h +++ b/vp9/encoder/temporal_filter.h @@ -44,4 +44,8 @@ typedef struct { #define TEMPORAL_INVOKE(ctx,fn) vp9_temporal_filter_##fn #endif +struct VP9_COMP; + +extern void vp9_temporal_filter_prepare_c(struct VP9_COMP *cpi, int distance); + #endif // __INC_TEMPORAL_FILTER_H diff --git a/vp9/encoder/tokenize.c b/vp9/encoder/tokenize.c index 3c5c1c7a2276a2ffc1105909211dc07f6c954cab..bf835b7f2e4d3203696eea1f47a8e116f4928acb 100644 --- a/vp9/encoder/tokenize.c +++ b/vp9/encoder/tokenize.c @@ -48,9 +48,6 @@ extern unsigned int hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; #endif /* ENTROPY_STATS */ -void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); -void vp9_fix_contexts(MACROBLOCKD *xd); - static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; const TOKENVALUE *vp9_dct_value_tokens_ptr; static int dct_value_cost[DCT_MAX_VALUE * 2]; diff --git a/vp9/encoder/tokenize.h b/vp9/encoder/tokenize.h index bef52c505838f0eb10a4d16810ed2d700ad367e7..648e8cd4b528d8fb2534913f6bc6b03f7080e979 100644 --- a/vp9/encoder/tokenize.h +++ b/vp9/encoder/tokenize.h @@ -29,14 +29,22 @@ typedef struct { unsigned char skip_eob_node; } TOKENEXTRA; -int rd_cost_mby(MACROBLOCKD *); - extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block); extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block); extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); +struct VP9_COMP; + +extern void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); + +extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); + +extern void vp9_fix_contexts(MACROBLOCKD *xd); + #ifdef ENTROPY_STATS void init_context_counters(); void print_context_counters(); diff --git a/vp9/encoder/variance_c.c b/vp9/encoder/variance_c.c index 1577cac8c1d03c548350d1df106ed18aee42765d..62de70301469b15f8022ec7473159b64024f5a66 100644 --- a/vp9/encoder/variance_c.c +++ b/vp9/encoder/variance_c.c @@ -11,6 +11,7 @@ #include "variance.h" #include "vp9/common/filter.h" +#include "vp9/common/subpelvar.h" unsigned int vp9_get_mb_ss_c(const short *src_ptr) { @@ -24,31 +25,6 @@ unsigned int vp9_get_mb_ss_c(const short *src_ptr) { } -static void variance(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum) { - int i, j; - int diff; - - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - diff = src_ptr[j] - ref_ptr[j]; - *sum += diff; - *sse += diff * diff; - } - - src_ptr += source_stride; - ref_ptr += recon_stride; - } -} #if CONFIG_SUPERBLOCKS unsigned int vp9_variance32x32_c(const unsigned char *src_ptr, @@ -146,113 +122,6 @@ unsigned int vp9_mse16x16_c(const unsigned char *src_ptr, } -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement first-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ -static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp9_filter) { - unsigned int i, j; - - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - // Apply bilinear filter - output_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) + - ((int)src_ptr[pixel_step] * vp9_filter[1]) + - (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT; - src_ptr++; - } - - // Next row... - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement second-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ -static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp9_filter) { - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - // Apply filter - Temp = ((int)src_ptr[0] * vp9_filter[0]) + - ((int)src_ptr[pixel_step] * vp9_filter[1]) + - (VP9_FILTER_WEIGHT / 2); - output_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT); - src_ptr++; - } - - // Next row... - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - - unsigned int vp9_sub_pixel_variance4x4_c(const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, @@ -469,72 +338,3 @@ unsigned int vp9_sub_pixel_variance8x16_c(const unsigned char *src_ptr, return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } -#if CONFIG_NEWBESTREFMV -unsigned int vp9_variance2x16_c(const unsigned char *src_ptr, - const int source_stride, - const unsigned char *ref_ptr, - const int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 5)); -} - -unsigned int vp9_variance16x2_c(const unsigned char *src_ptr, - const int source_stride, - const unsigned char *ref_ptr, - const int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 5)); -} - -unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr, - const int src_pixels_per_line, - const int xoffset, - const int yoffset, - const unsigned char *dst_ptr, - const int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[16 * 3]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, - src_pixels_per_line, 1, 3, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter); - - return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); -} - -unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, - const int src_pixels_per_line, - const int xoffset, - const int yoffset, - const unsigned char *dst_ptr, - const int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[2 * 17]; // Temp data bufffer used in filtering - unsigned char temp2[2 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, - src_pixels_per_line, 1, 17, 2, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter); - - return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); -} -#endif diff --git a/vp9/encoder/x86/x86_csystemdependent.c b/vp9/encoder/x86/x86_csystemdependent.c index 4cff713db73b570bd9c75b010b38c2bcc9e0a256..0176672fcd3e3b0894f3669ee4e7d3de85299718 100644 --- a/vp9/encoder/x86/x86_csystemdependent.c +++ b/vp9/encoder/x86/x86_csystemdependent.c @@ -95,20 +95,6 @@ void vp9_arch_x86_encoder_init(VP9_COMP *cpi) { } #endif -#if HAVE_SSE3 - if (flags & HAS_SSE3) { - cpi->rtcd.search.full_search = vp9_full_search_sadx3; - cpi->rtcd.search.diamond_search = vp9_diamond_search_sadx4; - cpi->rtcd.search.refining_search = vp9_refining_search_sadx4; - } -#endif - - -#if HAVE_SSE4_1 - if (flags & HAS_SSE4_1) { - cpi->rtcd.search.full_search = vp9_full_search_sadx8; - } -#endif #endif } diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 82e1e1053389d22b47a7bc3fe9fb50d5427cdfb4..1afecbfd018b9a1a4e7cd05073175a0993f46b5c 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -53,6 +53,7 @@ VP9_COMMON_SRCS-yes += common/reconintra4x4.h VP9_COMMON_SRCS-yes += common/rtcd.c VP9_COMMON_SRCS-yes += common/rtcd_defs.sh VP9_COMMON_SRCS-yes += common/sadmxn.h +VP9_COMMON_SRCS-yes += common/subpelvar.h VP9_COMMON_SRCS-yes += common/seg_common.h VP9_COMMON_SRCS-yes += common/seg_common.c VP9_COMMON_SRCS-yes += common/setupintrarecon.h diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index bbd9317fee3cbb61ad541b71cf80483c15958a02..49125413ac0f1777b8bca611e9d8ad76d0b76913 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -179,7 +179,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if (cfg->g_pass == VPX_RC_LAST_PASS) { size_t packet_sz = sizeof(FIRSTPASS_STATS); - int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz; + int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz); FIRSTPASS_STATS *stats; if (!cfg->rc_twopass_stats_in.buf) @@ -698,9 +698,9 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, pkt.data.frame.pts = (dst_time_stamp * ctx->cfg.g_timebase.den + round) / ctx->cfg.g_timebase.num / 10000000; - pkt.data.frame.duration = - (delta * ctx->cfg.g_timebase.den + round) - / ctx->cfg.g_timebase.num / 10000000; + pkt.data.frame.duration = (unsigned long) + ((delta * ctx->cfg.g_timebase.den + round) + / ctx->cfg.g_timebase.num / 10000000); pkt.data.frame.flags = lib_flags << 16; if (lib_flags & FRAMEFLAGS_KEY) diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index dc89478bdc1e5f2594e2180f5b487880ac532252..378b91725525a9041fcc2f930979209f10b6bc5a 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -5,7 +5,6 @@ SCALE_SRCS-yes += vpxscale.h SCALE_SRCS-yes += generic/vpxscale.c SCALE_SRCS-yes += generic/yv12config.c SCALE_SRCS-yes += generic/yv12extend.c -SCALE_SRCS-yes += generic/yv12extend_generic.h SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c #neon