diff --git a/build/make/Makefile b/build/make/Makefile index a369be8cd09a4609efb11bc59bd0501ede92da33..7a2523960d3f653a0727aad5f4d5841758104b69 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -103,6 +103,18 @@ test:: .PHONY: testdata testdata:: +# Add compiler flags for intrinsic files +$(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx +$(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx +$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 +$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 +$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 +$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 +$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 +$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 +$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 +$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 + $(BUILD_PFX)%.c.d: %.c $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) diff --git a/build/make/configure.sh b/build/make/configure.sh index cb7d7fb118be0c954d85d19b5c33030510201aba..ee4493d2b5f3b870ba0c7ee66c6fce350a95de38 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -266,12 +266,13 @@ else fi TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h" TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c" +TMP_CC="${TMPDIRx}/vpx-conf-$$-${RANDOM}.cc" TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o" TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x" TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm" clean_temp_files() { - rm -f ${TMP_C} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM} + rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM} } # @@ -292,9 +293,9 @@ check_cc() { check_cxx() { log check_cxx "$@" - cat >${TMP_C} - log_file ${TMP_C} - check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} + cat >${TMP_CC} + log_file ${TMP_CC} + check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_CC} } check_cpp() { @@ -1071,7 +1072,7 @@ EOF tune_cflags="-march=" setup_gnu_toolchain #for 32 bit x86 builds, -O3 did not turn on this flag - enabled optimizations && check_add_cflags -fomit-frame-pointer + enabled optimizations && disabled gprof && check_add_cflags -fomit-frame-pointer ;; vs*) # When building with Microsoft Visual Studio the assembler is diff --git a/configure b/configure index 3854ae9ead8b79da93a66dd0b075e9535d9935f1..28676fbc6c3a6e99134969b7ddd6e2052a6f08e5 100755 --- a/configure +++ b/configure @@ -243,19 +243,11 @@ HAVE_LIST=" unistd_h " EXPERIMENT_LIST=" - csm - new_mvref - implicit_segmentation - newbintramodes - comp_interintra_pred - enable_6tap - abovesprefmv - code_nonzerocount - useselectrefmv - modelcoefprob - loop_dering - implicit_compoundinter_weight - scatterscan + oneshotq + multiple_arf + non420 + alpha + balanced_coeftree " CONFIG_LIST=" external_build @@ -608,7 +600,10 @@ process_toolchain() { check_add_cflags -Wimplicit-function-declaration check_add_cflags -Wuninitialized check_add_cflags -Wunused-variable - check_add_cflags -Wunused-but-set-variable + case ${CC} in + *clang*) ;; + *) check_add_cflags -Wunused-but-set-variable ;; + esac enabled extra_warnings || check_add_cflags -Wno-unused-function fi diff --git a/test/acm_random.h b/test/acm_random.h index 13903c66ae67da778ff2dc9c88a85f7c200ba0b8..cd33d1268126380fdb3eef0c8e7a8d571b1b8a25 100644 --- a/test/acm_random.h +++ b/test/acm_random.h @@ -34,6 +34,13 @@ class ACMRandom { return (value >> 24) & 0xff; } + uint8_t Rand8Extremes(void) { + // Returns a random value near 0 or near 255, to better exercise + // saturation behavior. + const uint8_t r = Rand8(); + return r < 128 ? r << 4 : r >> 4; + } + int PseudoUniform(int range) { return random_.Generate(range); } diff --git a/test/borders_test.cc b/test/borders_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..49505ee66c947d9ebb09d69de9365b08bbe27f58 --- /dev/null +++ b/test/borders_test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <climits> +#include <vector> +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class BordersTest : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { + protected: + BordersTest() : EncoderTest(GET_PARAM(0)) {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if ( video->frame() == 1) { + encoder->Control(VP8E_SET_CPUUSED, 0); + encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); + encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); + encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); + encoder->Control(VP8E_SET_ARNR_TYPE, 3); + } + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + } + } +}; + +TEST_P(BordersTest, TestEncodeHighBitrate) { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_target_bitrate = 2000; + cfg_.rc_max_quantizer = 10; + + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +TEST_P(BordersTest, TestLowBitrate) { + // Validate that this clip encodes and decodes without a mismatch + // when passing in a very high min q. This pushes the encoder to producing + // lots of small partitions which might will test the other condition. + + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_target_bitrate = 200; + cfg_.rc_min_quantizer = 40; + + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values( + ::libvpx_test::kTwoPassGood)); +} // namespace diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 35065a41f755a30589e91bb1094db2b2e85dbb5d..f5e0cf736db1a48cd4c9daa55bcb65c5a483d405 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -8,6 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/include/gtest/gtest.h" extern "C" { #include "./vpx_config.h" @@ -16,10 +20,6 @@ extern "C" { #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" } -#include "third_party/googletest/src/include/gtest/gtest.h" -#include "test/acm_random.h" -#include "test/register_state_check.h" -#include "test/util.h" namespace { typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride, @@ -46,27 +46,27 @@ struct ConvolveFunctions { // Reference 8-tap subpixel filter, slightly modified to fit into this test. #define VP9_FILTER_WEIGHT 128 #define VP9_FILTER_SHIFT 7 -static uint8_t clip_pixel(int x) { +uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; } -static void filter_block2d_8_c(const uint8_t *src_ptr, - const unsigned int src_stride, - const int16_t *HFilter, - const int16_t *VFilter, - uint8_t *dst_ptr, - unsigned int dst_stride, - unsigned int output_width, - unsigned int output_height) { +void filter_block2d_8_c(const uint8_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, + const int16_t *VFilter, + uint8_t *dst_ptr, + unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height) { // Between passes, we use an intermediate buffer whose height is extended to // have enough horizontally filtered values as input for the vertical pass. // This buffer is allocated to be big enough for the largest block type we // support. const int kInterp_Extend = 4; const unsigned int intermediate_height = - (kInterp_Extend - 1) + output_height + kInterp_Extend; + (kInterp_Extend - 1) + output_height + kInterp_Extend; /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height @@ -75,7 +75,7 @@ static void filter_block2d_8_c(const uint8_t *src_ptr, * = 23 * and filter_max_width = 16 */ - uint8_t intermediate_buffer[23 * 16]; + uint8_t intermediate_buffer[71 * 64]; const int intermediate_next_stride = 1 - intermediate_height * output_width; // Horizontal pass (src -> transposed intermediate). @@ -87,15 +87,15 @@ static void filter_block2d_8_c(const uint8_t *src_ptr, for (i = 0; i < intermediate_height; ++i) { for (j = 0; j < output_width; ++j) { // Apply filter... - int temp = ((int)src_ptr[0] * HFilter[0]) + - ((int)src_ptr[1] * HFilter[1]) + - ((int)src_ptr[2] * HFilter[2]) + - ((int)src_ptr[3] * HFilter[3]) + - ((int)src_ptr[4] * HFilter[4]) + - ((int)src_ptr[5] * HFilter[5]) + - ((int)src_ptr[6] * HFilter[6]) + - ((int)src_ptr[7] * HFilter[7]) + - (VP9_FILTER_WEIGHT >> 1); // Rounding + const int temp = (src_ptr[0] * HFilter[0]) + + (src_ptr[1] * HFilter[1]) + + (src_ptr[2] * HFilter[2]) + + (src_ptr[3] * HFilter[3]) + + (src_ptr[4] * HFilter[4]) + + (src_ptr[5] * HFilter[5]) + + (src_ptr[6] * HFilter[6]) + + (src_ptr[7] * HFilter[7]) + + (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); @@ -115,15 +115,15 @@ static void filter_block2d_8_c(const uint8_t *src_ptr, for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { // Apply filter... - int temp = ((int)src_ptr[0] * VFilter[0]) + - ((int)src_ptr[1] * VFilter[1]) + - ((int)src_ptr[2] * VFilter[2]) + - ((int)src_ptr[3] * VFilter[3]) + - ((int)src_ptr[4] * VFilter[4]) + - ((int)src_ptr[5] * VFilter[5]) + - ((int)src_ptr[6] * VFilter[6]) + - ((int)src_ptr[7] * VFilter[7]) + - (VP9_FILTER_WEIGHT >> 1); // Rounding + const int temp = (src_ptr[0] * VFilter[0]) + + (src_ptr[1] * VFilter[1]) + + (src_ptr[2] * VFilter[2]) + + (src_ptr[3] * VFilter[3]) + + (src_ptr[4] * VFilter[4]) + + (src_ptr[5] * VFilter[5]) + + (src_ptr[6] * VFilter[6]) + + (src_ptr[7] * VFilter[7]) + + (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); @@ -135,12 +135,12 @@ static void filter_block2d_8_c(const uint8_t *src_ptr, } } -static void block2d_average_c(uint8_t *src, - unsigned int src_stride, - uint8_t *output_ptr, - unsigned int output_stride, - unsigned int output_width, - unsigned int output_height) { +void block2d_average_c(uint8_t *src, + unsigned int src_stride, + uint8_t *output_ptr, + unsigned int output_stride, + unsigned int output_width, + unsigned int output_height) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { @@ -150,21 +150,21 @@ static void block2d_average_c(uint8_t *src, } } -static void filter_average_block2d_8_c(const uint8_t *src_ptr, - const unsigned int src_stride, - const int16_t *HFilter, - const int16_t *VFilter, - uint8_t *dst_ptr, - unsigned int dst_stride, - unsigned int output_width, - unsigned int output_height) { - uint8_t tmp[16*16]; - - assert(output_width <= 16); - assert(output_height <= 16); - filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 16, +void filter_average_block2d_8_c(const uint8_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, + const int16_t *VFilter, + uint8_t *dst_ptr, + unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height) { + uint8_t tmp[64 * 64]; + + assert(output_width <= 64); + assert(output_height <= 64); + filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, output_width, output_height); - block2d_average_c(tmp, 16, dst_ptr, dst_stride, + block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height); } @@ -173,10 +173,9 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) { static void SetUpTestCase() { // Force input_ to be unaligned, output to be 16 byte aligned. input_ = reinterpret_cast<uint8_t*>( - vpx_memalign(kDataAlignment, kOuterBlockSize * kOuterBlockSize + 1)) - + 1; + vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; output_ = reinterpret_cast<uint8_t*>( - vpx_memalign(kDataAlignment, kOuterBlockSize * kOuterBlockSize)); + vpx_memalign(kDataAlignment, kOutputBufferSize)); } static void TearDownTestCase() { @@ -186,62 +185,63 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) { output_ = NULL; } - protected: - static const int kDataAlignment = 16; - static const int kOuterBlockSize = 32; - static const int kInputStride = kOuterBlockSize; - static const int kOutputStride = kOuterBlockSize; - static const int kMaxDimension = 16; - - int Width() const { return GET_PARAM(0); } - int Height() const { return GET_PARAM(1); } - int BorderLeft() const { - const int center = (kOuterBlockSize - Width()) / 2; - return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); - } - int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } - - bool IsIndexInBorder(int i) { - return (i < BorderTop() * kOuterBlockSize || - i >= (BorderTop() + Height()) * kOuterBlockSize || - i % kOuterBlockSize < BorderLeft() || - i % kOuterBlockSize >= (BorderLeft() + Width())); - } + protected: + static const int kDataAlignment = 16; + static const int kOuterBlockSize = 128; + static const int kInputStride = kOuterBlockSize; + static const int kOutputStride = kOuterBlockSize; + static const int kMaxDimension = 64; + static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; + static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; + + int Width() const { return GET_PARAM(0); } + int Height() const { return GET_PARAM(1); } + int BorderLeft() const { + const int center = (kOuterBlockSize - Width()) / 2; + return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); + } + int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } - virtual void SetUp() { - UUT_ = GET_PARAM(2); - memset(input_, 0, sizeof(input_)); - /* Set up guard blocks for an inner block cetered in the outer block */ - for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) { - if (IsIndexInBorder(i)) - output_[i] = 255; - else - output_[i] = 0; - } + bool IsIndexInBorder(int i) { + return (i < BorderTop() * kOuterBlockSize || + i >= (BorderTop() + Height()) * kOuterBlockSize || + i % kOuterBlockSize < BorderLeft() || + i % kOuterBlockSize >= (BorderLeft() + Width())); + } - ::libvpx_test::ACMRandom prng; - for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) - input_[i] = prng.Rand8(); + virtual void SetUp() { + UUT_ = GET_PARAM(2); + /* Set up guard blocks for an inner block cetered in the outer block */ + for (int i = 0; i < kOutputBufferSize; ++i) { + if (IsIndexInBorder(i)) + output_[i] = 255; + else + output_[i] = 0; } - void CheckGuardBlocks() { - for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) { - if (IsIndexInBorder(i)) - EXPECT_EQ(255, output_[i]); - } - } + ::libvpx_test::ACMRandom prng; + for (int i = 0; i < kInputBufferSize; ++i) + input_[i] = prng.Rand8Extremes(); + } - uint8_t* input() { - return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); + void CheckGuardBlocks() { + for (int i = 0; i < kOutputBufferSize; ++i) { + if (IsIndexInBorder(i)) + EXPECT_EQ(255, output_[i]); } + } - uint8_t* output() { - return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); - } + uint8_t* input() const { + return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); + } - const ConvolveFunctions* UUT_; - static uint8_t* input_; - static uint8_t* output_; + uint8_t* output() const { + return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); + } + + const ConvolveFunctions* UUT_; + static uint8_t* input_; + static uint8_t* output_; }; uint8_t* ConvolveTest::input_ = NULL; uint8_t* ConvolveTest::output_ = NULL; @@ -303,11 +303,33 @@ TEST_P(ConvolveTest, Copy2D) { const int16_t (*kTestFilterList[])[8] = { vp9_bilinear_filters, - vp9_sub_pel_filters_6, vp9_sub_pel_filters_8, vp9_sub_pel_filters_8s, vp9_sub_pel_filters_8lp }; +const int kNumFilterBanks = sizeof(kTestFilterList) / + sizeof(kTestFilterList[0]); +const int kNumFilters = 16; + +TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const int16_t (*filters)[8] = kTestFilterList[filter_bank]; + for (int i = 0; i < kNumFilters; i++) { + const int p0 = filters[i][0] + filters[i][1]; + const int p1 = filters[i][2] + filters[i][3]; + const int p2 = filters[i][4] + filters[i][5]; + const int p3 = filters[i][6] + filters[i][7]; + EXPECT_LE(p0, 128); + EXPECT_LE(p1, 128); + EXPECT_LE(p2, 128); + EXPECT_LE(p3, 128); + EXPECT_LE(p0 + p3, 128); + EXPECT_LE(p0 + p3 + p1, 128); + EXPECT_LE(p0 + p3 + p1 + p2, 128); + EXPECT_EQ(p0 + p1 + p2 + p3, 128); + } + } +} const int16_t kInvalidFilter[8] = { 0 }; @@ -316,12 +338,9 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { uint8_t* const out = output(); uint8_t ref[kOutputStride * kMaxDimension]; - const int kNumFilterBanks = sizeof(kTestFilterList) / - sizeof(kTestFilterList[0]); for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { const int16_t (*filters)[8] = kTestFilterList[filter_bank]; - const int kNumFilters = 16; for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { @@ -368,7 +387,7 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { ::libvpx_test::ACMRandom prng; for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) { - const uint8_t r = prng.Rand8(); + const uint8_t r = prng.Rand8Extremes(); out[y * kOutputStride + x] = r; ref[y * kOutputStride + x] = r; @@ -440,16 +459,17 @@ DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = { TEST_P(ConvolveTest, ChangeFilterWorks) { uint8_t* const in = input(); uint8_t* const out = output(); + const int kPixelSelected = 4; REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride, kChangeFilters[8], 17, kChangeFilters[4], 16, Width(), Height())); for (int x = 0; x < Width(); ++x) { - if (x < 8) - ASSERT_EQ(in[4], out[x]) << "x == " << x; - else - ASSERT_EQ(in[12], out[x]) << "x == " << x; + const int kQ4StepAdjust = x >> 4; + const int kFilterPeriodAdjust = (x >> 3) << 3; + const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected; + ASSERT_EQ(in[ref_x], out[x]) << "x == " << x; } REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride, @@ -457,10 +477,10 @@ TEST_P(ConvolveTest, ChangeFilterWorks) { Width(), Height())); for (int y = 0; y < Height(); ++y) { - if (y < 8) - ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y; - else - ASSERT_EQ(in[12 * kInputStride], out[y * kOutputStride]) << "y == " << y; + const int kQ4StepAdjust = y >> 4; + const int kFilterPeriodAdjust = (y >> 3) << 3; + const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected; + ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y; } REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride, @@ -468,9 +488,13 @@ TEST_P(ConvolveTest, ChangeFilterWorks) { Width(), Height())); for (int y = 0; y < Height(); ++y) { + const int kQ4StepAdjustY = y >> 4; + const int kFilterPeriodAdjustY = (y >> 3) << 3; + const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected; for (int x = 0; x < Width(); ++x) { - const int ref_x = x < 8 ? 4 : 12; - const int ref_y = y < 8 ? 4 : 12; + const int kQ4StepAdjustX = x >> 4; + const int kFilterPeriodAdjustX = (x >> 3) << 3; + const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected; ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x]) << "x == " << x << ", y == " << y; @@ -489,10 +513,17 @@ const ConvolveFunctions convolve8_c( INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( make_tuple(4, 4, &convolve8_c), make_tuple(8, 4, &convolve8_c), + make_tuple(4, 8, &convolve8_c), make_tuple(8, 8, &convolve8_c), make_tuple(16, 8, &convolve8_c), - make_tuple(16, 16, &convolve8_c))); -} + make_tuple(8, 16, &convolve8_c), + make_tuple(16, 16, &convolve8_c), + make_tuple(32, 16, &convolve8_c), + make_tuple(16, 32, &convolve8_c), + make_tuple(32, 32, &convolve8_c), + make_tuple(64, 32, &convolve8_c), + make_tuple(32, 64, &convolve8_c), + make_tuple(64, 64, &convolve8_c))); #if HAVE_SSSE3 const ConvolveFunctions convolve8_ssse3( @@ -503,7 +534,16 @@ const ConvolveFunctions convolve8_ssse3( INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(4, 4, &convolve8_ssse3), make_tuple(8, 4, &convolve8_ssse3), + make_tuple(4, 8, &convolve8_ssse3), make_tuple(8, 8, &convolve8_ssse3), make_tuple(16, 8, &convolve8_ssse3), - make_tuple(16, 16, &convolve8_ssse3))); + make_tuple(8, 16, &convolve8_ssse3), + make_tuple(16, 16, &convolve8_ssse3), + make_tuple(32, 16, &convolve8_ssse3), + make_tuple(16, 32, &convolve8_ssse3), + make_tuple(32, 32, &convolve8_ssse3), + make_tuple(64, 32, &convolve8_ssse3), + make_tuple(32, 64, &convolve8_ssse3), + make_tuple(64, 64, &convolve8_ssse3))); #endif +} // namespace diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index f6d2d5994bd8f77ddebfd6f01fbe5e66c4fa344b..9fb45d6b7e666f9ac981f8a0f3b8fde372088aef 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -17,6 +17,7 @@ extern "C" { #include "vp9/common/vp9_entropy.h" #include "vp9_rtcd.h" +void vp9_short_idct16x16_add_c(short *input, uint8_t *output, int pitch); } #include "acm_random.h" @@ -269,19 +270,23 @@ TEST(VP9Idct16x16Test, AccuracyCheck) { const int count_test_block = 1000; for (int i = 0; i < count_test_block; ++i) { int16_t in[256], coeff[256]; - int16_t out_c[256]; + uint8_t dst[256], src[256]; double out_r[256]; + for (int j = 0; j < 256; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 256; ++j) - in[j] = rnd.Rand8() - rnd.Rand8(); + in[j] = src[j] - dst[j]; reference_16x16_dct_2d(in, out_r); for (int j = 0; j < 256; j++) coeff[j] = round(out_r[j]); - vp9_short_idct16x16_c(coeff, out_c, 32); + vp9_short_idct16x16_add_c(coeff, dst, 16); for (int j = 0; j < 256; ++j) { - const int diff = out_c[j] - in[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; EXPECT_GE(1, error) << "Error: 16x16 IDCT has error " << error @@ -289,7 +294,7 @@ TEST(VP9Idct16x16Test, AccuracyCheck) { } } } -#if 1 + // we need enable fdct test once we re-do the 16 point fdct. TEST(VP9Fdct16x16Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); @@ -299,18 +304,22 @@ TEST(VP9Fdct16x16Test, AccuracyCheck) { for (int i = 0; i < count_test_block; ++i) { int16_t test_input_block[256]; int16_t test_temp_block[256]; - int16_t test_output_block[256]; + uint8_t dst[256], src[256]; + for (int j = 0; j < 256; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 256; ++j) - test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; const int pitch = 32; vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch); - vp9_short_idct16x16_c(test_temp_block, test_output_block, pitch); + vp9_short_idct16x16_add_c(test_temp_block, dst, 16); for (int j = 0; j < 256; ++j) { - const int diff = test_input_block[j] - test_output_block[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; if (max_error < error) max_error = error; @@ -354,6 +363,4 @@ TEST(VP9Fdct16x16Test, CoeffSizeCheck) { } } } -#endif - } // namespace diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index a565270993d822146181561d64a7bce5f56831dc..e05d482b6503c6f3585fd9f2a4ef1cabeb7045b0 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -18,7 +18,7 @@ extern "C" { #include "vp9/common/vp9_entropy.h" #include "./vp9_rtcd.h" void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch); - void vp9_short_idct32x32_c(short *input, short *output, int pitch); + void vp9_short_idct32x32_add_c(short *input, uint8_t *output, int pitch); } #include "test/acm_random.h" @@ -91,28 +91,31 @@ static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) { } } - TEST(VP9Idct32x32Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; for (int i = 0; i < count_test_block; ++i) { int16_t in[1024], coeff[1024]; - int16_t out_c[1024]; + uint8_t dst[1024], src[1024]; double out_r[1024]; + for (int j = 0; j < 1024; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 1024; ++j) - in[j] = rnd.Rand8() - rnd.Rand8(); + in[j] = src[j] - dst[j]; reference_32x32_dct_2d(in, out_r); for (int j = 0; j < 1024; j++) coeff[j] = round(out_r[j]); - vp9_short_idct32x32_c(coeff, out_c, 64); + vp9_short_idct32x32_add_c(coeff, dst, 32); for (int j = 0; j < 1024; ++j) { - const int diff = out_c[j] - in[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; EXPECT_GE(1, error) - << "Error: 3x32 IDCT has error " << error + << "Error: 32x32 IDCT has error " << error << " at index " << j; } } @@ -126,18 +129,22 @@ TEST(VP9Fdct32x32Test, AccuracyCheck) { for (int i = 0; i < count_test_block; ++i) { int16_t test_input_block[1024]; int16_t test_temp_block[1024]; - int16_t test_output_block[1024]; + uint8_t dst[1024], src[1024]; + for (int j = 0; j < 1024; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 1024; ++j) - test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; const int pitch = 64; vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch); - vp9_short_idct32x32_c(test_temp_block, test_output_block, pitch); + vp9_short_idct32x32_add_c(test_temp_block, dst, 32); for (int j = 0; j < 1024; ++j) { - const unsigned diff = test_input_block[j] - test_output_block[j]; + const unsigned diff = dst[j] - src[j]; const unsigned error = diff * diff; if (max_error < error) max_error = error; diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h index 5a37816ab941b5b8288d11da1e99f57fffda686d..6aeb96b1fdfb62cdf1f4c800cc9555d7250f0611 100644 --- a/test/encode_test_driver.h +++ b/test/encode_test_driver.h @@ -10,9 +10,10 @@ #ifndef TEST_ENCODE_TEST_DRIVER_H_ #define TEST_ENCODE_TEST_DRIVER_H_ -#include "./vpx_config.h" #include <string> #include <vector> + +#include "./vpx_config.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "vpx/vpx_encoder.h" @@ -46,7 +47,7 @@ enum TestMode { class CxDataIterator { public: explicit CxDataIterator(vpx_codec_ctx_t *encoder) - : encoder_(encoder), iter_(NULL) {} + : encoder_(encoder), iter_(NULL) {} const vpx_codec_cx_pkt_t *Next() { return vpx_codec_get_cx_data(encoder_, &iter_); @@ -92,7 +93,7 @@ class Encoder { memset(&encoder_, 0, sizeof(encoder_)); } - ~Encoder() { + virtual ~Encoder() { vpx_codec_destroy(&encoder_); } diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc index 1eee0f55abd63ba31fd630a05aadf07c9412b065..ddfbd0fd86b48ee42161e3d661ac8671b3f5b8ca 100644 --- a/test/error_resilience_test.cc +++ b/test/error_resilience_test.cc @@ -206,11 +206,17 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) { // reset previously set error/droppable frames Reset(); +#if 0 + // TODO(jkoleszar): This test is disabled for the time being as too + // sensitive. It's not clear how to set a reasonable threshold for + // this behavior. + // Now set an arbitrary set of error frames that are non-droppable unsigned int num_error_frames = 3; unsigned int error_frame_list[] = {3, 10, 20}; SetErrorFrames(num_error_frames, error_frame_list); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Test that dropping an arbitrary set of inter frames does not hurt too much // Note the Average Mismatch PSNR is the average of the PSNR between // decoded frame and encoder's version of the same frame for all frames @@ -219,6 +225,7 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) { std::cout << " Mismatch PSNR: " << psnr_resilience_mismatch << "\n"; EXPECT_GT(psnr_resilience_mismatch, 20.0); +#endif } VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTest, ONE_PASS_TEST_MODES); diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index dfb64c3a2e84becce6b375ddbefe60f474648449..1c887bb6bc7fb70a588ed5d3152e3571a95ed59f 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -96,11 +96,15 @@ TEST(Vp9Fdct4x4Test, RoundTripErrorCheck) { for (int i = 0; i < count_test_block; ++i) { int16_t test_input_block[16]; int16_t test_temp_block[16]; - int16_t test_output_block[16]; + uint8_t dst[16], src[16]; + for (int j = 0; j < 16; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 16; ++j) - test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; // TODO(Yaowu): this should be converted to a parameterized test // to test optimized versions of this function. @@ -120,10 +124,10 @@ TEST(Vp9Fdct4x4Test, RoundTripErrorCheck) { } // Because the bitstream is not frozen yet, use the idct in the codebase. - vp9_short_idct4x4_c(test_temp_block, test_output_block, pitch); + vp9_short_idct4x4_add_c(test_temp_block, dst, 4); for (int j = 0; j < 16; ++j) { - const int diff = test_input_block[j] - test_output_block[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; if (max_error < error) max_error = error; diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index e1b2a07b8193c30ca7dce04e5897336a87faa0f7..90b4ecd64ca6f1668247171b12b8f7666d3488b0 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -16,6 +16,7 @@ extern "C" { #include "vp9_rtcd.h" +void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch); } #include "acm_random.h" @@ -100,11 +101,15 @@ TEST(VP9Fdct8x8Test, RoundTripErrorCheck) { for (int i = 0; i < count_test_block; ++i) { int16_t test_input_block[64]; int16_t test_temp_block[64]; - int16_t test_output_block[64]; + uint8_t dst[64], src[64]; + for (int j = 0; j < 64; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 64; ++j) - test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; const int pitch = 16; vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch); @@ -119,10 +124,10 @@ TEST(VP9Fdct8x8Test, RoundTripErrorCheck) { test_temp_block[j] *= 4; } } - vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch); + vp9_short_idct8x8_add_c(test_temp_block, dst, 8); for (int j = 0; j < 64; ++j) { - const int diff = test_input_block[j] - test_output_block[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; if (max_error < error) max_error = error; @@ -145,18 +150,22 @@ TEST(VP9Fdct8x8Test, ExtremalCheck) { for (int i = 0; i < count_test_block; ++i) { int16_t test_input_block[64]; int16_t test_temp_block[64]; - int16_t test_output_block[64]; + uint8_t dst[64], src[64]; - // Initialize a test block with input range {-255, 255}. + for (int j = 0; j < 64; ++j) { + src[j] = rnd.Rand8() % 2 ? 255 : 0; + dst[j] = src[j] > 0 ? 0 : 255; + } + // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 64; ++j) - test_input_block[j] = rnd.Rand8() % 2 ? 255 : -256; + test_input_block[j] = src[j] - dst[j]; const int pitch = 16; vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch); - vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch); + vp9_short_idct8x8_add_c(test_temp_block, dst, 8); for (int j = 0; j < 64; ++j) { - const int diff = test_input_block[j] - test_output_block[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; if (max_error < error) max_error = error; diff --git a/test/i420_video_source.h b/test/i420_video_source.h index 219bd3393c864567b3ce58fbbd55cec45edfdc3f..12a6ab1d3ff896af12a2487f5a7e5ca96a11b318 100644 --- a/test/i420_video_source.h +++ b/test/i420_video_source.h @@ -83,7 +83,7 @@ class I420VideoSource : public VideoSource { void SetSize(unsigned int width, unsigned int height) { if (width != width_ || height != height_) { vpx_img_free(img_); - img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 1); + img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 1); ASSERT_TRUE(img_ != NULL); width_ = width; height_ = height; diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc index 30a1ac3998006df794b38cd235e076b2081c8621..67db78b4617a57a75f03a8eb48de8ca7008eca6f 100644 --- a/test/idct8x8_test.cc +++ b/test/idct8x8_test.cc @@ -112,20 +112,23 @@ TEST(VP9Idct8x8Test, AccuracyCheck) { const int count_test_block = 10000; for (int i = 0; i < count_test_block; ++i) { int16_t input[64], coeff[64]; - int16_t output_c[64]; double output_r[64]; + uint8_t dst[64], src[64]; + for (int j = 0; j < 64; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 64; ++j) - input[j] = rnd.Rand8() - rnd.Rand8(); + input[j] = src[j] - dst[j]; - const int pitch = 16; reference_dct_2d(input, output_r); for (int j = 0; j < 64; ++j) coeff[j] = round(output_r[j]); - vp9_short_idct8x8_c(coeff, output_c, pitch); + vp9_short_idct8x8_add_c(coeff, dst, 8); for (int j = 0; j < 64; ++j) { - const int diff = output_c[j] -input[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; EXPECT_GE(1, error) << "Error: 8x8 FDCT/IDCT has error " << error diff --git a/test/superframe_test.cc b/test/superframe_test.cc index 52faddb43e4890bbf6816dfaedbe94065d555803..062ec6c9e77ee630193e6e4931da6d53376a053b 100644 --- a/test/superframe_test.cc +++ b/test/superframe_test.cc @@ -30,7 +30,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, } virtual void TearDown() { - delete modified_buf_; + delete[] modified_buf_; } virtual bool Continue() const { @@ -59,7 +59,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, buffer[pkt->data.frame.sz - index_sz] == marker) { // frame is a superframe. strip off the index. if (modified_buf_) - delete modified_buf_; + delete[] modified_buf_; modified_buf_ = new uint8_t[pkt->data.frame.sz - index_sz]; memcpy(modified_buf_, pkt->data.frame.buf, pkt->data.frame.sz - index_sz); diff --git a/test/test-data.sha1 b/test/test-data.sha1 index c1b6a834c31aac379af4a3bbc02815c278852fd9..98cdda0a28930d6b191b42e4b9b76bb6b9b48b82 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -1,4 +1,5 @@ d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv +b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv 5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf 65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf 906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf @@ -120,4 +121,4 @@ f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5 41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5 086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5 d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5 -8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5 \ No newline at end of file +8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5 diff --git a/test/test.mk b/test/test.mk index 0d069d026fe92295d15312369786f750733eae09..1e0b2172e20aaa5d5121c1a6e7a896e4279bf44e 100644 --- a/test/test.mk +++ b/test/test.mk @@ -22,6 +22,7 @@ LIBVPX_TEST_SRCS-yes += encode_test_driver.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c @@ -92,6 +93,7 @@ endif ## TEST DATA ## LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc index 711d0bd45e82fe00a543736552147aad93f372f6..9633ed756892243a20c5bcf0b75aee9afbb1facf 100644 --- a/test/tile_independence_test.cc +++ b/test/tile_independence_test.cc @@ -56,7 +56,13 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest, void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt, ::libvpx_test::MD5 *md5) { - dec->DecodeFrame((uint8_t *) pkt->data.frame.buf, pkt->data.frame.sz); + const vpx_codec_err_t res = + dec->DecodeFrame(reinterpret_cast<uint8_t*>(pkt->data.frame.buf), + pkt->data.frame.sz); + if (res != VPX_CODEC_OK) { + abort_ = true; + ASSERT_EQ(VPX_CODEC_OK, res); + } const vpx_image_t *img = dec->GetDxData().Next(); md5->Add(img); } diff --git a/test/variance_test.cc b/test/variance_test.cc index 120df316e0d3cdcf2ab976ae98f57b9ed021f3cd..337980cd7384bfdfcccaf3c63912043c4f315cb0 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -188,11 +188,11 @@ INSTANTIATE_TEST_CASE_P( #endif #if HAVE_SSE2 -const vp9_variance_fn_t variance4x4_wmt = vp9_variance4x4_wmt; -const vp9_variance_fn_t variance8x8_wmt = vp9_variance8x8_wmt; -const vp9_variance_fn_t variance8x16_wmt = vp9_variance8x16_wmt; -const vp9_variance_fn_t variance16x8_wmt = vp9_variance16x8_wmt; -const vp9_variance_fn_t variance16x16_wmt = vp9_variance16x16_wmt; +const vp9_variance_fn_t variance4x4_wmt = vp9_variance4x4_sse2; +const vp9_variance_fn_t variance8x8_wmt = vp9_variance8x8_sse2; +const vp9_variance_fn_t variance8x16_wmt = vp9_variance8x16_sse2; +const vp9_variance_fn_t variance16x8_wmt = vp9_variance16x8_sse2; +const vp9_variance_fn_t variance16x16_wmt = vp9_variance16x16_sse2; INSTANTIATE_TEST_CASE_P( SSE2, VP9VarianceTest, ::testing::Values(make_tuple(4, 4, variance4x4_wmt), diff --git a/test/video_source.h b/test/video_source.h index 9772657d6861e8dbb437c4d499c97fafcef7cc33..26d53282ff536b9c3192652ba3dc606776e803fb 100644 --- a/test/video_source.h +++ b/test/video_source.h @@ -103,7 +103,7 @@ class DummyVideoSource : public VideoSource { if (width != width_ || height != height_) { vpx_img_free(img_); raw_sz_ = ((width + 31)&~31) * height * 3 / 2; - img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 32); + img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 32); width_ = width; height_ = height; } diff --git a/test/vp9_boolcoder_test.cc b/test/vp9_boolcoder_test.cc index bde28a5f3086f8e320dd15b176eed11d39b82b28..42b2229bffe25b0091e1350709cd38360e28e181 100644 --- a/test/vp9_boolcoder_test.cc +++ b/test/vp9_boolcoder_test.cc @@ -52,7 +52,7 @@ TEST(VP9, TestBitIO) { const int random_seed = 6432; const int buffer_size = 10000; ACMRandom bit_rnd(random_seed); - BOOL_CODER bw; + vp9_writer bw; uint8_t bw_buffer[buffer_size]; vp9_start_encode(&bw, bw_buffer); @@ -63,13 +63,16 @@ TEST(VP9, TestBitIO) { } else if (bit_method == 3) { bit = bit_rnd(2); } - encode_bool(&bw, bit, static_cast<int>(probas[i])); + vp9_write(&bw, bit, static_cast<int>(probas[i])); } vp9_stop_encode(&bw); - BOOL_DECODER br; - vp9_start_decode(&br, bw_buffer, buffer_size); + // First bit should be zero + GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0); + + vp9_reader br; + vp9_reader_init(&br, bw_buffer, buffer_size); bit_rnd.Reset(random_seed); for (int i = 0; i < bits_to_test; ++i) { if (bit_method == 2) { @@ -77,7 +80,7 @@ TEST(VP9, TestBitIO) { } else if (bit_method == 3) { bit = bit_rnd(2); } - GTEST_ASSERT_EQ(decode_bool(&br, probas[i]), bit) + GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit) << "pos: " << i << " / " << bits_to_test << " bit_method: " << bit_method << " method: " << method; diff --git a/third_party/libyuv/source/scale.c b/third_party/libyuv/source/scale.c index c142a17bb48d7f4683d6abc99575c99bee9efb67..72a817d2591c5150ee56cdd716d433222d97bb2c 100644 --- a/third_party/libyuv/source/scale.c +++ b/third_party/libyuv/source/scale.c @@ -632,7 +632,7 @@ TALIGN16(const uint16, scaleab2[8]) = { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; #endif -#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) +#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) && defined(_MSC_VER) #define HAS_SCALEROWDOWN2_SSE2 // Reads 32 pixels, throws half away and writes 16 pixels. diff --git a/tools/cpplint.py b/tools/cpplint.py index 526b9556dcdc3b49403242d132198d78dbcf87af..159dbbb07e5b2e3f46a2dff2edf1ac8db6439a97 100755 --- a/tools/cpplint.py +++ b/tools/cpplint.py @@ -53,12 +53,8 @@ # - Check for 0 in char context (should be '\0') # - Check for camel-case method name conventions for methods # that are not simple inline getters and setters -# - Check that base classes have virtual destructors -# put " // namespace" after } that closes a namespace, with -# namespace's name after 'namespace' if it is named. # - Do not indent namespace contents # - Avoid inlining non-trivial constructors in header files -# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used # - Check for old-school (void) cast for call-sites of functions # ignored return value # - Check gUnit usage of anonymous namespace @@ -80,6 +76,7 @@ same line, but it is far from perfect (in either direction). """ import codecs +import copy import getopt import math # for log import os @@ -139,6 +136,22 @@ Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...] the top-level categories like 'build' and 'whitespace' will also be printed. If 'detailed' is provided, then a count is provided for each category like 'build/class'. + + root=subdir + The root directory used for deriving header guard CPP variable. + By default, the header guard CPP variable is calculated as the relative + path to the directory that contains .git, .hg, or .svn. When this flag + is specified, the relative path is calculated from the specified + directory. If the specified directory does not exist, this flag is + ignored. + + Examples: + Assuing that src/.git exists, the header guard CPP variables for + src/chrome/browser/ui/browser.h are: + + No flag => CHROME_BROWSER_UI_BROWSER_H_ + --root=chrome => BROWSER_UI_BROWSER_H_ + --root=chrome/browser => UI_BROWSER_H_ """ # We categorize each error message we print. Here are the categories. @@ -161,6 +174,7 @@ _ERROR_CATEGORIES = [ 'build/printf_format', 'build/storage_class', 'legal/copyright', + 'readability/alt_tokens', 'readability/braces', 'readability/casting', 'readability/check', @@ -169,6 +183,7 @@ _ERROR_CATEGORIES = [ 'readability/function', 'readability/multiline_comment', 'readability/multiline_string', + 'readability/namespace', 'readability/nolint', 'readability/streams', 'readability/todo', @@ -189,13 +204,14 @@ _ERROR_CATEGORIES = [ 'runtime/sizeof', 'runtime/string', 'runtime/threadsafe_fn', - 'runtime/virtual', 'whitespace/blank_line', 'whitespace/braces', 'whitespace/comma', 'whitespace/comments', + 'whitespace/empty_loop_body', 'whitespace/end_of_line', 'whitespace/ending_newline', + 'whitespace/forcolon', 'whitespace/indent', 'whitespace/labels', 'whitespace/line_length', @@ -278,6 +294,34 @@ for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement +# Alternative tokens and their replacements. For full list, see section 2.5 +# Alternative tokens [lex.digraph] in the C++ standard. +# +# Digraphs (such as '%:') are not included here since it's a mess to +# match those on a word boundary. +_ALT_TOKEN_REPLACEMENT = { + 'and': '&&', + 'bitor': '|', + 'or': '||', + 'xor': '^', + 'compl': '~', + 'bitand': '&', + 'and_eq': '&=', + 'or_eq': '|=', + 'xor_eq': '^=', + 'not': '!', + 'not_eq': '!=' + } + +# Compile regular expression that matches all the above keywords. The "[ =()]" +# bit is meant to avoid matching these keywords outside of boolean expressions. +# +# False positives include C-style multi-line comments (http://go/nsiut ) +# and multi-line strings (http://go/beujw ), but those have always been +# troublesome for cpplint. +_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( + r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') + # These constants define types of headers for use with # _IncludeState.CheckNextIncludeOrder(). @@ -287,6 +331,17 @@ _LIKELY_MY_HEADER = 3 _POSSIBLE_MY_HEADER = 4 _OTHER_HEADER = 5 +# These constants define the current inline assembly state +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block + +# Match start of assembly blocks +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' + r'(?:\s+(volatile|__volatile__))?' + r'\s*[{(]') + _regexp_compile_cache = {} @@ -297,6 +352,10 @@ _RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?') # on which those errors are expected and should be suppressed. _error_suppressions = {} +# The root directory used for deriving header guard CPP variable. +# This is set by --root flag. +_root = None + def ParseNolintSuppressions(filename, raw_line, linenum, error): """Updates the global list of error-suppressions. @@ -925,7 +984,7 @@ class CleansedLines(object): 1) elided member contains lines without strings and comments, 2) lines member contains lines without comments, and - 3) raw member contains all the lines without processing. + 3) raw_lines member contains all the lines without processing. All these three members are of <type 'list'>, and of the same length. """ @@ -965,6 +1024,29 @@ class CleansedLines(object): return elided +def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar): + """Find the position just after the matching endchar. + + Args: + line: a CleansedLines line. + startpos: start searching at this position. + depth: nesting level at startpos. + startchar: expression opening character. + endchar: expression closing character. + + Returns: + Index just after endchar. + """ + for i in xrange(startpos, len(line)): + if line[i] == startchar: + depth += 1 + elif line[i] == endchar: + depth -= 1 + if depth == 0: + return i + 1 + return -1 + + def CloseExpression(clean_lines, linenum, pos): """If input points to ( or { or [, finds the position that closes it. @@ -991,18 +1073,23 @@ def CloseExpression(clean_lines, linenum, pos): if startchar == '[': endchar = ']' if startchar == '{': endchar = '}' - num_open = line.count(startchar) - line.count(endchar) - while linenum < clean_lines.NumLines() and num_open > 0: + # Check first line + end_pos = FindEndOfExpressionInLine(line, pos, 0, startchar, endchar) + if end_pos > -1: + return (line, linenum, end_pos) + tail = line[pos:] + num_open = tail.count(startchar) - tail.count(endchar) + while linenum < clean_lines.NumLines() - 1: linenum += 1 line = clean_lines.elided[linenum] - num_open += line.count(startchar) - line.count(endchar) - # OK, now find the endchar that actually got us back to even - endpos = len(line) - while num_open >= 0: - endpos = line.rfind(')', 0, endpos) - num_open -= 1 # chopped off another ) - return (line, linenum, endpos + 1) + delta = line.count(startchar) - line.count(endchar) + if num_open + delta <= 0: + return (line, linenum, + FindEndOfExpressionInLine(line, 0, num_open, startchar, endchar)) + num_open += delta + # Did not find endchar before end of file, give up + return (line, clean_lines.NumLines(), -1) def CheckForCopyright(filename, lines, error): """Logs an error if no Copyright message appears at the top of the file.""" @@ -1032,9 +1119,13 @@ def GetHeaderGuardCPPVariable(filename): # Restores original filename in case that cpplint is invoked from Emacs's # flymake. filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) fileinfo = FileInfo(filename) - return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_' + file_path_from_root = fileinfo.RepositoryName() + if _root: + file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root) + return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_' def CheckForHeaderGuard(filename, lines, error): @@ -1259,17 +1350,55 @@ def CheckInvalidIncrement(filename, clean_lines, linenum, error): 'Changing pointer instead of value (or unused value of operator*).') -class _ClassInfo(object): +class _BlockInfo(object): + """Stores information about a generic block of code.""" + + def __init__(self, seen_open_brace): + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. + + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. + + This is mostly used for checking end of namespace comments. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + +class _ClassInfo(_BlockInfo): """Stores information about a class.""" - def __init__(self, name, clean_lines, linenum): + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, False) self.name = name - self.linenum = linenum - self.seen_open_brace = False + self.starting_linenum = linenum self.is_derived = False - self.virtual_method_linenumber = None - self.has_virtual_destructor = False - self.brace_depth = 0 + if class_or_struct == 'struct': + self.access = 'public' + else: + self.access = 'private' # Try to find the end of the class. This will be confused by things like: # class A { @@ -1279,26 +1408,324 @@ class _ClassInfo(object): self.last_line = 0 depth = 0 for i in range(linenum, clean_lines.NumLines()): - line = clean_lines.lines[i] + line = clean_lines.elided[i] depth += line.count('{') - line.count('}') if not depth: self.last_line = i break + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + self.is_derived = True -class _ClassState(object): - """Holds the current state of the parse relating to class declarations. - It maintains a stack of _ClassInfos representing the parser's guess - as to the current nesting of class declarations. The innermost class - is at the top (back) of the stack. Typically, the stack will either - be empty or have exactly one entry. - """ +class _NamespaceInfo(_BlockInfo): + """Stores information about a namespace.""" + + def __init__(self, name, linenum): + _BlockInfo.__init__(self, False) + self.name = name or '' + self.starting_linenum = linenum + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] + + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if (linenum - self.starting_linenum < 10 + and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. Example: http://go/nxpiz + # + # We also accept stuff like "// end of namespace <name>." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. Example: http://go/ldkdc, http://cl/23548205 + if self.name: + # Named namespace + if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) + + r'[\*/\.\\\s]*$'), + line): + error(filename, linenum, 'readability/namespace', 5, + 'Namespace should be terminated with "// namespace %s"' % + self.name) + else: + # Anonymous namespace + if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + error(filename, linenum, 'readability/namespace', 5, + 'Namespace should be terminated with "// namespace"') + + +class _PreprocessorInfo(object): + """Stores checkpoints of nesting stacks when #if/#else is seen.""" + + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if + + # The entire nesting stack up to #else + self.stack_before_else = [] + + # Whether we have already seen #else or #elif + self.seen_else = False + + +class _NestingState(object): + """Holds states related to parsing braces.""" def __init__(self): - self.classinfo_stack = [] + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] + + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. - def CheckFinished(self, filename, error): + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace + + def InNamespaceBody(self): + """Check if we are currently one level inside a namespace body. + + Returns: + True if top of the stack is a namespace block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif + (see http://go/qwddn for original example) + + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. + + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. + + Args: + line: current line to check. + """ + if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif Match(r'^\s*#\s*(else|elif)\b', line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif Match(r'^\s*#\s*endif\b', line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass + + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Update pp_stack first + self.UpdatePreprocessor(line) + + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count('(') - line.count(')') + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if (depth_change != 0 and + inner_block.open_parentheses == 1 and + _MATCH_ASM.match(line)): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif (inner_block.inline_asm == _INSIDE_ASM and + inner_block.open_parentheses == 0): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find('{') != -1: + new_namespace.seen_open_brace = True + line = line[line.find('{') + 1:] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + # + # Templates with class arguments may confuse the parser, for example: + # template <class T + # class Comparator = less<T>, + # class Vector = vector<T> > + # class HeapQueue { + # + # Because this parser has no nesting state about templates, by the + # time it saw "class Comparator", it may think that it's a new class. + # Nested templates have a similar problem: + # template < + # typename ExportedType, + # typename TupleType, + # template <typename, typename> class ImplTemplate> + # + # To avoid these cases, we ignore classes that are followed by '=' or '>' + class_decl_match = Match( + r'\s*(template\s*<[\w\s<>,:]*>\s*)?' + '(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)' + '(([^=>]|<[^<>]*>)*)$', line) + if (class_decl_match and + (not self.stack or self.stack[-1].open_parentheses == 0)): + self.stack.append(_ClassInfo( + class_decl_match.group(4), class_decl_match.group(2), + clean_lines, linenum)) + line = class_decl_match.group(5) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + access_match = Match(r'\s*(public|private|protected)\s*:', line) + if access_match: + self.stack[-1].access = access_match.group(1) + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) + if not matched: + break + + token = matched.group(1) + if token == '{': + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + else: + self.stack.append(_BlockInfo(True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + elif token == ';' or token == ')': + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) + + def InnermostClass(self): + """Get class info on the top of the stack. + + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. + """ + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None + + def CheckClassFinished(self, filename, error): """Checks that all classes have been completely parsed. Call this when all lines in a file have been processed. @@ -1306,17 +1733,18 @@ class _ClassState(object): filename: The name of the current file. error: The function to call with any errors found. """ - if self.classinfo_stack: - # Note: This test can result in false positives if #ifdef constructs - # get in the way of brace matching. See the testBuildClass test in - # cpplint_unittest.py for an example of this. - error(filename, self.classinfo_stack[0].linenum, 'build/class', 5, - 'Failed to find complete declaration of class %s' % - self.classinfo_stack[0].name) + # Note: This test can result in false positives if #ifdef constructs + # get in the way of brace matching. See the testBuildClass test in + # cpplint_unittest.py for an example of this. + for obj in self.stack: + if isinstance(obj, _ClassInfo): + error(filename, obj.starting_linenum, 'build/class', 5, + 'Failed to find complete declaration of class %s' % + obj.name) def CheckForNonStandardConstructs(filename, clean_lines, linenum, - class_state, error): + nesting_state, error): """Logs an error if we see certain non-ANSI constructs ignored by gcc-2. Complain about several constructs which gcc-2 accepts, but which are @@ -1329,8 +1757,6 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum, - text after #endif is not allowed. - invalid inner-style forward declaration. - >? and <? operators, and their >?= and <?= cousins. - - classes with virtual methods need virtual destructors (compiler warning - available, but not turned on yet.) Additionally, check for constructor/destructor style violations and reference members, as it is very convenient to do so while checking for @@ -1340,8 +1766,8 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum, filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. - class_state: A _ClassState instance which maintains information about - the current stack of nested class declarations being parsed. + nesting_state: A _NestingState instance which maintains information about + the current stack of nested blocks being parsed. error: A callable to which errors are reported, which takes 4 arguments: filename, line number, error level, and message """ @@ -1370,7 +1796,7 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum, if Search(r'\b(const|volatile|void|char|short|int|long' r'|float|double|signed|unsigned' r'|schar|u?int8|u?int16|u?int32|u?int64)' - r'\s+(auto|register|static|extern|typedef)\b', + r'\s+(register|static|extern|typedef)\b', line): error(filename, linenum, 'build/storage_class', 5, 'Storage class (static, extern, typedef, etc) should be first.') @@ -1400,45 +1826,13 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum, 'const string& members are dangerous. It is much better to use ' 'alternatives, such as pointers or simple constants.') - # Track class entry and exit, and attempt to find cases within the - # class declaration that don't meet the C++ style - # guidelines. Tracking is very dependent on the code matching Google - # style guidelines, but it seems to perform well enough in testing - # to be a worthwhile addition to the checks. - classinfo_stack = class_state.classinfo_stack - # Look for a class declaration. The regexp accounts for decorated classes - # such as in: - # class LOCKABLE API Object { - # }; - class_decl_match = Match( - r'\s*(template\s*<[\w\s<>,:]*>\s*)?' - '(class|struct)\s+([A-Z_]+\s+)*(\w+(::\w+)*)', line) - if class_decl_match: - classinfo_stack.append(_ClassInfo( - class_decl_match.group(4), clean_lines, linenum)) - - # Everything else in this function uses the top of the stack if it's - # not empty. - if not classinfo_stack: + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: return - classinfo = classinfo_stack[-1] - - # If the opening brace hasn't been seen look for it and also - # parent class declarations. - if not classinfo.seen_open_brace: - # If the line has a ';' in it, assume it's a forward declaration or - # a single-line class declaration, which we won't process. - if line.find(';') != -1: - classinfo_stack.pop() - return - classinfo.seen_open_brace = (line.find('{') != -1) - # Look for a bare ':' - if Search('(^|[^:]):($|[^:])', line): - classinfo.is_derived = True - if not classinfo.seen_open_brace: - return # Everything else in this function is for after open brace - # The class may have been declared with namespace or classname qualifiers. # The constructor and destructor will not have those qualifiers. base_classname = classinfo.name.split('::')[-1] @@ -1455,35 +1849,6 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum, error(filename, linenum, 'runtime/explicit', 5, 'Single-argument constructors should be marked explicit.') - # Look for methods declared virtual. - if Search(r'\bvirtual\b', line): - classinfo.virtual_method_linenumber = linenum - # Only look for a destructor declaration on the same line. It would - # be extremely unlikely for the destructor declaration to occupy - # more than one line. - if Search(r'~%s\s*\(' % base_classname, line): - classinfo.has_virtual_destructor = True - - # Look for class end. - brace_depth = classinfo.brace_depth - brace_depth = brace_depth + line.count('{') - line.count('}') - if brace_depth <= 0: - classinfo = classinfo_stack.pop() - # Try to detect missing virtual destructor declarations. - # For now, only warn if a non-derived class with virtual methods lacks - # a virtual destructor. This is to make it less likely that people will - # declare derived virtual destructors without declaring the base - # destructor virtual. - if ((classinfo.virtual_method_linenumber is not None) and - (not classinfo.has_virtual_destructor) and - (not classinfo.is_derived)): # Only warn for base classes - error(filename, classinfo.linenum, 'runtime/virtual', 4, - 'The class %s probably needs a virtual destructor due to ' - 'having virtual method(s), one declared at line %d.' - % (classinfo.name, classinfo.virtual_method_linenumber)) - else: - classinfo.brace_depth = brace_depth - def CheckSpacingForFunctionCall(filename, line, linenum, error): """Checks for the correctness of various spacing around function calls. @@ -1535,7 +1900,8 @@ def CheckSpacingForFunctionCall(filename, line, linenum, error): error(filename, linenum, 'whitespace/parens', 2, 'Extra space after (') if (Search(r'\w\s+\(', fncall) and - not Search(r'#\s*define|typedef', fncall)): + not Search(r'#\s*define|typedef', fncall) and + not Search(r'\w\s+\((\w+::)?\*\w+\)\(', fncall)): error(filename, linenum, 'whitespace/parens', 4, 'Extra space before ( in function call') # If the ) is followed only by a newline or a { + newline, assume it's @@ -1668,8 +2034,165 @@ def CheckComment(comment, filename, linenum, error): error(filename, linenum, 'whitespace/todo', 2, 'TODO(my_username) should be followed by a space') +def CheckAccess(filename, clean_lines, linenum, nesting_state, error): + """Checks for improper use of DISALLOW* macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A _NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] # get rid of comments and strings + + matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|' + r'DISALLOW_EVIL_CONSTRUCTORS|' + r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line) + if not matched: + return + if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo): + if nesting_state.stack[-1].access != 'private': + error(filename, linenum, 'readability/constructors', 3, + '%s must be in the private: section' % matched.group(1)) + + else: + # Found DISALLOW* macro outside a class declaration, or perhaps it + # was used inside a function when it should have been part of the + # class declaration. We could issue a warning here, but it + # probably resulted in a compiler error already. + pass + + +def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix): + """Find the corresponding > to close a template. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: Current line number. + init_suffix: Remainder of the current line after the initial <. + + Returns: + True if a matching bracket exists. + """ + line = init_suffix + nesting_stack = ['<'] + while True: + # Find the next operator that can tell us whether < is used as an + # opening bracket or as a less-than operator. We only want to + # warn on the latter case. + # + # We could also check all other operators and terminate the search + # early, e.g. if we got something like this "a<b+c", the "<" is + # most likely a less-than operator, but then we will get false + # positives for default arguments (e.g. http://go/prccd) and + # other template expressions (e.g. http://go/oxcjq). + match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line) + if match: + # Found an operator, update nesting stack + operator = match.group(1) + line = match.group(2) + + if nesting_stack[-1] == '<': + # Expecting closing angle bracket + if operator in ('<', '(', '['): + nesting_stack.append(operator) + elif operator == '>': + nesting_stack.pop() + if not nesting_stack: + # Found matching angle bracket + return True + elif operator == ',': + # Got a comma after a bracket, this is most likely a template + # argument. We have not seen a closing angle bracket yet, but + # it's probably a few lines later if we look for it, so just + # return early here. + return True + else: + # Got some other operator. + return False + + else: + # Expecting closing parenthesis or closing bracket + if operator in ('<', '(', '['): + nesting_stack.append(operator) + elif operator in (')', ']'): + # We don't bother checking for matching () or []. If we got + # something like (] or [), it would have been a syntax error. + nesting_stack.pop() + + else: + # Scan the next line + linenum += 1 + if linenum >= len(clean_lines.elided): + break + line = clean_lines.elided[linenum] + + # Exhausted all remaining lines and still no matching angle bracket. + # Most likely the input was incomplete, otherwise we should have + # seen a semicolon and returned early. + return True + + +def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix): + """Find the corresponding < that started a template. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: Current line number. + init_prefix: Part of the current line before the initial >. + + Returns: + True if a matching bracket exists. + """ + line = init_prefix + nesting_stack = ['>'] + while True: + # Find the previous operator + match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line) + if match: + # Found an operator, update nesting stack + operator = match.group(2) + line = match.group(1) + + if nesting_stack[-1] == '>': + # Expecting opening angle bracket + if operator in ('>', ')', ']'): + nesting_stack.append(operator) + elif operator == '<': + nesting_stack.pop() + if not nesting_stack: + # Found matching angle bracket + return True + elif operator == ',': + # Got a comma before a bracket, this is most likely a + # template argument. The opening angle bracket is probably + # there if we look for it, so just return early here. + return True + else: + # Got some other operator. + return False + + else: + # Expecting opening parenthesis or opening bracket + if operator in ('>', ')', ']'): + nesting_stack.append(operator) + elif operator in ('(', '['): + nesting_stack.pop() -def CheckSpacing(filename, clean_lines, linenum, error): + else: + # Scan the previous line + linenum -= 1 + if linenum < 0: + break + line = clean_lines.elided[linenum] + + # Exhausted all earlier lines and still no matching angle bracket. + return False + + +def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): """Checks for the correctness of various spacing issues in the code. Things we check for: spaces around operators, spaces after @@ -1682,6 +2205,8 @@ def CheckSpacing(filename, clean_lines, linenum, error): filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. + nesting_state: A _NestingState instance which maintains information about + the current stack of nested blocks being parsed. error: The function to call with any errors found. """ @@ -1691,7 +2216,16 @@ def CheckSpacing(filename, clean_lines, linenum, error): # Before nixing comments, check if the line is blank for no good # reason. This includes the first line after a block is opened, and # blank lines at the end of a function (ie, right before a line like '}' - if IsBlankLine(line): + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { + # + # } + # + # A warning about missing end of namespace comments will be issued instead. + if IsBlankLine(line) and not nesting_state.InNamespaceBody(): elided = clean_lines.elided prev_line = elided[linenum - 1] prevbrace = prev_line.rfind('{') @@ -1699,8 +2233,7 @@ def CheckSpacing(filename, clean_lines, linenum, error): # both start with alnums and are indented the same amount. # This ignores whitespace at the start of a namespace block # because those are not usually indented. - if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1 - and prev_line[:prevbrace].find('namespace') == -1): + if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: # OK, we have a blank line at the start of a code block. Before we # complain, we check if it is an exception to the rule: The previous # non-empty line has the parameters of a function header that are indented @@ -1732,12 +2265,7 @@ def CheckSpacing(filename, clean_lines, linenum, error): if not exception: error(filename, linenum, 'whitespace/blank_line', 2, 'Blank line at the start of a code block. Is this needed?') - # This doesn't ignore whitespace at the end of a namespace block - # because that is too hard without pairing open/close braces; - # however, a special exception is made for namespace closing - # brackets which have a comment containing "namespace". - # - # Also, ignore blank lines at the end of a block in a long if-else + # Ignore blank lines at the end of a block in a long if-else # chain, like this: # if (condition1) { # // Something followed by a blank line @@ -1749,7 +2277,6 @@ def CheckSpacing(filename, clean_lines, linenum, error): next_line = raw[linenum + 1] if (next_line and Match(r'\s*}', next_line) - and next_line.find('namespace') == -1 and next_line.find('} else ') == -1): error(filename, linenum, 'whitespace/blank_line', 3, 'Blank line at the end of a code block. Is this needed?') @@ -1810,26 +2337,59 @@ def CheckSpacing(filename, clean_lines, linenum, error): # though, so we punt on this one for now. TODO. # You should always have whitespace around binary operators. - # Alas, we can't test < or > because they're legitimately used sans spaces - # (a->b, vector<int> a). The only time we can tell is a < with no >, and - # only if it's not template params list spilling into the next line. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line) - if not match: - # Note that while it seems that the '<[^<]*' term in the following - # regexp could be simplified to '<.*', which would indeed match - # the same class of strings, the [^<] means that searching for the - # regexp takes linear rather than quadratic time. - if not Search(r'<[^<]*,\s*$', line): # template params spill - match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line) if match: error(filename, linenum, 'whitespace/operators', 3, 'Missing spaces around %s' % match.group(1)) - # We allow no-spaces around << and >> when used like this: 10<<20, but + # We allow no-spaces around << when used like this: 10<<20, but # not otherwise (particularly, not when used as streams) - match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line) + match = Search(r'(\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line) + if match and not (match.group(1).isdigit() and match.group(2).isdigit()): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + elif not Match(r'#.*include', line): + # Avoid false positives on -> + reduced_line = line.replace('->', '') + + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = Search(r'[^\s<]<([^\s=<].*)', reduced_line) + if (match and + not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line) + if (match and + not FindPreviousMatchingAngleBracket(clean_lines, linenum, + match.group(1))): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type<type<type>> alpha + match = Search(r'>>[a-zA-Z_]', line) if match: error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around %s' % match.group(1)) + 'Missing spaces around >>') # There shouldn't be space around unary operators match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) @@ -1903,16 +2463,23 @@ def CheckSpacing(filename, clean_lines, linenum, error): # the semicolon there. if Search(r':\s*;\s*$', line): error(filename, linenum, 'whitespace/semicolon', 5, - 'Semicolon defining empty statement. Use { } instead.') + 'Semicolon defining empty statement. Use {} instead.') elif Search(r'^\s*;\s*$', line): error(filename, linenum, 'whitespace/semicolon', 5, 'Line contains only semicolon. If this should be an empty statement, ' - 'use { } instead.') + 'use {} instead.') elif (Search(r'\s+;\s*$', line) and not Search(r'\bfor\b', line)): error(filename, linenum, 'whitespace/semicolon', 5, 'Extra space before last semicolon. If this should be an empty ' - 'statement, use { } instead.') + 'statement, use {} instead.') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (Search('for *\(.*[^:]:[^: ]', line) or + Search('for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): @@ -1938,8 +2505,8 @@ def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): # # If we didn't find the end of the class, last_line would be zero, # and the check will be skipped by the first condition. - if (class_info.last_line - class_info.linenum <= 24 or - linenum <= class_info.linenum): + if (class_info.last_line - class_info.starting_linenum <= 24 or + linenum <= class_info.starting_linenum): return matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) @@ -1950,15 +2517,18 @@ def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): # - We are at the beginning of the class. # - We are forward-declaring an inner class that is semantically # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. prev_line = clean_lines.lines[linenum - 1] if (not IsBlankLine(prev_line) and - not Search(r'\b(class|struct)\b', prev_line)): + not Search(r'\b(class|struct)\b', prev_line) and + not Search(r'\\$', prev_line)): # Try a bit harder to find the beginning of the class. This is to # account for multi-line base-specifier lists, e.g.: # class Derived # : public Base { - end_class_head = class_info.linenum - for i in range(class_info.linenum, linenum): + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): if Search(r'\{\s*$', clean_lines.lines[i]): end_class_head = i break @@ -2008,9 +2578,11 @@ def CheckBraces(filename, clean_lines, linenum, error): # which is commonly used to control the lifetime of # stack-allocated variables. We don't detect this perfectly: we # just don't complain if the last non-whitespace character on the - # previous non-blank line is ';', ':', '{', or '}'. + # previous non-blank line is ';', ':', '{', or '}', or if the previous + # line starts a preprocessor block. prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if not Search(r'[;:}{]\s*$', prevline): + if (not Search(r'[;:}{]\s*$', prevline) and + not Match(r'\s*#', prevline)): error(filename, linenum, 'whitespace/braces', 4, '{ should almost always be at the end of the previous line') @@ -2064,6 +2636,33 @@ def CheckBraces(filename, clean_lines, linenum, error): "You don't need a ; after a }") +def CheckEmptyLoopBody(filename, clean_lines, linenum, error): + """Loop for empty loop body with only a single semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + line = clean_lines.elided[linenum] + if Match(r'\s*(for|while)\s*\(', line): + # Find the end of the conditional expression + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find('(')) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and Match(r';', end_line[end_pos:]): + error(filename, end_linenum, 'whitespace/empty_loop_body', 5, + 'Empty loop bodies should use {} or continue') + + def ReplaceableCheck(operator, macro, line): """Determine whether a basic CHECK can be replaced with a more specific one. @@ -2132,6 +2731,38 @@ def CheckCheck(filename, clean_lines, linenum, error): break +def CheckAltTokens(filename, clean_lines, linenum, error): + """Check alternative keywords being used in boolean expressions. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if Match(r'^\s*#', line): + return + + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find('/*') >= 0 or line.find('*/') >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error(filename, linenum, 'readability/alt_tokens', 2, + 'Use operator %s instead of %s' % ( + _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) + + def GetLineWidth(line): """Determines the width of the line in column positions. @@ -2154,7 +2785,7 @@ def GetLineWidth(line): return len(line) -def CheckStyle(filename, clean_lines, linenum, file_extension, class_state, +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, error): """Checks rules from the 'C++ style rules' section of cppguide.html. @@ -2167,6 +2798,8 @@ def CheckStyle(filename, clean_lines, linenum, file_extension, class_state, clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. file_extension: The extension (without the dot) of the filename. + nesting_state: A _NestingState instance which maintains information about + the current stack of nested blocks being parsed. error: The function to call with any errors found. """ @@ -2248,16 +2881,19 @@ def CheckStyle(filename, clean_lines, linenum, file_extension, class_state, not ((cleansed_line.find('case ') != -1 or cleansed_line.find('default:') != -1) and cleansed_line.find('break;') != -1)): - error(filename, linenum, 'whitespace/newline', 4, + error(filename, linenum, 'whitespace/newline', 0, 'More than one command on the same line') # Some more style checks CheckBraces(filename, clean_lines, linenum, error) - CheckSpacing(filename, clean_lines, linenum, error) + CheckEmptyLoopBody(filename, clean_lines, linenum, error) + CheckAccess(filename, clean_lines, linenum, nesting_state, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) CheckCheck(filename, clean_lines, linenum, error) - if class_state and class_state.classinfo_stack: - CheckSectionSpacing(filename, clean_lines, - class_state.classinfo_stack[-1], linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"') @@ -2554,9 +3190,11 @@ def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state, fnline))): # We allow non-const references in a few standard places, like functions - # called "swap()" or iostream operators like "<<" or ">>". + # called "swap()" or iostream operators like "<<" or ">>". We also filter + # out for loops, which lint otherwise mistakenly thinks are functions. if not Search( - r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&', + r'(for|swap|Swap|operator[<>][<>])\s*\(\s*' + r'(?:(?:typename\s*)?[\w:]|<.*>)+\s*&', fnline): error(filename, linenum, 'runtime/references', 2, 'Is this a non-const reference? ' @@ -2578,10 +3216,19 @@ def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state, if (match.group(1) is None and # If new operator, then this isn't a cast not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or Match(r'^\s*MockCallback<.*>', line))): - error(filename, linenum, 'readability/casting', 4, - 'Using deprecated casting style. ' - 'Use static_cast<%s>(...) instead' % - match.group(2)) + # Try a bit harder to catch gmock lines: the only place where + # something looks like an old-style cast is where we declare the + # return type of the mocked method, and the only time when we + # are missing context is if MOCK_METHOD was split across + # multiple lines (for example http://go/hrfhr ), so we only need + # to check the previous line for MOCK_METHOD. + if (linenum == 0 or + not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(\S+,\s*$', + clean_lines.elided[linenum - 1])): + error(filename, linenum, 'readability/casting', 4, + 'Using deprecated casting style. ' + 'Use static_cast<%s>(...) instead' % + match.group(2)) CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum], 'static_cast', @@ -2703,7 +3350,7 @@ def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state, printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') if printf_args: match = Match(r'([\w.\->()]+)$', printf_args) - if match: + if match and match.group(1) != '__VA_ARGS__': function_name = re.search(r'\b((?:string)?printf)\s*\(', line, re.I).group(1) error(filename, linenum, 'runtime/printf', 4, @@ -2824,6 +3471,11 @@ def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern, 'Using sizeof(type). Use sizeof(varname) instead if possible') return True + # operator++(int) and operator--(int) + if (line[0:match.start(1) - 1].endswith(' operator++') or + line[0:match.start(1) - 1].endswith(' operator--')): + return False + remainder = line[match.end(0):] # The close paren is for function pointers as arguments to a function. @@ -3112,13 +3764,13 @@ def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): if match: error(filename, linenum, 'build/explicit_make_pair', 4, # 4 = high confidence - 'Omit template arguments from make_pair OR use pair directly OR' - ' if appropriate, construct a pair directly') + 'For C++11-compatibility, omit template arguments from make_pair' + ' OR use pair directly OR if appropriate, construct a pair directly') -def ProcessLine(filename, file_extension, - clean_lines, line, include_state, function_state, - class_state, error, extra_check_functions=[]): +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=[]): """Processes a single line in the file. Args: @@ -3129,8 +3781,8 @@ def ProcessLine(filename, file_extension, line: Number of line being processed. include_state: An _IncludeState instance in which the headers are inserted. function_state: A _FunctionState instance which counts function lines, etc. - class_state: A _ClassState instance which maintains information about - the current stack of nested class declarations being parsed. + nesting_state: A _NestingState instance which maintains information about + the current stack of nested blocks being parsed. error: A callable to which errors are reported, which takes 4 arguments: filename, line number, error level, and message extra_check_functions: An array of additional check functions that will be @@ -3139,13 +3791,16 @@ def ProcessLine(filename, file_extension, """ raw_lines = clean_lines.raw_lines ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM: + return CheckForFunctionLengths(filename, clean_lines, line, function_state, error) CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) - CheckStyle(filename, clean_lines, line, file_extension, class_state, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) CheckLanguage(filename, clean_lines, line, file_extension, include_state, error) CheckForNonStandardConstructs(filename, clean_lines, line, - class_state, error) + nesting_state, error) CheckPosixThreading(filename, clean_lines, line, error) CheckInvalidIncrement(filename, clean_lines, line, error) CheckMakePairUsesDeduction(filename, clean_lines, line, error) @@ -3172,7 +3827,7 @@ def ProcessFileData(filename, file_extension, lines, error, include_state = _IncludeState() function_state = _FunctionState() - class_state = _ClassState() + nesting_state = _NestingState() ResetNolintSuppressions() @@ -3185,9 +3840,9 @@ def ProcessFileData(filename, file_extension, lines, error, clean_lines = CleansedLines(lines) for line in xrange(clean_lines.NumLines()): ProcessLine(filename, file_extension, clean_lines, line, - include_state, function_state, class_state, error, + include_state, function_state, nesting_state, error, extra_check_functions) - class_state.CheckFinished(filename, error) + nesting_state.CheckClassFinished(filename, error) CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) @@ -3301,7 +3956,8 @@ def ParseArguments(args): try: (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', 'counting=', - 'filter=']) + 'filter=', + 'root=']) except getopt.GetoptError: PrintUsage('Invalid arguments.') @@ -3327,6 +3983,9 @@ def ParseArguments(args): if val not in ('total', 'toplevel', 'detailed'): PrintUsage('Valid counting options are total, toplevel, and detailed') counting_style = val + elif opt == '--root': + global _root + _root = val if not filenames: PrintUsage('No files were specified.') diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm index 09dd011ec37bed0abf3dd31b5cf84ad25f1eb5c0..5ea8dd83dd78c983f9801edeffe03dc713b06380 100644 --- a/vp8/encoder/arm/neon/shortfdct_neon.asm +++ b/vp8/encoder/arm/neon/shortfdct_neon.asm @@ -97,7 +97,7 @@ coeff vmlal.s16 q11, d6, d17 ; c1*2217 + d1*5352 + 12000 vmlsl.s16 q12, d6, d16 ; d1*2217 - c1*5352 + 51000 - vmvn.s16 d4, d4 + vmvn d4, d4 vshrn.s32 d1, q11, #16 ; op[4] = (c1*2217 + d1*5352 + 12000)>>16 vsub.s16 d1, d1, d4 ; op[4] += (d1!=0) vshrn.s32 d3, q12, #16 ; op[12]= (d1*2217 - c1*5352 + 51000)>>16 @@ -200,7 +200,7 @@ coeff vmlal.s16 q11, d27, d17 ; B[4] = c1*2217 + d1*5352 + 12000 vmlsl.s16 q12, d27, d16 ; B[12] = d1*2217 - c1*5352 + 51000 - vmvn.s16 q14, q14 + vmvn q14, q14 vshrn.s32 d1, q9, #16 ; A[4] = (c1*2217 + d1*5352 + 12000)>>16 vshrn.s32 d3, q10, #16 ; A[12]= (d1*2217 - c1*5352 + 51000)>>16 diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 3432f02adf08e77fa8bab83c94cb9422091a1d3e..51b154f45c15417aeca535d501feaff0b23a9c94 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -2755,7 +2755,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi) /* Clear the alternate reference update pending flag. */ cpi->source_alt_ref_pending = 0; - /* Set the alternate refernce frame active flag */ + /* Set the alternate reference frame active flag */ cpi->source_alt_ref_active = 1; @@ -3402,7 +3402,7 @@ static void encode_frame_to_data_rate else cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; - /* Check to see if a key frame is signalled + /* Check to see if a key frame is signaled * For two pass with auto key frame enabled cm->frame_type may already * be set, but not for one pass. */ diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 7d1904aaf7d15fb8a89b3a6e94302c24d2e70ec4..cd091f39ae3bad58598984e4c1405e93850973c9 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -91,18 +91,8 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c -# TODO(johann) make this generic -ifeq ($(HAVE_SSE2),yes) -vp8/encoder/x86/quantize_sse2.c.o: CFLAGS += -msse2 -vp8/encoder/x86/quantize_sse2.c.d: CFLAGS += -msse2 -endif - ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2 -vp8/encoder/x86/denoising_sse2.c.d: CFLAGS += -msse2 -endif endif VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm diff --git a/vp9/common/ppc/vp9_copy_altivec.asm b/vp9/common/ppc/vp9_copy_altivec.asm deleted file mode 100644 index a4ce9158342aeb4ecb1dd12b2c5d9ad4ee79dc31..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_copy_altivec.asm +++ /dev/null @@ -1,47 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl copy_mem16x16_ppc - -;# r3 unsigned char *src -;# r4 int src_stride -;# r5 unsigned char *dst -;# r6 int dst_stride - -;# Make the assumption that input will not be aligned, -;# but the output will be. So two reads and a perm -;# for the input, but only one store for the output. -copy_mem16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xe000 - mtspr 256, r12 ;# set VRSAVE - - li r10, 16 - mtctr r10 - -cp_16x16_loop: - lvsl v0, 0, r3 ;# permutate value for alignment - - lvx v1, 0, r3 - lvx v2, r10, r3 - - vperm v1, v1, v2, v0 - - stvx v1, 0, r5 - - add r3, r3, r4 ;# increment source pointer - add r5, r5, r6 ;# increment destination pointer - - bdnz cp_16x16_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp9/common/ppc/vp9_filter_altivec.asm b/vp9/common/ppc/vp9_filter_altivec.asm deleted file mode 100644 index 4da2e94f959d7481fa5b7e0798332ad34185b74a..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_filter_altivec.asm +++ /dev/null @@ -1,1013 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl sixtap_predict_ppc - .globl sixtap_predict8x4_ppc - .globl sixtap_predict8x8_ppc - .globl sixtap_predict16x16_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -.macro load_hfilter V0, V1 - load_c \V0, HFilter, r5, r9, r10 - - addi r5, r5, 16 - lvx \V1, r5, r10 -.endm - -;# Vertical filtering -.macro Vprolog - load_c v0, VFilter, r6, r3, r10 - - vspltish v5, 8 - vspltish v6, 3 - vslh v6, v5, v6 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v1, v0, 1 - vspltb v2, v0, 2 - vspltb v3, v0, 3 - vspltb v4, v0, 4 - vspltb v5, v0, 5 - vspltb v0, v0, 0 -.endm - -.macro vpre_load - Vprolog - li r10, 16 - lvx v10, 0, r9 ;# v10..v14 = first 5 rows - lvx v11, r10, r9 - addi r9, r9, 32 - lvx v12, 0, r9 - lvx v13, r10, r9 - addi r9, r9, 32 - lvx v14, 0, r9 -.endm - -.macro Msum Re, Ro, V, T, TMP - ;# (Re,Ro) += (V*T) - vmuleub \TMP, \V, \T ;# trashes v8 - vadduhm \Re, \Re, \TMP ;# Re = evens, saturation unnecessary - vmuloub \TMP, \V, \T - vadduhm \Ro, \Ro, \TMP ;# Ro = odds -.endm - -.macro vinterp_no_store P0 P1 P2 P3 P4 P5 - vmuleub v8, \P0, v0 ;# 64 + 4 positive taps - vadduhm v16, v6, v8 - vmuloub v8, \P0, v0 - vadduhm v17, v6, v8 - Msum v16, v17, \P2, v2, v8 - Msum v16, v17, \P3, v3, v8 - Msum v16, v17, \P5, v5, v8 - - vmuleub v18, \P1, v1 ;# 2 negative taps - vmuloub v19, \P1, v1 - Msum v18, v19, \P4, v4, v8 - - vsubuhs v16, v16, v18 ;# subtract neg from pos - vsubuhs v17, v17, v19 - vsrh v16, v16, v7 ;# divide by 128 - vsrh v17, v17, v7 ;# v16 v17 = evens, odds - vmrghh v18, v16, v17 ;# v18 v19 = 16-bit result in order - vmrglh v19, v16, v17 - vpkuhus \P0, v18, v19 ;# P0 = 8-bit result -.endm - -.macro vinterp_no_store_8x8 P0 P1 P2 P3 P4 P5 - vmuleub v24, \P0, v13 ;# 64 + 4 positive taps - vadduhm v21, v20, v24 - vmuloub v24, \P0, v13 - vadduhm v22, v20, v24 - Msum v21, v22, \P2, v15, v25 - Msum v21, v22, \P3, v16, v25 - Msum v21, v22, \P5, v18, v25 - - vmuleub v23, \P1, v14 ;# 2 negative taps - vmuloub v24, \P1, v14 - Msum v23, v24, \P4, v17, v25 - - vsubuhs v21, v21, v23 ;# subtract neg from pos - vsubuhs v22, v22, v24 - vsrh v21, v21, v19 ;# divide by 128 - vsrh v22, v22, v19 ;# v16 v17 = evens, odds - vmrghh v23, v21, v22 ;# v18 v19 = 16-bit result in order - vmrglh v24, v21, v22 - vpkuhus \P0, v23, v24 ;# P0 = 8-bit result -.endm - - -.macro Vinterp P0 P1 P2 P3 P4 P5 - vinterp_no_store \P0, \P1, \P2, \P3, \P4, \P5 - stvx \P0, 0, r7 - add r7, r7, r8 ;# 33 ops per 16 pels -.endm - - -.macro luma_v P0, P1, P2, P3, P4, P5 - addi r9, r9, 16 ;# P5 = newest input row - lvx \P5, 0, r9 - Vinterp \P0, \P1, \P2, \P3, \P4, \P5 -.endm - -.macro luma_vtwo - luma_v v10, v11, v12, v13, v14, v15 - luma_v v11, v12, v13, v14, v15, v10 -.endm - -.macro luma_vfour - luma_vtwo - luma_v v12, v13, v14, v15, v10, v11 - luma_v v13, v14, v15, v10, v11, v12 -.endm - -.macro luma_vsix - luma_vfour - luma_v v14, v15, v10, v11, v12, v13 - luma_v v15, v10, v11, v12, v13, v14 -.endm - -.macro Interp4 R I I4 - vmsummbm \R, v13, \I, v15 - vmsummbm \R, v14, \I4, \R -.endm - -.macro Read8x8 VD, RS, RP, increment_counter - lvsl v21, 0, \RS ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx \VD, 0, \RS - lvx v20, r10, \RS - -.if \increment_counter - add \RS, \RS, \RP -.endif - - vperm \VD, \VD, v20, v21 -.endm - -.macro interp_8x8 R - vperm v20, \R, \R, v16 ;# v20 = 0123 1234 2345 3456 - vperm v21, \R, \R, v17 ;# v21 = 4567 5678 6789 789A - Interp4 v20, v20, v21 ;# v20 = result 0 1 2 3 - vperm \R, \R, \R, v18 ;# R = 89AB 9ABC ABCx BCxx - Interp4 v21, v21, \R ;# v21 = result 4 5 6 7 - - vpkswus \R, v20, v21 ;# R = 0 1 2 3 4 5 6 7 - vsrh \R, \R, v19 - - vpkuhus \R, \R, \R ;# saturate and pack - -.endm - -.macro Read4x4 VD, RS, RP, increment_counter - lvsl v21, 0, \RS ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v20, 0, \RS - -.if \increment_counter - add \RS, \RS, \RP -.endif - - vperm \VD, v20, v20, v21 -.endm - .text - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -sixtap_predict_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xff87 - ori r12, r12, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - slwi. r5, r5, 5 ;# index into horizontal filter array - - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq- vertical_only_4x4 - - ;# load up horizontal filter - load_hfilter v13, v14 - - ;# rounding added in on the multiply - vspltisw v16, 8 - vspltisw v15, 3 - vslw v15, v16, v15 ;# 0x00000040000000400000004000000040 - - ;# Load up permutation constants - load_c v16, B_0123, 0, r9, r10 - load_c v17, B_4567, 0, r9, r10 - load_c v18, B_89AB, 0, r9, r10 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - addi r9, r3, 0 - li r10, 16 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# filter a line - interp_8x8 v2 - interp_8x8 v3 - interp_8x8 v4 - interp_8x8 v5 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional 5 lines that are needed - ;# for the vertical filter. - beq- store_4x4 - - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r9, r9, r4 - sub r9, r9, r4 - - Read8x8 v0, r9, r4, 1 - Read8x8 v1, r9, r4, 0 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 0 - - interp_8x8 v0 - interp_8x8 v1 - interp_8x8 v6 - interp_8x8 v7 - interp_8x8 v8 - - b second_pass_4x4 - -vertical_only_4x4: - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - li r10, 16 - - Read8x8 v0, r3, r4, 1 - Read8x8 v1, r3, r4, 1 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 0 - - slwi r6, r6, 4 ;# index into vertical filter array - -second_pass_4x4: - load_c v20, b_hilo_4x4, 0, r9, r10 - load_c v21, b_hilo, 0, r9, r10 - - ;# reposition input so that it can go through the - ;# filtering phase with one pass. - vperm v0, v0, v1, v20 ;# 0 1 x x - vperm v2, v2, v3, v20 ;# 2 3 x x - vperm v4, v4, v5, v20 ;# 4 5 x x - vperm v6, v6, v7, v20 ;# 6 7 x x - - vperm v0, v0, v2, v21 ;# 0 1 2 3 - vperm v4, v4, v6, v21 ;# 4 5 6 7 - - vsldoi v1, v0, v4, 4 - vsldoi v2, v0, v4, 8 - vsldoi v3, v0, v4, 12 - - vsldoi v5, v4, v8, 4 - - load_c v13, VFilter, r6, r9, r10 - - vspltish v15, 8 - vspltish v20, 3 - vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v14, v13, 1 - vspltb v15, v13, 2 - vspltb v16, v13, 3 - vspltb v17, v13, 4 - vspltb v18, v13, 5 - vspltb v13, v13, 0 - - vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5 - - stvx v0, 0, r1 - - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - lwz r0, 4(r1) - stw r0, 0(r7) - add r7, r7, r8 - - lwz r0, 8(r1) - stw r0, 0(r7) - add r7, r7, r8 - - lwz r0, 12(r1) - stw r0, 0(r7) - - b exit_4x4 - -store_4x4: - - stvx v2, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v3, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v4, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v5, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - -exit_4x4: - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro w_8x8 V, D, R, P - stvx \V, 0, r1 - lwz \R, 0(r1) - stw \R, 0(r7) - lwz \R, 4(r1) - stw \R, 4(r7) - add \D, \D, \P -.endm - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch - -sixtap_predict8x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - slwi. r5, r5, 5 ;# index into horizontal filter array - - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq- second_pass_pre_copy_8x4 - - load_hfilter v13, v14 - - ;# rounding added in on the multiply - vspltisw v16, 8 - vspltisw v15, 3 - vslw v15, v16, v15 ;# 0x00000040000000400000004000000040 - - ;# Load up permutation constants - load_c v16, B_0123, 0, r9, r10 - load_c v17, B_4567, 0, r9, r10 - load_c v18, B_89AB, 0, r9, r10 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - addi r9, r3, 0 - li r10, 16 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# filter a line - interp_8x8 v2 - interp_8x8 v3 - interp_8x8 v4 - interp_8x8 v5 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional 5 lines that are needed - ;# for the vertical filter. - beq- store_8x4 - - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r9, r9, r4 - sub r9, r9, r4 - - Read8x8 v0, r9, r4, 1 - Read8x8 v1, r9, r4, 0 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 0 - - interp_8x8 v0 - interp_8x8 v1 - interp_8x8 v6 - interp_8x8 v7 - interp_8x8 v8 - - b second_pass_8x4 - -second_pass_pre_copy_8x4: - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - li r10, 16 - - Read8x8 v0, r3, r4, 1 - Read8x8 v1, r3, r4, 1 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 1 - - slwi r6, r6, 4 ;# index into vertical filter array - -second_pass_8x4: - load_c v13, VFilter, r6, r9, r10 - - vspltish v15, 8 - vspltish v20, 3 - vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v14, v13, 1 - vspltb v15, v13, 2 - vspltb v16, v13, 3 - vspltb v17, v13, 4 - vspltb v18, v13, 5 - vspltb v13, v13, 0 - - vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5 - vinterp_no_store_8x8 v1, v2, v3, v4, v5, v6 - vinterp_no_store_8x8 v2, v3, v4, v5, v6, v7 - vinterp_no_store_8x8 v3, v4, v5, v6, v7, v8 - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x4 - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - - b exit_8x4 - -store_aligned_8x4: - - load_c v10, b_hilo, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - - b exit_8x4 - -store_8x4: - cmpi cr0, r8, 8 - beq cr0, store_aligned2_8x4 - - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - - b exit_8x4 - -store_aligned2_8x4: - load_c v10, b_hilo, 0, r9, r10 - - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - -exit_8x4: - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch - -;# Because the width that needs to be filtered will fit in a single altivec -;# register there is no need to loop. Everything can stay in registers. -sixtap_predict8x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - slwi. r5, r5, 5 ;# index into horizontal filter array - - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq- second_pass_pre_copy_8x8 - - load_hfilter v13, v14 - - ;# rounding added in on the multiply - vspltisw v16, 8 - vspltisw v15, 3 - vslw v15, v16, v15 ;# 0x00000040000000400000004000000040 - - ;# Load up permutation constants - load_c v16, B_0123, 0, r9, r10 - load_c v17, B_4567, 0, r9, r10 - load_c v18, B_89AB, 0, r9, r10 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - addi r9, r3, 0 - li r10, 16 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 1 - Read8x8 v9, r3, r4, 1 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# filter a line - interp_8x8 v2 - interp_8x8 v3 - interp_8x8 v4 - interp_8x8 v5 - interp_8x8 v6 - interp_8x8 v7 - interp_8x8 v8 - interp_8x8 v9 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional 5 lines that are needed - ;# for the vertical filter. - beq- store_8x8 - - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r9, r9, r4 - sub r9, r9, r4 - - Read8x8 v0, r9, r4, 1 - Read8x8 v1, r9, r4, 0 - Read8x8 v10, r3, r4, 1 - Read8x8 v11, r3, r4, 1 - Read8x8 v12, r3, r4, 0 - - interp_8x8 v0 - interp_8x8 v1 - interp_8x8 v10 - interp_8x8 v11 - interp_8x8 v12 - - b second_pass_8x8 - -second_pass_pre_copy_8x8: - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - li r10, 16 - - Read8x8 v0, r3, r4, 1 - Read8x8 v1, r3, r4, 1 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 1 - Read8x8 v9, r3, r4, 1 - Read8x8 v10, r3, r4, 1 - Read8x8 v11, r3, r4, 1 - Read8x8 v12, r3, r4, 0 - - slwi r6, r6, 4 ;# index into vertical filter array - -second_pass_8x8: - load_c v13, VFilter, r6, r9, r10 - - vspltish v15, 8 - vspltish v20, 3 - vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v14, v13, 1 - vspltb v15, v13, 2 - vspltb v16, v13, 3 - vspltb v17, v13, 4 - vspltb v18, v13, 5 - vspltb v13, v13, 0 - - vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5 - vinterp_no_store_8x8 v1, v2, v3, v4, v5, v6 - vinterp_no_store_8x8 v2, v3, v4, v5, v6, v7 - vinterp_no_store_8x8 v3, v4, v5, v6, v7, v8 - vinterp_no_store_8x8 v4, v5, v6, v7, v8, v9 - vinterp_no_store_8x8 v5, v6, v7, v8, v9, v10 - vinterp_no_store_8x8 v6, v7, v8, v9, v10, v11 - vinterp_no_store_8x8 v7, v8, v9, v10, v11, v12 - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x8 - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - w_8x8 v6, r7, r0, r8 - w_8x8 v7, r7, r0, r8 - - b exit_8x8 - -store_aligned_8x8: - - load_c v10, b_hilo, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - vperm v6, v6, v7, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - addi r7, r7, 16 - stvx v6, 0, r7 - - b exit_8x8 - -store_8x8: - cmpi cr0, r8, 8 - beq cr0, store_aligned2_8x8 - - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - w_8x8 v6, r7, r0, r8 - w_8x8 v7, r7, r0, r8 - w_8x8 v8, r7, r0, r8 - w_8x8 v9, r7, r0, r8 - - b exit_8x8 - -store_aligned2_8x8: - load_c v10, b_hilo, 0, r9, r10 - - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - vperm v6, v6, v7, v10 - vperm v8, v8, v9, v10 - - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - addi r7, r7, 16 - stvx v6, 0, r7 - addi r7, r7, 16 - stvx v8, 0, r7 - -exit_8x8: - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch - -;# Two pass filtering. First pass is Horizontal edges, second pass is vertical -;# edges. One of the filters can be null, but both won't be. Needs to use a -;# temporary buffer because the source buffer can't be modified and the buffer -;# for the destination is not large enough to hold the temporary data. -sixtap_predict16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xf000 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-416(r1) ;# create space on the stack - - ;# Three possiblities - ;# 1. First filter is null. Don't use a temp buffer. - ;# 2. Second filter is null. Don't use a temp buffer. - ;# 3. Neither are null, use temp buffer. - - ;# First Pass (horizontal edge) - ;# setup pointers for src - ;# if possiblity (1) then setup the src pointer to be the orginal and jump - ;# to second pass. this is based on if x_offset is 0. - - ;# load up horizontal filter - slwi. r5, r5, 5 ;# index into horizontal filter array - - load_hfilter v4, v5 - - beq- copy_horizontal_16x21 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# setup constants - ;# v14 permutation value for alignment - load_c v14, b_hperm, 0, r9, r10 - - ;# These statements are guessing that there won't be a second pass, - ;# but if there is then inside the bypass they need to be set - li r0, 16 ;# prepare for no vertical filter - - ;# Change the output pointer and pitch to be the actual - ;# desination instead of a temporary buffer. - addi r9, r7, 0 - addi r5, r8, 0 - - ;# no vertical filter, so write the output from the first pass - ;# directly into the output buffer. - beq- no_vertical_filter_bypass - - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - - ;# setup counter for the number of lines that are going to be filtered - li r0, 21 - - ;# use the stack as temporary storage - la r9, 48(r1) - li r5, 16 - -no_vertical_filter_bypass: - - mtctr r0 - - ;# rounding added in on the multiply - vspltisw v10, 8 - vspltisw v12, 3 - vslw v12, v10, v12 ;# 0x00000040000000400000004000000040 - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v13, 7 - - ;# index to the next set of vectors in the row. - li r10, 16 - li r12, 32 - -horizontal_loop_16x16: - - lvsl v15, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v1, 0, r3 - lvx v2, r10, r3 - lvx v3, r12, r3 - - vperm v8, v1, v2, v15 - vperm v9, v2, v3, v15 ;# v8 v9 = 21 input pixels left-justified - - vsldoi v11, v8, v9, 4 - - ;# set 0 - vmsummbm v6, v4, v8, v12 ;# taps times elements - vmsummbm v0, v5, v11, v6 - - ;# set 1 - vsldoi v10, v8, v9, 1 - vsldoi v11, v8, v9, 5 - - vmsummbm v6, v4, v10, v12 - vmsummbm v1, v5, v11, v6 - - ;# set 2 - vsldoi v10, v8, v9, 2 - vsldoi v11, v8, v9, 6 - - vmsummbm v6, v4, v10, v12 - vmsummbm v2, v5, v11, v6 - - ;# set 3 - vsldoi v10, v8, v9, 3 - vsldoi v11, v8, v9, 7 - - vmsummbm v6, v4, v10, v12 - vmsummbm v3, v5, v11, v6 - - vpkswus v0, v0, v1 ;# v0 = 0 4 8 C 1 5 9 D (16-bit) - vpkswus v1, v2, v3 ;# v1 = 2 6 A E 3 7 B F - - vsrh v0, v0, v13 ;# divide v0, v1 by 128 - vsrh v1, v1, v13 - - vpkuhus v0, v0, v1 ;# v0 = scrambled 8-bit result - vperm v0, v0, v0, v14 ;# v0 = correctly-ordered result - - stvx v0, 0, r9 - add r9, r9, r5 - - add r3, r3, r4 - - bdnz horizontal_loop_16x16 - - ;# check again to see if vertical filter needs to be done. - cmpi cr0, r6, 0 - beq cr0, end_16x16 - - ;# yes there is, so go to the second pass - b second_pass_16x16 - -copy_horizontal_16x21: - li r10, 21 - mtctr r10 - - li r10, 16 - - sub r3, r3, r4 - sub r3, r3, r4 - - ;# this is done above if there is a horizontal filter, - ;# if not it needs to be done down here. - slwi r6, r6, 4 ;# index into vertical filter array - - ;# always write to the stack when doing a horizontal copy - la r9, 48(r1) - -copy_horizontal_loop_16x21: - lvsl v15, 0, r3 ;# permutate value for alignment - - lvx v1, 0, r3 - lvx v2, r10, r3 - - vperm v8, v1, v2, v15 - - stvx v8, 0, r9 - addi r9, r9, 16 - - add r3, r3, r4 - - bdnz copy_horizontal_loop_16x21 - -second_pass_16x16: - - ;# always read from the stack when doing a vertical filter - la r9, 48(r1) - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v7, 7 - - vpre_load - - luma_vsix - luma_vsix - luma_vfour - -end_16x16: - - addi r1, r1, 416 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - - .align 4 -HFilter: - .byte 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, -6,123, 12, 0, -6,123, 12, 0, -6,123, 12, 0, -6,123, 12 - .byte -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0 - .byte 2,-11,108, 36, 2,-11,108, 36, 2,-11,108, 36, 2,-11,108, 36 - .byte -8, 1, 0, 0, -8, 1, 0, 0, -8, 1, 0, 0, -8, 1, 0, 0 - .byte 0, -9, 93, 50, 0, -9, 93, 50, 0, -9, 93, 50, 0, -9, 93, 50 - .byte -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0 - .byte 3,-16, 77, 77, 3,-16, 77, 77, 3,-16, 77, 77, 3,-16, 77, 77 - .byte -16, 3, 0, 0,-16, 3, 0, 0,-16, 3, 0, 0,-16, 3, 0, 0 - .byte 0, -6, 50, 93, 0, -6, 50, 93, 0, -6, 50, 93, 0, -6, 50, 93 - .byte -9, 0, 0, 0, -9, 0, 0, 0, -9, 0, 0, 0, -9, 0, 0, 0 - .byte 1, -8, 36,108, 1, -8, 36,108, 1, -8, 36,108, 1, -8, 36,108 - .byte -11, 2, 0, 0,-11, 2, 0, 0,-11, 2, 0, 0,-11, 2, 0, 0 - .byte 0, -1, 12,123, 0, -1, 12,123, 0, -1, 12,123, 0, -1, 12,123 - .byte -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0 - - .align 4 -VFilter: - .byte 0, 0,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 6,123, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 2, 11,108, 36, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 9, 93, 50, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 3, 16, 77, 77, 16, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 6, 50, 93, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 1, 8, 36,108, 11, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 1, 12,123, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - - .align 4 -b_hperm: - .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 - - .align 4 -B_0123: - .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 - - .align 4 -B_4567: - .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 - - .align 4 -B_89AB: - .byte 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 - - .align 4 -b_hilo: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 - - .align 4 -b_hilo_4x4: - .byte 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0 diff --git a/vp9/common/ppc/vp9_filter_bilinear_altivec.asm b/vp9/common/ppc/vp9_filter_bilinear_altivec.asm deleted file mode 100644 index fd8aa665fdfb1cb4a4140af3399b7e008d25828d..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_filter_bilinear_altivec.asm +++ /dev/null @@ -1,677 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl bilinear_predict4x4_ppc - .globl bilinear_predict8x4_ppc - .globl bilinear_predict8x8_ppc - .globl bilinear_predict16x16_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -.macro load_vfilter V0, V1 - load_c \V0, vfilter_b, r6, r9, r10 - - addi r6, r6, 16 - lvx \V1, r6, r10 -.endm - -.macro HProlog jump_label - ;# load up horizontal filter - slwi. r5, r5, 4 ;# index into horizontal filter array - - ;# index to the next set of vectors in the row. - li r10, 16 - li r12, 32 - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq \jump_label - - load_c v20, hfilter_b, r5, r9, r0 - - ;# setup constants - ;# v14 permutation value for alignment - load_c v28, b_hperm_b, 0, r9, r0 - - ;# rounding added in on the multiply - vspltisw v21, 8 - vspltisw v18, 3 - vslw v18, v21, v18 ;# 0x00000040000000400000004000000040 - - slwi. r6, r6, 5 ;# index into vertical filter array -.endm - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# -.macro HFilter V - vperm v24, v21, v21, v10 ;# v20 = 0123 1234 2345 3456 - vperm v25, v21, v21, v11 ;# v21 = 4567 5678 6789 789A - - vmsummbm v24, v20, v24, v18 - vmsummbm v25, v20, v25, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - - vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result -.endm - -.macro hfilter_8 V, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 9 bytes wide, output is 8 bytes. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - - HFilter \V -.endm - - -.macro load_and_align_8 V, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - - vperm \V, v21, v22, v17 -.endm - -.macro write_aligned_8 V, increment_counter - stvx \V, 0, r7 - -.if \increment_counter - add r7, r7, r8 -.endif -.endm - -.macro vfilter_16 P0 P1 - vmuleub v22, \P0, v20 ;# 64 + 4 positive taps - vadduhm v22, v18, v22 - vmuloub v23, \P0, v20 - vadduhm v23, v18, v23 - - vmuleub v24, \P1, v21 - vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary - vmuloub v25, \P1, v21 - vadduhm v23, v23, v25 ;# Ro = odds - - vsrh v22, v22, v19 ;# divide by 128 - vsrh v23, v23, v19 ;# v16 v17 = evens, odds - vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order - vmrglh v23, v22, v23 - vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result -.endm - - -.macro w_8x8 V, D, R, P - stvx \V, 0, r1 - lwz \R, 0(r1) - stw \R, 0(r7) - lwz \R, 4(r1) - stw \R, 4(r7) - add \D, \D, \P -.endm - - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict4x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf830 - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_4x4_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r9, r12 - load_c v11, b_4567_b, 0, r9, r12 - - hfilter_8 v0, 1 - hfilter_8 v1, 1 - hfilter_8 v2, 1 - hfilter_8 v3, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_4x4_b - - hfilter_8 v4, 0 - - b second_pass_4x4_b - -second_pass_4x4_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_8 v0, 1 - load_and_align_8 v1, 1 - load_and_align_8 v2, 1 - load_and_align_8 v3, 1 - load_and_align_8 v4, 1 - -second_pass_4x4_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - -store_out_4x4_b: - - stvx v0, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v1, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v2, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v3, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - -exit_4x4: - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict8x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf830 - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x4_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r9, r12 - load_c v11, b_4567_b, 0, r9, r12 - - hfilter_8 v0, 1 - hfilter_8 v1, 1 - hfilter_8 v2, 1 - hfilter_8 v3, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_8x4_b - - hfilter_8 v4, 0 - - b second_pass_8x4_b - -second_pass_8x4_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_8 v0, 1 - load_and_align_8 v1, 1 - load_and_align_8 v2, 1 - load_and_align_8 v3, 1 - load_and_align_8 v4, 1 - -second_pass_8x4_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - -store_out_8x4_b: - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x4_b - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - - b exit_8x4 - -store_aligned_8x4_b: - load_c v10, b_hilo_b, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - -exit_8x4: - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict8x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfff0 - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x8_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r9, r12 - load_c v11, b_4567_b, 0, r9, r12 - - hfilter_8 v0, 1 - hfilter_8 v1, 1 - hfilter_8 v2, 1 - hfilter_8 v3, 1 - hfilter_8 v4, 1 - hfilter_8 v5, 1 - hfilter_8 v6, 1 - hfilter_8 v7, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_8x8_b - - hfilter_8 v8, 0 - - b second_pass_8x8_b - -second_pass_8x8_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_8 v0, 1 - load_and_align_8 v1, 1 - load_and_align_8 v2, 1 - load_and_align_8 v3, 1 - load_and_align_8 v4, 1 - load_and_align_8 v5, 1 - load_and_align_8 v6, 1 - load_and_align_8 v7, 1 - load_and_align_8 v8, 0 - -second_pass_8x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -store_out_8x8_b: - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x8_b - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - w_8x8 v6, r7, r0, r8 - w_8x8 v7, r7, r0, r8 - - b exit_8x8 - -store_aligned_8x8_b: - load_c v10, b_hilo_b, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - vperm v6, v6, v7, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - addi r7, r7, 16 - stvx v6, 0, r7 - -exit_8x8: - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# -.macro hfilter_16 V, increment_counter - - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - lvx v23, r12, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified - - ;# set 0 - vmsummbm v24, v20, v21, v18 ;# taps times elements - - ;# set 1 - vsldoi v23, v21, v22, 1 - vmsummbm v25, v20, v23, v18 - - ;# set 2 - vsldoi v23, v21, v22, 2 - vmsummbm v26, v20, v23, v18 - - ;# set 3 - vsldoi v23, v21, v22, 3 - vmsummbm v27, v20, v23, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - vsrh v25, v25, v19 - - vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result - vperm \V, \V, v0, v28 ;# \V = correctly-ordered result -.endm - -.macro load_and_align_16 V, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - - vperm \V, v21, v22, v17 -.endm - -.macro write_16 V, increment_counter - stvx \V, 0, r7 - -.if \increment_counter - add r7, r7, r8 -.endif -.endm - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - HProlog second_pass_16x16_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - hfilter_16 v8, 1 - hfilter_16 v9, 1 - hfilter_16 v10, 1 - hfilter_16 v11, 1 - hfilter_16 v12, 1 - hfilter_16 v13, 1 - hfilter_16 v14, 1 - hfilter_16 v15, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_16x16_b - - hfilter_16 v16, 0 - - b second_pass_16x16_b - -second_pass_16x16_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, 1 - load_and_align_16 v1, 1 - load_and_align_16 v2, 1 - load_and_align_16 v3, 1 - load_and_align_16 v4, 1 - load_and_align_16 v5, 1 - load_and_align_16 v6, 1 - load_and_align_16 v7, 1 - load_and_align_16 v8, 1 - load_and_align_16 v9, 1 - load_and_align_16 v10, 1 - load_and_align_16 v11, 1 - load_and_align_16 v12, 1 - load_and_align_16 v13, 1 - load_and_align_16 v14, 1 - load_and_align_16 v15, 1 - load_and_align_16 v16, 0 - -second_pass_16x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -store_out_16x16_b: - - write_16 v0, 1 - write_16 v1, 1 - write_16 v2, 1 - write_16 v3, 1 - write_16 v4, 1 - write_16 v5, 1 - write_16 v6, 1 - write_16 v7, 1 - write_16 v8, 1 - write_16 v9, 1 - write_16 v10, 1 - write_16 v11, 1 - write_16 v12, 1 - write_16 v13, 1 - write_16 v14, 1 - write_16 v15, 0 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - - .align 4 -hfilter_b: - .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0 - .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0 - .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0 - .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0 - .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0 - .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0 - .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0 - .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0 - - .align 4 -vfilter_b: - .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - - .align 4 -b_hperm_b: - .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 - - .align 4 -b_0123_b: - .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 - - .align 4 -b_4567_b: - .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 - -b_hilo_b: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 diff --git a/vp9/common/ppc/vp9_idct_altivec.asm b/vp9/common/ppc/vp9_idct_altivec.asm deleted file mode 100644 index b87aa42001893f92b5c4dc47626b089b04fe5eaf..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_idct_altivec.asm +++ /dev/null @@ -1,189 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl short_idct4x4_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -;# r3 short *input -;# r4 short *output -;# r5 int pitch - .align 2 -short_idct4x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - load_c v8, sinpi8sqrt2, 0, r9, r10 - load_c v9, cospi8sqrt2minus1, 0, r9, r10 - load_c v10, hi_hi, 0, r9, r10 - load_c v11, lo_lo, 0, r9, r10 - load_c v12, shift_16, 0, r9, r10 - - li r10, 16 - lvx v0, 0, r3 ;# input ip[0], ip[ 4] - lvx v1, r10, r3 ;# input ip[8], ip[12] - - ;# first pass - vupkhsh v2, v0 - vupkhsh v3, v1 - vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8] - vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8] - - vupklsh v0, v0 - vmulosh v4, v0, v8 - vsraw v4, v4, v12 - vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2) - - vupklsh v1, v1 - vmulosh v5, v1, v9 - vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v1 - - vsubsws v4, v4, v5 ;# c1 - - vmulosh v3, v1, v8 - vsraw v3, v3, v12 - vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2) - - vmulosh v5, v0, v9 - vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v0 - - vaddsws v3, v3, v5 ;# d1 - - vaddsws v0, v6, v3 ;# a1 + d1 - vsubsws v3, v6, v3 ;# a1 - d1 - - vaddsws v1, v7, v4 ;# b1 + c1 - vsubsws v2, v7, v4 ;# b1 - c1 - - ;# transpose input - vmrghw v4, v0, v1 ;# a0 b0 a1 b1 - vmrghw v5, v2, v3 ;# c0 d0 c1 d1 - - vmrglw v6, v0, v1 ;# a2 b2 a3 b3 - vmrglw v7, v2, v3 ;# c2 d2 c3 d3 - - vperm v0, v4, v5, v10 ;# a0 b0 c0 d0 - vperm v1, v4, v5, v11 ;# a1 b1 c1 d1 - - vperm v2, v6, v7, v10 ;# a2 b2 c2 d2 - vperm v3, v6, v7, v11 ;# a3 b3 c3 d3 - - ;# second pass - vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8] - vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8] - - vmulosh v4, v1, v8 - vsraw v4, v4, v12 - vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2) - - vmulosh v5, v3, v9 - vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v3 - - vsubsws v4, v4, v5 ;# c1 - - vmulosh v2, v3, v8 - vsraw v2, v2, v12 - vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2) - - vmulosh v5, v1, v9 - vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v1 - - vaddsws v3, v2, v5 ;# d1 - - vaddsws v0, v6, v3 ;# a1 + d1 - vsubsws v3, v6, v3 ;# a1 - d1 - - vaddsws v1, v7, v4 ;# b1 + c1 - vsubsws v2, v7, v4 ;# b1 - c1 - - vspltish v6, 4 - vspltish v7, 3 - - vpkswss v0, v0, v1 - vpkswss v1, v2, v3 - - vaddshs v0, v0, v6 - vaddshs v1, v1, v6 - - vsrah v0, v0, v7 - vsrah v1, v1, v7 - - ;# transpose output - vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3 - vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3 - - vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1 - vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3 - - stwu r1,-416(r1) ;# create space on the stack - - stvx v0, 0, r1 - lwz r6, 0(r1) - stw r6, 0(r4) - lwz r6, 4(r1) - stw r6, 4(r4) - - add r4, r4, r5 - - lwz r6, 8(r1) - stw r6, 0(r4) - lwz r6, 12(r1) - stw r6, 4(r4) - - add r4, r4, r5 - - stvx v1, 0, r1 - lwz r6, 0(r1) - stw r6, 0(r4) - lwz r6, 4(r1) - stw r6, 4(r4) - - add r4, r4, r5 - - lwz r6, 8(r1) - stw r6, 0(r4) - lwz r6, 12(r1) - stw r6, 4(r4) - - addi r1, r1, 416 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 4 -sinpi8sqrt2: - .short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468 - - .align 4 -cospi8sqrt2minus1: - .short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091 - - .align 4 -shift_16: - .long 16, 16, 16, 16 - - .align 4 -hi_hi: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 - - .align 4 -lo_lo: - .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 diff --git a/vp9/common/ppc/vp9_loopfilter_altivec.c b/vp9/common/ppc/vp9_loopfilter_altivec.c deleted file mode 100644 index 599070a750785dba6809961fd4f4900d9bc83d65..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_loopfilter_altivec.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" - -typedef void loop_filter_function_y_ppc -( - unsigned char *s, // source pointer - int p, // pitch - const signed char *flimit, - const signed char *limit, - const signed char *thresh -); - -typedef void loop_filter_function_uv_ppc -( - unsigned char *u, // source pointer - unsigned char *v, // source pointer - int p, // pitch - const signed char *flimit, - const signed char *limit, - const signed char *thresh -); - -typedef void loop_filter_function_s_ppc -( - unsigned char *s, // source pointer - int p, // pitch - const signed char *flimit -); - -loop_filter_function_y_ppc mbloop_filter_horizontal_edge_y_ppc; -loop_filter_function_y_ppc mbloop_filter_vertical_edge_y_ppc; -loop_filter_function_y_ppc loop_filter_horizontal_edge_y_ppc; -loop_filter_function_y_ppc loop_filter_vertical_edge_y_ppc; - -loop_filter_function_uv_ppc mbloop_filter_horizontal_edge_uv_ppc; -loop_filter_function_uv_ppc mbloop_filter_vertical_edge_uv_ppc; -loop_filter_function_uv_ppc loop_filter_horizontal_edge_uv_ppc; -loop_filter_function_uv_ppc loop_filter_vertical_edge_uv_ppc; - -loop_filter_function_s_ppc loop_filter_simple_horizontal_edge_ppc; -loop_filter_function_s_ppc loop_filter_simple_vertical_edge_ppc; - -// Horizontal MB filtering -void loop_filter_mbh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); - - if (u_ptr) - mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr); -} - -void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_horizontal_edge_ppc(y_ptr, y_stride, lfi->mbflim); -} - -// Vertical MB Filtering -void loop_filter_mbv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); - - if (u_ptr) - mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr); -} - -void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_vertical_edge_ppc(y_ptr, y_stride, lfi->mbflim); -} - -// Horizontal B Filtering -void loop_filter_bh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - // These should all be done at once with one call, instead of 3 - loop_filter_horizontal_edge_y_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); - loop_filter_horizontal_edge_y_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); - loop_filter_horizontal_edge_y_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); - - if (u_ptr) - loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr); -} - -void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_horizontal_edge_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim); - loop_filter_simple_horizontal_edge_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim); - loop_filter_simple_horizontal_edge_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim); -} - -// Vertical B Filtering -void loop_filter_bv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - loop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->flim, lfi->lim, lfi->thr); - - if (u_ptr) - loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr); -} - -void loop_filter_bvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_vertical_edge_ppc(y_ptr + 4, y_stride, lfi->flim); - loop_filter_simple_vertical_edge_ppc(y_ptr + 8, y_stride, lfi->flim); - loop_filter_simple_vertical_edge_ppc(y_ptr + 12, y_stride, lfi->flim); -} diff --git a/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm b/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm deleted file mode 100644 index 61df4e976391dfc034b2d597f8f8fbdf0a4a2f2b..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm +++ /dev/null @@ -1,1253 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl mbloop_filter_horizontal_edge_y_ppc - .globl loop_filter_horizontal_edge_y_ppc - .globl mbloop_filter_vertical_edge_y_ppc - .globl loop_filter_vertical_edge_y_ppc - - .globl mbloop_filter_horizontal_edge_uv_ppc - .globl loop_filter_horizontal_edge_uv_ppc - .globl mbloop_filter_vertical_edge_uv_ppc - .globl loop_filter_vertical_edge_uv_ppc - - .globl loop_filter_simple_horizontal_edge_ppc - .globl loop_filter_simple_vertical_edge_ppc - - .text -;# We often need to perform transposes (and other transpose-like operations) -;# on matrices of data. This is simplified by the fact that we usually -;# operate on hunks of data whose dimensions are powers of 2, or at least -;# divisible by highish powers of 2. -;# -;# These operations can be very confusing. They become more straightforward -;# when we think of them as permutations of address bits: Concatenate a -;# group of vector registers and think of it as occupying a block of -;# memory beginning at address zero. The low four bits 0...3 of the -;# address then correspond to position within a register, the higher-order -;# address bits select the register. -;# -;# Although register selection, at the code level, is arbitrary, things -;# are simpler if we use contiguous ranges of register numbers, simpler -;# still if the low-order bits of the register number correspond to -;# conceptual address bits. We do this whenever reasonable. -;# -;# A 16x16 transpose can then be thought of as an operation on -;# a 256-element block of memory. It takes 8 bits 0...7 to address this -;# memory and the effect of a transpose is to interchange address bit -;# 0 with 4, 1 with 5, 2 with 6, and 3 with 7. Bits 0...3 index the -;# column, which is interchanged with the row addressed by bits 4..7. -;# -;# The altivec merge instructions provide a rapid means of effecting -;# many of these transforms. They operate at three widths (8,16,32). -;# Writing V(x) for vector register #x, paired merges permute address -;# indices as follows. -;# -;# 0->1 1->2 2->3 3->(4+d) (4+s)->0: -;# -;# vmrghb V( x), V( y), V( y + (1<<s)) -;# vmrglb V( x + (1<<d)), V( y), V( y + (1<<s)) -;# -;# -;# =0= 1->2 2->3 3->(4+d) (4+s)->1: -;# -;# vmrghh V( x), V( y), V( y + (1<<s)) -;# vmrglh V( x + (1<<d)), V( y), V( y + (1<<s)) -;# -;# -;# =0= =1= 2->3 3->(4+d) (4+s)->2: -;# -;# vmrghw V( x), V( y), V( y + (1<<s)) -;# vmrglw V( x + (1<<d)), V( y), V( y + (1<<s)) -;# -;# -;# Unfortunately, there is no doubleword merge instruction. -;# The following sequence uses "vperm" is a substitute. -;# Assuming that the selection masks b_hihi and b_lolo (defined in LFppc.c) -;# are in registers Vhihi and Vlolo, we can also effect the permutation -;# -;# =0= =1= =2= 3->(4+d) (4+s)->3 by the sequence: -;# -;# vperm V( x), V( y), V( y + (1<<s)), Vhihi -;# vperm V( x + (1<<d)), V( y), V( y + (1<<s)), Vlolo -;# -;# -;# Except for bits s and d, the other relationships between register -;# number (= high-order part of address) bits are at the disposal of -;# the programmer. -;# - -;# To avoid excess transposes, we filter all 3 vertical luma subblock -;# edges together. This requires a single 16x16 transpose, which, in -;# the above language, amounts to the following permutation of address -;# indices: 0<->4 1<->5 2<->6 3<->7, which we accomplish by -;# 4 iterations of the cyclic transform 0->1->2->3->4->5->6->7->0. -;# -;# Except for the fact that the destination registers get written -;# before we are done referencing the old contents, the cyclic transform -;# is effected by -;# -;# x = 0; do { -;# vmrghb V(2x), V(x), V(x+8); -;# vmrghb V(2x+1), V(x), V(x+8); -;# } while( ++x < 8); -;# -;# For clarity, and because we can afford it, we do this transpose -;# using all 32 registers, alternating the banks 0..15 and 16 .. 31, -;# leaving the final result in 16 .. 31, as the lower registers are -;# used in the filtering itself. -;# -.macro Tpair A, B, X, Y - vmrghb \A, \X, \Y - vmrglb \B, \X, \Y -.endm - -;# Each step takes 8*2 = 16 instructions - -.macro t16_even - Tpair v16,v17, v0,v8 - Tpair v18,v19, v1,v9 - Tpair v20,v21, v2,v10 - Tpair v22,v23, v3,v11 - Tpair v24,v25, v4,v12 - Tpair v26,v27, v5,v13 - Tpair v28,v29, v6,v14 - Tpair v30,v31, v7,v15 -.endm - -.macro t16_odd - Tpair v0,v1, v16,v24 - Tpair v2,v3, v17,v25 - Tpair v4,v5, v18,v26 - Tpair v6,v7, v19,v27 - Tpair v8,v9, v20,v28 - Tpair v10,v11, v21,v29 - Tpair v12,v13, v22,v30 - Tpair v14,v15, v23,v31 -.endm - -;# Whole transpose takes 4*16 = 64 instructions - -.macro t16_full - t16_odd - t16_even - t16_odd - t16_even -.endm - -;# Vertical edge filtering requires transposes. For the simple filter, -;# we need to convert 16 rows of 4 pels each into 4 registers of 16 pels -;# each. Writing 0 ... 63 for the pixel indices, the desired result is: -;# -;# v0 = 0 1 ... 14 15 -;# v1 = 16 17 ... 30 31 -;# v2 = 32 33 ... 47 48 -;# v3 = 49 50 ... 62 63 -;# -;# In frame-buffer memory, the layout is: -;# -;# 0 16 32 48 -;# 1 17 33 49 -;# ... -;# 15 31 47 63. -;# -;# We begin by reading the data 32 bits at a time (using scalar operations) -;# into a temporary array, reading the rows of the array into vector registers, -;# with the following layout: -;# -;# v0 = 0 16 32 48 4 20 36 52 8 24 40 56 12 28 44 60 -;# v1 = 1 17 33 49 5 21 ... 45 61 -;# v2 = 2 18 ... 46 62 -;# v3 = 3 19 ... 47 63 -;# -;# From the "address-bit" perspective discussed above, we simply need to -;# interchange bits 0 <-> 4 and 1 <-> 5, leaving bits 2 and 3 alone. -;# In other words, we transpose each of the four 4x4 submatrices. -;# -;# This transformation is its own inverse, and we need to perform it -;# again before writing the pixels back into the frame buffer. -;# -;# It acts in place on registers v0...v3, uses v4...v7 as temporaries, -;# and assumes that v14/v15 contain the b_hihi/b_lolo selectors -;# defined above. We think of both groups of 4 registers as having -;# "addresses" {0,1,2,3} * 16. -;# -.macro Transpose4times4x4 Vlo, Vhi - - ;# d=s=0 0->1 1->2 2->3 3->4 4->0 =5= - - vmrghb v4, v0, v1 - vmrglb v5, v0, v1 - vmrghb v6, v2, v3 - vmrglb v7, v2, v3 - - ;# d=0 s=1 =0= 1->2 2->3 3->4 4->5 5->1 - - vmrghh v0, v4, v6 - vmrglh v1, v4, v6 - vmrghh v2, v5, v7 - vmrglh v3, v5, v7 - - ;# d=s=0 =0= =1= 2->3 3->4 4->2 =5= - - vmrghw v4, v0, v1 - vmrglw v5, v0, v1 - vmrghw v6, v2, v3 - vmrglw v7, v2, v3 - - ;# d=0 s=1 =0= =1= =2= 3->4 4->5 5->3 - - vperm v0, v4, v6, \Vlo - vperm v1, v4, v6, \Vhi - vperm v2, v5, v7, \Vlo - vperm v3, v5, v7, \Vhi -.endm -;# end Transpose4times4x4 - - -;# Normal mb vertical edge filter transpose. -;# -;# We read 8 columns of data, initially in the following pattern: -;# -;# (0,0) (1,0) ... (7,0) (0,1) (1,1) ... (7,1) -;# (0,2) (1,2) ... (7,2) (0,3) (1,3) ... (7,3) -;# ... -;# (0,14) (1,14) .. (7,14) (0,15) (1,15) .. (7,15) -;# -;# and wish to convert to: -;# -;# (0,0) ... (0,15) -;# (1,0) ... (1,15) -;# ... -;# (7,0) ... (7,15). -;# -;# In "address bit" language, we wish to map -;# -;# 0->4 1->5 2->6 3->0 4->1 5->2 6->3, i.e., I -> (I+4) mod 7. -;# -;# This can be accomplished by 4 iterations of the cyclic transform -;# -;# I -> (I+1) mod 7; -;# -;# each iteration can be realized by (d=0, s=2): -;# -;# x = 0; do Tpair( V(2x),V(2x+1), V(x),V(x+4)) while( ++x < 4); -;# -;# The input/output is in registers v0...v7. We use v10...v17 as mirrors; -;# preserving v8 = sign converter. -;# -;# Inverse transpose is similar, except here I -> (I+3) mod 7 and the -;# result lands in the "mirror" registers v10...v17 -;# -.macro t8x16_odd - Tpair v10, v11, v0, v4 - Tpair v12, v13, v1, v5 - Tpair v14, v15, v2, v6 - Tpair v16, v17, v3, v7 -.endm - -.macro t8x16_even - Tpair v0, v1, v10, v14 - Tpair v2, v3, v11, v15 - Tpair v4, v5, v12, v16 - Tpair v6, v7, v13, v17 -.endm - -.macro transpose8x16_fwd - t8x16_odd - t8x16_even - t8x16_odd - t8x16_even -.endm - -.macro transpose8x16_inv - t8x16_odd - t8x16_even - t8x16_odd -.endm - -.macro Transpose16x16 - vmrghb v0, v16, v24 - vmrglb v1, v16, v24 - vmrghb v2, v17, v25 - vmrglb v3, v17, v25 - vmrghb v4, v18, v26 - vmrglb v5, v18, v26 - vmrghb v6, v19, v27 - vmrglb v7, v19, v27 - vmrghb v8, v20, v28 - vmrglb v9, v20, v28 - vmrghb v10, v21, v29 - vmrglb v11, v21, v29 - vmrghb v12, v22, v30 - vmrglb v13, v22, v30 - vmrghb v14, v23, v31 - vmrglb v15, v23, v31 - vmrghb v16, v0, v8 - vmrglb v17, v0, v8 - vmrghb v18, v1, v9 - vmrglb v19, v1, v9 - vmrghb v20, v2, v10 - vmrglb v21, v2, v10 - vmrghb v22, v3, v11 - vmrglb v23, v3, v11 - vmrghb v24, v4, v12 - vmrglb v25, v4, v12 - vmrghb v26, v5, v13 - vmrglb v27, v5, v13 - vmrghb v28, v6, v14 - vmrglb v29, v6, v14 - vmrghb v30, v7, v15 - vmrglb v31, v7, v15 - vmrghb v0, v16, v24 - vmrglb v1, v16, v24 - vmrghb v2, v17, v25 - vmrglb v3, v17, v25 - vmrghb v4, v18, v26 - vmrglb v5, v18, v26 - vmrghb v6, v19, v27 - vmrglb v7, v19, v27 - vmrghb v8, v20, v28 - vmrglb v9, v20, v28 - vmrghb v10, v21, v29 - vmrglb v11, v21, v29 - vmrghb v12, v22, v30 - vmrglb v13, v22, v30 - vmrghb v14, v23, v31 - vmrglb v15, v23, v31 - vmrghb v16, v0, v8 - vmrglb v17, v0, v8 - vmrghb v18, v1, v9 - vmrglb v19, v1, v9 - vmrghb v20, v2, v10 - vmrglb v21, v2, v10 - vmrghb v22, v3, v11 - vmrglb v23, v3, v11 - vmrghb v24, v4, v12 - vmrglb v25, v4, v12 - vmrghb v26, v5, v13 - vmrglb v27, v5, v13 - vmrghb v28, v6, v14 - vmrglb v29, v6, v14 - vmrghb v30, v7, v15 - vmrglb v31, v7, v15 -.endm - -;# load_g loads a global vector (whose address is in the local variable Gptr) -;# into vector register Vreg. Trashes r0 -.macro load_g Vreg, Gptr - lwz r0, \Gptr - lvx \Vreg, 0, r0 -.endm - -;# exploit the saturation here. if the answer is negative -;# it will be clamped to 0. orring 0 with a positive -;# number will be the positive number (abs) -;# RES = abs( A-B), trashes TMP -.macro Abs RES, TMP, A, B - vsububs \RES, \A, \B - vsububs \TMP, \B, \A - vor \RES, \RES, \TMP -.endm - -;# RES = Max( RES, abs( A-B)), trashes TMP -.macro max_abs RES, TMP, A, B - vsububs \TMP, \A, \B - vmaxub \RES, \RES, \TMP - vsububs \TMP, \B, \A - vmaxub \RES, \RES, \TMP -.endm - -.macro Masks - ;# build masks - ;# input is all 8 bit unsigned (0-255). need to - ;# do abs(vala-valb) > limit. but no need to compare each - ;# value to the limit. find the max of the absolute differences - ;# and compare that to the limit. - ;# First hev - Abs v14, v13, v2, v3 ;# |P1 - P0| - max_abs v14, v13, v5, v4 ;# |Q1 - Q0| - - vcmpgtub v10, v14, v10 ;# HEV = true if thresh exceeded - - ;# Next limit - max_abs v14, v13, v0, v1 ;# |P3 - P2| - max_abs v14, v13, v1, v2 ;# |P2 - P1| - max_abs v14, v13, v6, v5 ;# |Q2 - Q1| - max_abs v14, v13, v7, v6 ;# |Q3 - Q2| - - vcmpgtub v9, v14, v9 ;# R = true if limit exceeded - - ;# flimit - Abs v14, v13, v3, v4 ;# |P0 - Q0| - - vcmpgtub v8, v14, v8 ;# X = true if flimit exceeded - - vor v8, v8, v9 ;# R = true if flimit or limit exceeded - ;# done building masks -.endm - -.macro build_constants RFL, RLI, RTH, FL, LI, TH - ;# build constants - lvx \FL, 0, \RFL ;# flimit - lvx \LI, 0, \RLI ;# limit - lvx \TH, 0, \RTH ;# thresh - - vspltisb v11, 8 - vspltisb v12, 4 - vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 -.endm - -.macro load_data_y - ;# setup strides/pointers to be able to access - ;# all of the data - add r5, r4, r4 ;# r5 = 2 * stride - sub r6, r3, r5 ;# r6 -> 2 rows back - neg r7, r4 ;# r7 = -stride - - ;# load 16 pixels worth of data to work on - sub r0, r6, r5 ;# r0 -> 4 rows back (temp) - lvx v0, 0, r0 ;# P3 (read only) - lvx v1, r7, r6 ;# P2 - lvx v2, 0, r6 ;# P1 - lvx v3, r7, r3 ;# P0 - lvx v4, 0, r3 ;# Q0 - lvx v5, r4, r3 ;# Q1 - lvx v6, r5, r3 ;# Q2 - add r0, r3, r5 ;# r0 -> 2 rows fwd (temp) - lvx v7, r4, r0 ;# Q3 (read only) -.endm - -;# Expects -;# v10 == HEV -;# v13 == tmp -;# v14 == tmp -.macro common_adjust P0, Q0, P1, Q1, HEV_PRESENT - vxor \P1, \P1, v11 ;# SP1 - vxor \P0, \P0, v11 ;# SP0 - vxor \Q0, \Q0, v11 ;# SQ0 - vxor \Q1, \Q1, v11 ;# SQ1 - - vsubsbs v13, \P1, \Q1 ;# f = c (P1 - Q1) -.if \HEV_PRESENT - vand v13, v13, v10 ;# f &= hev -.endif - vsubsbs v14, \Q0, \P0 ;# -126 <= X = Q0-P0 <= +126 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - - vandc v13, v13, v8 ;# f &= mask - - vspltisb v8, 3 - vspltisb v9, 4 - - vaddsbs v14, v13, v9 ;# f1 = c (f+4) - vaddsbs v15, v13, v8 ;# f2 = c (f+3) - - vsrab v13, v14, v8 ;# f1 >>= 3 - vsrab v15, v15, v8 ;# f2 >>= 3 - - vsubsbs \Q0, \Q0, v13 ;# u1 = c (SQ0 - f1) - vaddsbs \P0, \P0, v15 ;# u2 = c (SP0 + f2) -.endm - -.macro vp8_mbfilter - Masks - - ;# start the fitering here - vxor v1, v1, v11 ;# SP2 - vxor v2, v2, v11 ;# SP1 - vxor v3, v3, v11 ;# SP0 - vxor v4, v4, v11 ;# SQ0 - vxor v5, v5, v11 ;# SQ1 - vxor v6, v6, v11 ;# SQ2 - - ;# add outer taps if we have high edge variance - vsubsbs v13, v2, v5 ;# f = c (SP1-SQ1) - - vsubsbs v14, v4, v3 ;# SQ0-SP0 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 ;# f = c( c(SP1-SQ1) + 3*(SQ0-SP0)) - - vandc v13, v13, v8 ;# f &= mask - vand v15, v13, v10 ;# f2 = f & hev - - ;# save bottom 3 bits so that we round one side +4 and the other +3 - vspltisb v8, 3 - vspltisb v9, 4 - - vaddsbs v14, v15, v9 ;# f1 = c (f+4) - vaddsbs v15, v15, v8 ;# f2 = c (f+3) - - vsrab v14, v14, v8 ;# f1 >>= 3 - vsrab v15, v15, v8 ;# f2 >>= 3 - - vsubsbs v4, v4, v14 ;# u1 = c (SQ0 - f1) - vaddsbs v3, v3, v15 ;# u2 = c (SP0 + f2) - - ;# only apply wider filter if not high edge variance - vandc v13, v13, v10 ;# f &= ~hev - - vspltisb v9, 2 - vnor v8, v8, v8 - vsrb v9, v8, v9 ;# 0x3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f - vupkhsb v9, v9 ;# 0x003f003f003f003f003f003f003f003f - vspltisb v8, 9 - - ;# roughly 1/7th difference across boundary - vspltish v10, 7 - vmulosb v14, v8, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - vmulesb v15, v8, v13 - vaddshs v14, v14, v9 ;# += 63 - vaddshs v15, v15, v9 - vsrah v14, v14, v10 ;# >>= 7 - vsrah v15, v15, v10 - vmrglh v10, v15, v14 - vmrghh v15, v15, v14 - - vpkshss v10, v15, v10 ;# X = saturated down to bytes - - vsubsbs v6, v6, v10 ;# subtract from Q and add to P - vaddsbs v1, v1, v10 - - vxor v6, v6, v11 - vxor v1, v1, v11 - - ;# roughly 2/7th difference across boundary - vspltish v10, 7 - vaddubm v12, v8, v8 - vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - vmulesb v15, v12, v13 - vaddshs v14, v14, v9 - vaddshs v15, v15, v9 - vsrah v14, v14, v10 ;# >>= 7 - vsrah v15, v15, v10 - vmrglh v10, v15, v14 - vmrghh v15, v15, v14 - - vpkshss v10, v15, v10 ;# X = saturated down to bytes - - vsubsbs v5, v5, v10 ;# subtract from Q and add to P - vaddsbs v2, v2, v10 - - vxor v5, v5, v11 - vxor v2, v2, v11 - - ;# roughly 3/7th difference across boundary - vspltish v10, 7 - vaddubm v12, v12, v8 - vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - vmulesb v15, v12, v13 - vaddshs v14, v14, v9 - vaddshs v15, v15, v9 - vsrah v14, v14, v10 ;# >>= 7 - vsrah v15, v15, v10 - vmrglh v10, v15, v14 - vmrghh v15, v15, v14 - - vpkshss v10, v15, v10 ;# X = saturated down to bytes - - vsubsbs v4, v4, v10 ;# subtract from Q and add to P - vaddsbs v3, v3, v10 - - vxor v4, v4, v11 - vxor v3, v3, v11 -.endm - -.macro SBFilter - Masks - - common_adjust v3, v4, v2, v5, 1 - - ;# outer tap adjustments - vspltisb v8, 1 - - vaddubm v13, v13, v8 ;# f += 1 - vsrab v13, v13, v8 ;# f >>= 1 - - vandc v13, v13, v10 ;# f &= ~hev - - vsubsbs v5, v5, v13 ;# u1 = c (SQ1 - f) - vaddsbs v2, v2, v13 ;# u2 = c (SP1 + f) - - vxor v2, v2, v11 - vxor v3, v3, v11 - vxor v4, v4, v11 - vxor v5, v5, v11 -.endm - - .align 2 -mbloop_filter_horizontal_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r5, r6, r7, v8, v9, v10 - - load_data_y - - vp8_mbfilter - - stvx v1, r7, r6 ;# P2 - stvx v2, 0, r6 ;# P1 - stvx v3, r7, r3 ;# P0 - stvx v4, 0, r3 ;# Q0 - stvx v5, r4, r3 ;# Q1 - stvx v6, r5, r3 ;# Q2 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -;# r6 const signed char *limit -;# r7 const signed char *thresh -loop_filter_horizontal_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r5, r6, r7, v8, v9, v10 - - load_data_y - - SBFilter - - stvx v2, 0, r6 ;# P1 - stvx v3, r7, r3 ;# P0 - stvx v4, 0, r3 ;# Q0 - stvx v5, r4, r3 ;# Q1 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# Filtering a vertical mb. Each mb is aligned on a 16 byte boundary. -;# So we can read in an entire mb aligned. However if we want to filter the mb -;# edge we run into problems. For the loopfilter we require 4 bytes before the mb -;# and 4 after for a total of 8 bytes. Reading 16 bytes inorder to get 4 is a bit -;# of a waste. So this is an even uglier way to get around that. -;# Using the regular register file words are read in and then saved back out to -;# memory to align and order them up. Then they are read in using the -;# vector register file. -.macro RLVmb V, R - lwzux r0, r3, r4 - stw r0, 4(\R) - lwz r0,-4(r3) - stw r0, 0(\R) - lwzux r0, r3, r4 - stw r0,12(\R) - lwz r0,-4(r3) - stw r0, 8(\R) - lvx \V, 0, \R -.endm - -.macro WLVmb V, R - stvx \V, 0, \R - lwz r0,12(\R) - stwux r0, r3, r4 - lwz r0, 8(\R) - stw r0,-4(r3) - lwz r0, 4(\R) - stwux r0, r3, r4 - lwz r0, 0(\R) - stw r0,-4(r3) -.endm - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -;# r6 const signed char *limit -;# r7 const signed char *thresh -mbloop_filter_vertical_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - la r9, -48(r1) ;# temporary space for reading in vectors - sub r3, r3, r4 - - RLVmb v0, r9 - RLVmb v1, r9 - RLVmb v2, r9 - RLVmb v3, r9 - RLVmb v4, r9 - RLVmb v5, r9 - RLVmb v6, r9 - RLVmb v7, r9 - - transpose8x16_fwd - - build_constants r5, r6, r7, v8, v9, v10 - - vp8_mbfilter - - transpose8x16_inv - - add r3, r3, r4 - neg r4, r4 - - WLVmb v17, r9 - WLVmb v16, r9 - WLVmb v15, r9 - WLVmb v14, r9 - WLVmb v13, r9 - WLVmb v12, r9 - WLVmb v11, r9 - WLVmb v10, r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro RL V, R, P - lvx \V, 0, \R - add \R, \R, \P -.endm - -.macro WL V, R, P - stvx \V, 0, \R - add \R, \R, \P -.endm - -.macro Fil P3, P2, P1, P0, Q0, Q1, Q2, Q3 - ;# K = |P0-P1| already - Abs v14, v13, \Q0, \Q1 ;# M = |Q0-Q1| - vmaxub v14, v14, v4 ;# M = max( |P0-P1|, |Q0-Q1|) - vcmpgtub v10, v14, v0 - - Abs v4, v5, \Q2, \Q3 ;# K = |Q2-Q3| = next |P0-P1] - - max_abs v14, v13, \Q1, \Q2 ;# M = max( M, |Q1-Q2|) - max_abs v14, v13, \P1, \P2 ;# M = max( M, |P1-P2|) - max_abs v14, v13, \P2, \P3 ;# M = max( M, |P2-P3|) - - vmaxub v14, v14, v4 ;# M = max interior abs diff - vcmpgtub v9, v14, v2 ;# M = true if int_l exceeded - - Abs v14, v13, \P0, \Q0 ;# X = Abs( P0-Q0) - vcmpgtub v8, v14, v3 ;# X = true if edge_l exceeded - vor v8, v8, v9 ;# M = true if edge_l or int_l exceeded - - ;# replace P1,Q1 w/signed versions - common_adjust \P0, \Q0, \P1, \Q1, 1 - - vaddubm v13, v13, v1 ;# -16 <= M <= 15, saturation irrelevant - vsrab v13, v13, v1 - vandc v13, v13, v10 ;# adjust P1,Q1 by (M+1)>>1 if ! hev - vsubsbs \Q1, \Q1, v13 - vaddsbs \P1, \P1, v13 - - vxor \P1, \P1, v11 ;# P1 - vxor \P0, \P0, v11 ;# P0 - vxor \Q0, \Q0, v11 ;# Q0 - vxor \Q1, \Q1, v11 ;# Q1 -.endm - - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -;# r6 const signed char *limit -;# r7 const signed char *thresh -loop_filter_vertical_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - addi r9, r3, 0 - RL v16, r9, r4 - RL v17, r9, r4 - RL v18, r9, r4 - RL v19, r9, r4 - RL v20, r9, r4 - RL v21, r9, r4 - RL v22, r9, r4 - RL v23, r9, r4 - RL v24, r9, r4 - RL v25, r9, r4 - RL v26, r9, r4 - RL v27, r9, r4 - RL v28, r9, r4 - RL v29, r9, r4 - RL v30, r9, r4 - lvx v31, 0, r9 - - Transpose16x16 - - vspltisb v1, 1 - - build_constants r5, r6, r7, v3, v2, v0 - - Abs v4, v5, v19, v18 ;# K(v14) = first |P0-P1| - - Fil v16, v17, v18, v19, v20, v21, v22, v23 - Fil v20, v21, v22, v23, v24, v25, v26, v27 - Fil v24, v25, v26, v27, v28, v29, v30, v31 - - Transpose16x16 - - addi r9, r3, 0 - WL v16, r9, r4 - WL v17, r9, r4 - WL v18, r9, r4 - WL v19, r9, r4 - WL v20, r9, r4 - WL v21, r9, r4 - WL v22, r9, r4 - WL v23, r9, r4 - WL v24, r9, r4 - WL v25, r9, r4 - WL v26, r9, r4 - WL v27, r9, r4 - WL v28, r9, r4 - WL v29, r9, r4 - WL v30, r9, r4 - stvx v31, 0, r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- UV FILTERING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -.macro active_chroma_sel V - andi. r7, r3, 8 ;# row origin modulo 16 - add r7, r7, r7 ;# selects selectors - lis r12, _chromaSelectors@ha - la r0, _chromaSelectors@l(r12) - lwzux r0, r7, r0 ;# leave selector addr in r7 - - lvx \V, 0, r0 ;# mask to concatenate active U,V pels -.endm - -.macro hread_uv Dest, U, V, Offs, VMask - lvx \U, \Offs, r3 - lvx \V, \Offs, r4 - vperm \Dest, \U, \V, \VMask ;# Dest = active part of U then V -.endm - -.macro hwrite_uv New, U, V, Offs, Umask, Vmask - vperm \U, \New, \U, \Umask ;# Combine new pels with siblings - vperm \V, \New, \V, \Vmask - stvx \U, \Offs, r3 ;# Write to frame buffer - stvx \V, \Offs, r4 -.endm - -;# Process U,V in parallel. -.macro load_chroma_h - neg r9, r5 ;# r9 = -1 * stride - add r8, r9, r9 ;# r8 = -2 * stride - add r10, r5, r5 ;# r10 = 2 * stride - - active_chroma_sel v12 - - ;# P3, Q3 are read-only; need not save addresses or sibling pels - add r6, r8, r8 ;# r6 = -4 * stride - hread_uv v0, v14, v15, r6, v12 - add r6, r10, r5 ;# r6 = 3 * stride - hread_uv v7, v14, v15, r6, v12 - - ;# Others are read/write; save addresses and sibling pels - - add r6, r8, r9 ;# r6 = -3 * stride - hread_uv v1, v16, v17, r6, v12 - hread_uv v2, v18, v19, r8, v12 - hread_uv v3, v20, v21, r9, v12 - hread_uv v4, v22, v23, 0, v12 - hread_uv v5, v24, v25, r5, v12 - hread_uv v6, v26, v27, r10, v12 -.endm - -.macro uresult_sel V - load_g \V, 4(r7) -.endm - -.macro vresult_sel V - load_g \V, 8(r7) -.endm - -;# always write P1,P0,Q0,Q1 -.macro store_chroma_h - uresult_sel v11 - vresult_sel v12 - hwrite_uv v2, v18, v19, r8, v11, v12 - hwrite_uv v3, v20, v21, r9, v11, v12 - hwrite_uv v4, v22, v23, 0, v11, v12 - hwrite_uv v5, v24, v25, r5, v11, v12 -.endm - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -mbloop_filter_horizontal_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r6, r7, r8, v8, v9, v10 - - load_chroma_h - - vp8_mbfilter - - store_chroma_h - - hwrite_uv v1, v16, v17, r6, v11, v12 ;# v1 == P2 - hwrite_uv v6, v26, v27, r10, v11, v12 ;# v6 == Q2 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -loop_filter_horizontal_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r6, r7, r8, v8, v9, v10 - - load_chroma_h - - SBFilter - - store_chroma_h - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro R V, R - lwzux r0, r3, r5 - stw r0, 4(\R) - lwz r0,-4(r3) - stw r0, 0(\R) - lwzux r0, r4, r5 - stw r0,12(\R) - lwz r0,-4(r4) - stw r0, 8(\R) - lvx \V, 0, \R -.endm - - -.macro W V, R - stvx \V, 0, \R - lwz r0,12(\R) - stwux r0, r4, r5 - lwz r0, 8(\R) - stw r0,-4(r4) - lwz r0, 4(\R) - stwux r0, r3, r5 - lwz r0, 0(\R) - stw r0,-4(r3) -.endm - -.macro chroma_vread R - sub r3, r3, r5 ;# back up one line for simplicity - sub r4, r4, r5 - - R v0, \R - R v1, \R - R v2, \R - R v3, \R - R v4, \R - R v5, \R - R v6, \R - R v7, \R - - transpose8x16_fwd -.endm - -.macro chroma_vwrite R - - transpose8x16_inv - - add r3, r3, r5 - add r4, r4, r5 - neg r5, r5 ;# Write rows back in reverse order - - W v17, \R - W v16, \R - W v15, \R - W v14, \R - W v13, \R - W v12, \R - W v11, \R - W v10, \R -.endm - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -mbloop_filter_vertical_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - la r9, -48(r1) ;# temporary space for reading in vectors - - chroma_vread r9 - - build_constants r6, r7, r8, v8, v9, v10 - - vp8_mbfilter - - chroma_vwrite r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -loop_filter_vertical_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - la r9, -48(r1) ;# temporary space for reading in vectors - - chroma_vread r9 - - build_constants r6, r7, r8, v8, v9, v10 - - SBFilter - - chroma_vwrite r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# -=-=-=-=-=-=-=-=-=-=-=-=-=-= SIMPLE LOOP FILTER =-=-=-=-=-=-=-=-=-=-=-=-=-=- - -.macro vp8_simple_filter - Abs v14, v13, v1, v2 ;# M = abs( P0 - Q0) - vcmpgtub v8, v14, v8 ;# v5 = true if _over_ limit - - ;# preserve unsigned v0 and v3 - common_adjust v1, v2, v0, v3, 0 - - vxor v1, v1, v11 - vxor v2, v2, v11 ;# cvt Q0, P0 back to pels -.endm - -.macro simple_vertical - addi r8, 0, 16 - addi r7, r5, 32 - - lvx v0, 0, r5 - lvx v1, r8, r5 - lvx v2, 0, r7 - lvx v3, r8, r7 - - lis r12, _B_hihi@ha - la r0, _B_hihi@l(r12) - lvx v16, 0, r0 - - lis r12, _B_lolo@ha - la r0, _B_lolo@l(r12) - lvx v17, 0, r0 - - Transpose4times4x4 v16, v17 - vp8_simple_filter - - vxor v0, v0, v11 - vxor v3, v3, v11 ;# cvt Q0, P0 back to pels - - Transpose4times4x4 v16, v17 - - stvx v0, 0, r5 - stvx v1, r8, r5 - stvx v2, 0, r7 - stvx v3, r8, r7 -.endm - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -loop_filter_simple_horizontal_edge_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - mtspr 256, r12 ;# set VRSAVE - - ;# build constants - lvx v8, 0, r5 ;# flimit - - vspltisb v11, 8 - vspltisb v12, 4 - vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 - - neg r5, r4 ;# r5 = -1 * stride - add r6, r5, r5 ;# r6 = -2 * stride - - lvx v0, r6, r3 ;# v0 = P1 = 16 pels two rows above edge - lvx v1, r5, r3 ;# v1 = P0 = 16 pels one row above edge - lvx v2, 0, r3 ;# v2 = Q0 = 16 pels one row below edge - lvx v3, r4, r3 ;# v3 = Q1 = 16 pels two rows below edge - - vp8_simple_filter - - stvx v1, r5, r3 ;# store P0 - stvx v2, 0, r3 ;# store Q0 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro RLV Offs - stw r0, (\Offs*4)(r5) - lwzux r0, r7, r4 -.endm - -.macro WLV Offs - lwz r0, (\Offs*4)(r5) - stwux r0, r7, r4 -.endm - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -loop_filter_simple_vertical_edge_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - ;# build constants - lvx v8, 0, r5 ;# flimit - - vspltisb v11, 8 - vspltisb v12, 4 - vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 - - la r5, -96(r1) ;# temporary space for reading in vectors - - ;# Store 4 pels at word "Offs" in temp array, then advance r7 - ;# to next row and read another 4 pels from the frame buffer. - - subi r7, r3, 2 ;# r7 -> 2 pels before start - lwzx r0, 0, r7 ;# read first 4 pels - - ;# 16 unaligned word accesses - RLV 0 - RLV 4 - RLV 8 - RLV 12 - RLV 1 - RLV 5 - RLV 9 - RLV 13 - RLV 2 - RLV 6 - RLV 10 - RLV 14 - RLV 3 - RLV 7 - RLV 11 - - stw r0, (15*4)(r5) ;# write last 4 pels - - simple_vertical - - ;# Read temp array, write frame buffer. - subi r7, r3, 2 ;# r7 -> 2 pels before start - lwzx r0, 0, r5 ;# read/write first 4 pels - stwx r0, 0, r7 - - WLV 4 - WLV 8 - WLV 12 - WLV 1 - WLV 5 - WLV 9 - WLV 13 - WLV 2 - WLV 6 - WLV 10 - WLV 14 - WLV 3 - WLV 7 - WLV 11 - WLV 15 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - -_chromaSelectors: - .long _B_hihi - .long _B_Ures0 - .long _B_Vres0 - .long 0 - .long _B_lolo - .long _B_Ures8 - .long _B_Vres8 - .long 0 - - .align 4 -_B_Vres8: - .byte 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 - - .align 4 -_B_Ures8: - .byte 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 - - .align 4 -_B_lolo: - .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 - - .align 4 -_B_Vres0: - .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 - .align 4 -_B_Ures0: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 - - .align 4 -_B_hihi: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 diff --git a/vp9/common/ppc/vp9_platform_altivec.asm b/vp9/common/ppc/vp9_platform_altivec.asm deleted file mode 100644 index f81d86f740e241c1340dc33a81a0fa9a5016f47d..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_platform_altivec.asm +++ /dev/null @@ -1,59 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl save_platform_context - .globl restore_platform_context - -.macro W V P - stvx \V, 0, \P - addi \P, \P, 16 -.endm - -.macro R V P - lvx \V, 0, \P - addi \P, \P, 16 -.endm - -;# r3 context_ptr - .align 2 -save_platform_contex: - W v20, r3 - W v21, r3 - W v22, r3 - W v23, r3 - W v24, r3 - W v25, r3 - W v26, r3 - W v27, r3 - W v28, r3 - W v29, r3 - W v30, r3 - W v31, r3 - - blr - -;# r3 context_ptr - .align 2 -restore_platform_context: - R v20, r3 - R v21, r3 - R v22, r3 - R v23, r3 - R v24, r3 - R v25, r3 - R v26, r3 - R v27, r3 - R v28, r3 - R v29, r3 - R v30, r3 - R v31, r3 - - blr diff --git a/vp9/common/ppc/vp9_recon_altivec.asm b/vp9/common/ppc/vp9_recon_altivec.asm deleted file mode 100644 index dd39e05a83663080a6f4f7b5ea210a55a0822862..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_recon_altivec.asm +++ /dev/null @@ -1,175 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl recon4b_ppc - .globl recon2b_ppc - .globl recon_b_ppc - -.macro row_of16 Diff Pred Dst Stride - lvx v1, 0, \Pred ;# v1 = pred = p0..p15 - addi \Pred, \Pred, 16 ;# next pred - vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 - lvx v3, 0, \Diff ;# v3 = d0..d7 - vaddshs v2, v2, v3 ;# v2 = r0..r7 - vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 - lvx v3, r8, \Diff ;# v3 = d8..d15 - addi \Diff, \Diff, 32 ;# next diff - vaddshs v3, v3, v1 ;# v3 = r8..r15 - vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15 - stvx v2, 0, \Dst ;# to dst - add \Dst, \Dst, \Stride ;# next dst -.endm - - .text - .align 2 -;# r3 = short *diff_ptr, -;# r4 = unsigned char *pred_ptr, -;# r5 = unsigned char *dst_ptr, -;# r6 = int stride -recon4b_ppc: - mfspr r0, 256 ;# get old VRSAVE - stw r0, -8(r1) ;# save old VRSAVE to stack - oris r0, r0, 0xf000 - mtspr 256,r0 ;# set VRSAVE - - vxor v0, v0, v0 - li r8, 16 - - row_of16 r3, r4, r5, r6 - row_of16 r3, r4, r5, r6 - row_of16 r3, r4, r5, r6 - row_of16 r3, r4, r5, r6 - - lwz r12, -8(r1) ;# restore old VRSAVE from stack - mtspr 256, r12 ;# reset old VRSAVE - - blr - -.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels - lvx v1, 0, \Pred ;# v1 = pred = p0..p15 - vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 - lvx v3, 0, \Diff ;# v3 = d0..d7 - vaddshs v2, v2, v3 ;# v2 = r0..r7 - vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 - lvx v3, r8, \Diff ;# v2 = d8..d15 - vaddshs v3, v3, v1 ;# v3 = r8..r15 - vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15 - stvx v2, 0, r10 ;# 2 rows to dst from buf - lwz r0, 0(r10) -.if \write_first_four_pels - stw r0, 0(\Dst) - .else - stwux r0, \Dst, \Stride -.endif - lwz r0, 4(r10) - stw r0, 4(\Dst) - lwz r0, 8(r10) - stwux r0, \Dst, \Stride ;# advance dst to next row - lwz r0, 12(r10) - stw r0, 4(\Dst) -.endm - - .align 2 -;# r3 = short *diff_ptr, -;# r4 = unsigned char *pred_ptr, -;# r5 = unsigned char *dst_ptr, -;# r6 = int stride - -recon2b_ppc: - mfspr r0, 256 ;# get old VRSAVE - stw r0, -8(r1) ;# save old VRSAVE to stack - oris r0, r0, 0xf000 - mtspr 256,r0 ;# set VRSAVE - - vxor v0, v0, v0 - li r8, 16 - - la r10, -48(r1) ;# buf - - two_rows_of8 r3, r4, r5, r6, 1 - - addi r4, r4, 16; ;# next pred - addi r3, r3, 32; ;# next diff - - two_rows_of8 r3, r4, r5, r6, 0 - - lwz r12, -8(r1) ;# restore old VRSAVE from stack - mtspr 256, r12 ;# reset old VRSAVE - - blr - -.macro get_two_diff_rows - stw r0, 0(r10) - lwz r0, 4(r3) - stw r0, 4(r10) - lwzu r0, 32(r3) - stw r0, 8(r10) - lwz r0, 4(r3) - stw r0, 12(r10) - lvx v3, 0, r10 -.endm - - .align 2 -;# r3 = short *diff_ptr, -;# r4 = unsigned char *pred_ptr, -;# r5 = unsigned char *dst_ptr, -;# r6 = int stride -recon_b_ppc: - mfspr r0, 256 ;# get old VRSAVE - stw r0, -8(r1) ;# save old VRSAVE to stack - oris r0, r0, 0xf000 - mtspr 256,r0 ;# set VRSAVE - - vxor v0, v0, v0 - - la r10, -48(r1) ;# buf - - lwz r0, 0(r4) - stw r0, 0(r10) - lwz r0, 16(r4) - stw r0, 4(r10) - lwz r0, 32(r4) - stw r0, 8(r10) - lwz r0, 48(r4) - stw r0, 12(r10) - - lvx v1, 0, r10; ;# v1 = pred = p0..p15 - - lwz r0, 0(r3) ;# v3 = d0..d7 - - get_two_diff_rows - - vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7 - vaddshs v2, v2, v3; ;# v2 = r0..r7 - - lwzu r0, 32(r3) ;# v3 = d8..d15 - - get_two_diff_rows - - vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15 - vaddshs v3, v3, v1; ;# v3 = r8..r15 - - vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15 - stvx v2, 0, r10; ;# 16 pels to dst from buf - - lwz r0, 0(r10) - stw r0, 0(r5) - lwz r0, 4(r10) - stwux r0, r5, r6 - lwz r0, 8(r10) - stwux r0, r5, r6 - lwz r0, 12(r10) - stwx r0, r5, r6 - - lwz r12, -8(r1) ;# restore old VRSAVE from stack - mtspr 256, r12 ;# reset old VRSAVE - - blr diff --git a/vp9/common/ppc/vp9_systemdependent.c b/vp9/common/ppc/vp9_systemdependent.c deleted file mode 100644 index a6be550a1336b228c9f99c28a57308101efdcc54..0000000000000000000000000000000000000000 --- a/vp9/common/ppc/vp9_systemdependent.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_loopfilter.h" -#include "recon.h" -#include "vp9/common/vp9_onyxc_int.h" - -void (*vp8_short_idct4x4)(short *input, short *output, int pitch); -void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch); -void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch); - -extern void (*vp9_post_proc_down_and_across)(unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, int cols, int flimit); - -extern void (*vp9_mbpost_proc_down)(unsigned char *dst, int pitch, - int rows, int cols, int flimit); -extern void vp9_mbpost_proc_down_c(unsigned char *dst, int pitch, - int rows, int cols, int flimit); -extern void (*vp9_mbpost_proc_across_ip)(unsigned char *src, int pitch, - int rows, int cols, int flimit); -extern void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch, - int rows, int cols, int flimit); -extern void vp9_post_proc_down_and_across_c(unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, int cols, int flimit); -void vp9_plane_add_noise_c(unsigned char *start, - unsigned int width, unsigned int height, - int pitch, int q, int a); - -extern copy_mem_block_function *vp9_copy_mem16x16; -extern copy_mem_block_function *vp9_copy_mem8x8; -extern copy_mem_block_function *vp9_copy_mem8x4; - -// PPC -extern subpixel_predict_function sixtap_predict_ppc; -extern subpixel_predict_function sixtap_predict8x4_ppc; -extern subpixel_predict_function sixtap_predict8x8_ppc; -extern subpixel_predict_function sixtap_predict16x16_ppc; -extern subpixel_predict_function bilinear_predict4x4_ppc; -extern subpixel_predict_function bilinear_predict8x4_ppc; -extern subpixel_predict_function bilinear_predict8x8_ppc; -extern subpixel_predict_function bilinear_predict16x16_ppc; - -extern copy_mem_block_function copy_mem16x16_ppc; - -void recon_b_ppc(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void recon2b_ppc(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void recon4b_ppc(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); - -extern void short_idct4x4_ppc(short *input, short *output, int pitch); - -// Generic C -extern subpixel_predict_function vp9_sixtap_predict_c; -extern subpixel_predict_function vp9_sixtap_predict8x4_c; -extern subpixel_predict_function vp9_sixtap_predict8x8_c; -extern subpixel_predict_function vp9_sixtap_predict16x16_c; -extern subpixel_predict_function vp9_bilinear_predict4x4_c; -extern subpixel_predict_function vp9_bilinear_predict8x4_c; -extern subpixel_predict_function vp9_bilinear_predict8x8_c; -extern subpixel_predict_function vp9_bilinear_predict16x16_c; - -extern copy_mem_block_function vp9_copy_mem16x16_c; -extern copy_mem_block_function vp9_copy_mem8x8_c; -extern copy_mem_block_function vp9_copy_mem8x4_c; - -void vp9_recon_b_c(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void vp9_recon2b_c(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void vp9_recon4b_c(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); - -extern void vp9_short_idct4x4_1_c(short *input, short *output, int pitch); -extern void vp9_short_idct4x4_c(short *input, short *output, int pitch); -extern void vp8_dc_only_idct_c(short input_dc, short *output, int pitch); - -// PPC -extern loop_filter_block_function loop_filter_mbv_ppc; -extern loop_filter_block_function loop_filter_bv_ppc; -extern loop_filter_block_function loop_filter_mbh_ppc; -extern loop_filter_block_function loop_filter_bh_ppc; - -extern loop_filter_block_function loop_filter_mbvs_ppc; -extern loop_filter_block_function loop_filter_bvs_ppc; -extern loop_filter_block_function loop_filter_mbhs_ppc; -extern loop_filter_block_function loop_filter_bhs_ppc; - -// Generic C -extern loop_filter_block_function vp9_loop_filter_mbv_c; -extern loop_filter_block_function vp9_loop_filter_bv_c; -extern loop_filter_block_function vp9_loop_filter_mbh_c; -extern loop_filter_block_function vp9_loop_filter_bh_c; - -extern loop_filter_block_function vp9_loop_filter_mbvs_c; -extern loop_filter_block_function vp9_loop_filter_bvs_c; -extern loop_filter_block_function vp9_loop_filter_mbhs_c; -extern loop_filter_block_function vp9_loop_filter_bhs_c; - -extern loop_filter_block_function *vp8_lf_mbvfull; -extern loop_filter_block_function *vp8_lf_mbhfull; -extern loop_filter_block_function *vp8_lf_bvfull; -extern loop_filter_block_function *vp8_lf_bhfull; - -extern loop_filter_block_function *vp8_lf_mbvsimple; -extern loop_filter_block_function *vp8_lf_mbhsimple; -extern loop_filter_block_function *vp8_lf_bvsimple; -extern loop_filter_block_function *vp8_lf_bhsimple; - -void vp9_clear_c(void) { -} - -void vp9_machine_specific_config(void) { - // Pure C: - vp9_clear_system_state = vp9_clear_c; - vp9_recon_b = vp9_recon_b_c; - vp9_recon4b = vp9_recon4b_c; - vp9_recon2b = vp9_recon2b_c; - - vp9_bilinear_predict16x16 = bilinear_predict16x16_ppc; - vp9_bilinear_predict8x8 = bilinear_predict8x8_ppc; - vp9_bilinear_predict8x4 = bilinear_predict8x4_ppc; - vp8_bilinear_predict = bilinear_predict4x4_ppc; - - vp9_sixtap_predict16x16 = sixtap_predict16x16_ppc; - vp9_sixtap_predict8x8 = sixtap_predict8x8_ppc; - vp9_sixtap_predict8x4 = sixtap_predict8x4_ppc; - vp9_sixtap_predict = sixtap_predict_ppc; - - vp8_short_idct4x4_1 = vp9_short_idct4x4_1_c; - vp8_short_idct4x4 = short_idct4x4_ppc; - vp8_dc_only_idct = vp8_dc_only_idct_c; - - vp8_lf_mbvfull = loop_filter_mbv_ppc; - vp8_lf_bvfull = loop_filter_bv_ppc; - vp8_lf_mbhfull = loop_filter_mbh_ppc; - vp8_lf_bhfull = loop_filter_bh_ppc; - - vp8_lf_mbvsimple = loop_filter_mbvs_ppc; - vp8_lf_bvsimple = loop_filter_bvs_ppc; - vp8_lf_mbhsimple = loop_filter_mbhs_ppc; - vp8_lf_bhsimple = loop_filter_bhs_ppc; - - vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c; - vp9_mbpost_proc_down = vp9_mbpost_proc_down_c; - vp9_mbpost_proc_across_ip = vp9_mbpost_proc_across_ip_c; - vp9_plane_add_noise = vp9_plane_add_noise_c; - - vp9_copy_mem16x16 = copy_mem16x16_ppc; - vp9_copy_mem8x8 = vp9_copy_mem8x8_c; - vp9_copy_mem8x4 = vp9_copy_mem8x4_c; - -} diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 15c8c0d64222bf0ece61575b501375ff0dd8ceb1..2660344d51fa5acad76cd83f184426c7e7d032a1 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -10,84 +10,109 @@ #include "./vpx_config.h" -#include "vp9/common/vp9_blockd.h" #include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_findnearmv.h" +#include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_findnearmv.h" +#include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_systemdependent.h" - -void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi_base) { - int stride = cpi->mode_info_stride; +void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) { + const int stride = cm->mode_info_stride; int i; // Clear down top border row - vpx_memset(mi_base, 0, sizeof(MODE_INFO) * cpi->mode_info_stride); + vpx_memset(mi, 0, sizeof(MODE_INFO) * stride); // Clear left border column - for (i = 1; i < cpi->mb_rows + 1; i++) { - vpx_memset(&mi_base[i * stride], 0, sizeof(MODE_INFO)); - } + for (i = 1; i < cm->mi_rows + 1; i++) + vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO)); } -void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi) { +void vp9_update_mode_info_in_image(VP9_COMMON *cm, MODE_INFO *mi) { int i, j; // For each in image mode_info element set the in image flag to 1 - for (i = 0; i < cpi->mb_rows; i++) { - for (j = 0; j < cpi->mb_cols; j++) { - mi->mbmi.mb_in_image = 1; - mi++; // Next element in the row + for (i = 0; i < cm->mi_rows; i++) { + MODE_INFO *ptr = mi; + for (j = 0; j < cm->mi_cols; j++) { + ptr->mbmi.mb_in_image = 1; + ptr++; // Next element in the row } - mi++; // Step over border element at start of next row + // Step over border element at start of next row + mi += cm->mode_info_stride; } } -void vp9_de_alloc_frame_buffers(VP9_COMMON *oci) { +void vp9_free_frame_buffers(VP9_COMMON *oci) { int i; for (i = 0; i < NUM_YV12_BUFFERS; i++) - vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]); + vp9_free_frame_buffer(&oci->yv12_fb[i]); - vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); - vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); + vp9_free_frame_buffer(&oci->temp_scale_frame); + vp9_free_frame_buffer(&oci->post_proc_buffer); - vpx_free(oci->above_context); vpx_free(oci->mip); vpx_free(oci->prev_mip); + vpx_free(oci->above_seg_context); - oci->above_context = 0; + vpx_free(oci->above_context[0]); + for (i = 0; i < MAX_MB_PLANE; i++) + oci->above_context[i] = 0; oci->mip = 0; oci->prev_mip = 0; + oci->above_seg_context = 0; +} +static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) { + cm->mb_cols = (aligned_width + 8) >> 4; + cm->mb_rows = (aligned_height + 8) >> 4; + cm->MBs = cm->mb_rows * cm->mb_cols; + + cm->mi_cols = aligned_width >> LOG2_MI_SIZE; + cm->mi_rows = aligned_height >> LOG2_MI_SIZE; + cm->mode_info_stride = cm->mi_cols + 64 / MI_SIZE; +} + +static void setup_mi(VP9_COMMON *cm) { + cm->mi = cm->mip + cm->mode_info_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; + + vpx_memset(cm->mip, 0, + cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); + + vp9_update_mode_info_border(cm, cm->mip); + vp9_update_mode_info_in_image(cm, cm->mi); + + vp9_update_mode_info_border(cm, cm->prev_mip); + vp9_update_mode_info_in_image(cm, cm->prev_mi); } int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) { - int i; - int aligned_width, aligned_height; + int i, mi_cols; - vp9_de_alloc_frame_buffers(oci); + // Our internal buffers are always multiples of 16 + const int aligned_width = multiple8(width); + const int aligned_height = multiple8(height); + const int ss_x = oci->subsampling_x; + const int ss_y = oci->subsampling_y; - /* our internal buffers are always multiples of 16 */ - aligned_width = (width + 15) & ~15; - aligned_height = (height + 15) & ~15; + vp9_free_frame_buffers(oci); for (i = 0; i < NUM_YV12_BUFFERS; i++) { oci->fb_idx_ref_cnt[i] = 0; - if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, - VP9BORDERINPIXELS) < 0) { - vp9_de_alloc_frame_buffers(oci); - return 1; - } + if (vp9_alloc_frame_buffer(&oci->yv12_fb[i], width, height, ss_x, ss_y, + VP9BORDERINPIXELS) < 0) + goto fail; } oci->new_fb_idx = NUM_YV12_BUFFERS - 1; oci->fb_idx_ref_cnt[oci->new_fb_idx] = 1; - for (i = 0; i < 3; i++) + for (i = 0; i < ALLOWED_REFS_PER_FRAME; i++) oci->active_ref_idx[i] = i; for (i = 0; i < NUM_REF_FRAMES; i++) { @@ -95,125 +120,86 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) { oci->fb_idx_ref_cnt[i] = 1; } - if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, - VP9BORDERINPIXELS) < 0) { - vp9_de_alloc_frame_buffers(oci); - return 1; - } - - if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, - VP9BORDERINPIXELS) < 0) { - vp9_de_alloc_frame_buffers(oci); - return 1; - } + if (vp9_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, ss_x, ss_y, + VP9BORDERINPIXELS) < 0) + goto fail; - oci->mb_rows = aligned_height >> 4; - oci->mb_cols = aligned_width >> 4; - oci->MBs = oci->mb_rows * oci->mb_cols; - oci->mode_info_stride = oci->mb_cols + 1; - oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); + if (vp9_alloc_frame_buffer(&oci->post_proc_buffer, width, height, ss_x, ss_y, + VP9BORDERINPIXELS) < 0) + goto fail; - if (!oci->mip) { - vp9_de_alloc_frame_buffers(oci); - return 1; - } + set_mb_mi(oci, aligned_width, aligned_height); - oci->mi = oci->mip + oci->mode_info_stride + 1; + // Allocation + oci->mip = vpx_calloc(oci->mode_info_stride * (oci->mi_rows + 64 / MI_SIZE), + sizeof(MODE_INFO)); + if (!oci->mip) + goto fail; - /* allocate memory for last frame MODE_INFO array */ + oci->prev_mip = vpx_calloc(oci->mode_info_stride * + (oci->mi_rows + 64 / MI_SIZE), + sizeof(MODE_INFO)); + if (!oci->prev_mip) + goto fail; - oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); + setup_mi(oci); - if (!oci->prev_mip) { - vp9_de_alloc_frame_buffers(oci); - return 1; - } + // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling + // information is exposed at this level + mi_cols = mi_cols_aligned_to_sb(oci); - oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1; + // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm + // block where mi unit size is 8x8. +# if CONFIG_ALPHA + oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 8 * mi_cols, 1); +#else + oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 6 * mi_cols, 1); +#endif + if (!oci->above_context[0]) + goto fail; - oci->above_context = - vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * (3 + oci->mb_cols), 1); + for (i = 1; i < MAX_MB_PLANE; i++) + oci->above_context[i] = + oci->above_context[0] + i * sizeof(ENTROPY_CONTEXT) * 2 * mi_cols; - if (!oci->above_context) { - vp9_de_alloc_frame_buffers(oci); - return 1; - } - - vp9_update_mode_info_border(oci, oci->mip); - vp9_update_mode_info_in_image(oci, oci->mi); + oci->above_seg_context = vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1); + if (!oci->above_seg_context) + goto fail; return 0; -} - -void vp9_setup_version(VP9_COMMON *cm) { - if (cm->version & 0x4) { - if (!CONFIG_EXPERIMENTAL) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Bitstream was created by an experimental " - "encoder"); - cm->experimental = 1; - } - switch (cm->version & 0x3) { - case 0: - cm->no_lpf = 0; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 0; - cm->full_pixel = 0; - break; - case 1: - cm->no_lpf = 0; - cm->filter_type = SIMPLE_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 0; - break; - case 2: - case 3: - cm->no_lpf = 1; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 0; - break; - // Full pel only code deprecated in experimental code base - // case 3: - // cm->no_lpf = 1; - // cm->filter_type = SIMPLE_LOOPFILTER; - // cm->use_bilinear_mc_filter = 1; - // cm->full_pixel = 1; - // break; - } + fail: + vp9_free_frame_buffers(oci); + return 1; } + void vp9_create_common(VP9_COMMON *oci) { vp9_machine_specific_config(oci); vp9_init_mbmode_probs(oci); - vp9_default_bmode_probs(oci->fc.bmode_prob); - oci->txfm_mode = ONLY_4X4; - oci->mb_no_coeff_skip = 1; oci->comp_pred_mode = HYBRID_PREDICTION; - oci->no_lpf = 0; - oci->filter_type = NORMAL_LOOPFILTER; - oci->use_bilinear_mc_filter = 0; - oci->full_pixel = 0; oci->clr_type = REG_YUV; - oci->clamp_type = RECON_CLAMP_REQUIRED; - /* Initialise reference frame sign bias structure to defaults */ + // Initialize reference frame sign bias structure to defaults vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); - - oci->kf_ymode_probs_update = 0; } void vp9_remove_common(VP9_COMMON *oci) { - vp9_de_alloc_frame_buffers(oci); + vp9_free_frame_buffers(oci); } void vp9_initialize_common() { vp9_coef_tree_initialize(); - vp9_entropy_mode_init(); - vp9_entropy_mv_init(); } + +void vp9_update_frame_size(VP9_COMMON *cm) { + const int aligned_width = multiple8(cm->width); + const int aligned_height = multiple8(cm->height); + + set_mb_mi(cm, aligned_width, aligned_height); + setup_mi(cm); +} diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h index a784cb7463d1ba6da88328a3cd44d3a60a1ed6fe..8bf5ed1602fa1b2e488871281964183e8ba1b686 100644 --- a/vp9/common/vp9_alloccommon.h +++ b/vp9/common/vp9_alloccommon.h @@ -14,13 +14,18 @@ #include "vp9/common/vp9_onyxc_int.h" +void vp9_initialize_common(); + +void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi); +void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi); + void vp9_create_common(VP9_COMMON *oci); void vp9_remove_common(VP9_COMMON *oci); -void vp9_de_alloc_frame_buffers(VP9_COMMON *oci); + int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height); -void vp9_setup_version(VP9_COMMON *oci); +void vp9_free_frame_buffers(VP9_COMMON *oci); -void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi_base); -void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi); + +void vp9_update_frame_size(VP9_COMMON *cm); #endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c deleted file mode 100644 index 9151622d3884408e541cd1c49d02b5c326d723b8..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_blockd.c +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_blockd.h" -#include "vpx_mem/vpx_mem.h" - -const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24] = { - { 0, 0, 0, 0, - 1, 1, 1, 1, - 2, 2, 2, 2, - 3, 3, 3, 3, - 4, 4, - 5, 5, - 6, 6, - 7, 7 }, - { 0, 0, 0, 0, - 0, 0, 0, 0, - 2, 2, 2, 2, - 2, 2, 2, 2, - 4, 4, - 4, 4, - 6, 6, - 6, 6 }, - { 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0 }, -}; -const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24] = { - { 0, 1, 2, 3, - 0, 1, 2, 3, - 0, 1, 2, 3, - 0, 1, 2, 3, - 4, 5, - 4, 5, - 6, 7, - 6, 7 }, - { 0, 0, 0, 0, - 2, 2, 2, 2, - 0, 0, 0, 0, - 2, 2, 2, 2, - 4, 4, - 4, 4, - 6, 6, - 6, 6 }, - { 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0 }, -}; - -#define S(x) x + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT) -const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96] = { - { 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), - S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), - S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), - 4, 4, 4, 4, - 5, 5, 5, 5, - S(4), S(4), S(4), S(4), - S(5), S(5), S(5), S(5), - 6, 6, 6, 6, - 7, 7, 7, 7, - S(6), S(6), S(6), S(6), - S(7), S(7), S(7), S(7) }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), - S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), - 4, 4, 4, 4, - 4, 4, 4, 4, - S(4), S(4), S(4), S(4), - S(4), S(4), S(4), S(4), - 6, 6, 6, 6, - 6, 6, 6, 6, - S(6), S(6), S(6), S(6), - S(6), S(6), S(6), S(6) }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 4, - 6, 6, 6, 6, - 6, 6, 6, 6, - 6, 6, 6, 6, - 6, 6, 6, 6 }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }, -}; -const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96] = { - { 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), - 4, 5, S(4), S(5), - 4, 5, S(4), S(5), - 4, 5, S(4), S(5), - 4, 5, S(4), S(5), - 6, 7, S(6), S(7), - 6, 7, S(6), S(7), - 6, 7, S(6), S(7), - 6, 7, S(6), S(7) }, - { 0, 0, 0, 0, 2, 2, 2, 2, - S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - 0, 0, 0, 0, 2, 2, 2, 2, - S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - 0, 0, 0, 0, 2, 2, 2, 2, - S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - 0, 0, 0, 0, 2, 2, 2, 2, - S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - 4, 4, 4, 4, - S(4), S(4), S(4), S(4), - 4, 4, 4, 4, - S(4), S(4), S(4), S(4), - 6, 6, 6, 6, - S(6), S(6), S(6), S(6), - 6, 6, 6, 6, - S(6), S(6), S(6), S(6) }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 4, - 6, 6, 6, 6, - 6, 6, 6, 6, - 6, 6, 6, 6, - 6, 6, 6, 6 }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }, -}; - -#define T(x) x + 2 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) -#define U(x) x + 3 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) -const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), - S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), - S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), - T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), - T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), - U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), - U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), - 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, - S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), - S(5), S(5), S(5), S(5), S(5), S(5), S(5), S(5), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - T(5), T(5), T(5), T(5), T(5), T(5), T(5), T(5), - U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), - U(5), U(5), U(5), U(5), U(5), U(5), U(5), U(5), - 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, - S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), - S(7), S(7), S(7), S(7), S(7), S(7), S(7), S(7), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - T(7), T(7), T(7), T(7), T(7), T(7), T(7), T(7), - U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6), - U(7), U(7), U(7), U(7), U(7), U(7), U(7), U(7) }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), - S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), - T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), - U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), - S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), - U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), - S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6), - U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6) }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6 }, -}; -const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384] = { - { 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), - 6, 7, S(6), S(7), T(6), T(7), U(6), U(7) }, - { 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), - T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), - 4, 4, 4, 4, S(4), S(4), S(4), S(4), - T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), - 4, 4, 4, 4, S(4), S(4), S(4), S(4), - T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), - 4, 4, 4, 4, S(4), S(4), S(4), S(4), - T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), - 4, 4, 4, 4, S(4), S(4), S(4), S(4), - T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), - 6, 6, 6, 6, S(6), S(6), S(6), S(6), - T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), - 6, 6, 6, 6, S(6), S(6), S(6), S(6), - T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), - 6, 6, 6, 6, S(6), S(6), S(6), S(6), - T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), - 6, 6, 6, 6, S(6), S(6), S(6), S(6), - T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6) }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), - T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), - U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), - T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6 }, -}; -#undef U -#undef T -#undef S diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 23d0bfd593374c8a9970157d52aaaff244dfb1c5..e1901d87debeb7e48ab52fe7fa63c76ffaa41ec9 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -12,8 +12,6 @@ #ifndef VP9_COMMON_VP9_BLOCKD_H_ #define VP9_COMMON_VP9_BLOCKD_H_ -void vpx_log(const char *format, ...); - #include "./vpx_config.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_convolve.h" @@ -21,35 +19,27 @@ void vpx_log(const char *format, ...); #include "vp9/common/vp9_treecoder.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" -#define TRUE 1 -#define FALSE 0 - -// #define MODE_STATS - -/*#define DCPRED 1*/ -#define DCPREDSIMTHRESH 0 -#define DCPREDCNTTHRESH 3 +#define BLOCK_SIZE_GROUPS 4 +#define MAX_MB_SEGMENTS 8 +#define MB_SEG_TREE_PROBS (MAX_MB_SEGMENTS-1) -#define MB_FEATURE_TREE_PROBS 3 #define PREDICTION_PROBS 3 #define MBSKIP_CONTEXTS 3 -#define MAX_MB_SEGMENTS 4 - #define MAX_REF_LF_DELTAS 4 -#define MAX_MODE_LF_DELTAS 4 +#define MAX_MODE_LF_DELTAS 2 /* Segment Feature Masks */ #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 -#define MAX_MV_REFS 9 -#define MAX_MV_REF_CANDIDATES 4 +#define MAX_MV_REF_CANDIDATES 2 -typedef struct { - int r, c; -} POS; +#define INTRA_INTER_CONTEXTS 4 +#define COMP_INTER_CONTEXTS 5 +#define REF_CONTEXTS 5 typedef enum { PLANE_TYPE_Y_WITH_DC, @@ -57,24 +47,21 @@ typedef enum { } PLANE_TYPE; typedef char ENTROPY_CONTEXT; -typedef struct { - ENTROPY_CONTEXT y1[4]; - ENTROPY_CONTEXT u[2]; - ENTROPY_CONTEXT v[2]; -} ENTROPY_CONTEXT_PLANES; -#define VP9_COMBINEENTROPYCONTEXTS(Dest, A, B) \ - Dest = ((A)!=0) + ((B)!=0); +typedef char PARTITION_CONTEXT; + +static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, + ENTROPY_CONTEXT b) { + return (a != 0) + (b != 0); +} typedef enum { KEY_FRAME = 0, - INTER_FRAME = 1 + INTER_FRAME = 1, + NUM_FRAME_TYPES, } FRAME_TYPE; typedef enum { -#if CONFIG_ENABLE_6TAP - SIXTAP, -#endif EIGHTTAP_SMOOTH, EIGHTTAP, EIGHTTAP_SHARP, @@ -83,26 +70,27 @@ typedef enum { } INTERPOLATIONFILTERTYPE; typedef enum { - DC_PRED, /* average of above and left pixels */ - V_PRED, /* vertical prediction */ - H_PRED, /* horizontal prediction */ - D45_PRED, /* Directional 45 deg prediction [anti-clockwise from 0 deg hor] */ - D135_PRED, /* Directional 135 deg prediction [anti-clockwise from 0 deg hor] */ - D117_PRED, /* Directional 112 deg prediction [anti-clockwise from 0 deg hor] */ - D153_PRED, /* Directional 157 deg prediction [anti-clockwise from 0 deg hor] */ - D27_PRED, /* Directional 22 deg prediction [anti-clockwise from 0 deg hor] */ - D63_PRED, /* Directional 67 deg prediction [anti-clockwise from 0 deg hor] */ - TM_PRED, /* Truemotion prediction */ - I8X8_PRED, /* 8x8 based prediction, each 8x8 has its own prediction mode */ - B_PRED, /* block based prediction, each block has its own prediction mode */ + DC_PRED, // Average of above and left pixels + V_PRED, // Vertical + H_PRED, // Horizontal + D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi) + D135_PRED, // Directional 135 deg = 180 - 45 + D117_PRED, // Directional 117 deg = 180 - 63 + D153_PRED, // Directional 153 deg = 180 - 27 + D27_PRED, // Directional 27 deg = round(arctan(1/2) * 180/pi) + D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi) + TM_PRED, // True-motion NEARESTMV, NEARMV, ZEROMV, NEWMV, - SPLITMV, MB_MODE_COUNT } MB_PREDICTION_MODE; +static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) { + return mode >= NEARESTMV && mode <= NEWMV; +} + // Segment level features. typedef enum { SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... @@ -117,8 +105,7 @@ typedef enum { TX_4X4 = 0, // 4x4 dct transform TX_8X8 = 1, // 8x8 dct transform TX_16X16 = 2, // 16x16 dct transform - TX_SIZE_MAX_MB = 3, // Number of different transforms available - TX_32X32 = TX_SIZE_MAX_MB, // 32x32 dct transform + TX_32X32 = 3, // 32x32 dct transform TX_SIZE_MAX_SB, // Number of transforms available to SBs } TX_SIZE; @@ -129,61 +116,18 @@ typedef enum { ADST_ADST = 3 // ADST in both directions } TX_TYPE; -#define VP9_YMODES (B_PRED + 1) -#define VP9_UV_MODES (TM_PRED + 1) -#define VP9_I8X8_MODES (TM_PRED + 1) -#define VP9_I32X32_MODES (TM_PRED + 1) +#define VP9_INTRA_MODES (TM_PRED + 1) -#define VP9_MVREFS (1 + SPLITMV - NEARESTMV) +#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV) #define WHT_UPSCALE_FACTOR 2 -typedef enum { - B_DC_PRED, /* average of above and left pixels */ - B_TM_PRED, - - B_VE_PRED, /* vertical prediction */ - B_HE_PRED, /* horizontal prediction */ - - B_LD_PRED, - B_RD_PRED, - - B_VR_PRED, - B_VL_PRED, - B_HD_PRED, - B_HU_PRED, -#if CONFIG_NEWBINTRAMODES - B_CONTEXT_PRED, -#endif - - LEFT4X4, - ABOVE4X4, - ZERO4X4, - NEW4X4, +#define TX_SIZE_PROBS 6 // (TX_SIZE_MAX_SB * (TX_SIZE_MAX_SB - 1) / 2) - B_MODE_COUNT -} B_PREDICTION_MODE; - -#define VP9_BINTRAMODES (LEFT4X4) -#define VP9_SUBMVREFS (1 + NEW4X4 - LEFT4X4) - -#if CONFIG_NEWBINTRAMODES -/* The number of B_PRED intra modes that are replaced by B_CONTEXT_PRED */ -#define CONTEXT_PRED_REPLACEMENTS 0 -#define VP9_KF_BINTRAMODES (VP9_BINTRAMODES - 1) -#define VP9_NKF_BINTRAMODES (VP9_BINTRAMODES - CONTEXT_PRED_REPLACEMENTS) -#else -#define VP9_KF_BINTRAMODES (VP9_BINTRAMODES) /* 10 */ -#define VP9_NKF_BINTRAMODES (VP9_BINTRAMODES) /* 10 */ -#endif - -typedef enum { - PARTITIONING_16X8 = 0, - PARTITIONING_8X16, - PARTITIONING_8X8, - PARTITIONING_4X4, - NB_PARTITIONINGS, -} SPLITMV_PARTITIONING_TYPE; +#define get_tx_probs(c, b) ((b) < BLOCK_SIZE_MB16X16 ? \ + (c)->fc.tx_probs_8x8p : \ + (b) < BLOCK_SIZE_SB32X32 ? \ + (c)->fc.tx_probs_16x16p : (c)->fc.tx_probs_32x32p) /* For keyframes, intra block modes are predicted by the (already decoded) modes for the Y blocks to the left and above us; for interframes, there @@ -191,11 +135,7 @@ typedef enum { union b_mode_info { struct { - B_PREDICTION_MODE first; - TX_TYPE tx_type; -#if CONFIG_NEWBINTRAMODES - B_PREDICTION_MODE context; -#endif + MB_PREDICTION_MODE first; } as_mode; int_mv as_mv[2]; // first, second inter predictor motion vectors }; @@ -209,37 +149,80 @@ typedef enum { MAX_REF_FRAMES = 4 } MV_REFERENCE_FRAME; -typedef enum { - BLOCK_SIZE_MB16X16 = 0, - BLOCK_SIZE_SB32X32 = 1, - BLOCK_SIZE_SB64X64 = 2, -} BLOCK_SIZE_TYPE; +static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) { + switch (sb_type) { + case BLOCK_SIZE_SB4X8: + case BLOCK_SIZE_AB4X4: return 0; + case BLOCK_SIZE_SB8X4: + case BLOCK_SIZE_SB8X8: + case BLOCK_SIZE_SB8X16: return 1; + case BLOCK_SIZE_SB16X8: + case BLOCK_SIZE_MB16X16: + case BLOCK_SIZE_SB16X32: return 2; + case BLOCK_SIZE_SB32X16: + case BLOCK_SIZE_SB32X32: + case BLOCK_SIZE_SB32X64: return 3; + case BLOCK_SIZE_SB64X32: + case BLOCK_SIZE_SB64X64: return 4; + default: assert(0); + return -1; + } +} + +static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) { + switch (sb_type) { + case BLOCK_SIZE_SB8X4: + case BLOCK_SIZE_AB4X4: return 0; + case BLOCK_SIZE_SB4X8: + case BLOCK_SIZE_SB8X8: + case BLOCK_SIZE_SB16X8: return 1; + case BLOCK_SIZE_SB8X16: + case BLOCK_SIZE_MB16X16: + case BLOCK_SIZE_SB32X16: return 2; + case BLOCK_SIZE_SB16X32: + case BLOCK_SIZE_SB32X32: + case BLOCK_SIZE_SB64X32: return 3; + case BLOCK_SIZE_SB32X64: + case BLOCK_SIZE_SB64X64: return 4; + default: assert(0); + return -1; + } +} + +static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) { + int a = b_width_log2(sb_type) - 1; + // align 4x4 block to mode_info + if (a < 0) + a = 0; + assert(a >= 0); + return a; +} + +static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) { + int a = b_height_log2(sb_type) - 1; + if (a < 0) + a = 0; + assert(a >= 0); + return a; +} typedef struct { MB_PREDICTION_MODE mode, uv_mode; -#if CONFIG_COMP_INTERINTRA_PRED - MB_PREDICTION_MODE interintra_mode, interintra_uv_mode; -#endif - MV_REFERENCE_FRAME ref_frame, second_ref_frame; + MV_REFERENCE_FRAME ref_frame[2]; TX_SIZE txfm_size; int_mv mv[2]; // for each reference frame used int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; int_mv best_mv, best_second_mv; -#if CONFIG_NEW_MVREF - int best_index, best_second_index; -#endif int mb_mode_context[MAX_REF_FRAMES]; - SPLITMV_PARTITIONING_TYPE partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ unsigned char need_to_clamp_mvs; unsigned char need_to_clamp_secondmv; - unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */ + unsigned char segment_id; // Segment id for current frame // Flags used for prediction status of various bistream signals unsigned char seg_id_predicted; - unsigned char ref_predicted; // Indicates if the mb is part of the image (1) vs border (0) // This can be useful in determining whether the MB provides @@ -249,69 +232,62 @@ typedef struct { INTERPOLATIONFILTERTYPE interp_filter; BLOCK_SIZE_TYPE sb_type; -#if CONFIG_CODE_NONZEROCOUNT - uint16_t nzcs[256+64*2]; -#endif } MB_MODE_INFO; typedef struct { MB_MODE_INFO mbmi; - union b_mode_info bmi[16]; + union b_mode_info bmi[4]; } MODE_INFO; -typedef struct blockd { - int16_t *qcoeff; - int16_t *dqcoeff; - uint8_t *predictor; - int16_t *diff; - int16_t *dequant; - - /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ - uint8_t **base_pre; - uint8_t **base_second_pre; - int pre; - int pre_stride; - - uint8_t **base_dst; - int dst; - int dst_stride; - - union b_mode_info bmi; -} BLOCKD; - +#define VP9_REF_SCALE_SHIFT 14 struct scale_factors { - int x_num; - int x_den; + int x_scale_fp; // horizontal fixed point scale factor + int y_scale_fp; // vertical fixed point scale factor int x_offset_q4; int x_step_q4; - int y_num; - int y_den; int y_offset_q4; int y_step_q4; -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT - convolve_fn_t predict[2][2][8]; // horiz, vert, weight (0 - 7) -#else + + int (*scale_value_x)(int val, const struct scale_factors *scale); + int (*scale_value_y)(int val, const struct scale_factors *scale); + void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col); + int_mv32 (*scale_mv_q3_to_q4)(const int_mv *src_mv, + const struct scale_factors *scale); + int32_t (*scale_mv_component_q4)(int mv_q4, int scale_fp, int offset_q4); + convolve_fn_t predict[2][2][2]; // horiz, vert, avg -#endif }; -typedef struct macroblockd { - DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ - DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks - DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]); - DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]); -#if CONFIG_CODE_NONZEROCOUNT - DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]); +#if CONFIG_ALPHA +enum { MAX_MB_PLANE = 4 }; +#else +enum { MAX_MB_PLANE = 3 }; #endif - /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */ - BLOCKD block[24]; - int fullpixel_mask; +struct buf_2d { + uint8_t *buf; + int stride; +}; + +struct macroblockd_plane { + DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[256]); + PLANE_TYPE plane_type; + int subsampling_x; + int subsampling_y; + struct buf_2d dst; + struct buf_2d pre[2]; + int16_t *dequant; + ENTROPY_CONTEXT *above_context; + ENTROPY_CONTEXT *left_context; +}; + +#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n)) + +typedef struct macroblockd { + struct macroblockd_plane plane[MAX_MB_PLANE]; - YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ - YV12_BUFFER_CONFIG second_pre; - YV12_BUFFER_CONFIG dst; struct scale_factors scale_factor[2]; struct scale_factors scale_factor_uv[2]; @@ -325,11 +301,11 @@ typedef struct macroblockd { int left_available; int right_available; - /* Y,U,V */ - ENTROPY_CONTEXT_PLANES *above_context; - ENTROPY_CONTEXT_PLANES *left_context; + // partition contexts + PARTITION_CONTEXT *above_seg_context; + PARTITION_CONTEXT *left_seg_context; - /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */ + /* 0 (disable) 1 (enable) segmentation */ unsigned char segmentation_enabled; /* 0 (do not update) 1 (update) the macroblock segmentation map. */ @@ -345,15 +321,10 @@ typedef struct macroblockd { /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */ // Probability Tree used to code Segment number - vp9_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; - vp9_prob mb_segment_mispred_tree_probs[MAX_MB_SEGMENTS]; - -#if CONFIG_NEW_MVREF - vp9_prob mb_mv_ref_probs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES-1]; -#endif + vp9_prob mb_segment_tree_probs[MB_SEG_TREE_PROBS]; // Segment features - signed char segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX]; + int16_t segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX]; unsigned int segment_feature_mask[MAX_MB_SEGMENTS]; /* mode_based Loop filter adjustment */ @@ -361,10 +332,14 @@ typedef struct macroblockd { unsigned char mode_ref_lf_delta_update; /* Delta values have the range +/- MAX_LOOP_FILTER */ - signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ - signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ - signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ - signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ + /* 0 = Intra, Last, GF, ARF */ + signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; + /* 0 = Intra, Last, GF, ARF */ + signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; + /* 0 = ZERO_MV, MV */ + signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; + /* 0 = ZERO_MV, MV */ + signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* Distance of MB away from frame edges */ int mb_to_left_edge; @@ -377,15 +352,13 @@ typedef struct macroblockd { int lossless; /* Inverse transform function pointers. */ - void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch); - void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch); - void (*itxm_add)(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *output, int pitch, int stride, int eob); - void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd); - void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride, + void (*inv_txm4x4_1_add)(int16_t *input, uint8_t *dest, int stride); + void (*inv_txm4x4_add)(int16_t *input, uint8_t *dest, int stride); + void (*itxm_add)(int16_t *input, uint8_t *dest, int stride, int eob); + void (*itxm_add_y_block)(int16_t *q, uint8_t *dst, int stride, struct macroblockd *xd); + void (*itxm_add_uv_block)(int16_t *q, uint8_t *dst, int stride, + uint16_t *eobs); struct subpix_fn_table subpix; @@ -393,283 +366,545 @@ typedef struct macroblockd { int corrupted; - int sb_index; - int mb_index; // Index of the MB in the SB (0..3) + int sb_index; // index of 32x32 block inside the 64x64 block + int mb_index; // index of 16x16 block inside the 32x32 block + int b_index; // index of 8x8 block inside the 16x16 block + int ab_index; // index of 4x4 block inside the 8x8 block int q_index; } MACROBLOCKD; -#define ACTIVE_HT 110 // quantization stepsize threshold - -#define ACTIVE_HT8 300 - -#define ACTIVE_HT16 300 - -// convert MB_PREDICTION_MODE to B_PREDICTION_MODE -static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { - switch (mode) { - case DC_PRED: return B_DC_PRED; - case V_PRED: return B_VE_PRED; - case H_PRED: return B_HE_PRED; - case TM_PRED: return B_TM_PRED; - case D45_PRED: return B_LD_PRED; - case D135_PRED: return B_RD_PRED; - case D117_PRED: return B_VR_PRED; - case D153_PRED: return B_HD_PRED; - case D27_PRED: return B_HU_PRED; - case D63_PRED: return B_VL_PRED; +static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) { + switch (subsize) { + case BLOCK_SIZE_SB64X64: + case BLOCK_SIZE_SB64X32: + case BLOCK_SIZE_SB32X64: + case BLOCK_SIZE_SB32X32: + return &xd->sb_index; + case BLOCK_SIZE_SB32X16: + case BLOCK_SIZE_SB16X32: + case BLOCK_SIZE_MB16X16: + return &xd->mb_index; + case BLOCK_SIZE_SB16X8: + case BLOCK_SIZE_SB8X16: + case BLOCK_SIZE_SB8X8: + return &xd->b_index; + case BLOCK_SIZE_SB8X4: + case BLOCK_SIZE_SB4X8: + case BLOCK_SIZE_AB4X4: + return &xd->ab_index; + default: + assert(0); + return NULL; + } +} + +static INLINE void update_partition_context(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE sb_type, + BLOCK_SIZE_TYPE sb_size) { + int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2; + int bwl = b_width_log2(sb_type); + int bhl = b_height_log2(sb_type); + int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl; + int i; + + // update the partition context at the end notes. set partition bits + // of block sizes larger than the current one to be one, and partition + // bits of smaller block sizes to be zero. + if ((bwl == bsl) && (bhl == bsl)) { + for (i = 0; i < bs; i++) + xd->left_seg_context[i] = ~(0xf << boffset); + for (i = 0; i < bs; i++) + xd->above_seg_context[i] = ~(0xf << boffset); + } else if ((bwl == bsl) && (bhl < bsl)) { + for (i = 0; i < bs; i++) + xd->left_seg_context[i] = ~(0xe << boffset); + for (i = 0; i < bs; i++) + xd->above_seg_context[i] = ~(0xf << boffset); + } else if ((bwl < bsl) && (bhl == bsl)) { + for (i = 0; i < bs; i++) + xd->left_seg_context[i] = ~(0xf << boffset); + for (i = 0; i < bs; i++) + xd->above_seg_context[i] = ~(0xe << boffset); + } else if ((bwl < bsl) && (bhl < bsl)) { + for (i = 0; i < bs; i++) + xd->left_seg_context[i] = ~(0xe << boffset); + for (i = 0; i < bs; i++) + xd->above_seg_context[i] = ~(0xe << boffset); + } else { + assert(0); + } +} + +static INLINE int partition_plane_context(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE sb_type) { + int bsl = mi_width_log2(sb_type), bs = 1 << bsl; + int above = 0, left = 0, i; + int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; + + assert(mi_width_log2(sb_type) == mi_height_log2(sb_type)); + assert(bsl >= 0); + assert(boffset >= 0); + + for (i = 0; i < bs; i++) + above |= (xd->above_seg_context[i] & (1 << boffset)); + for (i = 0; i < bs; i++) + left |= (xd->left_seg_context[i] & (1 << boffset)); + + above = (above > 0); + left = (left > 0); + + return (left * 2 + above) + bsl * PARTITION_PLOFFSET; +} + +static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize, + PARTITION_TYPE partition) { + BLOCK_SIZE_TYPE subsize; + switch (partition) { + case PARTITION_NONE: + subsize = bsize; + break; + case PARTITION_HORZ: + if (bsize == BLOCK_SIZE_SB64X64) + subsize = BLOCK_SIZE_SB64X32; + else if (bsize == BLOCK_SIZE_SB32X32) + subsize = BLOCK_SIZE_SB32X16; + else if (bsize == BLOCK_SIZE_MB16X16) + subsize = BLOCK_SIZE_SB16X8; + else if (bsize == BLOCK_SIZE_SB8X8) + subsize = BLOCK_SIZE_SB8X4; + else + assert(0); + break; + case PARTITION_VERT: + if (bsize == BLOCK_SIZE_SB64X64) + subsize = BLOCK_SIZE_SB32X64; + else if (bsize == BLOCK_SIZE_SB32X32) + subsize = BLOCK_SIZE_SB16X32; + else if (bsize == BLOCK_SIZE_MB16X16) + subsize = BLOCK_SIZE_SB8X16; + else if (bsize == BLOCK_SIZE_SB8X8) + subsize = BLOCK_SIZE_SB4X8; + else + assert(0); + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_SIZE_SB64X64) + subsize = BLOCK_SIZE_SB32X32; + else if (bsize == BLOCK_SIZE_SB32X32) + subsize = BLOCK_SIZE_MB16X16; + else if (bsize == BLOCK_SIZE_MB16X16) + subsize = BLOCK_SIZE_SB8X8; + else if (bsize == BLOCK_SIZE_SB8X8) + subsize = BLOCK_SIZE_AB4X4; + else + assert(0); + break; default: - assert(0); - return B_MODE_COUNT; // Dummy value + assert(0); } + return subsize; } // transform mapping -static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { +static TX_TYPE txfm_map(MB_PREDICTION_MODE bmode) { switch (bmode) { - case B_TM_PRED : - case B_RD_PRED : + case TM_PRED : + case D135_PRED : return ADST_ADST; - case B_VE_PRED : - case B_VR_PRED : + case V_PRED : + case D117_PRED : + case D63_PRED: return ADST_DCT; - case B_HE_PRED : - case B_HD_PRED : - case B_HU_PRED : + case H_PRED : + case D153_PRED : + case D27_PRED : return DCT_ADST; -#if CONFIG_NEWBINTRAMODES - case B_CONTEXT_PRED: - assert(0); - break; -#endif - default: return DCT_DCT; } } -extern const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24]; -extern const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24]; -extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96]; -extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96]; -extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384]; -extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384]; - -#define USE_ADST_FOR_I16X16_8X8 1 -#define USE_ADST_FOR_I16X16_4X4 1 -#define USE_ADST_FOR_I8X8_4X4 1 -#define USE_ADST_PERIPHERY_ONLY 1 -#define USE_ADST_FOR_SB 1 -#define USE_ADST_FOR_REMOTE_EDGE 0 - static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { - // TODO(debargha): explore different patterns for ADST usage when blocksize - // is smaller than the prediction size - TX_TYPE tx_type = DCT_DCT; - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; -#if !USE_ADST_FOR_SB - if (sb_type) - return tx_type; -#endif - if (ib >= (16 << (2 * sb_type))) // no chroma adst - return tx_type; - if (xd->lossless) + TX_TYPE tx_type; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + if (xd->lossless || mbmi->ref_frame[0] != INTRA_FRAME) return DCT_DCT; - if (xd->mode_info_context->mbmi.mode == B_PRED && - xd->q_index < ACTIVE_HT) { - const BLOCKD *b = &xd->block[ib]; - tx_type = txfm_map( -#if CONFIG_NEWBINTRAMODES - b->bmi.as_mode.first == B_CONTEXT_PRED ? b->bmi.as_mode.context : -#endif - b->bmi.as_mode.first); - } else if (xd->mode_info_context->mbmi.mode == I8X8_PRED && - xd->q_index < ACTIVE_HT) { - const BLOCKD *b = &xd->block[ib]; - const int ic = (ib & 10); -#if USE_ADST_FOR_I8X8_4X4 -#if USE_ADST_PERIPHERY_ONLY - // Use ADST for periphery blocks only - const int inner = ib & 5; - b += ic - ib; - tx_type = txfm_map(pred_mode_conv( - (MB_PREDICTION_MODE)b->bmi.as_mode.first)); -#if USE_ADST_FOR_REMOTE_EDGE - if (inner == 5) - tx_type = DCT_DCT; -#else - if (inner == 1) { - if (tx_type == ADST_ADST) tx_type = ADST_DCT; - else if (tx_type == DCT_ADST) tx_type = DCT_DCT; - } else if (inner == 4) { - if (tx_type == ADST_ADST) tx_type = DCT_ADST; - else if (tx_type == ADST_DCT) tx_type = DCT_DCT; - } else if (inner == 5) { - tx_type = DCT_DCT; - } -#endif -#else - // Use ADST - b += ic - ib; - tx_type = txfm_map(pred_mode_conv( - (MB_PREDICTION_MODE)b->bmi.as_mode.first)); -#endif -#else - // Use 2D DCT - tx_type = DCT_DCT; -#endif - } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED && - xd->q_index < ACTIVE_HT) { -#if USE_ADST_FOR_I16X16_4X4 -#if USE_ADST_PERIPHERY_ONLY - const int hmax = 4 << sb_type; - tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); -#if USE_ADST_FOR_REMOTE_EDGE - if ((ib & (hmax - 1)) != 0 && ib >= hmax) - tx_type = DCT_DCT; -#else - if (ib >= 1 && ib < hmax) { - if (tx_type == ADST_ADST) tx_type = ADST_DCT; - else if (tx_type == DCT_ADST) tx_type = DCT_DCT; - } else if (ib >= 1 && (ib & (hmax - 1)) == 0) { - if (tx_type == ADST_ADST) tx_type = DCT_ADST; - else if (tx_type == ADST_DCT) tx_type = DCT_DCT; - } else if (ib != 0) { - tx_type = DCT_DCT; - } -#endif -#else - // Use ADST - tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); -#endif -#else - // Use 2D DCT - tx_type = DCT_DCT; -#endif + if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + tx_type = txfm_map(mi->bmi[ib].as_mode.first); + } else { + assert(mbmi->mode <= TM_PRED); + tx_type = txfm_map(mbmi->mode); } return tx_type; } static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { - // TODO(debargha): explore different patterns for ADST usage when blocksize - // is smaller than the prediction size TX_TYPE tx_type = DCT_DCT; - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; -#if !USE_ADST_FOR_SB - if (sb_type) - return tx_type; -#endif - if (ib >= (16 << (2 * sb_type))) // no chroma adst - return tx_type; - if (xd->mode_info_context->mbmi.mode == I8X8_PRED && - xd->q_index < ACTIVE_HT8) { - const BLOCKD *b = &xd->block[ib]; - // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged - // or the relationship otherwise modified to address this type conversion. - tx_type = txfm_map(pred_mode_conv( - (MB_PREDICTION_MODE)b->bmi.as_mode.first)); - } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED && - xd->q_index < ACTIVE_HT8) { -#if USE_ADST_FOR_I16X16_8X8 -#if USE_ADST_PERIPHERY_ONLY - const int hmax = 4 << sb_type; - tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); -#if USE_ADST_FOR_REMOTE_EDGE - if ((ib & (hmax - 1)) != 0 && ib >= hmax) - tx_type = DCT_DCT; -#else - if (ib >= 1 && ib < hmax) { - if (tx_type == ADST_ADST) tx_type = ADST_DCT; - else if (tx_type == DCT_ADST) tx_type = DCT_DCT; - } else if (ib >= 1 && (ib & (hmax - 1)) == 0) { - if (tx_type == ADST_ADST) tx_type = DCT_ADST; - else if (tx_type == ADST_DCT) tx_type = DCT_DCT; - } else if (ib != 0) { - tx_type = DCT_DCT; - } -#endif -#else - // Use ADST - tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); -#endif -#else - // Use 2D DCT - tx_type = DCT_DCT; -#endif + if (xd->mode_info_context->mbmi.mode <= TM_PRED) { + tx_type = txfm_map(xd->mode_info_context->mbmi.mode); } return tx_type; } static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) { TX_TYPE tx_type = DCT_DCT; - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; -#if !USE_ADST_FOR_SB - if (sb_type) - return tx_type; -#endif - if (ib >= (16 << (2 * sb_type))) - return tx_type; - if (xd->mode_info_context->mbmi.mode < I8X8_PRED && - xd->q_index < ACTIVE_HT16) { - tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); -#if USE_ADST_PERIPHERY_ONLY - if (sb_type) { - const int hmax = 4 << sb_type; -#if USE_ADST_FOR_REMOTE_EDGE - if ((ib & (hmax - 1)) != 0 && ib >= hmax) - tx_type = DCT_DCT; -#else - if (ib >= 1 && ib < hmax) { - if (tx_type == ADST_ADST) tx_type = ADST_DCT; - else if (tx_type == DCT_ADST) tx_type = DCT_DCT; - } else if (ib >= 1 && (ib & (hmax - 1)) == 0) { - if (tx_type == ADST_ADST) tx_type = DCT_ADST; - else if (tx_type == ADST_DCT) tx_type = DCT_DCT; - } else if (ib != 0) { - tx_type = DCT_DCT; + if (xd->mode_info_context->mbmi.mode <= TM_PRED) { + tx_type = txfm_map(xd->mode_info_context->mbmi.mode); + } + return tx_type; +} + +void vp9_setup_block_dptrs(MACROBLOCKD *xd, + int subsampling_x, int subsampling_y); + +static TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) { + const TX_SIZE size = mbmi->txfm_size; + + switch (mbmi->sb_type) { + case BLOCK_SIZE_SB64X64: + return size; + case BLOCK_SIZE_SB64X32: + case BLOCK_SIZE_SB32X64: + case BLOCK_SIZE_SB32X32: + if (size == TX_32X32) + return TX_16X16; + else + return size; + case BLOCK_SIZE_SB32X16: + case BLOCK_SIZE_SB16X32: + case BLOCK_SIZE_MB16X16: + if (size == TX_16X16) + return TX_8X8; + else + return size; + default: + return TX_4X4; + } + + return size; +} + +struct plane_block_idx { + int plane; + int block; +}; + +// TODO(jkoleszar): returning a struct so it can be used in a const context, +// expect to refactor this further later. +static INLINE struct plane_block_idx plane_block_idx(int y_blocks, + int b_idx) { + const int v_offset = y_blocks * 5 / 4; + struct plane_block_idx res; + + if (b_idx < y_blocks) { + res.plane = 0; + res.block = b_idx; + } else if (b_idx < v_offset) { + res.plane = 1; + res.block = b_idx - y_blocks; + } else { + assert(b_idx < y_blocks * 3 / 2); + res.plane = 2; + res.block = b_idx - v_offset; + } + return res; +} + +static INLINE int plane_block_width(BLOCK_SIZE_TYPE bsize, + const struct macroblockd_plane* plane) { + return 4 << (b_width_log2(bsize) - plane->subsampling_x); +} + +static INLINE int plane_block_height(BLOCK_SIZE_TYPE bsize, + const struct macroblockd_plane* plane) { + return 4 << (b_height_log2(bsize) - plane->subsampling_y); +} + +typedef void (*foreach_transformed_block_visitor)(int plane, int block, + BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, + void *arg); + +static INLINE void foreach_transformed_block_in_plane( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane, + foreach_transformed_block_visitor visit, void *arg) { + const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); + + // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + // transform size varies per plane, look it up in a common way. + const MB_MODE_INFO* mbmi = &xd->mode_info_context->mbmi; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) + : mbmi->txfm_size; + const int block_size_b = bw + bh; + const int txfrm_size_b = tx_size * 2; + + // subsampled size of the block + const int ss_sum = xd->plane[plane].subsampling_x + + xd->plane[plane].subsampling_y; + const int ss_block_size = block_size_b - ss_sum; + + const int step = 1 << txfrm_size_b; + + int i; + + assert(txfrm_size_b <= block_size_b); + assert(txfrm_size_b <= ss_block_size); + + // If mb_to_right_edge is < 0 we are in a situation in which + // the current block size extends into the UMV and we won't + // visit the sub blocks that are wholly within the UMV. + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + int r, c; + const int sw = bw - xd->plane[plane].subsampling_x; + const int sh = bh - xd->plane[plane].subsampling_y; + int max_blocks_wide = 1 << sw; + int max_blocks_high = 1 << sh; + + // xd->mb_to_right_edge is in units of pixels * 8. This converts + // it to 4x4 block sizes. + if (xd->mb_to_right_edge < 0) + max_blocks_wide += + + (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x)); + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += + + (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y)); + + i = 0; + // Unlike the normal case - in here we have to keep track of the + // row and column of the blocks we use so that we know if we are in + // the unrestricted motion border.. + for (r = 0; r < (1 << sh); r += (1 << tx_size)) { + for (c = 0; c < (1 << sw); c += (1 << tx_size)) { + if (r < max_blocks_high && c < max_blocks_wide) + visit(plane, i, bsize, txfrm_size_b, arg); + i += step; } -#endif } -#endif + } else { + for (i = 0; i < (1 << ss_block_size); i += step) { + visit(plane, i, bsize, txfrm_size_b, arg); + } } - return tx_type; } -void vp9_build_block_doffsets(MACROBLOCKD *xd); -void vp9_setup_block_dptrs(MACROBLOCKD *xd); +static INLINE void foreach_transformed_block( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, + foreach_transformed_block_visitor visit, void *arg) { + int plane; -static void update_blockd_bmi(MACROBLOCKD *xd) { - const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + foreach_transformed_block_in_plane(xd, bsize, plane, + visit, arg); + } +} - if (mode == SPLITMV || mode == I8X8_PRED || mode == B_PRED) { - int i; - for (i = 0; i < 16; i++) - xd->block[i].bmi = xd->mode_info_context->bmi[i]; +static INLINE void foreach_transformed_block_uv( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, + foreach_transformed_block_visitor visit, void *arg) { + int plane; + + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + foreach_transformed_block_in_plane(xd, bsize, plane, + visit, arg); } } -static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) { - TX_SIZE tx_size_uv; - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - tx_size_uv = xd->mode_info_context->mbmi.txfm_size; - } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) - tx_size_uv = TX_16X16; - else - tx_size_uv = xd->mode_info_context->mbmi.txfm_size; +// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could +// calculate the subsampled BLOCK_SIZE_TYPE, but that type isn't defined for +// sizes smaller than 16x16 yet. +typedef void (*foreach_predicted_block_visitor)(int plane, int block, + BLOCK_SIZE_TYPE bsize, + int pred_w, int pred_h, + void *arg); +static INLINE void foreach_predicted_block_in_plane( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane, + foreach_predicted_block_visitor visit, void *arg) { + int i, x, y; + + // block sizes in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + // subsampled size of the block + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + + // size of the predictor to use. + int pred_w, pred_h; + + if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) { + assert(bsize == BLOCK_SIZE_SB8X8); + pred_w = 0; + pred_h = 0; } else { - if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) - tx_size_uv = TX_8X8; - else if (xd->mode_info_context->mbmi.txfm_size == TX_8X8 && - (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV)) - tx_size_uv = TX_4X4; - else - tx_size_uv = xd->mode_info_context->mbmi.txfm_size; + pred_w = bwl; + pred_h = bhl; + } + assert(pred_w <= bwl); + assert(pred_h <= bhl); + + // visit each subblock in raster order + i = 0; + for (y = 0; y < 1 << bhl; y += 1 << pred_h) { + for (x = 0; x < 1 << bwl; x += 1 << pred_w) { + visit(plane, i, bsize, pred_w, pred_h, arg); + i += 1 << pred_w; + } + i += (1 << (bwl + pred_h)) - (1 << bwl); + } +} +static INLINE void foreach_predicted_block( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, + foreach_predicted_block_visitor visit, void *arg) { + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg); + } +} +static INLINE void foreach_predicted_block_uv( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, + foreach_predicted_block_visitor visit, void *arg) { + int plane; + + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg); + } +} +static int raster_block_offset(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + int plane, int block, int stride) { + const int bw = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int y = 4 * (block >> bw), x = 4 * (block & ((1 << bw) - 1)); + return y * stride + x; +} +static int16_t* raster_block_offset_int16(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, int16_t *base) { + const int stride = plane_block_width(bsize, &xd->plane[plane]); + return base + raster_block_offset(xd, bsize, plane, block, stride); +} +static uint8_t* raster_block_offset_uint8(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, + uint8_t *base, int stride) { + return base + raster_block_offset(xd, bsize, plane, block, stride); +} + +static int txfrm_block_to_raster_block(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, + int ss_txfrm_size) { + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int txwl = ss_txfrm_size / 2; + const int tx_cols_lg2 = bwl - txwl; + const int tx_cols = 1 << tx_cols_lg2; + const int raster_mb = block >> ss_txfrm_size; + const int x = (raster_mb & (tx_cols - 1)) << (txwl); + const int y = raster_mb >> tx_cols_lg2 << (txwl); + return x + (y << bwl); +} + +static void txfrm_block_to_raster_xy(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, + int ss_txfrm_size, + int *x, int *y) { + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int txwl = ss_txfrm_size / 2; + const int tx_cols_lg2 = bwl - txwl; + const int tx_cols = 1 << tx_cols_lg2; + const int raster_mb = block >> ss_txfrm_size; + *x = (raster_mb & (tx_cols - 1)) << (txwl); + *y = raster_mb >> tx_cols_lg2 << (txwl); +} + +static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block, + BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) { + const int bw = plane_block_width(bsize, &xd->plane[plane]); + const int bh = plane_block_height(bsize, &xd->plane[plane]); + int x, y; + txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); + x = x * 4 - 1; + y = y * 4 - 1; + // Copy a pixel into the umv if we are in a situation where the block size + // extends into the UMV. + // TODO(JBB): Should be able to do the full extend in place so we don't have + // to do this multiple times. + if (xd->mb_to_right_edge < 0) { + int umv_border_start = bw + + (xd->mb_to_right_edge >> (3 + xd->plane[plane].subsampling_x)); + + if (x + bw > umv_border_start) + vpx_memset( + xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride + + umv_border_start, + *(xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride + + umv_border_start - 1), + bw); } - return tx_size_uv; + if (xd->mb_to_bottom_edge < 0) { + int umv_border_start = bh + + (xd->mb_to_bottom_edge >> (3 + xd->plane[plane].subsampling_y)); + int i; + uint8_t c = *(xd->plane[plane].dst.buf + + (umv_border_start - 1) * xd->plane[plane].dst.stride + x); + + uint8_t *d = xd->plane[plane].dst.buf + + umv_border_start * xd->plane[plane].dst.stride + x; + + if (y + bh > umv_border_start) + for (i = 0; i < bh; i++, d += xd->plane[plane].dst.stride) + *d = c; + } +} +static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + int plane, int ss_tx_size, int eob, int aoff, + int loff, ENTROPY_CONTEXT *A, + ENTROPY_CONTEXT *L) { + const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); + const int sw = bw - xd->plane[plane].subsampling_x; + const int sh = bh - xd->plane[plane].subsampling_y; + int mi_blocks_wide = 1 << sw; + int mi_blocks_high = 1 << sh; + int tx_size_in_blocks = (1 << ss_tx_size); + int above_contexts = tx_size_in_blocks; + int left_contexts = tx_size_in_blocks; + int pt; + + // xd->mb_to_right_edge is in units of pixels * 8. This converts + // it to 4x4 block sizes. + if (xd->mb_to_right_edge < 0) { + mi_blocks_wide += (xd->mb_to_right_edge + >> (5 + xd->plane[plane].subsampling_x)); + } + + // this code attempts to avoid copying into contexts that are outside + // our border. Any blocks that do are set to 0... + if (above_contexts + aoff > mi_blocks_wide) + above_contexts = mi_blocks_wide - aoff; + + if (xd->mb_to_bottom_edge < 0) { + mi_blocks_high += (xd->mb_to_bottom_edge + >> (5 + xd->plane[plane].subsampling_y)); + } + if (left_contexts + loff > mi_blocks_high) { + left_contexts = mi_blocks_high - loff; + } + + for (pt = 0; pt < above_contexts; pt++) + A[pt] = eob > 0; + for (pt = above_contexts; pt < (1 << ss_tx_size); pt++) + A[pt] = 0; + for (pt = 0; pt < left_contexts; pt++) + L[pt] = eob > 0; + for (pt = left_contexts; pt < (1 << ss_tx_size); pt++) + L[pt] = 0; } + + #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_coefupdateprobs.h b/vp9/common/vp9_coefupdateprobs.h index b4d892df9afd5b2fb63a5dbde657ac1a782d49b2..e86200802db2b2cb73ece9ab2a1136beb27b0929 100644 --- a/vp9/common/vp9_coefupdateprobs.h +++ b/vp9/common/vp9_coefupdateprobs.h @@ -14,20 +14,8 @@ /* Update probabilities for the nodes in the token entropy tree. Generated file included by vp9_entropy.c */ -static const vp9_prob vp9_coef_update_prob[ENTROPY_NODES] = { - 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252 +static const vp9_prob vp9_coef_update_prob[UNCONSTRAINED_NODES] = { + 252, 252, 252, }; -#if CONFIG_CODE_NONZEROCOUNT -#define NZC_UPDATE_PROB_4X4 252 -#define NZC_UPDATE_PROB_8X8 252 -#define NZC_UPDATE_PROB_16X16 252 -#define NZC_UPDATE_PROB_32X32 252 -#define NZC_UPDATE_PROB_PCAT 252 -#endif - -#if CONFIG_MODELCOEFPROB -#define COEF_MODEL_UPDATE_PROB 16 -#endif - #endif // VP9_COMMON_VP9_COEFUPDATEPROBS_H__ diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h index f72d25e7f4c86d6ec56312e75caa20b63eb2725f..0d7babf97d7a28315fb74988148b7e3f986000cf 100644 --- a/vp9/common/vp9_common.h +++ b/vp9/common/vp9_common.h @@ -19,9 +19,6 @@ #include "vpx_mem/vpx_mem.h" #include "vpx/vpx_integer.h" -#define TRUE 1 -#define FALSE 0 - #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y)) @@ -55,4 +52,17 @@ static INLINE int clamp(int value, int low, int high) { return value < low ? low : (value > high ? high : value); } +static INLINE double fclamp(double value, double low, double high) { + return value < low ? low : (value > high ? high : value); +} + +static INLINE int multiple8(int value) { + return (value + 7) & ~7; +} + +#define SYNC_CODE_0 0x49 +#define SYNC_CODE_1 0x83 +#define SYNC_CODE_2 0x42 + + #endif // VP9_COMMON_VP9_COMMON_H_ diff --git a/vp9/common/vp9_context.c b/vp9/common/vp9_context.c deleted file mode 100644 index 271b45541a07901ad54d4619b50a63e4c3060622..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_context.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_entropy.h" - -/* *** GENERATED FILE: DO NOT EDIT *** */ - -#if 0 -int Contexts[vp8_coef_counter_dimen]; - -const int default_contexts[vp8_coef_counter_dimen] = { - { - // Block Type ( 0 ) - { - // Coeff Band ( 0 ) - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - }, - { - // Coeff Band ( 1 ) - {30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593}, - {26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987}, - {10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104}, - }, - { - // Coeff Band ( 2 ) - {25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0}, - {9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294}, - {1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879}, - }, - { - // Coeff Band ( 3 ) - {26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0}, - {8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302}, - { 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611}, - }, - { - // Coeff Band ( 4 ) - {10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0}, - {2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073}, - { 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50}, - }, - { - // Coeff Band ( 5 ) - {10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0}, - {2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362}, - { 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190}, - }, - { - // Coeff Band ( 6 ) - {40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0}, - {6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164}, - { 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345}, - }, - { - // Coeff Band ( 7 ) - { 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8}, - }, - }, - { - // Block Type ( 1 ) - { - // Coeff Band ( 0 ) - {3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289}, - {8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914}, - {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620}, - }, - { - // Coeff Band ( 1 ) - {12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0}, - {11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988}, - {7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136}, - }, - { - // Coeff Band ( 2 ) - {15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0}, - {7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980}, - {1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429}, - }, - { - // Coeff Band ( 3 ) - {19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0}, - {9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820}, - {1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679}, - }, - { - // Coeff Band ( 4 ) - {12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0}, - {4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127}, - { 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101}, - }, - { - // Coeff Band ( 5 ) - {12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0}, - {4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157}, - { 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198}, - }, - { - // Coeff Band ( 6 ) - {61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0}, - {15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195}, - { 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507}, - }, - { - // Coeff Band ( 7 ) - { 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641}, - { 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30}, - }, - }, - { - // Block Type ( 2 ) - { - // Coeff Band ( 0 ) - { 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798}, - {1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837}, - {1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122}, - }, - { - // Coeff Band ( 1 ) - {1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0}, - {1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063}, - {1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047}, - }, - { - // Coeff Band ( 2 ) - { 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0}, - { 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404}, - { 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236}, - }, - { - // Coeff Band ( 3 ) - { 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157}, - { 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300}, - }, - { - // Coeff Band ( 4 ) - { 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427}, - { 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7}, - }, - { - // Coeff Band ( 5 ) - { 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652}, - { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30}, - }, - { - // Coeff Band ( 6 ) - { 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517}, - { 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3}, - }, - { - // Coeff Band ( 7 ) - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - }, - }, - { - // Block Type ( 3 ) - { - // Coeff Band ( 0 ) - {2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694}, - {8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572}, - {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284}, - }, - { - // Coeff Band ( 1 ) - {9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0}, - {12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280}, - {10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460}, - }, - { - // Coeff Band ( 2 ) - {6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0}, - {6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539}, - {3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138}, - }, - { - // Coeff Band ( 3 ) - {11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0}, - {9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181}, - {4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267}, - }, - { - // Coeff Band ( 4 ) - {4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0}, - {3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401}, - {1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268}, - }, - { - // Coeff Band ( 5 ) - {8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0}, - {3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811}, - {1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527}, - }, - { - // Coeff Band ( 6 ) - {27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0}, - {5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954}, - {1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979}, - }, - { - // Coeff Band ( 7 ) - { 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459}, - { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13}, - }, - }, -}; - -// Update probabilities for the nodes in the token entropy tree. -const vp9_prob tree_update_probs[vp9_coef_tree_dimen] = { - { - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, }, - {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, }, - {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, }, - {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, -}; -#endif diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index 3ab8bec7a162d0b16c107dbf1b5dac9a67ed85bb..46ae50349f3e6d5591ca120fb555473048ce0ed8 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -122,78 +122,6 @@ static void convolve_avg_horiz_c(const uint8_t *src, int src_stride, } } -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT - -static inline uint8_t combine_qtr(uint8_t a, uint8_t b) { - return (((a) + (b) * 3 + 2) >> 2); -} - -static inline uint8_t combine_3qtr(uint8_t a, uint8_t b) { - return (((a) * 3 + (b) + 2) >> 2); -} - -static inline uint8_t combine_1by8(uint8_t a, uint8_t b) { - return (((a) * 1 + (b) * 7 + 4) >> 3); -} - -static inline uint8_t combine_3by8(uint8_t a, uint8_t b) { - return (((a) * 3 + (b) * 5 + 4) >> 3); -} - -static inline uint8_t combine_5by8(uint8_t a, uint8_t b) { - return (((a) * 5 + (b) * 3 + 4) >> 3); -} - -static inline uint8_t combine_7by8(uint8_t a, uint8_t b) { - return (((a) * 7 + (b) * 1 + 4) >> 3); -} - -// TODO(debargha): Implment with a separate weight parameter -static void convolve_wtd_horiz_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x0, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int taps, - uint8_t (*combine)(uint8_t a, uint8_t b)) { - int x, y, k, sum; - const int16_t *filter_x_base = filter_x0; - -#if ALIGN_FILTERS_256 - filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); -#endif - - /* Adjust base pointer address for this source line */ - src -= taps / 2 - 1; - - for (y = 0; y < h; ++y) { - /* Pointer to filter to use */ - const int16_t *filter_x = filter_x0; - - /* Initial phase offset */ - int x0_q4 = (filter_x - filter_x_base) / taps; - int x_q4 = x0_q4; - - for (x = 0; x < w; ++x) { - /* Per-pixel src offset */ - int src_x = (x_q4 - x0_q4) >> 4; - - for (sum = 0, k = 0; k < taps; ++k) { - sum += src[src_x + k] * filter_x[k]; - } - sum += (VP9_FILTER_WEIGHT >> 1); - dst[x] = combine(dst[x], clip_pixel(sum >> VP9_FILTER_SHIFT)); - - /* Adjust source and filter to use for the next pixel */ - x_q4 += x_step_q4; - filter_x = filter_x_base + (x_q4 & 0xf) * taps; - } - src += src_stride; - dst += dst_stride; - } -} - -#endif - static void convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -279,66 +207,20 @@ static void convolve_avg_vert_c(const uint8_t *src, int src_stride, } } -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -static void convolve_wtd_vert_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y0, int y_step_q4, - int w, int h, int taps, - uint8_t (*combine)(uint8_t a, uint8_t b)) { - int x, y, k, sum; - - const int16_t *filter_y_base = filter_y0; - -#if ALIGN_FILTERS_256 - filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); -#endif - - /* Adjust base pointer address for this source column */ - src -= src_stride * (taps / 2 - 1); - for (x = 0; x < w; ++x) { - /* Pointer to filter to use */ - const int16_t *filter_y = filter_y0; - - /* Initial phase offset */ - int y0_q4 = (filter_y - filter_y_base) / taps; - int y_q4 = y0_q4; - - for (y = 0; y < h; ++y) { - /* Per-pixel src offset */ - int src_y = (y_q4 - y0_q4) >> 4; - - for (sum = 0, k = 0; k < taps; ++k) { - sum += src[(src_y + k) * src_stride] * filter_y[k]; - } - sum += (VP9_FILTER_WEIGHT >> 1); - dst[y * dst_stride] = combine(dst[y * dst_stride], - clip_pixel(sum >> VP9_FILTER_SHIFT)); - - /* Adjust source and filter to use for the next pixel */ - y_q4 += y_step_q4; - filter_y = filter_y_base + (y_q4 & 0xf) * taps; - } - ++src; - ++dst; - } -} -#endif - static void convolve_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { /* Fixed size intermediate buffer places limits on parameters. - * Maximum intermediate_height is 39, for y_step_q4 == 32, - * h == 16, taps == 8. + * Maximum intermediate_height is 135, for y_step_q4 == 32, + * h == 64, taps == 8. */ - uint8_t temp[16 * 39]; + uint8_t temp[64 * 135]; int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1; - assert(w <= 16); - assert(h <= 16); + assert(w <= 64); + assert(h <= 64); assert(taps <= 8); assert(y_step_q4 <= 32); @@ -346,10 +228,10 @@ static void convolve_c(const uint8_t *src, int src_stride, intermediate_height = h; convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, - temp, 16, + temp, 64, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, taps); - convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, + convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, taps); } @@ -360,14 +242,14 @@ static void convolve_avg_c(const uint8_t *src, int src_stride, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { /* Fixed size intermediate buffer places limits on parameters. - * Maximum intermediate_height is 39, for y_step_q4 == 32, - * h == 16, taps == 8. + * Maximum intermediate_height is 135, for y_step_q4 == 32, + * h == 64, taps == 8. */ - uint8_t temp[16 * 39]; + uint8_t temp[64 * 135]; int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1; - assert(w <= 16); - assert(h <= 16); + assert(w <= 64); + assert(h <= 64); assert(taps <= 8); assert(y_step_q4 <= 32); @@ -375,10 +257,10 @@ static void convolve_avg_c(const uint8_t *src, int src_stride, intermediate_height = h; convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, - temp, 16, + temp, 64, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, taps); - convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, + convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, taps); } @@ -403,68 +285,6 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride, w, h, 8); } -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -void vp9_convolve8_1by8_horiz_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_1by8); -} - -void vp9_convolve8_qtr_horiz_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_qtr); -} - -void vp9_convolve8_3by8_horiz_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_3by8); -} - -void vp9_convolve8_5by8_horiz_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_5by8); -} - -void vp9_convolve8_3qtr_horiz_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_3qtr); -} - -void vp9_convolve8_7by8_horiz_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_7by8); -} -#endif - void vp9_convolve8_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -485,68 +305,6 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride, w, h, 8); } -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -void vp9_convolve8_1by8_vert_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_1by8); -} - -void vp9_convolve8_qtr_vert_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_qtr); -} - -void vp9_convolve8_3by8_vert_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_3by8); -} - -void vp9_convolve8_5by8_vert_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_5by8); -} - -void vp9_convolve8_3qtr_vert_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_3qtr); -} - -void vp9_convolve8_7by8_vert_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - convolve_wtd_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h, 8, combine_7by8); -} -#endif - void vp9_convolve8_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -563,156 +321,22 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride, const int16_t *filter_y, int y_step_q4, int w, int h) { /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); - assert(w <= 16); - assert(h <= 16); - - vp9_convolve8(src, src_stride, - temp, 16, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - vp9_convolve_avg(temp, 16, - dst, dst_stride, - NULL, 0, /* These unused parameter should be removed! */ - NULL, 0, /* These unused parameter should be removed! */ - w, h); -} - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -void vp9_convolve8_1by8_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); - assert(w <= 16); - assert(h <= 16); - - vp9_convolve8(src, src_stride, - temp, 16, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - vp9_convolve_1by8(temp, 16, - dst, dst_stride, - NULL, 0, /* These unused parameter should be removed! */ - NULL, 0, /* These unused parameter should be removed! */ - w, h); -} - -void vp9_convolve8_qtr_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); - assert(w <= 16); - assert(h <= 16); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); + assert(w <= 64); + assert(h <= 64); vp9_convolve8(src, src_stride, - temp, 16, + temp, 64, filter_x, x_step_q4, filter_y, y_step_q4, w, h); - vp9_convolve_qtr(temp, 16, + vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, /* These unused parameter should be removed! */ NULL, 0, /* These unused parameter should be removed! */ w, h); } -void vp9_convolve8_3by8_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); - assert(w <= 16); - assert(h <= 16); - - vp9_convolve8(src, src_stride, - temp, 16, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - vp9_convolve_3by8(temp, 16, - dst, dst_stride, - NULL, 0, /* These unused parameter should be removed! */ - NULL, 0, /* These unused parameter should be removed! */ - w, h); -} - -void vp9_convolve8_5by8_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); - assert(w <= 16); - assert(h <= 16); - - vp9_convolve8(src, src_stride, - temp, 16, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - vp9_convolve_5by8(temp, 16, - dst, dst_stride, - NULL, 0, /* These unused parameter should be removed! */ - NULL, 0, /* These unused parameter should be removed! */ - w, h); -} - -void vp9_convolve8_3qtr_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); - assert(w <= 16); - assert(h <= 16); - - vp9_convolve8(src, src_stride, - temp, 16, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - vp9_convolve_3qtr(temp, 16, - dst, dst_stride, - NULL, 0, /* These unused parameter should be removed! */ - NULL, 0, /* These unused parameter should be removed! */ - w, h); -} - -void vp9_convolve8_7by8_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); - assert(w <= 16); - assert(h <= 16); - - vp9_convolve8(src, src_stride, - temp, 16, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - vp9_convolve_7by8(temp, 16, - dst, dst_stride, - NULL, 0, /* These unused parameter should be removed! */ - NULL, 0, /* These unused parameter should be removed! */ - w, h); -} -#endif - void vp9_convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int filter_x_stride, @@ -750,101 +374,3 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride, dst += dst_stride; } } - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -void vp9_convolve_1by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = combine_1by8(dst[x], src[x]); - } - src += src_stride; - dst += dst_stride; - } -} - -void vp9_convolve_qtr(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = combine_qtr(dst[x], src[x]); - } - src += src_stride; - dst += dst_stride; - } -} - -void vp9_convolve_3by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = combine_3by8(dst[x], src[x]); - } - src += src_stride; - dst += dst_stride; - } -} - -void vp9_convolve_5by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = combine_5by8(dst[x], src[x]); - } - src += src_stride; - dst += dst_stride; - } -} - -void vp9_convolve_3qtr(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = combine_3qtr(dst[x], src[x]); - } - src += src_stride; - dst += dst_stride; - } -} - -void vp9_convolve_7by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = combine_7by8(dst[x], src[x]); - } - src += src_stride; - dst += dst_stride; - } -} -#endif diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h index bef2d85641783ef1a14c4cccd493bb23d8033a19..0596080c009d7ab0ed3251d324be94ca358193c9 100644 --- a/vp9/common/vp9_convolve.h +++ b/vp9/common/vp9_convolve.h @@ -33,50 +33,6 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride, const int16_t *filter_y, int y_step_q4, int w, int h); -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -// Not a convolution, a block wtd (1/8, 7/8) average for (dst, src) -void vp9_convolve_1by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - -// Not a convolution, a block wtd (1/4, 3/4) average for (dst, src) -void vp9_convolve_qtr(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - -// Not a convolution, a block wtd (3/8, 5/8) average for (dst, src) -void vp9_convolve_3by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - -// Not a convolution, a block wtd (5/8, 3/8) average for (dst, src) -void vp9_convolve_5by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - -// Not a convolution, a block wtd (3/4, 1/4) average for (dst, src) -void vp9_convolve_3qtr(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - -// Not a convolution, a block wtd (7/8, 1/8) average for (dst, src) -void vp9_convolve_7by8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); -#endif - struct subpix_fn_table { const int16_t (*filter_x)[8]; const int16_t (*filter_y)[8]; diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c index c3fffc63251216d0076b49e2ae1cc0f69ca72776..5841f8091d38d56593a48dd0834f9c8614f908f9 100644 --- a/vp9/common/vp9_debugmodes.c +++ b/vp9/common/vp9_debugmodes.c @@ -13,130 +13,124 @@ #include "vp9/common/vp9_blockd.h" void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, - int frame) { - int mb_row; - int mb_col; - int mb_index = 0; - FILE *mvs = fopen("mvs.stt", "a"); + int frame, char *file) { + int mi_row; + int mi_col; + int mi_index = 0; + FILE *mvs = fopen(file, "a"); // Print out the macroblock Y modes - fprintf(mvs, "Mb Modes for Frame %d\n", frame); + fprintf(mvs, "SB Types for Frame %d\n", frame); + + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi[mi_index].mbmi.sb_type); + + mi_index++; + } - for (mb_row = 0; mb_row < rows; mb_row++) { - for (mb_col = 0; mb_col < cols; mb_col++) { + fprintf(mvs, "\n"); + mi_index += 8; + } - fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode); + // Print out the macroblock Y modes + fprintf(mvs, "Mb Modes for Frame %d\n", frame); + mi_index = 0; + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi[mi_index].mbmi.mode); - mb_index++; + mi_index++; } fprintf(mvs, "\n"); - mb_index++; + mi_index += 8; } fprintf(mvs, "\n"); - mb_index = 0; + mi_index = 0; fprintf(mvs, "Mb mv ref for Frame %d\n", frame); - for (mb_row = 0; mb_row < rows; mb_row++) { - for (mb_col = 0; mb_col < cols; mb_col++) { + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi[mi_index].mbmi.ref_frame[0]); - fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame); - - mb_index++; + mi_index++; } fprintf(mvs, "\n"); - mb_index++; + mi_index += 8; } - fprintf(mvs, "\n"); - /* print out the macroblock UV modes */ - mb_index = 0; - fprintf(mvs, "UV Modes for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) { - for (mb_col = 0; mb_col < cols; mb_col++) { + mi_index = 0; + fprintf(mvs, "Mb mv ref for Frame %d\n", frame); - fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode); + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%4d:%4d ", mi[mi_index].mbmi.mv[0].as_mv.row, + mi[mi_index].mbmi.mv[0].as_mv.col); - mb_index++; + mi_index++; } - mb_index++; fprintf(mvs, "\n"); + mi_index += 8; } fprintf(mvs, "\n"); - /* print out the block modes */ - mb_index = 0; - fprintf(mvs, "Mbs for Frame %d\n", frame); - { - int b_row; - - for (b_row = 0; b_row < 4 * rows; b_row++) { - int b_col; - int bindex; + /* print out the macroblock txform sizes */ + mi_index = 0; + fprintf(mvs, "TXFM size for Frame %d\n", frame); - for (b_col = 0; b_col < 4 * cols; b_col++) { - mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); - bindex = (b_row & 3) * 4 + (b_col & 3); + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi[mi_index].mbmi.txfm_size); - if (mi[mb_index].mbmi.mode == B_PRED) { - fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode.first); - } else - fprintf(mvs, "xx "); - - } - - fprintf(mvs, "\n"); + mi_index++; } + + mi_index += 8; + fprintf(mvs, "\n"); } + fprintf(mvs, "\n"); - /* print out the macroblock mvs */ - mb_index = 0; - fprintf(mvs, "MVs for Frame %d\n", frame); + /* print out the macroblock UV modes */ + mi_index = 0; + fprintf(mvs, "UV Modes for Frame %d\n", frame); - for (mb_row = 0; mb_row < rows; mb_row++) { - for (mb_col = 0; mb_col < cols; mb_col++) { - fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv[0].as_mv.row / 2, - mi[mb_index].mbmi.mv[0].as_mv.col / 2); + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi[mi_index].mbmi.uv_mode); - mb_index++; + mi_index++; } - mb_index++; + mi_index += 8; fprintf(mvs, "\n"); } fprintf(mvs, "\n"); - /* print out the block modes */ - mb_index = 0; + /* print out the macroblock mvs */ + mi_index = 0; fprintf(mvs, "MVs for Frame %d\n", frame); - { - int b_row; - - for (b_row = 0; b_row < 4 * rows; b_row++) { - int b_col; - int bindex; - - for (b_col = 0; b_col < 4 * cols; b_col++) { - mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); - bindex = (b_row & 3) * 4 + (b_col & 3); - fprintf(mvs, "%3d:%-3d ", - mi[mb_index].bmi[bindex].as_mv[0].as_mv.row, - mi[mb_index].bmi[bindex].as_mv[0].as_mv.col); - } + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%5d:%-5d", mi[mi_index].mbmi.mv[0].as_mv.row / 2, + mi[mi_index].mbmi.mv[0].as_mv.col / 2); - fprintf(mvs, "\n"); + mi_index++; } + + mi_index += 8; + fprintf(mvs, "\n"); } + fprintf(mvs, "\n"); fclose(mvs); diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index c9be8b2299adcd61d75cd777f030869a5dd81f1e..1954093f5f4045de9370a2bab9f8866ad72434cd 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -11,987 +11,1374 @@ /*Generated file, included by vp9_entropy.c*/ -// NOTE: When the CONFIG_MODELCOEFPROB experiment is on, only the first -// 2 or 3 from each row is actually used depending on whether -// UNCONSTRAINDED_NODES is 2 or 3. If this experiment is merged -// the tables below should be shortened accordingly. -static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES] = { +#if CONFIG_BALANCED_COEFTREE +static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 208, 32, 178, 198, 161, 167, 196, 147, 244, 194, 210 }, - { 102, 43, 132, 185, 148, 162, 185, 141, 237, 181, 215 }, - { 15, 36, 68, 143, 119, 151, 169, 133, 230, 173, 214 } - }, { /* Coeff Band 1 */ - { 71, 91, 178, 226, 169, 176, 232, 170, 252, 219, 231 }, - { 72, 88, 174, 226, 168, 176, 232, 170, 252, 219, 234 }, - { 40, 79, 154, 222, 161, 174, 231, 169, 251, 219, 238 }, - { 21, 68, 126, 211, 144, 167, 230, 167, 252, 219, 236 }, - { 7, 49, 84, 175, 121, 152, 223, 151, 251, 218, 237 }, - { 1, 20, 32, 100, 97, 140, 163, 116, 237, 186, 222 } - }, { /* Coeff Band 2 */ - { 108, 110, 206, 237, 182, 183, 239, 181, 252, 221, 245 }, - { 72, 98, 191, 236, 180, 182, 240, 183, 252, 223, 239 }, - { 26, 77, 152, 230, 166, 179, 239, 181, 252, 222, 241 }, - { 7, 57, 106, 212, 141, 167, 236, 173, 252, 223, 243 }, - { 1, 35, 60, 171, 110, 149, 225, 155, 251, 218, 240 }, - { 1, 14, 22, 90, 86, 134, 163, 116, 238, 181, 233 } - }, { /* Coeff Band 3 */ - { 105, 139, 222, 245, 196, 192, 245, 195, 253, 229, 255 }, - { 76, 118, 205, 245, 192, 192, 247, 198, 254, 230, 255 }, - { 21, 88, 164, 240, 175, 186, 246, 197, 255, 232, 255 }, - { 5, 63, 118, 222, 149, 172, 242, 185, 255, 230, 254 }, - { 1, 42, 74, 186, 120, 157, 227, 161, 253, 220, 250 }, - { 1, 18, 30, 97, 92, 136, 163, 118, 244, 184, 244 } - }, { /* Coeff Band 4 */ - { 143, 117, 233, 251, 207, 201, 250, 210, 255, 239, 128 }, - { 99, 104, 214, 249, 200, 199, 251, 211, 255, 238, 255 }, - { 26, 81, 170, 245, 183, 192, 250, 206, 255, 242, 255 }, - { 6, 60, 116, 226, 151, 176, 242, 187, 255, 235, 255 }, - { 1, 38, 65, 178, 114, 153, 224, 157, 254, 224, 255 }, - { 1, 15, 26, 86, 88, 133, 163, 110, 251, 197, 252 } - }, { /* Coeff Band 5 */ - { 155, 74, 238, 252, 215, 206, 252, 223, 255, 255, 128 }, - { 152, 64, 223, 250, 205, 201, 254, 219, 255, 255, 128 }, - { 67, 55, 182, 246, 187, 192, 251, 210, 255, 240, 128 }, - { 27, 44, 127, 227, 155, 176, 244, 186, 255, 240, 255 }, - { 9, 27, 69, 176, 115, 152, 227, 154, 255, 229, 255 }, - { 2, 11, 28, 91, 84, 133, 177, 115, 254, 210, 255 } + { 6, 213, 178 }, + { 26, 113, 132 }, + { 34, 17, 68 } + }, { /* Coeff Band 1 */ + { 66, 96, 178 }, + { 63, 96, 174 }, + { 67, 54, 154 }, + { 62, 28, 126 }, + { 48, 9, 84 }, + { 20, 1, 32 } + }, { /* Coeff Band 2 */ + { 64, 144, 206 }, + { 70, 99, 191 }, + { 69, 36, 152 }, + { 55, 9, 106 }, + { 35, 1, 60 }, + { 14, 1, 22 } + }, { /* Coeff Band 3 */ + { 82, 154, 222 }, + { 83, 112, 205 }, + { 81, 31, 164 }, + { 62, 7, 118 }, + { 42, 1, 74 }, + { 18, 1, 30 } + }, { /* Coeff Band 4 */ + { 52, 179, 233 }, + { 64, 132, 214 }, + { 73, 36, 170 }, + { 59, 8, 116 }, + { 38, 1, 65 }, + { 15, 1, 26 } + }, { /* Coeff Band 5 */ + { 29, 175, 238 }, + { 26, 169, 223 }, + { 41, 80, 182 }, + { 39, 32, 127 }, + { 26, 10, 69 }, + { 11, 2, 28 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 207, 112, 234, 244, 192, 193, 246, 194, 255, 237, 255 }, - { 145, 120, 212, 233, 178, 183, 232, 177, 252, 216, 228 }, - { 77, 114, 177, 214, 164, 174, 210, 159, 245, 199, 230 } - }, { /* Coeff Band 1 */ - { 93, 174, 243, 248, 205, 200, 245, 195, 255, 232, 255 }, - { 100, 144, 231, 248, 204, 200, 244, 193, 255, 232, 255 }, - { 28, 101, 186, 247, 194, 199, 244, 194, 255, 232, 255 }, - { 9, 73, 132, 238, 155, 186, 245, 197, 255, 232, 250 }, - { 2, 44, 76, 187, 112, 151, 240, 172, 255, 235, 249 }, - { 1, 19, 33, 98, 92, 138, 176, 113, 252, 208, 249 } - }, { /* Coeff Band 2 */ - { 116, 175, 246, 250, 212, 202, 248, 198, 255, 238, 255 }, - { 78, 142, 231, 250, 208, 203, 249, 200, 255, 241, 255 }, - { 14, 93, 177, 245, 186, 196, 248, 198, 255, 241, 255 }, - { 4, 65, 122, 227, 148, 177, 244, 186, 255, 241, 243 }, - { 1, 38, 69, 180, 111, 152, 235, 162, 255, 237, 247 }, - { 1, 18, 30, 101, 89, 133, 190, 116, 255, 219, 246 } - }, { /* Coeff Band 3 */ - { 138, 183, 249, 253, 220, 209, 252, 210, 255, 251, 128 }, - { 93, 147, 237, 252, 213, 209, 253, 213, 255, 251, 128 }, - { 21, 104, 187, 247, 185, 196, 252, 210, 255, 249, 128 }, - { 6, 73, 131, 225, 147, 174, 248, 190, 255, 248, 128 }, - { 1, 47, 83, 189, 119, 155, 239, 167, 255, 246, 128 }, - { 1, 26, 44, 130, 96, 139, 209, 129, 255, 235, 255 } - }, { /* Coeff Band 4 */ - { 188, 143, 252, 255, 228, 218, 253, 218, 255, 209, 128 }, - { 137, 124, 241, 253, 215, 211, 254, 221, 255, 255, 128 }, - { 32, 89, 188, 248, 186, 198, 254, 216, 255, 253, 128 }, - { 7, 61, 122, 231, 146, 176, 252, 201, 255, 250, 128 }, - { 1, 34, 66, 186, 103, 149, 246, 176, 255, 249, 128 }, - { 1, 18, 34, 115, 91, 134, 217, 124, 255, 233, 255 } - }, { /* Coeff Band 5 */ - { 198, 92, 253, 255, 231, 222, 255, 230, 128, 128, 128 }, - { 189, 79, 244, 254, 220, 217, 255, 237, 255, 255, 128 }, - { 78, 61, 200, 252, 196, 207, 255, 231, 255, 255, 128 }, - { 34, 50, 146, 242, 161, 187, 255, 222, 255, 255, 128 }, - { 11, 38, 93, 215, 122, 159, 253, 202, 255, 255, 128 }, - { 1, 31, 55, 143, 102, 143, 227, 148, 255, 238, 128 } + { 21, 226, 234 }, + { 52, 182, 212 }, + { 80, 112, 177 } + }, { /* Coeff Band 1 */ + { 111, 164, 243 }, + { 88, 152, 231 }, + { 90, 43, 186 }, + { 70, 12, 132 }, + { 44, 2, 76 }, + { 19, 1, 33 } + }, { /* Coeff Band 2 */ + { 96, 185, 246 }, + { 99, 127, 231 }, + { 88, 21, 177 }, + { 64, 5, 122 }, + { 38, 1, 69 }, + { 18, 1, 30 } + }, { /* Coeff Band 3 */ + { 84, 206, 249 }, + { 94, 147, 237 }, + { 95, 33, 187 }, + { 71, 8, 131 }, + { 47, 1, 83 }, + { 26, 1, 44 } + }, { /* Coeff Band 4 */ + { 38, 221, 252 }, + { 58, 177, 241 }, + { 78, 46, 188 }, + { 59, 9, 122 }, + { 34, 1, 66 }, + { 18, 1, 34 } + }, { /* Coeff Band 5 */ + { 21, 216, 253 }, + { 21, 206, 244 }, + { 42, 93, 200 }, + { 43, 41, 146 }, + { 36, 13, 93 }, + { 31, 1, 55 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 207, 35, 219, 243, 195, 192, 243, 188, 251, 232, 238 }, - { 126, 46, 182, 230, 177, 182, 228, 171, 248, 214, 232 }, - { 51, 47, 125, 196, 147, 166, 206, 151, 245, 199, 229 } - }, { /* Coeff Band 1 */ - { 114, 124, 220, 244, 197, 192, 242, 189, 253, 226, 255 }, - { 142, 116, 213, 243, 194, 191, 241, 188, 252, 226, 255 }, - { 81, 101, 190, 242, 188, 190, 242, 190, 253, 229, 255 }, - { 42, 83, 155, 235, 166, 183, 241, 190, 253, 227, 246 }, - { 16, 62, 104, 205, 133, 161, 238, 176, 254, 227, 250 }, - { 6, 40, 60, 132, 109, 145, 190, 128, 248, 202, 239 } - }, { /* Coeff Band 2 */ - { 139, 149, 228, 248, 205, 198, 244, 196, 255, 223, 255 }, - { 115, 127, 221, 248, 202, 198, 245, 198, 255, 228, 255 }, - { 43, 100, 189, 246, 195, 195, 244, 196, 254, 234, 228 }, - { 13, 77, 141, 238, 168, 187, 243, 191, 255, 232, 255 }, - { 3, 49, 88, 203, 125, 160, 237, 178, 253, 227, 251 }, - { 1, 23, 41, 118, 97, 136, 191, 127, 250, 207, 247 } - }, { /* Coeff Band 3 */ - { 119, 185, 236, 251, 216, 205, 249, 202, 253, 237, 255 }, - { 89, 140, 224, 251, 211, 205, 250, 208, 255, 241, 255 }, - { 34, 105, 189, 248, 195, 197, 250, 208, 255, 245, 255 }, - { 14, 78, 142, 235, 166, 182, 246, 194, 255, 242, 255 }, - { 5, 49, 90, 196, 128, 160, 235, 165, 255, 237, 255 }, - { 1, 22, 41, 114, 97, 139, 180, 124, 252, 201, 249 } - }, { /* Coeff Band 4 */ - { 162, 142, 244, 254, 228, 215, 255, 230, 128, 128, 128 }, - { 129, 120, 231, 253, 216, 210, 255, 228, 255, 255, 128 }, - { 44, 90, 189, 249, 195, 199, 253, 217, 255, 240, 128 }, - { 14, 65, 132, 234, 158, 181, 249, 203, 255, 248, 128 }, - { 3, 38, 72, 188, 112, 154, 239, 171, 255, 243, 128 }, - { 1, 17, 39, 110, 86, 141, 201, 123, 255, 240, 128 } - }, { /* Coeff Band 5 */ - { 167, 96, 247, 255, 230, 218, 249, 231, 255, 255, 128 }, - { 163, 84, 234, 253, 214, 209, 255, 231, 255, 255, 128 }, - { 70, 63, 185, 249, 189, 197, 255, 230, 255, 255, 128 }, - { 30, 44, 132, 238, 157, 180, 251, 210, 255, 220, 128 }, - { 13, 30, 80, 195, 121, 153, 243, 179, 255, 224, 128 }, - { 5, 13, 38, 103, 109, 128, 196, 147, 255, 255, 128 } + { 7, 213, 219 }, + { 23, 139, 182 }, + { 38, 60, 125 } + }, { /* Coeff Band 1 */ + { 69, 156, 220 }, + { 52, 178, 213 }, + { 69, 111, 190 }, + { 69, 58, 155 }, + { 58, 21, 104 }, + { 39, 7, 60 } + }, { /* Coeff Band 2 */ + { 68, 189, 228 }, + { 70, 158, 221 }, + { 83, 64, 189 }, + { 73, 18, 141 }, + { 48, 4, 88 }, + { 23, 1, 41 } + }, { /* Coeff Band 3 */ + { 99, 194, 236 }, + { 91, 138, 224 }, + { 91, 53, 189 }, + { 74, 20, 142 }, + { 48, 6, 90 }, + { 22, 1, 41 } + }, { /* Coeff Band 4 */ + { 52, 203, 244 }, + { 60, 168, 231 }, + { 75, 62, 189 }, + { 61, 18, 132 }, + { 38, 4, 72 }, + { 17, 1, 39 } + }, { /* Coeff Band 5 */ + { 33, 192, 247 }, + { 31, 185, 234 }, + { 46, 85, 185 }, + { 39, 35, 132 }, + { 28, 15, 80 }, + { 13, 5, 38 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 242, 90, 246, 244, 200, 192, 242, 189, 255, 234, 255 }, - { 186, 102, 228, 233, 187, 182, 231, 172, 254, 225, 252 }, - { 102, 108, 203, 228, 181, 180, 218, 167, 243, 201, 223 } - }, { /* Coeff Band 1 */ - { 152, 169, 250, 253, 223, 209, 251, 208, 255, 250, 128 }, - { 164, 149, 242, 253, 222, 209, 249, 207, 253, 238, 255 }, - { 63, 108, 204, 252, 215, 211, 251, 211, 255, 242, 128 }, - { 39, 83, 153, 248, 175, 199, 250, 214, 255, 245, 128 }, - { 31, 66, 108, 214, 130, 161, 251, 196, 255, 237, 128 }, - { 27, 65, 71, 150, 112, 149, 213, 133, 255, 230, 255 } - }, { /* Coeff Band 2 */ - { 161, 174, 250, 254, 226, 215, 254, 226, 255, 230, 128 }, - { 133, 150, 239, 254, 222, 213, 254, 225, 255, 255, 128 }, - { 32, 105, 197, 252, 206, 207, 253, 220, 255, 255, 128 }, - { 10, 78, 147, 245, 173, 193, 253, 212, 255, 255, 128 }, - { 2, 49, 99, 221, 133, 164, 250, 198, 255, 252, 128 }, - { 1, 26, 53, 154, 96, 135, 234, 142, 255, 240, 128 } - }, { /* Coeff Band 3 */ - { 160, 187, 251, 255, 234, 223, 255, 233, 128, 128, 128 }, - { 131, 155, 241, 255, 228, 222, 255, 232, 255, 255, 128 }, - { 42, 108, 198, 253, 207, 212, 255, 234, 255, 255, 128 }, - { 18, 81, 151, 246, 176, 194, 254, 222, 255, 255, 128 }, - { 9, 60, 112, 225, 144, 167, 252, 199, 255, 255, 128 }, - { 5, 35, 49, 163, 113, 150, 237, 118, 255, 255, 128 } - }, { /* Coeff Band 4 */ - { 195, 141, 253, 255, 242, 232, 255, 255, 128, 128, 128 }, - { 169, 128, 245, 255, 235, 227, 255, 248, 128, 128, 128 }, - { 62, 91, 204, 255, 216, 220, 255, 233, 128, 128, 128 }, - { 23, 70, 150, 248, 178, 202, 255, 223, 128, 128, 128 }, - { 2, 44, 78, 220, 110, 164, 255, 209, 128, 128, 128 }, - { 1, 1, 128, 255, 255, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 195, 104, 253, 255, 246, 246, 255, 171, 128, 128, 128 }, - { 197, 92, 248, 255, 239, 228, 255, 239, 128, 128, 128 }, - { 88, 71, 214, 255, 219, 220, 255, 244, 128, 128, 128 }, - { 39, 56, 160, 250, 187, 204, 255, 255, 128, 128, 128 }, - { 18, 28, 90, 217, 81, 137, 255, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + { 5, 247, 246 }, + { 28, 209, 228 }, + { 65, 137, 203 } + }, { /* Coeff Band 1 */ + { 69, 208, 250 }, + { 54, 207, 242 }, + { 81, 92, 204 }, + { 70, 54, 153 }, + { 58, 40, 108 }, + { 58, 35, 71 } + }, { /* Coeff Band 2 */ + { 65, 215, 250 }, + { 72, 185, 239 }, + { 92, 50, 197 }, + { 75, 14, 147 }, + { 49, 2, 99 }, + { 26, 1, 53 } + }, { /* Coeff Band 3 */ + { 70, 220, 251 }, + { 76, 186, 241 }, + { 90, 65, 198 }, + { 75, 26, 151 }, + { 58, 12, 112 }, + { 34, 6, 49 } + }, { /* Coeff Band 4 */ + { 34, 224, 253 }, + { 44, 204, 245 }, + { 69, 85, 204 }, + { 64, 31, 150 }, + { 44, 2, 78 }, + { 1, 1, 128 } + }, { /* Coeff Band 5 */ + { 25, 216, 253 }, + { 21, 215, 248 }, + { 47, 108, 214 }, + { 47, 48, 160 }, + { 26, 20, 90 }, + { 64, 171, 128 } } } } }; -static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES] = { +static const vp9_coeff_probs_model default_coef_probs_8x8[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 196, 40, 199, 180, 158, 161, 172, 135, 226, 183, 140 }, - { 83, 38, 128, 153, 142, 157, 155, 128, 222, 164, 202 }, - { 10, 29, 55, 116, 113, 146, 150, 122, 223, 169, 200 } - }, { /* Coeff Band 1 */ - { 33, 114, 160, 211, 155, 169, 223, 162, 248, 212, 215 }, - { 69, 107, 155, 210, 154, 169, 224, 163, 248, 212, 216 }, - { 30, 91, 138, 207, 150, 168, 223, 162, 248, 212, 216 }, - { 12, 74, 115, 200, 140, 164, 222, 160, 249, 212, 219 }, - { 4, 52, 80, 172, 121, 153, 216, 149, 249, 212, 226 }, - { 1, 27, 40, 105, 101, 141, 157, 120, 231, 177, 210 } - }, { /* Coeff Band 2 */ - { 38, 159, 190, 227, 171, 177, 229, 172, 250, 214, 237 }, - { 34, 130, 182, 229, 173, 180, 231, 174, 249, 215, 234 }, - { 10, 97, 153, 226, 164, 178, 232, 175, 250, 215, 241 }, - { 3, 71, 115, 213, 145, 170, 230, 171, 251, 217, 235 }, - { 1, 41, 68, 172, 114, 152, 219, 154, 250, 212, 235 }, - { 1, 16, 27, 88, 90, 135, 155, 113, 235, 180, 216 } - }, { /* Coeff Band 3 */ - { 41, 184, 214, 238, 187, 186, 235, 180, 252, 217, 236 }, - { 24, 142, 199, 241, 188, 189, 237, 184, 252, 220, 235 }, - { 6, 97, 159, 235, 172, 184, 239, 185, 252, 221, 243 }, - { 1, 63, 110, 214, 144, 170, 234, 174, 253, 223, 243 }, - { 1, 32, 58, 166, 109, 149, 218, 152, 251, 215, 238 }, - { 1, 12, 21, 78, 85, 131, 152, 109, 236, 180, 224 } - }, { /* Coeff Band 4 */ - { 54, 207, 231, 245, 201, 193, 238, 186, 252, 221, 220 }, - { 32, 156, 213, 246, 198, 195, 242, 192, 252, 224, 245 }, - { 7, 98, 164, 240, 177, 187, 243, 193, 252, 227, 244 }, - { 2, 62, 108, 216, 143, 170, 237, 177, 254, 227, 248 }, - { 1, 32, 57, 165, 108, 148, 219, 152, 252, 217, 243 }, - { 1, 13, 22, 79, 87, 132, 153, 109, 240, 182, 232 } - }, { /* Coeff Band 5 */ - { 89, 208, 239, 250, 216, 200, 240, 190, 255, 222, 219 }, - { 53, 155, 223, 250, 209, 202, 245, 199, 253, 225, 246 }, - { 12, 102, 170, 243, 183, 192, 246, 198, 254, 230, 255 }, - { 3, 67, 111, 218, 144, 171, 239, 180, 254, 231, 248 }, - { 1, 38, 60, 164, 108, 148, 221, 152, 253, 220, 246 }, - { 1, 18, 26, 81, 88, 132, 157, 108, 245, 188, 241 } + { 9, 203, 199 }, + { 26, 92, 128 }, + { 28, 11, 55 } + }, { /* Coeff Band 1 */ + { 99, 54, 160 }, + { 78, 99, 155 }, + { 80, 44, 138 }, + { 71, 17, 115 }, + { 51, 5, 80 }, + { 27, 1, 40 } + }, { /* Coeff Band 2 */ + { 135, 81, 190 }, + { 113, 61, 182 }, + { 93, 16, 153 }, + { 70, 4, 115 }, + { 41, 1, 68 }, + { 16, 1, 27 } + }, { /* Coeff Band 3 */ + { 155, 103, 214 }, + { 129, 48, 199 }, + { 95, 10, 159 }, + { 63, 1, 110 }, + { 32, 1, 58 }, + { 12, 1, 21 } + }, { /* Coeff Band 4 */ + { 163, 149, 231 }, + { 137, 69, 213 }, + { 95, 11, 164 }, + { 62, 3, 108 }, + { 32, 1, 57 }, + { 13, 1, 22 } + }, { /* Coeff Band 5 */ + { 136, 189, 239 }, + { 123, 102, 223 }, + { 97, 19, 170 }, + { 66, 4, 111 }, + { 38, 1, 60 }, + { 18, 1, 26 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 205, 121, 244, 237, 187, 188, 229, 174, 248, 215, 228 }, - { 140, 120, 211, 219, 174, 177, 207, 158, 241, 195, 214 }, - { 51, 100, 152, 198, 155, 168, 199, 148, 240, 193, 207 } - }, { /* Coeff Band 1 */ - { 66, 196, 236, 247, 202, 197, 243, 193, 254, 228, 246 }, - { 99, 164, 223, 246, 199, 196, 243, 193, 254, 226, 255 }, - { 29, 122, 187, 244, 187, 194, 244, 193, 255, 227, 239 }, - { 14, 95, 145, 234, 156, 181, 244, 194, 254, 229, 246 }, - { 6, 68, 97, 190, 123, 155, 240, 168, 254, 232, 245 }, - { 3, 43, 50, 112, 105, 143, 170, 118, 245, 195, 230 } - }, { /* Coeff Band 2 */ - { 66, 202, 238, 248, 206, 199, 245, 196, 254, 233, 244 }, - { 45, 155, 218, 248, 200, 199, 245, 197, 254, 229, 208 }, - { 6, 96, 163, 242, 178, 191, 245, 196, 254, 233, 228 }, - { 2, 64, 110, 224, 142, 175, 242, 185, 254, 232, 247 }, - { 1, 34, 61, 172, 103, 147, 232, 164, 254, 226, 244 }, - { 1, 13, 24, 82, 85, 133, 165, 105, 248, 199, 242 } - }, { /* Coeff Band 3 */ - { 66, 204, 242, 251, 213, 204, 248, 204, 255, 236, 255 }, - { 38, 158, 222, 251, 206, 205, 249, 206, 255, 238, 255 }, - { 6, 95, 166, 244, 178, 194, 249, 205, 255, 236, 255 }, - { 2, 61, 111, 223, 141, 173, 244, 187, 255, 237, 255 }, - { 1, 31, 59, 171, 104, 149, 230, 158, 255, 230, 252 }, - { 1, 12, 22, 82, 79, 128, 171, 111, 251, 203, 249 } - }, { /* Coeff Band 4 */ - { 63, 214, 245, 252, 219, 208, 249, 206, 255, 241, 128 }, - { 38, 164, 228, 252, 210, 208, 251, 212, 255, 245, 255 }, - { 5, 101, 174, 246, 182, 196, 251, 207, 255, 244, 255 }, - { 1, 64, 116, 224, 142, 174, 246, 190, 255, 241, 228 }, - { 1, 34, 63, 172, 105, 148, 233, 160, 255, 235, 237 }, - { 1, 14, 26, 88, 85, 130, 177, 110, 252, 210, 250 } - }, { /* Coeff Band 5 */ - { 91, 214, 246, 254, 226, 213, 251, 210, 255, 239, 255 }, - { 55, 162, 233, 253, 215, 210, 253, 216, 255, 244, 128 }, - { 10, 104, 179, 247, 184, 196, 252, 212, 255, 247, 255 }, - { 2, 67, 119, 226, 143, 173, 249, 195, 255, 245, 255 }, - { 1, 37, 66, 175, 106, 149, 237, 164, 255, 240, 255 }, - { 1, 16, 30, 96, 87, 132, 188, 113, 255, 222, 255 } + { 24, 226, 244 }, + { 54, 178, 211 }, + { 80, 74, 152 } + }, { /* Coeff Band 1 */ + { 145, 153, 236 }, + { 101, 163, 223 }, + { 108, 50, 187 }, + { 90, 22, 145 }, + { 66, 8, 97 }, + { 42, 4, 50 } + }, { /* Coeff Band 2 */ + { 150, 159, 238 }, + { 128, 90, 218 }, + { 94, 9, 163 }, + { 64, 3, 110 }, + { 34, 1, 61 }, + { 13, 1, 24 } + }, { /* Coeff Band 3 */ + { 151, 162, 242 }, + { 135, 80, 222 }, + { 93, 9, 166 }, + { 61, 3, 111 }, + { 31, 1, 59 }, + { 12, 1, 22 } + }, { /* Coeff Band 4 */ + { 161, 170, 245 }, + { 140, 84, 228 }, + { 99, 8, 174 }, + { 64, 1, 116 }, + { 34, 1, 63 }, + { 14, 1, 26 } + }, { /* Coeff Band 5 */ + { 138, 197, 246 }, + { 127, 109, 233 }, + { 100, 16, 179 }, + { 66, 3, 119 }, + { 37, 1, 66 }, + { 16, 1, 30 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 211, 32, 212, 235, 185, 184, 223, 167, 239, 210, 182 }, - { 121, 47, 171, 224, 171, 180, 211, 162, 238, 195, 221 }, - { 40, 51, 118, 203, 145, 168, 211, 160, 246, 200, 236 } - }, { /* Coeff Band 1 */ - { 71, 129, 209, 244, 192, 194, 242, 188, 255, 230, 255 }, - { 118, 122, 206, 244, 192, 192, 241, 187, 254, 227, 255 }, - { 53, 104, 184, 241, 186, 190, 241, 184, 254, 232, 255 }, - { 20, 81, 148, 234, 168, 183, 240, 183, 254, 231, 240 }, - { 3, 47, 82, 197, 127, 160, 234, 166, 254, 228, 251 }, - { 1, 18, 28, 96, 88, 134, 174, 116, 247, 194, 247 } - }, { /* Coeff Band 2 */ - { 86, 162, 220, 247, 203, 198, 245, 193, 255, 237, 255 }, - { 84, 134, 216, 247, 201, 197, 244, 192, 255, 233, 255 }, - { 26, 102, 186, 243, 190, 192, 244, 192, 255, 232, 255 }, - { 7, 75, 135, 231, 163, 181, 240, 183, 255, 234, 255 }, - { 1, 46, 79, 193, 121, 157, 233, 168, 255, 225, 242 }, - { 1, 20, 35, 113, 94, 136, 191, 123, 252, 209, 250 } - }, { /* Coeff Band 3 */ - { 89, 191, 232, 250, 211, 203, 248, 202, 255, 230, 128 }, - { 67, 148, 223, 250, 207, 201, 250, 207, 255, 247, 255 }, - { 19, 105, 183, 245, 189, 193, 249, 202, 255, 244, 255 }, - { 5, 72, 127, 228, 156, 177, 245, 186, 255, 238, 255 }, - { 1, 44, 76, 190, 119, 156, 234, 167, 255, 231, 255 }, - { 1, 21, 36, 116, 92, 138, 195, 128, 250, 208, 241 } - }, { /* Coeff Band 4 */ - { 94, 210, 236, 252, 215, 206, 253, 209, 255, 247, 128 }, - { 68, 153, 224, 251, 209, 204, 251, 213, 255, 240, 128 }, - { 14, 103, 178, 246, 188, 195, 251, 209, 255, 239, 128 }, - { 2, 70, 122, 230, 154, 177, 247, 194, 255, 239, 128 }, - { 1, 42, 72, 189, 115, 153, 234, 166, 255, 229, 255 }, - { 1, 19, 34, 104, 98, 143, 180, 124, 252, 200, 255 } - }, { /* Coeff Band 5 */ - { 87, 200, 238, 254, 226, 214, 250, 212, 255, 226, 128 }, - { 55, 151, 225, 253, 217, 212, 253, 217, 255, 233, 128 }, - { 11, 106, 179, 249, 193, 200, 252, 213, 255, 247, 128 }, - { 2, 72, 124, 232, 155, 180, 246, 195, 255, 230, 128 }, - { 1, 42, 70, 182, 114, 153, 232, 163, 255, 236, 255 }, - { 1, 17, 28, 95, 92, 137, 170, 115, 252, 208, 228 } + { 6, 216, 212 }, + { 25, 134, 171 }, + { 43, 48, 118 } + }, { /* Coeff Band 1 */ + { 93, 112, 209 }, + { 66, 159, 206 }, + { 82, 78, 184 }, + { 75, 28, 148 }, + { 46, 4, 82 }, + { 18, 1, 28 } + }, { /* Coeff Band 2 */ + { 108, 148, 220 }, + { 90, 130, 216 }, + { 92, 40, 186 }, + { 73, 10, 135 }, + { 46, 1, 79 }, + { 20, 1, 35 } + }, { /* Coeff Band 3 */ + { 125, 173, 232 }, + { 109, 117, 223 }, + { 97, 31, 183 }, + { 71, 7, 127 }, + { 44, 1, 76 }, + { 21, 1, 36 } + }, { /* Coeff Band 4 */ + { 133, 195, 236 }, + { 112, 121, 224 }, + { 97, 23, 178 }, + { 69, 3, 122 }, + { 42, 1, 72 }, + { 19, 1, 34 } + }, { /* Coeff Band 5 */ + { 132, 180, 238 }, + { 119, 102, 225 }, + { 101, 18, 179 }, + { 71, 3, 124 }, + { 42, 1, 70 }, + { 17, 1, 28 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 238, 66, 250, 245, 205, 193, 232, 180, 254, 228, 255 }, - { 178, 84, 226, 237, 192, 185, 230, 176, 253, 217, 251 }, - { 76, 83, 168, 218, 166, 173, 225, 162, 252, 220, 243 } - }, { /* Coeff Band 1 */ - { 137, 176, 246, 252, 218, 207, 251, 208, 255, 238, 128 }, - { 176, 160, 237, 252, 217, 206, 249, 209, 255, 247, 128 }, - { 68, 128, 205, 251, 209, 207, 251, 207, 255, 248, 128 }, - { 40, 105, 167, 246, 172, 192, 252, 215, 255, 247, 128 }, - { 22, 84, 131, 214, 144, 164, 249, 185, 255, 250, 255 }, - { 11, 60, 91, 161, 130, 155, 194, 133, 253, 214, 255 } - }, { /* Coeff Band 2 */ - { 124, 192, 247, 253, 223, 210, 254, 215, 255, 255, 128 }, - { 103, 161, 234, 253, 218, 209, 253, 214, 255, 255, 128 }, - { 19, 108, 190, 250, 202, 202, 251, 213, 255, 241, 128 }, - { 6, 74, 131, 242, 165, 191, 251, 207, 255, 244, 128 }, - { 1, 41, 72, 198, 111, 151, 249, 185, 255, 248, 128 }, - { 1, 14, 24, 82, 90, 140, 185, 96, 254, 224, 255 } - }, { /* Coeff Band 3 */ - { 118, 200, 248, 254, 228, 216, 254, 222, 255, 213, 128 }, - { 91, 166, 235, 254, 220, 212, 254, 223, 255, 233, 128 }, - { 16, 110, 186, 251, 197, 201, 255, 225, 255, 255, 128 }, - { 3, 72, 124, 239, 160, 186, 253, 209, 255, 239, 128 }, - { 1, 39, 66, 198, 106, 151, 248, 191, 255, 247, 128 }, - { 1, 14, 19, 94, 74, 124, 209, 109, 255, 245, 128 } - }, { /* Coeff Band 4 */ - { 112, 213, 248, 255, 231, 218, 255, 234, 255, 255, 128 }, - { 80, 172, 234, 254, 220, 216, 255, 233, 255, 255, 128 }, - { 11, 112, 182, 251, 195, 204, 255, 231, 255, 224, 128 }, - { 2, 73, 126, 241, 159, 186, 254, 219, 255, 255, 128 }, - { 1, 40, 69, 207, 111, 159, 249, 191, 255, 255, 128 }, - { 1, 16, 24, 83, 78, 138, 230, 134, 255, 239, 128 } - }, { /* Coeff Band 5 */ - { 100, 209, 245, 255, 236, 225, 248, 231, 255, 192, 128 }, - { 65, 164, 232, 255, 226, 221, 255, 240, 255, 255, 128 }, - { 11, 117, 186, 253, 203, 209, 255, 240, 255, 255, 128 }, - { 2, 83, 136, 245, 167, 191, 253, 222, 255, 255, 128 }, - { 1, 55, 88, 213, 122, 157, 248, 182, 255, 255, 128 }, - { 1, 10, 38, 58, 85, 43, 198, 107, 255, 255, 128 } + { 5, 242, 250 }, + { 26, 198, 226 }, + { 58, 98, 168 } + }, { /* Coeff Band 1 */ + { 82, 201, 246 }, + { 50, 219, 237 }, + { 94, 107, 205 }, + { 89, 61, 167 }, + { 77, 31, 131 }, + { 57, 14, 91 } + }, { /* Coeff Band 2 */ + { 99, 202, 247 }, + { 96, 165, 234 }, + { 100, 31, 190 }, + { 72, 8, 131 }, + { 41, 1, 72 }, + { 14, 1, 24 } + }, { /* Coeff Band 3 */ + { 108, 204, 248 }, + { 107, 156, 235 }, + { 103, 27, 186 }, + { 71, 4, 124 }, + { 39, 1, 66 }, + { 14, 1, 19 } + }, { /* Coeff Band 4 */ + { 120, 211, 248 }, + { 118, 149, 234 }, + { 107, 19, 182 }, + { 72, 3, 126 }, + { 40, 1, 69 }, + { 16, 1, 24 } + }, { /* Coeff Band 5 */ + { 127, 199, 245 }, + { 122, 125, 232 }, + { 112, 20, 186 }, + { 82, 3, 136 }, + { 55, 1, 88 }, + { 10, 1, 38 } } } } }; -static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES] = { +static const vp9_coeff_probs_model default_coef_probs_16x16[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 8, 26, 101, 170, 141, 159, 166, 138, 205, 164, 158 }, - { 2, 25, 67, 119, 124, 152, 121, 123, 189, 145, 175 }, - { 1, 15, 28, 67, 102, 139, 95, 107, 191, 136, 187 } - }, { /* Coeff Band 1 */ - { 22, 73, 118, 160, 137, 157, 175, 132, 242, 184, 229 }, - { 43, 73, 116, 160, 137, 157, 177, 132, 242, 185, 231 }, - { 24, 66, 105, 158, 134, 156, 175, 133, 242, 185, 232 }, - { 9, 54, 85, 150, 126, 153, 175, 132, 242, 185, 231 }, - { 2, 34, 54, 123, 109, 145, 168, 124, 242, 183, 231 }, - { 1, 14, 22, 63, 93, 134, 108, 103, 214, 149, 206 } - }, { /* Coeff Band 2 */ - { 34, 123, 149, 186, 148, 163, 195, 143, 245, 195, 233 }, - { 34, 106, 147, 189, 149, 164, 198, 146, 246, 197, 234 }, - { 10, 81, 123, 186, 143, 162, 200, 147, 246, 198, 235 }, - { 2, 56, 87, 170, 127, 156, 201, 143, 248, 202, 234 }, - { 1, 35, 56, 138, 109, 146, 187, 133, 246, 196, 233 }, - { 1, 17, 27, 80, 93, 135, 136, 109, 229, 168, 215 } - }, { /* Coeff Band 3 */ - { 27, 159, 171, 208, 161, 171, 211, 155, 249, 205, 239 }, - { 17, 119, 162, 213, 160, 172, 218, 160, 250, 210, 238 }, - { 3, 81, 128, 207, 149, 168, 220, 161, 250, 213, 238 }, - { 1, 53, 87, 183, 128, 158, 217, 153, 251, 214, 239 }, - { 1, 31, 52, 143, 106, 145, 199, 137, 249, 205, 235 }, - { 1, 14, 24, 77, 89, 133, 142, 109, 234, 174, 215 } - }, { /* Coeff Band 4 */ - { 24, 189, 200, 224, 177, 178, 221, 164, 250, 212, 234 }, - { 14, 136, 184, 230, 176, 181, 228, 172, 252, 215, 231 }, - { 2, 87, 140, 222, 159, 176, 230, 172, 252, 218, 238 }, - { 1, 54, 90, 193, 130, 161, 223, 160, 252, 217, 241 }, - { 1, 28, 49, 142, 103, 144, 202, 139, 250, 208, 233 }, - { 1, 12, 21, 73, 87, 132, 141, 106, 234, 176, 209 } - }, { /* Coeff Band 5 */ - { 32, 220, 227, 242, 199, 190, 234, 180, 251, 220, 232 }, - { 12, 155, 200, 242, 190, 191, 240, 187, 252, 225, 230 }, - { 1, 90, 144, 231, 164, 180, 240, 184, 253, 229, 239 }, - { 1, 53, 90, 198, 130, 162, 230, 165, 253, 226, 238 }, - { 1, 28, 50, 145, 103, 144, 207, 140, 251, 213, 236 }, - { 1, 13, 22, 74, 88, 132, 142, 107, 233, 176, 216 } + { 25, 9, 101 }, + { 25, 2, 67 }, + { 15, 1, 28 } + }, { /* Coeff Band 1 */ + { 67, 30, 118 }, + { 61, 56, 116 }, + { 60, 31, 105 }, + { 52, 11, 85 }, + { 34, 2, 54 }, + { 14, 1, 22 } + }, { /* Coeff Band 2 */ + { 107, 58, 149 }, + { 92, 53, 147 }, + { 78, 14, 123 }, + { 56, 3, 87 }, + { 35, 1, 56 }, + { 17, 1, 27 } + }, { /* Coeff Band 3 */ + { 142, 61, 171 }, + { 111, 30, 162 }, + { 80, 4, 128 }, + { 53, 1, 87 }, + { 31, 1, 52 }, + { 14, 1, 24 } + }, { /* Coeff Band 4 */ + { 171, 73, 200 }, + { 129, 28, 184 }, + { 86, 3, 140 }, + { 54, 1, 90 }, + { 28, 1, 49 }, + { 12, 1, 21 } + }, { /* Coeff Band 5 */ + { 193, 129, 227 }, + { 148, 28, 200 }, + { 90, 2, 144 }, + { 53, 1, 90 }, + { 28, 1, 50 }, + { 13, 1, 22 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 5, 61, 234, 230, 183, 183, 212, 164, 241, 199, 205 }, - { 3, 65, 184, 199, 164, 170, 182, 145, 232, 175, 223 }, - { 1, 56, 104, 154, 137, 158, 156, 131, 221, 165, 210 } - }, { /* Coeff Band 1 */ - { 46, 183, 210, 229, 181, 182, 222, 165, 252, 214, 251 }, - { 122, 166, 202, 228, 179, 181, 223, 164, 252, 217, 250 }, - { 49, 125, 177, 225, 172, 179, 223, 163, 252, 215, 253 }, - { 22, 99, 142, 216, 155, 173, 222, 164, 252, 215, 250 }, - { 8, 69, 95, 180, 127, 156, 220, 153, 252, 214, 250 }, - { 2, 38, 51, 112, 109, 144, 159, 118, 243, 184, 232 } - }, { /* Coeff Band 2 */ - { 56, 196, 218, 236, 187, 185, 231, 172, 254, 223, 239 }, - { 38, 141, 195, 235, 182, 185, 233, 174, 254, 225, 232 }, - { 7, 93, 147, 225, 164, 178, 233, 173, 255, 226, 248 }, - { 2, 63, 101, 201, 137, 165, 227, 162, 254, 225, 248 }, - { 1, 39, 61, 159, 110, 148, 213, 146, 254, 218, 247 }, - { 1, 20, 33, 98, 95, 136, 166, 115, 247, 192, 231 } - }, { /* Coeff Band 3 */ - { 44, 206, 223, 240, 193, 189, 235, 177, 255, 231, 224 }, - { 27, 147, 200, 240, 188, 189, 238, 181, 255, 229, 239 }, - { 4, 93, 147, 230, 165, 180, 238, 180, 255, 231, 237 }, - { 1, 58, 95, 201, 134, 164, 229, 164, 255, 228, 254 }, - { 1, 32, 52, 152, 105, 146, 212, 142, 254, 221, 255 }, - { 1, 14, 23, 81, 87, 133, 156, 109, 248, 191, 236 } - }, { /* Coeff Band 4 */ - { 39, 216, 227, 244, 200, 194, 237, 179, 255, 231, 255 }, - { 22, 152, 204, 243, 192, 193, 240, 186, 255, 231, 240 }, - { 2, 92, 148, 232, 167, 183, 239, 182, 255, 232, 255 }, - { 1, 55, 91, 200, 132, 164, 229, 164, 255, 230, 255 }, - { 1, 28, 47, 144, 99, 142, 211, 141, 255, 222, 251 }, - { 1, 13, 21, 75, 86, 131, 152, 103, 249, 193, 242 } - }, { /* Coeff Band 5 */ - { 34, 228, 234, 249, 213, 201, 246, 194, 255, 239, 255 }, - { 13, 161, 208, 247, 198, 197, 248, 197, 255, 243, 255 }, - { 1, 95, 148, 234, 166, 183, 246, 190, 255, 243, 236 }, - { 1, 55, 90, 199, 128, 161, 237, 168, 255, 239, 255 }, - { 1, 30, 51, 147, 102, 144, 218, 142, 255, 232, 254 }, - { 1, 16, 25, 86, 88, 131, 168, 109, 252, 207, 245 } + { 60, 7, 234 }, + { 64, 4, 184 }, + { 56, 1, 104 } + }, { /* Coeff Band 1 */ + { 150, 111, 210 }, + { 87, 185, 202 }, + { 101, 81, 177 }, + { 90, 34, 142 }, + { 67, 11, 95 }, + { 38, 2, 51 } + }, { /* Coeff Band 2 */ + { 153, 139, 218 }, + { 120, 72, 195 }, + { 90, 11, 147 }, + { 63, 3, 101 }, + { 39, 1, 61 }, + { 20, 1, 33 } + }, { /* Coeff Band 3 */ + { 171, 132, 223 }, + { 131, 56, 200 }, + { 92, 6, 147 }, + { 58, 1, 95 }, + { 32, 1, 52 }, + { 14, 1, 23 } + }, { /* Coeff Band 4 */ + { 183, 137, 227 }, + { 139, 48, 204 }, + { 91, 3, 148 }, + { 55, 1, 91 }, + { 28, 1, 47 }, + { 13, 1, 21 } + }, { /* Coeff Band 5 */ + { 198, 149, 234 }, + { 153, 32, 208 }, + { 95, 2, 148 }, + { 55, 1, 90 }, + { 30, 1, 51 }, + { 16, 1, 25 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 204, 33, 217, 233, 185, 184, 199, 165, 204, 163, 162 }, - { 93, 48, 151, 209, 157, 171, 193, 161, 203, 167, 189 }, - { 18, 43, 86, 173, 126, 156, 203, 149, 231, 193, 200 } - }, { /* Coeff Band 1 */ - { 43, 121, 184, 233, 173, 182, 235, 187, 248, 211, 237 }, - { 93, 117, 177, 232, 170, 180, 235, 182, 246, 204, 224 }, - { 33, 101, 158, 229, 165, 179, 235, 182, 245, 207, 236 }, - { 11, 81, 129, 221, 153, 173, 233, 179, 246, 203, 229 }, - { 2, 51, 82, 188, 124, 158, 224, 162, 248, 206, 228 }, - { 1, 18, 29, 88, 93, 137, 141, 116, 222, 161, 217 } - }, { /* Coeff Band 2 */ - { 63, 154, 199, 239, 184, 187, 236, 187, 248, 209, 221 }, - { 53, 128, 191, 239, 182, 188, 236, 188, 251, 209, 255 }, - { 14, 99, 160, 235, 172, 184, 235, 187, 249, 207, 240 }, - { 4, 75, 122, 219, 150, 173, 226, 177, 250, 204, 240 }, - { 1, 47, 77, 176, 121, 154, 207, 153, 245, 197, 237 }, - { 1, 18, 30, 84, 95, 136, 138, 112, 229, 167, 228 } - }, { /* Coeff Band 3 */ - { 48, 193, 210, 245, 194, 194, 241, 196, 252, 213, 255 }, - { 26, 145, 201, 245, 194, 196, 240, 195, 251, 215, 240 }, - { 6, 104, 165, 241, 179, 190, 239, 191, 253, 222, 255 }, - { 1, 73, 120, 218, 151, 174, 227, 172, 251, 219, 248 }, - { 1, 42, 69, 167, 118, 153, 205, 146, 251, 206, 245 }, - { 1, 16, 27, 84, 89, 133, 148, 112, 240, 179, 238 } - }, { /* Coeff Band 4 */ - { 47, 213, 225, 248, 203, 199, 240, 194, 254, 211, 255 }, - { 32, 153, 212, 248, 201, 199, 241, 196, 251, 226, 255 }, - { 6, 102, 168, 240, 181, 190, 240, 187, 251, 225, 238 }, - { 1, 66, 111, 211, 146, 169, 229, 167, 255, 224, 244 }, - { 1, 36, 60, 157, 110, 148, 209, 143, 252, 215, 255 }, - { 1, 16, 27, 83, 90, 133, 152, 111, 244, 184, 250 } - }, { /* Coeff Band 5 */ - { 46, 225, 232, 252, 219, 208, 247, 204, 254, 233, 255 }, - { 24, 162, 214, 250, 208, 204, 247, 201, 254, 236, 255 }, - { 3, 106, 165, 242, 182, 191, 245, 196, 255, 231, 255 }, - { 1, 66, 108, 213, 142, 169, 235, 175, 255, 226, 247 }, - { 1, 35, 59, 158, 108, 147, 216, 146, 254, 220, 255 }, - { 1, 16, 27, 85, 90, 131, 159, 110, 248, 191, 252 } + { 7, 209, 217 }, + { 31, 106, 151 }, + { 40, 21, 86 } + }, { /* Coeff Band 1 */ + { 101, 71, 184 }, + { 74, 131, 177 }, + { 88, 50, 158 }, + { 78, 16, 129 }, + { 51, 2, 82 }, + { 18, 1, 29 } + }, { /* Coeff Band 2 */ + { 116, 115, 199 }, + { 102, 88, 191 }, + { 94, 22, 160 }, + { 74, 6, 122 }, + { 47, 1, 77 }, + { 18, 1, 30 } + }, { /* Coeff Band 3 */ + { 157, 124, 210 }, + { 130, 53, 201 }, + { 102, 10, 165 }, + { 73, 1, 120 }, + { 42, 1, 69 }, + { 16, 1, 27 } + }, { /* Coeff Band 4 */ + { 174, 147, 225 }, + { 134, 67, 212 }, + { 100, 10, 168 }, + { 66, 1, 111 }, + { 36, 1, 60 }, + { 16, 1, 27 } + }, { /* Coeff Band 5 */ + { 185, 165, 232 }, + { 147, 56, 214 }, + { 105, 5, 165 }, + { 66, 1, 108 }, + { 35, 1, 59 }, + { 16, 1, 27 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 229, 28, 245, 227, 195, 182, 200, 145, 253, 186, 255 }, - { 151, 44, 210, 214, 180, 175, 193, 146, 247, 185, 254 }, - { 55, 48, 131, 183, 148, 163, 194, 138, 249, 201, 246 } - }, { /* Coeff Band 1 */ - { 126, 165, 239, 250, 206, 204, 248, 193, 255, 255, 128 }, - { 199, 158, 231, 248, 206, 198, 247, 200, 243, 255, 255 }, - { 102, 136, 209, 248, 203, 197, 247, 201, 255, 244, 128 }, - { 64, 116, 181, 245, 185, 196, 248, 201, 255, 233, 128 }, - { 44, 98, 151, 233, 162, 179, 248, 195, 255, 242, 128 }, - { 44, 81, 119, 204, 140, 165, 222, 163, 252, 217, 255 } - }, { /* Coeff Band 2 */ - { 108, 185, 239, 252, 216, 209, 248, 205, 255, 230, 128 }, - { 91, 155, 224, 252, 211, 205, 251, 211, 255, 230, 128 }, - { 20, 116, 185, 248, 194, 196, 252, 206, 255, 255, 128 }, - { 8, 86, 141, 239, 168, 185, 248, 196, 255, 247, 128 }, - { 3, 50, 92, 206, 125, 164, 242, 176, 255, 246, 128 }, - { 1, 21, 40, 131, 85, 141, 200, 131, 247, 236, 255 } - }, { /* Coeff Band 3 */ - { 94, 198, 243, 254, 226, 215, 254, 220, 255, 255, 128 }, - { 67, 164, 228, 253, 217, 208, 250, 216, 255, 213, 128 }, - { 14, 120, 185, 250, 196, 205, 248, 205, 255, 255, 128 }, - { 4, 83, 134, 238, 161, 181, 250, 202, 255, 233, 128 }, - { 1, 48, 82, 196, 119, 157, 248, 178, 255, 255, 128 }, - { 1, 26, 38, 96, 84, 132, 221, 110, 255, 209, 128 } - }, { /* Coeff Band 4 */ - { 82, 210, 245, 255, 230, 215, 246, 221, 255, 255, 128 }, - { 55, 170, 231, 254, 222, 213, 255, 220, 255, 255, 128 }, - { 8, 118, 184, 251, 200, 207, 255, 219, 255, 255, 128 }, - { 2, 78, 126, 239, 156, 185, 251, 216, 255, 255, 128 }, - { 1, 43, 68, 189, 108, 151, 247, 187, 255, 228, 128 }, - { 1, 34, 40, 121, 114, 102, 205, 96, 255, 255, 128 } - }, { /* Coeff Band 5 */ - { 65, 228, 241, 255, 231, 214, 253, 222, 255, 255, 128 }, - { 33, 173, 226, 254, 222, 216, 255, 231, 255, 255, 128 }, - { 5, 120, 180, 251, 197, 205, 251, 226, 255, 233, 128 }, - { 1, 81, 130, 240, 159, 187, 251, 206, 255, 205, 128 }, - { 1, 51, 78, 198, 119, 168, 238, 181, 255, 171, 128 }, - { 1, 18, 49, 183, 119, 160, 255, 171, 128, 128, 128 } + { 3, 232, 245 }, + { 18, 162, 210 }, + { 38, 64, 131 } + }, { /* Coeff Band 1 */ + { 84, 187, 239 }, + { 35, 231, 231 }, + { 82, 150, 209 }, + { 87, 97, 181 }, + { 81, 64, 151 }, + { 67, 60, 119 } + }, { /* Coeff Band 2 */ + { 107, 185, 239 }, + { 100, 149, 224 }, + { 107, 34, 185 }, + { 83, 12, 141 }, + { 49, 4, 92 }, + { 21, 1, 40 } + }, { /* Coeff Band 3 */ + { 125, 184, 243 }, + { 121, 127, 228 }, + { 113, 25, 185 }, + { 82, 6, 134 }, + { 48, 1, 82 }, + { 26, 1, 38 } + }, { /* Coeff Band 4 */ + { 143, 185, 245 }, + { 133, 115, 231 }, + { 114, 14, 184 }, + { 77, 3, 126 }, + { 43, 1, 68 }, + { 34, 1, 40 } + }, { /* Coeff Band 5 */ + { 170, 194, 241 }, + { 151, 80, 226 }, + { 118, 9, 180 }, + { 81, 1, 130 }, + { 51, 1, 78 }, + { 18, 1, 49 } } } } }; -static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES] = { +static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ - { 37, 34, 137, 205, 154, 170, 151, 159, 109, 172, 44 }, - { 3, 26, 60, 113, 123, 154, 100, 124, 152, 131, 144 }, - { 1, 13, 23, 54, 102, 139, 71, 106, 146, 123, 148 } - }, { /* Coeff Band 1 */ - { 26, 77, 122, 152, 144, 160, 143, 129, 216, 158, 201 }, - { 43, 76, 123, 152, 142, 159, 145, 129, 218, 160, 204 }, - { 25, 67, 112, 150, 141, 159, 144, 128, 218, 159, 204 }, - { 9, 54, 90, 143, 134, 156, 144, 127, 218, 159, 204 }, - { 2, 32, 52, 116, 114, 148, 138, 123, 217, 158, 207 }, - { 1, 10, 15, 44, 91, 133, 75, 99, 172, 128, 169 } - }, { /* Coeff Band 2 */ - { 32, 122, 143, 163, 145, 161, 162, 131, 226, 171, 206 }, - { 46, 105, 143, 168, 148, 161, 165, 133, 228, 174, 204 }, - { 17, 79, 116, 164, 142, 161, 166, 134, 229, 174, 206 }, - { 4, 53, 78, 143, 125, 153, 163, 129, 232, 175, 213 }, - { 1, 29, 44, 105, 105, 142, 147, 120, 228, 168, 211 }, - { 1, 12, 18, 52, 91, 133, 92, 100, 193, 140, 183 } - }, { /* Coeff Band 3 */ - { 33, 157, 160, 182, 149, 163, 185, 141, 236, 185, 218 }, - { 20, 116, 152, 188, 152, 165, 191, 144, 238, 188, 217 }, - { 4, 74, 114, 180, 141, 162, 192, 143, 240, 191, 219 }, - { 1, 44, 69, 148, 119, 151, 183, 134, 243, 192, 227 }, - { 1, 25, 40, 110, 101, 141, 162, 121, 238, 181, 223 }, - { 1, 12, 18, 56, 89, 132, 103, 101, 206, 148, 196 } - }, { /* Coeff Band 4 */ - { 25, 183, 174, 207, 159, 171, 205, 156, 243, 194, 228 }, - { 13, 124, 159, 209, 157, 171, 213, 160, 243, 200, 228 }, - { 2, 75, 117, 199, 143, 166, 215, 158, 246, 205, 230 }, - { 1, 45, 73, 165, 119, 153, 204, 144, 248, 205, 231 }, - { 1, 26, 43, 120, 101, 141, 178, 127, 242, 192, 226 }, - { 1, 12, 19, 59, 89, 132, 112, 102, 215, 154, 201 } - }, { /* Coeff Band 5 */ - { 13, 232, 223, 239, 196, 188, 225, 172, 248, 209, 226 }, - { 4, 155, 187, 237, 184, 187, 233, 180, 250, 216, 232 }, - { 1, 86, 131, 222, 156, 175, 233, 176, 251, 218, 237 }, - { 1, 49, 79, 181, 123, 157, 218, 155, 251, 214, 237 }, - { 1, 26, 43, 125, 100, 141, 188, 130, 246, 199, 231 }, - { 1, 12, 20, 62, 88, 131, 119, 102, 222, 161, 209 } + { 29, 42, 137 }, + { 26, 3, 60 }, + { 13, 1, 23 } + }, { /* Coeff Band 1 */ + { 69, 36, 122 }, + { 63, 57, 123 }, + { 60, 33, 112 }, + { 52, 11, 90 }, + { 32, 2, 52 }, + { 10, 1, 15 } + }, { /* Coeff Band 2 */ + { 107, 55, 143 }, + { 86, 69, 143 }, + { 74, 24, 116 }, + { 52, 5, 78 }, + { 29, 1, 44 }, + { 12, 1, 18 } + }, { /* Coeff Band 3 */ + { 137, 71, 160 }, + { 107, 34, 152 }, + { 73, 6, 114 }, + { 44, 1, 69 }, + { 25, 1, 40 }, + { 12, 1, 18 } + }, { /* Coeff Band 4 */ + { 165, 70, 174 }, + { 118, 24, 159 }, + { 74, 3, 117 }, + { 45, 1, 73 }, + { 26, 1, 43 }, + { 12, 1, 19 } + }, { /* Coeff Band 5 */ + { 220, 93, 223 }, + { 153, 10, 187 }, + { 86, 2, 131 }, + { 49, 1, 79 }, + { 26, 1, 43 }, + { 12, 1, 20 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 51, 37, 227, 237, 205, 184, 200, 162, 231, 187, 207 }, - { 9, 36, 172, 204, 176, 173, 171, 145, 217, 167, 197 }, - { 21, 26, 112, 162, 145, 162, 155, 133, 215, 165, 191 } - }, { /* Coeff Band 1 */ - { 79, 169, 219, 223, 176, 177, 222, 161, 248, 213, 244 }, - { 177, 166, 216, 222, 175, 178, 222, 161, 246, 212, 226 }, - { 119, 141, 196, 222, 174, 176, 220, 163, 250, 212, 236 }, - { 63, 117, 165, 217, 163, 175, 218, 161, 248, 209, 231 }, - { 30, 87, 117, 192, 138, 162, 216, 157, 247, 211, 224 }, - { 14, 56, 60, 119, 111, 146, 156, 123, 227, 171, 220 } - }, { /* Coeff Band 2 */ - { 88, 195, 225, 229, 181, 181, 229, 171, 252, 212, 221 }, - { 66, 145, 202, 229, 177, 180, 230, 172, 253, 220, 255 }, - { 12, 97, 152, 221, 162, 174, 230, 169, 253, 218, 249 }, - { 3, 66, 103, 198, 138, 165, 223, 159, 253, 219, 251 }, - { 1, 38, 61, 158, 110, 148, 209, 146, 252, 212, 238 }, - { 1, 19, 30, 94, 94, 136, 160, 114, 244, 185, 236 } - }, { /* Coeff Band 3 */ - { 79, 211, 228, 235, 186, 184, 233, 176, 255, 225, 255 }, - { 50, 151, 205, 235, 182, 185, 237, 177, 254, 223, 255 }, - { 7, 95, 149, 225, 162, 176, 236, 177, 254, 229, 219 }, - { 1, 62, 98, 198, 134, 164, 228, 162, 254, 224, 238 }, - { 1, 35, 57, 156, 108, 148, 211, 143, 253, 215, 238 }, - { 1, 17, 26, 87, 89, 135, 161, 113, 246, 189, 237 } - }, { /* Coeff Band 4 */ - { 68, 225, 230, 239, 190, 187, 238, 180, 252, 234, 255 }, - { 39, 156, 206, 239, 185, 187, 241, 187, 254, 231, 255 }, - { 4, 94, 147, 229, 163, 178, 242, 183, 255, 236, 224 }, - { 1, 58, 94, 200, 132, 163, 232, 166, 254, 230, 255 }, - { 1, 32, 52, 153, 104, 146, 214, 144, 253, 222, 236 }, - { 1, 15, 24, 84, 89, 131, 159, 109, 247, 192, 240 } - }, { /* Coeff Band 5 */ - { 45, 248, 234, 248, 208, 198, 244, 193, 255, 233, 255 }, - { 19, 169, 204, 246, 195, 195, 246, 199, 255, 233, 255 }, - { 2, 98, 145, 235, 166, 183, 245, 192, 255, 235, 255 }, - { 1, 59, 92, 205, 131, 164, 236, 172, 254, 231, 250 }, - { 1, 33, 52, 152, 103, 145, 216, 144, 253, 221, 240 }, - { 1, 15, 24, 83, 87, 133, 156, 110, 246, 191, 242 } + { 30, 58, 227 }, + { 35, 10, 172 }, + { 24, 23, 112 } + }, { /* Coeff Band 1 */ + { 117, 145, 219 }, + { 51, 221, 216 }, + { 75, 169, 196 }, + { 88, 96, 165 }, + { 77, 43, 117 }, + { 53, 18, 60 } + }, { /* Coeff Band 2 */ + { 128, 176, 225 }, + { 108, 114, 202 }, + { 92, 19, 152 }, + { 65, 4, 103 }, + { 38, 1, 61 }, + { 19, 1, 30 } + }, { /* Coeff Band 3 */ + { 146, 184, 228 }, + { 122, 95, 205 }, + { 92, 11, 149 }, + { 62, 1, 98 }, + { 35, 1, 57 }, + { 17, 1, 26 } + }, { /* Coeff Band 4 */ + { 165, 192, 230 }, + { 132, 81, 206 }, + { 93, 6, 147 }, + { 58, 1, 94 }, + { 32, 1, 52 }, + { 15, 1, 24 } + }, { /* Coeff Band 5 */ + { 204, 223, 234 }, + { 156, 49, 204 }, + { 97, 3, 145 }, + { 59, 1, 92 }, + { 33, 1, 52 }, + { 15, 1, 24 } } } }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 179, 23, 200, 222, 180, 182, 150, 152, 148, 135, 125 }, - { 60, 33, 113, 185, 143, 166, 168, 144, 189, 168, 152 }, - { 8, 31, 59, 137, 114, 150, 163, 132, 206, 171, 169 } - }, { /* Coeff Band 1 */ - { 27, 103, 158, 215, 157, 174, 209, 165, 239, 191, 233 }, - { 90, 101, 159, 213, 156, 173, 212, 164, 230, 185, 237 }, - { 39, 91, 146, 212, 155, 169, 212, 165, 232, 186, 207 }, - { 16, 75, 120, 203, 144, 169, 210, 161, 233, 189, 227 }, - { 3, 48, 76, 167, 120, 154, 199, 146, 236, 190, 218 }, - { 1, 18, 26, 72, 95, 137, 113, 109, 197, 146, 186 } - }, { /* Coeff Band 2 */ - { 45, 137, 177, 218, 166, 174, 206, 163, 234, 184, 214 }, - { 47, 117, 167, 218, 166, 176, 206, 164, 234, 182, 229 }, - { 16, 90, 136, 211, 153, 172, 205, 162, 236, 192, 231 }, - { 6, 65, 100, 188, 136, 162, 193, 155, 237, 177, 228 }, - { 1, 37, 58, 137, 113, 150, 166, 134, 229, 167, 234 }, - { 1, 13, 19, 55, 90, 132, 93, 103, 196, 137, 202 } - }, { /* Coeff Band 3 */ - { 36, 171, 194, 227, 177, 179, 208, 165, 244, 196, 245 }, - { 19, 129, 178, 227, 175, 184, 214, 165, 246, 188, 255 }, - { 5, 90, 139, 217, 158, 174, 213, 166, 246, 198, 255 }, - { 1, 59, 93, 182, 134, 162, 193, 150, 242, 188, 241 }, - { 1, 31, 49, 122, 108, 145, 160, 127, 235, 172, 229 }, - { 1, 10, 18, 54, 89, 132, 101, 99, 213, 144, 217 } - }, { /* Coeff Band 4 */ - { 37, 197, 210, 233, 187, 186, 216, 172, 250, 202, 255 }, - { 20, 142, 191, 234, 183, 186, 219, 170, 249, 207, 246 }, - { 3, 93, 144, 222, 163, 176, 219, 170, 249, 204, 224 }, - { 1, 56, 88, 179, 130, 159, 199, 148, 246, 197, 243 }, - { 1, 29, 47, 123, 104, 144, 172, 127, 244, 185, 234 }, - { 1, 14, 22, 66, 91, 130, 120, 103, 225, 158, 221 } - }, { /* Coeff Band 5 */ - { 19, 227, 223, 245, 203, 194, 238, 187, 251, 225, 217 }, - { 6, 152, 192, 242, 189, 190, 241, 190, 253, 225, 255 }, - { 1, 89, 138, 228, 161, 177, 239, 181, 254, 224, 248 }, - { 1, 52, 84, 188, 127, 157, 224, 159, 253, 222, 247 }, - { 1, 29, 47, 132, 102, 140, 196, 132, 251, 208, 244 }, - { 1, 14, 23, 71, 90, 133, 134, 103, 239, 174, 233 } + { 7, 184, 200 }, + { 25, 67, 113 }, + { 30, 9, 59 } + }, { /* Coeff Band 1 */ + { 92, 42, 158 }, + { 65, 121, 159 }, + { 77, 56, 146 }, + { 70, 22, 120 }, + { 47, 4, 76 }, + { 18, 1, 26 } + }, { /* Coeff Band 2 */ + { 113, 81, 177 }, + { 96, 75, 167 }, + { 84, 24, 136 }, + { 63, 8, 100 }, + { 37, 1, 58 }, + { 13, 1, 19 } + }, { /* Coeff Band 3 */ + { 147, 85, 194 }, + { 119, 36, 178 }, + { 88, 8, 139 }, + { 59, 1, 93 }, + { 31, 1, 49 }, + { 10, 1, 18 } + }, { /* Coeff Band 4 */ + { 169, 108, 210 }, + { 131, 41, 191 }, + { 92, 5, 144 }, + { 56, 1, 88 }, + { 29, 1, 47 }, + { 14, 1, 22 } + }, { /* Coeff Band 5 */ + { 210, 106, 223 }, + { 148, 14, 192 }, + { 89, 2, 138 }, + { 52, 1, 84 }, + { 29, 1, 47 }, + { 14, 1, 23 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 205, 14, 245, 235, 216, 189, 190, 146, 249, 201, 255 }, - { 97, 19, 213, 210, 194, 174, 176, 139, 241, 183, 250 }, - { 31, 20, 144, 183, 160, 167, 171, 132, 240, 184, 253 } - }, { /* Coeff Band 1 */ - { 137, 182, 245, 254, 221, 216, 255, 160, 128, 128, 128 }, - { 231, 185, 242, 251, 218, 205, 255, 233, 128, 128, 128 }, - { 170, 175, 229, 252, 205, 209, 255, 211, 128, 128, 128 }, - { 107, 157, 213, 250, 199, 205, 251, 207, 255, 255, 128 }, - { 77, 126, 183, 243, 182, 183, 252, 206, 255, 255, 128 }, - { 69, 96, 149, 229, 157, 170, 247, 169, 255, 255, 128 } - }, { /* Coeff Band 2 */ - { 107, 196, 241, 252, 211, 208, 255, 210, 128, 128, 128 }, - { 92, 162, 221, 249, 203, 195, 255, 199, 128, 128, 128 }, - { 20, 108, 181, 244, 190, 191, 250, 200, 255, 255, 128 }, - { 7, 80, 132, 241, 172, 197, 253, 191, 255, 255, 128 }, - { 2, 43, 75, 219, 122, 150, 255, 203, 128, 128, 128 }, - { 1, 15, 48, 98, 51, 192, 255, 160, 128, 128, 128 } - }, { /* Coeff Band 3 */ - { 107, 202, 244, 254, 226, 215, 255, 192, 128, 128, 128 }, - { 77, 167, 224, 252, 215, 212, 255, 235, 128, 128, 128 }, - { 14, 117, 179, 249, 191, 196, 255, 212, 128, 128, 128 }, - { 3, 84, 134, 237, 160, 194, 248, 216, 255, 255, 128 }, - { 1, 57, 84, 216, 145, 136, 255, 161, 128, 128, 128 }, - { 1, 1, 1, 255, 128, 255, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 4 */ - { 88, 219, 248, 255, 239, 225, 255, 255, 128, 128, 128 }, - { 61, 178, 234, 255, 227, 227, 255, 217, 128, 128, 128 }, - { 6, 127, 188, 252, 201, 211, 255, 244, 128, 128, 128 }, - { 1, 83, 130, 248, 173, 197, 255, 175, 128, 128, 128 }, - { 1, 61, 66, 211, 121, 188, 255, 213, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, { /* Coeff Band 5 */ - { 73, 243, 250, 255, 244, 220, 255, 205, 128, 128, 128 }, - { 42, 197, 242, 255, 237, 227, 242, 166, 255, 255, 128 }, - { 10, 137, 197, 252, 214, 199, 255, 238, 128, 128, 128 }, - { 2, 85, 134, 242, 163, 185, 224, 238, 255, 255, 128 }, - { 1, 70, 69, 199, 110, 64, 255, 213, 128, 128, 128 }, - { 1, 1, 1, 1, 128, 128, 255, 1, 128, 128, 128 } + { 3, 207, 245 }, + { 12, 102, 213 }, + { 18, 33, 144 } + }, { /* Coeff Band 1 */ + { 85, 205, 245 }, + { 18, 249, 242 }, + { 59, 221, 229 }, + { 91, 166, 213 }, + { 88, 117, 183 }, + { 70, 95, 149 } + }, { /* Coeff Band 2 */ + { 114, 193, 241 }, + { 104, 155, 221 }, + { 100, 33, 181 }, + { 78, 10, 132 }, + { 43, 2, 75 }, + { 15, 1, 48 } + }, { /* Coeff Band 3 */ + { 118, 198, 244 }, + { 117, 142, 224 }, + { 111, 25, 179 }, + { 83, 4, 134 }, + { 57, 1, 84 }, + { 1, 1, 1 } + }, { /* Coeff Band 4 */ + { 144, 201, 248 }, + { 136, 130, 234 }, + { 124, 12, 188 }, + { 83, 1, 130 }, + { 61, 1, 66 }, + { 64, 171, 128 } + }, { /* Coeff Band 5 */ + { 174, 227, 250 }, + { 165, 118, 242 }, + { 132, 21, 197 }, + { 84, 3, 134 }, + { 70, 1, 69 }, + { 1, 1, 1 } } } } }; - -#if CONFIG_CODE_NONZEROCOUNT - -// TODO(debargha): Remove the macro and count tables after experimentation -#define NZC_DEFAULT_COUNTS /* Uncomment to use counts as defaults */ - -#ifdef NZC_DEFAULT_COUNTS -static const unsigned int default_nzc_counts_4x4[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC4X4_TOKENS] = { - { - { - { 967652, 29023, 15039, 6952, 1568, 116 }, - { 289116, 22938, 4522, 1935, 520, 47 } - }, { - { 967652, 29023, 15039, 6952, 1568, 116 }, - { 689116, 22938, 4522, 1935, 520, 47 } - }, - }, { - { - { 124684, 37167, 15270, 8483, 1777, 102 }, - { 10405, 12395, 3401, 3574, 2461, 771 } - }, { - { 124684, 37167, 15270, 8483, 1777, 102 }, - { 20405, 12395, 3401, 3574, 2461, 771 } +#else +static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = { + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 195, 29, 183 }, + { 84, 49, 136 }, + { 8, 42, 71 } + }, { /* Coeff Band 1 */ + { 31, 107, 169 }, + { 35, 99, 159 }, + { 17, 82, 140 }, + { 8, 66, 114 }, + { 2, 44, 76 }, + { 1, 19, 32 } + }, { /* Coeff Band 2 */ + { 40, 132, 201 }, + { 29, 114, 187 }, + { 13, 91, 157 }, + { 7, 75, 127 }, + { 3, 58, 95 }, + { 1, 28, 47 } + }, { /* Coeff Band 3 */ + { 69, 142, 221 }, + { 42, 122, 201 }, + { 15, 91, 159 }, + { 6, 67, 121 }, + { 1, 42, 77 }, + { 1, 17, 31 } + }, { /* Coeff Band 4 */ + { 102, 148, 228 }, + { 67, 117, 204 }, + { 17, 82, 154 }, + { 6, 59, 114 }, + { 2, 39, 75 }, + { 1, 15, 29 } + }, { /* Coeff Band 5 */ + { 156, 57, 233 }, + { 119, 57, 212 }, + { 58, 48, 163 }, + { 29, 40, 124 }, + { 12, 30, 81 }, + { 3, 12, 31 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 191, 107, 226 }, + { 124, 117, 204 }, + { 25, 99, 155 } + }, { /* Coeff Band 1 */ + { 29, 148, 210 }, + { 37, 126, 194 }, + { 8, 93, 157 }, + { 2, 68, 118 }, + { 1, 39, 69 }, + { 1, 17, 33 } + }, { /* Coeff Band 2 */ + { 41, 151, 213 }, + { 27, 123, 193 }, + { 3, 82, 144 }, + { 1, 58, 105 }, + { 1, 32, 60 }, + { 1, 13, 26 } + }, { /* Coeff Band 3 */ + { 59, 159, 220 }, + { 23, 126, 198 }, + { 4, 88, 151 }, + { 1, 66, 114 }, + { 1, 38, 71 }, + { 1, 18, 34 } + }, { /* Coeff Band 4 */ + { 114, 136, 232 }, + { 51, 114, 207 }, + { 11, 83, 155 }, + { 3, 56, 105 }, + { 1, 33, 65 }, + { 1, 17, 34 } + }, { /* Coeff Band 5 */ + { 149, 65, 234 }, + { 121, 57, 215 }, + { 61, 49, 166 }, + { 28, 36, 114 }, + { 12, 25, 76 }, + { 3, 16, 42 } + } } - }, { - { - { 4100, 22976, 15627, 16137, 7982, 1793 }, - { 4249, 3084, 2131, 4081, 6439, 1653 } - }, { - { 21100, 22976, 15627, 16137, 7982, 1793 }, - { 4249, 3084, 2131, 4081, 2439, 1653 } + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 214, 49, 220 }, + { 132, 63, 188 }, + { 42, 65, 137 } + }, { /* Coeff Band 1 */ + { 85, 137, 221 }, + { 104, 131, 216 }, + { 49, 111, 192 }, + { 21, 87, 155 }, + { 2, 49, 87 }, + { 1, 16, 28 } + }, { /* Coeff Band 2 */ + { 89, 163, 230 }, + { 90, 137, 220 }, + { 29, 100, 183 }, + { 10, 70, 135 }, + { 2, 42, 81 }, + { 1, 17, 33 } + }, { /* Coeff Band 3 */ + { 108, 167, 237 }, + { 55, 133, 222 }, + { 15, 97, 179 }, + { 4, 72, 135 }, + { 1, 45, 85 }, + { 1, 19, 38 } + }, { /* Coeff Band 4 */ + { 124, 146, 240 }, + { 66, 124, 224 }, + { 17, 88, 175 }, + { 4, 58, 122 }, + { 1, 36, 75 }, + { 1, 18, 37 } + }, { /* Coeff Band 5 */ + { 141, 79, 241 }, + { 126, 70, 227 }, + { 66, 58, 182 }, + { 30, 44, 136 }, + { 12, 34, 96 }, + { 2, 20, 47 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 229, 99, 249 }, + { 143, 111, 235 }, + { 46, 109, 192 } + }, { /* Coeff Band 1 */ + { 82, 158, 236 }, + { 94, 146, 224 }, + { 25, 117, 191 }, + { 9, 87, 149 }, + { 3, 56, 99 }, + { 1, 33, 57 } + }, { /* Coeff Band 2 */ + { 83, 167, 237 }, + { 68, 145, 222 }, + { 10, 103, 177 }, + { 2, 72, 131 }, + { 1, 41, 79 }, + { 1, 20, 39 } + }, { /* Coeff Band 3 */ + { 99, 167, 239 }, + { 47, 141, 224 }, + { 10, 104, 178 }, + { 2, 73, 133 }, + { 1, 44, 85 }, + { 1, 22, 47 } + }, { /* Coeff Band 4 */ + { 127, 145, 243 }, + { 71, 129, 228 }, + { 17, 93, 177 }, + { 3, 61, 124 }, + { 1, 41, 84 }, + { 1, 21, 52 } + }, { /* Coeff Band 5 */ + { 157, 78, 244 }, + { 140, 72, 231 }, + { 69, 58, 184 }, + { 31, 44, 137 }, + { 14, 38, 105 }, + { 8, 23, 61 } + } } } }; - -static const unsigned int default_nzc_counts_8x8[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC8X8_TOKENS] = { - { - { - { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 }, - { 72052, 30468, 6973, 3250, 1500, 750, 375, 5 }, - }, { - { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 }, - { 192052, 30468, 6973, 3250, 1500, 750, 375, 5 }, - } - }, { - { - { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 }, - { 23772, 23120, 13127, 8115, 4000, 2000, 200, 6 }, - }, { - { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 }, - { 23772, 23120, 13127, 8115, 4000, 2000, 200, 6 }, +static const vp9_coeff_probs_model default_coef_probs_8x8[BLOCK_TYPES] = { + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 125, 34, 187 }, + { 52, 41, 133 }, + { 6, 31, 56 } + }, { /* Coeff Band 1 */ + { 37, 109, 153 }, + { 51, 102, 147 }, + { 23, 87, 128 }, + { 8, 67, 101 }, + { 1, 41, 63 }, + { 1, 19, 29 } + }, { /* Coeff Band 2 */ + { 31, 154, 185 }, + { 17, 127, 175 }, + { 6, 96, 145 }, + { 2, 73, 114 }, + { 1, 51, 82 }, + { 1, 28, 45 } + }, { /* Coeff Band 3 */ + { 23, 163, 200 }, + { 10, 131, 185 }, + { 2, 93, 148 }, + { 1, 67, 111 }, + { 1, 41, 69 }, + { 1, 14, 24 } + }, { /* Coeff Band 4 */ + { 29, 176, 217 }, + { 12, 145, 201 }, + { 3, 101, 156 }, + { 1, 69, 111 }, + { 1, 39, 63 }, + { 1, 14, 23 } + }, { /* Coeff Band 5 */ + { 57, 192, 233 }, + { 25, 154, 215 }, + { 6, 109, 167 }, + { 3, 78, 118 }, + { 1, 48, 69 }, + { 1, 21, 29 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 202, 105, 245 }, + { 108, 106, 216 }, + { 18, 90, 144 } + }, { /* Coeff Band 1 */ + { 33, 172, 219 }, + { 64, 149, 206 }, + { 14, 117, 177 }, + { 5, 90, 141 }, + { 2, 61, 95 }, + { 1, 37, 57 } + }, { /* Coeff Band 2 */ + { 33, 179, 220 }, + { 11, 140, 198 }, + { 1, 89, 148 }, + { 1, 60, 104 }, + { 1, 33, 57 }, + { 1, 12, 21 } + }, { /* Coeff Band 3 */ + { 30, 181, 221 }, + { 8, 141, 198 }, + { 1, 87, 145 }, + { 1, 58, 100 }, + { 1, 31, 55 }, + { 1, 12, 20 } + }, { /* Coeff Band 4 */ + { 32, 186, 224 }, + { 7, 142, 198 }, + { 1, 86, 143 }, + { 1, 58, 100 }, + { 1, 31, 55 }, + { 1, 12, 22 } + }, { /* Coeff Band 5 */ + { 57, 192, 227 }, + { 20, 143, 204 }, + { 3, 96, 154 }, + { 1, 68, 112 }, + { 1, 42, 69 }, + { 1, 19, 32 } + } } - }, { - { - { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 }, - { 11612, 13874, 13329, 13022, 6500, 3250, 300, 12 }, - }, { - { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 }, - { 11612, 13874, 13329, 13022, 6500, 3250, 300, 12 }, + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 212, 35, 215 }, + { 113, 47, 169 }, + { 29, 48, 105 } + }, { /* Coeff Band 1 */ + { 74, 129, 203 }, + { 106, 120, 203 }, + { 49, 107, 178 }, + { 19, 84, 144 }, + { 4, 50, 84 }, + { 1, 15, 25 } + }, { /* Coeff Band 2 */ + { 71, 172, 217 }, + { 44, 141, 209 }, + { 15, 102, 173 }, + { 6, 76, 133 }, + { 2, 51, 89 }, + { 1, 24, 42 } + }, { /* Coeff Band 3 */ + { 64, 185, 231 }, + { 31, 148, 216 }, + { 8, 103, 175 }, + { 3, 74, 131 }, + { 1, 46, 81 }, + { 1, 18, 30 } + }, { /* Coeff Band 4 */ + { 65, 196, 235 }, + { 25, 157, 221 }, + { 5, 105, 174 }, + { 1, 67, 120 }, + { 1, 38, 69 }, + { 1, 15, 30 } + }, { /* Coeff Band 5 */ + { 65, 204, 238 }, + { 30, 156, 224 }, + { 7, 107, 177 }, + { 2, 70, 124 }, + { 1, 42, 73 }, + { 1, 18, 34 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 225, 86, 251 }, + { 144, 104, 235 }, + { 42, 99, 181 } + }, { /* Coeff Band 1 */ + { 85, 175, 239 }, + { 112, 165, 229 }, + { 29, 136, 200 }, + { 12, 103, 162 }, + { 6, 77, 123 }, + { 2, 53, 84 } + }, { /* Coeff Band 2 */ + { 75, 183, 239 }, + { 30, 155, 221 }, + { 3, 106, 171 }, + { 1, 74, 128 }, + { 1, 44, 76 }, + { 1, 17, 28 } + }, { /* Coeff Band 3 */ + { 73, 185, 240 }, + { 27, 159, 222 }, + { 2, 107, 172 }, + { 1, 75, 127 }, + { 1, 42, 73 }, + { 1, 17, 29 } + }, { /* Coeff Band 4 */ + { 62, 190, 238 }, + { 21, 159, 222 }, + { 2, 107, 172 }, + { 1, 72, 122 }, + { 1, 40, 71 }, + { 1, 18, 32 } + }, { /* Coeff Band 5 */ + { 61, 199, 240 }, + { 27, 161, 226 }, + { 4, 113, 180 }, + { 1, 76, 129 }, + { 1, 46, 80 }, + { 1, 23, 41 } + } } } }; - -static const unsigned int default_nzc_counts_16x16[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC16X16_TOKENS] = { - { - { - { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 }, - { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 }, - }, { - { 32988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 }, - { 92052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 }, - } - }, { - { - { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 }, - { 47772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 }, - }, { - { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 }, - { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 }, +static const vp9_coeff_probs_model default_coef_probs_16x16[BLOCK_TYPES] = { + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 7, 27, 153 }, + { 5, 30, 95 }, + { 1, 16, 30 } + }, { /* Coeff Band 1 */ + { 50, 75, 127 }, + { 57, 75, 124 }, + { 27, 67, 108 }, + { 10, 54, 86 }, + { 1, 33, 52 }, + { 1, 12, 18 } + }, { /* Coeff Band 2 */ + { 43, 125, 151 }, + { 26, 108, 148 }, + { 7, 83, 122 }, + { 2, 59, 89 }, + { 1, 38, 60 }, + { 1, 17, 27 } + }, { /* Coeff Band 3 */ + { 23, 144, 163 }, + { 13, 112, 154 }, + { 2, 75, 117 }, + { 1, 50, 81 }, + { 1, 31, 51 }, + { 1, 14, 23 } + }, { /* Coeff Band 4 */ + { 18, 162, 185 }, + { 6, 123, 171 }, + { 1, 78, 125 }, + { 1, 51, 86 }, + { 1, 31, 54 }, + { 1, 14, 23 } + }, { /* Coeff Band 5 */ + { 15, 199, 227 }, + { 3, 150, 204 }, + { 1, 91, 146 }, + { 1, 55, 95 }, + { 1, 30, 53 }, + { 1, 11, 20 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 19, 55, 240 }, + { 19, 59, 196 }, + { 3, 52, 105 } + }, { /* Coeff Band 1 */ + { 41, 166, 207 }, + { 104, 153, 199 }, + { 31, 123, 181 }, + { 14, 101, 152 }, + { 5, 72, 106 }, + { 1, 36, 52 } + }, { /* Coeff Band 2 */ + { 35, 176, 211 }, + { 12, 131, 190 }, + { 2, 88, 144 }, + { 1, 60, 101 }, + { 1, 36, 60 }, + { 1, 16, 28 } + }, { /* Coeff Band 3 */ + { 28, 183, 213 }, + { 8, 134, 191 }, + { 1, 86, 142 }, + { 1, 56, 96 }, + { 1, 30, 53 }, + { 1, 12, 20 } + }, { /* Coeff Band 4 */ + { 20, 190, 215 }, + { 4, 135, 192 }, + { 1, 84, 139 }, + { 1, 53, 91 }, + { 1, 28, 49 }, + { 1, 11, 20 } + }, { /* Coeff Band 5 */ + { 13, 196, 216 }, + { 2, 137, 192 }, + { 1, 86, 143 }, + { 1, 57, 99 }, + { 1, 32, 56 }, + { 1, 13, 24 } + } } - }, { - { - { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 }, - { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 }, - }, { - { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 }, - { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 }, + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 211, 29, 217 }, + { 96, 47, 156 }, + { 22, 43, 87 } + }, { /* Coeff Band 1 */ + { 78, 120, 193 }, + { 111, 116, 186 }, + { 46, 102, 164 }, + { 15, 80, 128 }, + { 2, 49, 76 }, + { 1, 18, 28 } + }, { /* Coeff Band 2 */ + { 71, 161, 203 }, + { 42, 132, 192 }, + { 10, 98, 150 }, + { 3, 69, 109 }, + { 1, 44, 70 }, + { 1, 18, 29 } + }, { /* Coeff Band 3 */ + { 57, 186, 211 }, + { 30, 140, 196 }, + { 4, 93, 146 }, + { 1, 62, 102 }, + { 1, 38, 65 }, + { 1, 16, 27 } + }, { /* Coeff Band 4 */ + { 47, 199, 217 }, + { 14, 145, 196 }, + { 1, 88, 142 }, + { 1, 57, 98 }, + { 1, 36, 62 }, + { 1, 15, 26 } + }, { /* Coeff Band 5 */ + { 26, 219, 229 }, + { 5, 155, 207 }, + { 1, 94, 151 }, + { 1, 60, 104 }, + { 1, 36, 62 }, + { 1, 16, 28 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 233, 29, 248 }, + { 146, 47, 220 }, + { 43, 52, 140 } + }, { /* Coeff Band 1 */ + { 100, 163, 232 }, + { 179, 161, 222 }, + { 63, 142, 204 }, + { 37, 113, 174 }, + { 26, 89, 137 }, + { 18, 68, 97 } + }, { /* Coeff Band 2 */ + { 85, 181, 230 }, + { 32, 146, 209 }, + { 7, 100, 164 }, + { 3, 71, 121 }, + { 1, 45, 77 }, + { 1, 18, 30 } + }, { /* Coeff Band 3 */ + { 65, 187, 230 }, + { 20, 148, 207 }, + { 2, 97, 159 }, + { 1, 68, 116 }, + { 1, 40, 70 }, + { 1, 14, 29 } + }, { /* Coeff Band 4 */ + { 40, 194, 227 }, + { 8, 147, 204 }, + { 1, 94, 155 }, + { 1, 65, 112 }, + { 1, 39, 66 }, + { 1, 14, 26 } + }, { /* Coeff Band 5 */ + { 16, 208, 228 }, + { 3, 151, 207 }, + { 1, 98, 160 }, + { 1, 67, 117 }, + { 1, 41, 74 }, + { 1, 17, 31 } + } } } }; - -static const unsigned int default_nzc_counts_32x32[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC32X32_TOKENS] = { - { - { - { 72988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 }, - { 52052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1, 0, 0 }, - }, { - { 72988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 }, - { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1, 0, 0 }, - } - }, { - { - { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 }, - { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2, 1, 0 }, - }, { - { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 }, - { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2, 1, 0 }, +static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = { + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 17, 38, 140 }, + { 7, 34, 80 }, + { 1, 17, 29 } + }, { /* Coeff Band 1 */ + { 37, 75, 128 }, + { 41, 76, 128 }, + { 26, 66, 116 }, + { 12, 52, 94 }, + { 2, 32, 55 }, + { 1, 10, 16 } + }, { /* Coeff Band 2 */ + { 50, 127, 154 }, + { 37, 109, 152 }, + { 16, 82, 121 }, + { 5, 59, 85 }, + { 1, 35, 54 }, + { 1, 13, 20 } + }, { /* Coeff Band 3 */ + { 40, 142, 167 }, + { 17, 110, 157 }, + { 2, 71, 112 }, + { 1, 44, 72 }, + { 1, 27, 45 }, + { 1, 11, 17 } + }, { /* Coeff Band 4 */ + { 30, 175, 188 }, + { 9, 124, 169 }, + { 1, 74, 116 }, + { 1, 48, 78 }, + { 1, 30, 49 }, + { 1, 11, 18 } + }, { /* Coeff Band 5 */ + { 10, 222, 223 }, + { 2, 150, 194 }, + { 1, 83, 128 }, + { 1, 48, 79 }, + { 1, 27, 45 }, + { 1, 11, 17 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 36, 41, 235 }, + { 29, 36, 193 }, + { 10, 27, 111 } + }, { /* Coeff Band 1 */ + { 85, 165, 222 }, + { 177, 162, 215 }, + { 110, 135, 195 }, + { 57, 113, 168 }, + { 23, 83, 120 }, + { 10, 49, 61 } + }, { /* Coeff Band 2 */ + { 85, 190, 223 }, + { 36, 139, 200 }, + { 5, 90, 146 }, + { 1, 60, 103 }, + { 1, 38, 65 }, + { 1, 18, 30 } + }, { /* Coeff Band 3 */ + { 72, 202, 223 }, + { 23, 141, 199 }, + { 2, 86, 140 }, + { 1, 56, 97 }, + { 1, 36, 61 }, + { 1, 16, 27 } + }, { /* Coeff Band 4 */ + { 55, 218, 225 }, + { 13, 145, 200 }, + { 1, 86, 141 }, + { 1, 57, 99 }, + { 1, 35, 61 }, + { 1, 13, 22 } + }, { /* Coeff Band 5 */ + { 15, 235, 212 }, + { 1, 132, 184 }, + { 1, 84, 139 }, + { 1, 57, 97 }, + { 1, 34, 56 }, + { 1, 14, 23 } + } } - }, { - { - { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 }, - { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3, 2, 1 }, - }, { - { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 }, - { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3, 2, 1 }, + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 181, 21, 201 }, + { 61, 37, 123 }, + { 10, 38, 71 } + }, { /* Coeff Band 1 */ + { 47, 106, 172 }, + { 95, 104, 173 }, + { 42, 93, 159 }, + { 18, 77, 131 }, + { 4, 50, 81 }, + { 1, 17, 23 } + }, { /* Coeff Band 2 */ + { 62, 147, 199 }, + { 44, 130, 189 }, + { 28, 102, 154 }, + { 18, 75, 115 }, + { 2, 44, 65 }, + { 1, 12, 19 } + }, { /* Coeff Band 3 */ + { 55, 153, 210 }, + { 24, 130, 194 }, + { 3, 93, 146 }, + { 1, 61, 97 }, + { 1, 31, 50 }, + { 1, 10, 16 } + }, { /* Coeff Band 4 */ + { 49, 186, 223 }, + { 17, 148, 204 }, + { 1, 96, 142 }, + { 1, 53, 83 }, + { 1, 26, 44 }, + { 1, 11, 17 } + }, { /* Coeff Band 5 */ + { 13, 217, 212 }, + { 2, 136, 180 }, + { 1, 78, 124 }, + { 1, 50, 83 }, + { 1, 29, 49 }, + { 1, 14, 23 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 197, 13, 247 }, + { 82, 17, 222 }, + { 25, 17, 162 } + }, { /* Coeff Band 1 */ + { 126, 186, 247 }, + { 234, 191, 243 }, + { 176, 177, 234 }, + { 104, 158, 220 }, + { 66, 128, 186 }, + { 55, 90, 137 } + }, { /* Coeff Band 2 */ + { 111, 197, 242 }, + { 46, 158, 219 }, + { 9, 104, 171 }, + { 2, 65, 125 }, + { 1, 44, 80 }, + { 1, 17, 91 } + }, { /* Coeff Band 3 */ + { 104, 208, 245 }, + { 39, 168, 224 }, + { 3, 109, 162 }, + { 1, 79, 124 }, + { 1, 50, 102 }, + { 1, 43, 102 } + }, { /* Coeff Band 4 */ + { 84, 220, 246 }, + { 31, 177, 231 }, + { 2, 115, 180 }, + { 1, 79, 134 }, + { 1, 55, 77 }, + { 1, 60, 79 } + }, { /* Coeff Band 5 */ + { 43, 243, 240 }, + { 8, 180, 217 }, + { 1, 115, 166 }, + { 1, 84, 121 }, + { 1, 51, 67 }, + { 1, 16, 6 } + } } } }; - -#else - -static const vp9_prob default_nzc_probs_4x4[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC4X4_TOKENS] = { - { - { - { 219, 162, 179, 142, 242, }, - { 214, 253, 228, 246, 255, }, - }, { - { 225, 236, 190, 229, 253, }, - { 251, 253, 240, 248, 255, }, - }, - }, { - { - { 106, 126, 158, 126, 244, }, - { 118, 241, 201, 240, 255, }, - }, { - { 165, 179, 143, 189, 242, }, - { 173, 239, 192, 255, 128, }, - }, - }, { - { - { 42 , 78 , 153, 92 , 223, }, - { 128, 128, 128, 128, 128, }, - }, { - { 76 , 68 , 126, 110, 216, }, - { 128, 128, 128, 128, 128, }, - }, - }, -}; - -static const vp9_prob default_nzc_probs_8x8[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC8X8_TOKENS] = { - { - { - { 134, 139, 170, 178, 142, 197, 255, }, - { 167, 224, 199, 252, 205, 255, 128, }, - }, { - { 181, 210, 180, 241, 190, 235, 255, }, - { 234, 251, 235, 252, 219, 255, 128, }, - }, - }, { - { - { 33 , 64 , 155, 143, 86 , 216, 255, }, - { 73 , 160, 167, 251, 153, 255, 128, }, - }, { - { 79 , 104, 153, 195, 119, 246, 255, }, - { 149, 183, 186, 249, 203, 255, 128, }, - }, - }, { - { - { 10 , 25 , 156, 61 , 69 , 156, 254, }, - { 32 , 1 , 128, 146, 64 , 255, 128, }, - }, { - { 37 , 48 , 143, 113, 81 , 202, 255, }, - { 1 , 255, 128, 128, 128, 128, 128, }, - }, - }, -}; - -static const vp9_prob default_nzc_probs_16x16[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC16X16_TOKENS] = { - { - { - { 11 , 188, 210, 167, 141, 143, 152, 255, 128, }, - { 171, 201, 203, 244, 207, 255, 255, 128, 128, }, - }, { - { 23 , 217, 207, 251, 198, 255, 219, 128, 128, }, - { 235, 249, 229, 255, 199, 128, 128, 128, 128, }, - }, - }, { - { - { 9 , 45 , 168, 85 , 66 , 221, 139, 246, 255, }, - { 51 , 110, 163, 238, 94 , 255, 255, 128, 128, }, - }, { - { 4 , 149, 175, 240, 149, 255, 205, 128, 128, }, - { 141, 217, 186, 255, 128, 128, 128, 128, 128, }, - }, - }, { - { - { 1 , 12 , 173, 6 , 68 , 145, 41 , 204, 255, }, - { 39 , 47 , 128, 199, 110, 255, 128, 128, 128, }, - }, { - { 1 , 121, 171, 149, 115, 242, 159, 255, 128, }, - { 1 , 255, 255, 128, 128, 128, 128, 128, 128, }, - }, - }, -}; - -static const vp9_prob default_nzc_probs_32x32[MAX_NZC_CONTEXTS] - [REF_TYPES] - [BLOCK_TYPES] - [NZC32X32_TOKENS] = { - { - { - { 11 , 216, 195, 201, 160, 247, 217, 255, 255, 128, 128, }, - { 177, 240, 239, 255, 192, 128, 128, 128, 128, 128, 128, }, - }, { - { 48 , 235, 213, 235, 199, 255, 255, 128, 128, 128, 128, }, - { 205, 255, 248, 128, 128, 128, 128, 128, 128, 128, 128, }, - }, - }, { - { - { 6 , 96 , 138, 99 , 125, 248, 188, 255, 128, 128, 128, }, - { 17 , 53 , 43 , 189, 1 , 255, 171, 128, 128, 128, 128, }, - }, { - { 5 , 187, 235, 232, 117, 255, 219, 128, 128, 128, 128, }, - { 146, 255, 255, 128, 128, 128, 128, 128, 128, 128, 128, }, - }, - }, { - { - { 1 , 7 , 93 , 14 , 100, 30 , 85 , 65 , 81 , 210, 255, }, - { 1 , 1 , 128, 26 , 1 , 218, 78 , 255, 255, 128, 128, }, - }, { - { 4 , 148, 206, 137, 160, 255, 255, 128, 128, 128, 128, }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, }, - }, - }, -}; #endif - -static const vp9_prob default_nzc_pcat_probs[MAX_NZC_CONTEXTS] - [NZC_TOKENS_EXTRA] - [NZC_BITS_EXTRA] = { - // Bit probabilities are in least to most significance order - { - {176, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4 - {164, 192, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8 - {154, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16 - {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32 - {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64 - {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128 - {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256 - {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512 - {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024 - }, { - {168, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4 - {152, 184, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8 - {152, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16 - {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32 - {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64 - {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128 - {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256 - {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512 - {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024 - }, { - {160, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4 - {152, 176, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8 - {150, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16 - {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32 - {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64 - {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128 - {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256 - {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512 - {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024 - }, -}; - -#endif // CONFIG_CODE_NONZEROCOUNT diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index d05be990d72c2439a9be06bbe0007980b7990fdd..7b5273b0f6180cebf0c588f3277dabb4c36d3338 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -8,11 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ - -#include <stdio.h> - #include "vp9/common/vp9_entropy.h" -#include "string.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymode.h" @@ -20,8 +16,6 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_coefupdateprobs.h" -const int vp9_i8x8_block[4] = {0, 2, 8, 10}; - DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = { 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -41,30 +35,23 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -// Unified coefficient band structure used by all block sizes -DECLARE_ALIGNED(16, const int, vp9_coef_bands8x8[64]) = { - 0, 1, 2, 3, 4, 4, 5, 5, - 1, 2, 3, 4, 4, 5, 5, 5, - 2, 3, 4, 4, 5, 5, 5, 5, - 3, 4, 4, 5, 5, 5, 5, 5, - 4, 4, 5, 5, 5, 5, 5, 5, - 4, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5 +DECLARE_ALIGNED(16, const uint8_t, + vp9_coefband_trans_8x8plus[MAXBAND_INDEX + 1]) = { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5 }; -DECLARE_ALIGNED(16, const int, vp9_coef_bands4x4[16]) = { - 0, 1, 2, 3, - 1, 2, 3, 4, - 2, 3, 4, 5, - 3, 4, 5, 5 + +DECLARE_ALIGNED(16, const uint8_t, + vp9_coefband_trans_4x4[MAXBAND_INDEX + 1]) = { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, + 5, 5, 5, 5, 5, 5 }; DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]) = { 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 }; -#if CONFIG_SCATTERSCAN -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]) = { +DECLARE_ALIGNED(16, const int, vp9_default_scan_4x4[16]) = { 0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, @@ -85,7 +72,7 @@ DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { 13, 11, 14, 15, }; -DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { +DECLARE_ALIGNED(64, const int, vp9_default_scan_8x8[64]) = { 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26, 33, 19, 40, 12, 34, 27, 5, 41, @@ -118,7 +105,7 @@ DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]) = { 60, 39, 61, 47, 54, 55, 62, 63, }; -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { +DECLARE_ALIGNED(16, const int, vp9_default_scan_16x16[256]) = { 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, @@ -175,218 +162,64 @@ DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]) = { 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255, }; -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { +DECLARE_ALIGNED(16, const int, vp9_default_scan_32x32[1024]) = { 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100, 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136, 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, 453, 139, 44, 234, - 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 392, 203, 108, 546, 485, 576, 298, 235, 140, 361, 516, 330, 172, 547, 45, 424, 455, 267, 393, 577, - 486, 77, 204, 517, 362, 548, 608, 14, 456, 299, 578, 109, 236, 425, 394, 487, 609, 331, 141, 579, 518, 46, 268, 15, 173, 549, 610, 640, 363, 78, 519, 488, - 300, 205, 16, 457, 580, 426, 550, 395, 110, 237, 611, 641, 332, 672, 142, 642, 269, 458, 47, 581, 427, 489, 174, 364, 520, 612, 551, 673, 79, 206, 301, 643, - 704, 17, 111, 490, 674, 238, 582, 48, 521, 613, 333, 396, 459, 143, 270, 552, 644, 705, 736, 365, 80, 675, 583, 175, 428, 706, 112, 302, 207, 614, 553, 49, - 645, 522, 737, 397, 768, 144, 334, 18, 676, 491, 239, 615, 707, 584, 81, 460, 176, 271, 738, 429, 113, 800, 366, 208, 523, 708, 646, 554, 677, 769, 19, 145, - 585, 739, 240, 303, 50, 461, 616, 398, 647, 335, 492, 177, 82, 770, 832, 555, 272, 430, 678, 209, 709, 114, 740, 801, 617, 51, 304, 679, 524, 367, 586, 241, - 20, 146, 771, 864, 83, 802, 648, 493, 399, 273, 336, 710, 178, 462, 833, 587, 741, 115, 305, 711, 368, 525, 618, 803, 210, 896, 680, 834, 772, 52, 649, 147, - 431, 494, 556, 242, 400, 865, 337, 21, 928, 179, 742, 84, 463, 274, 369, 804, 650, 557, 743, 960, 835, 619, 773, 306, 211, 526, 432, 992, 588, 712, 116, 243, - 866, 495, 681, 558, 805, 589, 401, 897, 53, 338, 148, 682, 867, 464, 275, 22, 370, 433, 307, 620, 527, 836, 774, 651, 713, 744, 85, 180, 621, 465, 929, 775, - 496, 898, 212, 339, 244, 402, 590, 117, 559, 714, 434, 23, 868, 930, 806, 683, 528, 652, 371, 961, 149, 837, 54, 899, 745, 276, 993, 497, 403, 622, 181, 776, - 746, 529, 560, 435, 86, 684, 466, 308, 591, 653, 715, 807, 340, 869, 213, 962, 245, 838, 561, 931, 808, 592, 118, 498, 372, 623, 685, 994, 467, 654, 747, 900, - 716, 277, 150, 55, 24, 404, 530, 839, 777, 655, 182, 963, 840, 686, 778, 309, 870, 341, 87, 499, 809, 624, 593, 436, 717, 932, 214, 246, 995, 718, 625, 373, - 562, 25, 119, 901, 531, 468, 964, 748, 810, 278, 779, 500, 563, 656, 405, 687, 871, 872, 594, 151, 933, 749, 841, 310, 657, 626, 595, 437, 688, 183, 996, 965, - 902, 811, 342, 750, 689, 719, 532, 56, 215, 469, 934, 374, 247, 720, 780, 564, 781, 842, 406, 26, 751, 903, 873, 57, 279, 627, 501, 658, 843, 997, 812, 904, - 88, 813, 438, 752, 935, 936, 311, 596, 533, 690, 343, 966, 874, 89, 120, 470, 721, 875, 659, 782, 565, 998, 375, 844, 845, 27, 628, 967, 121, 905, 968, 152, - 937, 814, 753, 502, 691, 783, 184, 153, 722, 407, 58, 815, 999, 660, 597, 723, 534, 906, 216, 439, 907, 248, 185, 876, 846, 692, 784, 629, 90, 969, 280, 754, - 938, 939, 217, 847, 566, 471, 785, 816, 877, 1000, 249, 878, 661, 503, 312, 970, 755, 122, 817, 281, 344, 786, 598, 724, 28, 59, 29, 154, 535, 630, 376, 1001, - 313, 908, 186, 91, 848, 849, 345, 909, 940, 879, 408, 818, 693, 1002, 971, 941, 567, 377, 218, 756, 910, 787, 440, 123, 880, 725, 662, 250, 819, 1003, 282, 972, - 850, 599, 472, 409, 155, 441, 942, 757, 788, 694, 911, 881, 314, 631, 973, 504, 187, 1004, 346, 473, 851, 943, 820, 726, 60, 505, 219, 378, 912, 974, 30, 31, - 536, 882, 1005, 92, 251, 663, 944, 913, 283, 695, 883, 568, 1006, 975, 410, 442, 945, 789, 852, 537, 1007, 124, 315, 61, 758, 821, 600, 914, 976, 569, 474, 347, - 156, 1008, 915, 93, 977, 506, 946, 727, 379, 884, 188, 632, 601, 1009, 790, 853, 978, 947, 220, 411, 125, 633, 664, 759, 252, 443, 916, 538, 157, 822, 62, 570, - 979, 284, 1010, 885, 948, 189, 475, 94, 316, 665, 696, 1011, 854, 791, 980, 221, 348, 63, 917, 602, 380, 507, 253, 126, 697, 823, 634, 285, 728, 949, 886, 95, - 158, 539, 1012, 317, 412, 444, 760, 571, 190, 981, 729, 918, 127, 666, 349, 381, 476, 855, 761, 1013, 603, 222, 159, 698, 950, 508, 254, 792, 286, 635, 887, 793, - 413, 191, 982, 445, 540, 318, 730, 667, 223, 824, 919, 1014, 350, 477, 572, 255, 825, 951, 762, 509, 604, 856, 382, 699, 287, 319, 636, 983, 794, 414, 541, 731, - 857, 888, 351, 446, 573, 1015, 668, 889, 478, 826, 383, 763, 605, 920, 510, 637, 415, 700, 921, 858, 447, 952, 542, 795, 479, 953, 732, 890, 669, 574, 511, 984, - 827, 985, 922, 1016, 764, 606, 543, 701, 859, 638, 1017, 575, 796, 954, 733, 891, 670, 607, 828, 986, 765, 923, 639, 1018, 702, 860, 955, 671, 892, 734, 797, 703, - 987, 829, 1019, 766, 924, 735, 861, 956, 988, 893, 767, 798, 830, 1020, 925, 957, 799, 862, 831, 989, 894, 1021, 863, 926, 895, 958, 990, 1022, 927, 959, 991, 1023, -}; -#else // CONFIG_SCATTERSCAN -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]) = { - 0, 1, 4, 8, - 5, 2, 3, 6, - 9, 12, 13, 10, - 7, 11, 14, 15, -}; - -DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]) = { - 0, 4, 8, 12, - 1, 5, 9, 13, - 2, 6, 10, 14, - 3, 7, 11, 15 -}; - -DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { - 0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15 + 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, 486, 77, 204, 362, + 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111, 238, 48, 143, + 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, 393, 300, 269, 176, 145, + 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457, 426, 395, + 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, 210, 179, 117, 86, 55, 738, 707, + 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397, 304, + 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, 864, 833, 802, 771, 740, 709, + 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493, + 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, 743, 619, 495, 371, 247, 123, + 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681, + 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, 651, 620, 589, 558, 527, + 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373, + 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, 499, 375, 251, 127, + 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, 685, 654, 592, 561, + 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438, 407, 376, 345, + 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, 967, 874, 843, 750, + 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533, 440, 409, + 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, 752, 721, 690, 659, + 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002, 971, + 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, 537, 444, 413, 972, + 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477, + 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, 1007, 883, 759, 635, 511, + 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791, + 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, 1011, 887, 763, 639, + 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982, + 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, 1016, 985, 954, 923, + 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023, }; -DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, -}; - -DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]) = { - 0, 8, 16, 24, 32, 40, 48, 56, - 1, 9, 17, 25, 33, 41, 49, 57, - 2, 10, 18, 26, 34, 42, 50, 58, - 3, 11, 19, 27, 35, 43, 51, 59, - 4, 12, 20, 28, 36, 44, 52, 60, - 5, 13, 21, 29, 37, 45, 53, 61, - 6, 14, 22, 30, 38, 46, 54, 62, - 7, 15, 23, 31, 39, 47, 55, 63, -}; - -DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { - 0, 1, 16, 32, 17, 2, 3, 18, - 33, 48, 64, 49, 34, 19, 4, 5, - 20, 35, 50, 65, 80, 96, 81, 66, - 51, 36, 21, 6, 7, 22, 37, 52, - 67, 82, 97, 112, 128, 113, 98, 83, - 68, 53, 38, 23, 8, 9, 24, 39, - 54, 69, 84, 99, 114, 129, 144, 160, - 145, 130, 115, 100, 85, 70, 55, 40, - 25, 10, 11, 26, 41, 56, 71, 86, - 101, 116, 131, 146, 161, 176, 192, 177, - 162, 147, 132, 117, 102, 87, 72, 57, - 42, 27, 12, 13, 28, 43, 58, 73, - 88, 103, 118, 133, 148, 163, 178, 193, - 208, 224, 209, 194, 179, 164, 149, 134, - 119, 104, 89, 74, 59, 44, 29, 14, - 15, 30, 45, 60, 75, 90, 105, 120, - 135, 150, 165, 180, 195, 210, 225, 240, - 241, 226, 211, 196, 181, 166, 151, 136, - 121, 106, 91, 76, 61, 46, 31, 47, - 62, 77, 92, 107, 122, 137, 152, 167, - 182, 197, 212, 227, 242, 243, 228, 213, - 198, 183, 168, 153, 138, 123, 108, 93, - 78, 63, 79, 94, 109, 124, 139, 154, - 169, 184, 199, 214, 229, 244, 245, 230, - 215, 200, 185, 170, 155, 140, 125, 110, - 95, 111, 126, 141, 156, 171, 186, 201, - 216, 231, 246, 247, 232, 217, 202, 187, - 172, 157, 142, 127, 143, 158, 173, 188, - 203, 218, 233, 248, 249, 234, 219, 204, - 189, 174, 159, 175, 190, 205, 220, 235, - 250, 251, 236, 221, 206, 191, 207, 222, - 237, 252, 253, 238, 223, 239, 254, 255, -}; - -DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, - 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, - 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, - 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, - 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, - 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, - 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, - 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, - 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, - 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, - 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250, - 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251, - 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, - 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, - 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, - 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255, -}; - -DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, -}; - -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66, 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68, 37, 6, 7, 38, 69, 100, - 131, 162, 193, 224, 256, 225, 194, 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, - 41, 10, 11, 42, 73, 104, 135, 166, 197, 228, 259, 290, 321, 352, 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43, 12, 13, 44, 75, 106, 137, - 168, 199, 230, 261, 292, 323, 354, 385, 416, 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107, 76, 45, 14, 15, 46, 77, 108, 139, 170, 201, 232, - 263, 294, 325, 356, 387, 418, 449, 480, 512, 481, 450, 419, 388, 357, 326, 295, 264, 233, 202, 171, 140, 109, 78, 47, 16, 17, 48, 79, 110, 141, 172, 203, - 234, 265, 296, 327, 358, 389, 420, 451, 482, 513, 544, 576, 545, 514, 483, 452, 421, 390, 359, 328, 297, 266, 235, 204, 173, 142, 111, 80, 49, 18, 19, 50, - 81, 112, 143, 174, 205, 236, 267, 298, 329, 360, 391, 422, 453, 484, 515, 546, 577, 608, 640, 609, 578, 547, 516, 485, 454, 423, 392, 361, 330, 299, 268, 237, - 206, 175, 144, 113, 82, 51, 20, 21, 52, 83, 114, 145, 176, 207, 238, 269, 300, 331, 362, 393, 424, 455, 486, 517, 548, 579, 610, 641, 672, 704, 673, 642, - 611, 580, 549, 518, 487, 456, 425, 394, 363, 332, 301, 270, 239, 208, 177, 146, 115, 84, 53, 22, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302, 333, 364, - 395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705, 736, 768, 737, 706, 675, 644, 613, 582, 551, 520, 489, 458, 427, 396, 365, 334, 303, 272, 241, 210, 179, - 148, 117, 86, 55, 24, 25, 56, 87, 118, 149, 180, 211, 242, 273, 304, 335, 366, 397, 428, 459, 490, 521, 552, 583, 614, 645, 676, 707, 738, 769, 800, 832, - 801, 770, 739, 708, 677, 646, 615, 584, 553, 522, 491, 460, 429, 398, 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57, 26, 27, 58, 89, 120, 151, 182, - 213, 244, 275, 306, 337, 368, 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771, 802, 833, 864, 896, 865, 834, 803, 772, 741, 710, 679, 648, 617, - 586, 555, 524, 493, 462, 431, 400, 369, 338, 307, 276, 245, 214, 183, 152, 121, 90, 59, 28, 29, 60, 91, 122, 153, 184, 215, 246, 277, 308, 339, 370, 401, - 432, 463, 494, 525, 556, 587, 618, 649, 680, 711, 742, 773, 804, 835, 866, 897, 928, 960, 929, 898, 867, 836, 805, 774, 743, 712, 681, 650, 619, 588, 557, 526, - 495, 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92, 61, 30, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341, 372, 403, 434, 465, 496, - 527, 558, 589, 620, 651, 682, 713, 744, 775, 806, 837, 868, 899, 930, 961, 992, 993, 962, 931, 900, 869, 838, 807, 776, 745, 714, 683, 652, 621, 590, 559, 528, - 497, 466, 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125, 94, 63, 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467, 498, 529, 560, 591, - 622, 653, 684, 715, 746, 777, 808, 839, 870, 901, 932, 963, 994, 995, 964, 933, 902, 871, 840, 809, 778, 747, 716, 685, 654, 623, 592, 561, 530, 499, 468, 437, - 406, 375, 344, 313, 282, 251, 220, 189, 158, 127, 159, 190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 531, 562, 593, 624, 655, 686, 717, 748, 779, 810, - 841, 872, 903, 934, 965, 996, 997, 966, 935, 904, 873, 842, 811, 780, 749, 718, 687, 656, 625, 594, 563, 532, 501, 470, 439, 408, 377, 346, 315, 284, 253, 222, - 191, 223, 254, 285, 316, 347, 378, 409, 440, 471, 502, 533, 564, 595, 626, 657, 688, 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 999, 968, 937, 906, 875, - 844, 813, 782, 751, 720, 689, 658, 627, 596, 565, 534, 503, 472, 441, 410, 379, 348, 317, 286, 255, 287, 318, 349, 380, 411, 442, 473, 504, 535, 566, 597, 628, - 659, 690, 721, 752, 783, 814, 845, 876, 907, 938, 969, 1000, 1001, 970, 939, 908, 877, 846, 815, 784, 753, 722, 691, 660, 629, 598, 567, 536, 505, 474, 443, 412, - 381, 350, 319, 351, 382, 413, 444, 475, 506, 537, 568, 599, 630, 661, 692, 723, 754, 785, 816, 847, 878, 909, 940, 971, 1002, 1003, 972, 941, 910, 879, 848, 817, - 786, 755, 724, 693, 662, 631, 600, 569, 538, 507, 476, 445, 414, 383, 415, 446, 477, 508, 539, 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911, 942, - 973, 1004, 1005, 974, 943, 912, 881, 850, 819, 788, 757, 726, 695, 664, 633, 602, 571, 540, 509, 478, 447, 479, 510, 541, 572, 603, 634, 665, 696, 727, 758, 789, - 820, 851, 882, 913, 944, 975, 1006, 1007, 976, 945, 914, 883, 852, 821, 790, 759, 728, 697, 666, 635, 604, 573, 542, 511, 543, 574, 605, 636, 667, 698, 729, 760, - 791, 822, 853, 884, 915, 946, 977, 1008, 1009, 978, 947, 916, 885, 854, 823, 792, 761, 730, 699, 668, 637, 606, 575, 607, 638, 669, 700, 731, 762, 793, 824, 855, - 886, 917, 948, 979, 1010, 1011, 980, 949, 918, 887, 856, 825, 794, 763, 732, 701, 670, 639, 671, 702, 733, 764, 795, 826, 857, 888, 919, 950, 981, 1012, 1013, 982, - 951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892, - 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, -}; -#endif // CONFIG_SCATTERSCAN - /* Array indices are identical to previously-existing CONTEXT_NODE indices */ const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ { - -DCT_EOB_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, 6, /* 2 = ONE */ +#if CONFIG_BALANCED_COEFTREE + -ZERO_TOKEN, 2, /* 0 = ZERO */ + -DCT_EOB_TOKEN, 4, /* 1 = EOB */ +#else + -DCT_EOB_TOKEN, 2, /* 0 = EOB */ + -ZERO_TOKEN, 4, /* 1 = ZERO */ +#endif + -ONE_TOKEN, 6, /* 2 = ONE */ 8, 12, /* 3 = LOW_VAL */ -TWO_TOKEN, 10, /* 4 = TWO */ -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ - 14, 16, /* 6 = HIGH_LOW */ + 14, 16, /* 6 = HIGH_LOW */ -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ 18, 20, /* 8 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ + -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ + -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ }; -struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS]; +struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS]; /* Trees for extra bits. Probabilities are constant and do not depend on previously encoded bits */ @@ -400,1660 +233,189 @@ static const vp9_prob Pcat6[] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; -#if CONFIG_CODE_NONZEROCOUNT -const vp9_tree_index vp9_nzc4x4_tree[2 * NZC4X4_NODES] = { - -NZC_0, 2, - 4, 6, - -NZC_1, -NZC_2, - -NZC_3TO4, 8, - -NZC_5TO8, -NZC_9TO16, -}; -struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS]; - -const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = { - -NZC_0, 2, - 4, 6, - -NZC_1, -NZC_2, - 8, 10, - -NZC_3TO4, -NZC_5TO8, - -NZC_9TO16, 12, - -NZC_17TO32, -NZC_33TO64, -}; -struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS]; - -const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = { - -NZC_0, 2, - 4, 6, - -NZC_1, -NZC_2, - 8, 10, - -NZC_3TO4, -NZC_5TO8, - 12, 14, - -NZC_9TO16, -NZC_17TO32, - -NZC_33TO64, 16, - -NZC_65TO128, -NZC_129TO256, -}; -struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS]; - -const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = { - -NZC_0, 2, - 4, 6, - -NZC_1, -NZC_2, - 8, 10, - -NZC_3TO4, -NZC_5TO8, - 12, 14, - -NZC_9TO16, -NZC_17TO32, - 16, 18, - -NZC_33TO64, -NZC_65TO128, - -NZC_129TO256, 20, - -NZC_257TO512, -NZC_513TO1024, -}; -struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS]; - -const int vp9_extranzcbits[NZC32X32_TOKENS] = { - 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 -}; - -const int vp9_basenzcvalue[NZC32X32_TOKENS] = { - 0, 1, 2, 3, 5, 9, 17, 33, 65, 129, 257, 513 -}; - -#endif // CONFIG_CODE_NONZEROCOUNT - -#if CONFIG_MODELCOEFPROB - -const vp9_prob vp9_modelcoefprobs_gg875[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.875) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 2, 6, 86, 129, 11, 87, 42, 92, 52,}, - {2, 4, 12, 87, 129, 22, 89, 75, 97, 91,}, - {3, 6, 17, 88, 130, 32, 90, 102, 102, 121,}, - {4, 8, 22, 89, 131, 41, 91, 125, 107, 145,}, - {5, 10, 28, 90, 131, 50, 93, 144, 112, 164,}, - {6, 12, 33, 90, 132, 59, 94, 160, 117, 180,}, - {7, 14, 38, 91, 132, 67, 95, 173, 122, 193,}, - {8, 15, 42, 92, 133, 75, 97, 185, 126, 204,}, - {9, 17, 47, 92, 133, 82, 98, 195, 131, 212,}, - {10, 19, 52, 93, 134, 89, 99, 203, 135, 220,}, - {11, 21, 56, 94, 134, 96, 101, 211, 140, 226,}, - {12, 23, 60, 95, 135, 102, 102, 217, 144, 231,}, - {13, 25, 65, 95, 135, 109, 103, 222, 148, 235,}, - {14, 26, 69, 96, 136, 115, 105, 227, 153, 238,}, - {15, 28, 73, 97, 136, 120, 106, 231, 157, 241,}, - {16, 30, 77, 97, 137, 126, 107, 234, 161, 244,}, - {17, 32, 81, 98, 138, 131, 108, 237, 164, 246,}, - {18, 34, 85, 99, 138, 136, 110, 240, 168, 247,}, - {19, 35, 89, 100, 139, 141, 111, 242, 172, 249,}, - {20, 37, 92, 100, 139, 145, 112, 244, 175, 250,}, - {21, 39, 96, 101, 140, 150, 113, 246, 179, 251,}, - {22, 41, 99, 102, 140, 154, 115, 247, 182, 252,}, - {23, 42, 103, 102, 141, 158, 116, 248, 185, 252,}, - {24, 44, 106, 103, 141, 162, 117, 249, 188, 253,}, - {25, 46, 110, 104, 142, 166, 118, 250, 191, 253,}, - {26, 48, 113, 104, 142, 170, 120, 251, 194, 254,}, - {27, 49, 116, 105, 143, 173, 121, 252, 197, 254,}, - {28, 51, 119, 106, 143, 176, 122, 252, 200, 254,}, - {29, 53, 122, 107, 144, 180, 123, 253, 202, 255,}, - {30, 54, 125, 107, 144, 183, 125, 253, 205, 255,}, - {31, 56, 128, 108, 145, 186, 126, 254, 207, 255,}, - {32, 58, 131, 109, 145, 189, 127, 254, 209, 255,}, - {33, 59, 134, 109, 146, 191, 128, 254, 212, 255,}, - {34, 61, 137, 110, 146, 194, 130, 254, 214, 255,}, - {35, 62, 139, 111, 147, 196, 131, 255, 216, 255,}, - {36, 64, 142, 112, 147, 199, 132, 255, 218, 255,}, - {37, 66, 145, 112, 148, 201, 134, 255, 220, 255,}, - {38, 67, 147, 113, 148, 203, 135, 255, 221, 255,}, - {39, 69, 150, 114, 149, 206, 136, 255, 223, 255,}, - {40, 70, 152, 114, 149, 208, 137, 255, 225, 255,}, - {41, 72, 155, 115, 150, 210, 138, 255, 226, 255,}, - {42, 74, 157, 116, 150, 212, 140, 255, 228, 255,}, - {43, 75, 159, 117, 151, 213, 141, 255, 229, 255,}, - {44, 77, 161, 117, 151, 215, 142, 255, 230, 255,}, - {45, 78, 164, 118, 152, 217, 143, 255, 232, 255,}, - {46, 80, 166, 119, 152, 219, 145, 255, 233, 255,}, - {47, 81, 168, 120, 153, 220, 146, 255, 234, 255,}, - {48, 83, 170, 120, 153, 222, 147, 255, 235, 255,}, - {49, 84, 172, 121, 154, 223, 148, 255, 236, 255,}, - {50, 86, 174, 122, 154, 225, 150, 255, 237, 255,}, - {51, 87, 176, 123, 155, 226, 151, 255, 238, 255,}, - {52, 89, 178, 123, 155, 227, 152, 255, 239, 255,}, - {53, 90, 180, 124, 156, 228, 153, 255, 240, 255,}, - {54, 92, 182, 125, 156, 230, 154, 255, 241, 255,}, - {55, 93, 183, 126, 157, 231, 156, 255, 242, 255,}, - {56, 95, 185, 126, 157, 232, 157, 255, 242, 255,}, - {57, 96, 187, 127, 158, 233, 158, 255, 243, 255,}, - {58, 98, 189, 128, 158, 234, 159, 255, 244, 255,}, - {59, 99, 190, 129, 159, 235, 160, 255, 244, 255,}, - {60, 101, 192, 129, 159, 236, 162, 255, 245, 255,}, - {61, 102, 193, 130, 160, 237, 163, 255, 246, 255,}, - {62, 104, 195, 131, 160, 238, 164, 255, 246, 255,}, - {63, 105, 197, 132, 161, 238, 165, 255, 247, 255,}, - {64, 106, 198, 132, 162, 239, 166, 255, 247, 255,}, - {65, 108, 199, 133, 162, 240, 167, 255, 248, 255,}, - {66, 109, 201, 134, 163, 241, 169, 255, 248, 255,}, - {67, 111, 202, 135, 163, 241, 170, 255, 249, 255,}, - {68, 112, 204, 135, 164, 242, 171, 255, 249, 255,}, - {69, 113, 205, 136, 164, 243, 172, 255, 249, 255,}, - {70, 115, 206, 137, 165, 243, 173, 255, 250, 255,}, - {71, 116, 208, 138, 165, 244, 174, 255, 250, 255,}, - {72, 117, 209, 138, 166, 244, 175, 255, 250, 255,}, - {73, 119, 210, 139, 166, 245, 177, 255, 251, 255,}, - {74, 120, 211, 140, 167, 245, 178, 255, 251, 255,}, - {75, 121, 212, 141, 167, 246, 179, 255, 251, 255,}, - {76, 123, 214, 142, 168, 246, 180, 255, 252, 255,}, - {77, 124, 215, 142, 168, 247, 181, 255, 252, 255,}, - {78, 125, 216, 143, 169, 247, 182, 255, 252, 255,}, - {79, 127, 217, 144, 170, 248, 183, 255, 252, 255,}, - {80, 128, 218, 145, 170, 248, 184, 255, 253, 255,}, - {81, 129, 219, 146, 171, 248, 185, 255, 253, 255,}, - {82, 131, 220, 146, 171, 249, 186, 255, 253, 255,}, - {83, 132, 221, 147, 172, 249, 187, 255, 253, 255,}, - {84, 133, 222, 148, 172, 249, 188, 255, 253, 255,}, - {85, 134, 223, 149, 173, 250, 189, 255, 253, 255,}, - {86, 136, 224, 149, 173, 250, 190, 255, 254, 255,}, - {87, 137, 225, 150, 174, 250, 191, 255, 254, 255,}, - {88, 138, 226, 151, 174, 251, 192, 255, 254, 255,}, - {89, 139, 226, 152, 175, 251, 193, 255, 254, 255,}, - {90, 141, 227, 153, 175, 251, 194, 255, 254, 255,}, - {91, 142, 228, 153, 176, 251, 195, 255, 254, 255,}, - {92, 143, 229, 154, 177, 252, 196, 255, 254, 255,}, - {93, 144, 230, 155, 177, 252, 197, 255, 254, 255,}, - {94, 146, 230, 156, 178, 252, 198, 255, 255, 255,}, - {95, 147, 231, 157, 178, 252, 199, 255, 255, 255,}, - {96, 148, 232, 157, 179, 252, 200, 255, 255, 255,}, - {97, 149, 233, 158, 179, 253, 201, 255, 255, 255,}, - {98, 150, 233, 159, 180, 253, 202, 255, 255, 255,}, - {99, 152, 234, 160, 180, 253, 203, 255, 255, 255,}, - {100, 153, 235, 161, 181, 253, 204, 255, 255, 255,}, - {101, 154, 235, 161, 182, 253, 205, 255, 255, 255,}, - {102, 155, 236, 162, 182, 253, 206, 255, 255, 255,}, - {103, 156, 236, 163, 183, 254, 207, 255, 255, 255,}, - {104, 157, 237, 164, 183, 254, 207, 255, 255, 255,}, - {105, 159, 238, 165, 184, 254, 208, 255, 255, 255,}, - {106, 160, 238, 166, 184, 254, 209, 255, 255, 255,}, - {107, 161, 239, 166, 185, 254, 210, 255, 255, 255,}, - {108, 162, 239, 167, 185, 254, 211, 255, 255, 255,}, - {109, 163, 240, 168, 186, 254, 212, 255, 255, 255,}, - {110, 164, 240, 169, 187, 254, 212, 255, 255, 255,}, - {111, 165, 241, 170, 187, 254, 213, 255, 255, 255,}, - {112, 166, 241, 170, 188, 255, 214, 255, 255, 255,}, - {113, 167, 242, 171, 188, 255, 215, 255, 255, 255,}, - {114, 169, 242, 172, 189, 255, 216, 255, 255, 255,}, - {115, 170, 243, 173, 189, 255, 216, 255, 255, 255,}, - {116, 171, 243, 174, 190, 255, 217, 255, 255, 255,}, - {117, 172, 244, 174, 190, 255, 218, 255, 255, 255,}, - {118, 173, 244, 175, 191, 255, 219, 255, 255, 255,}, - {119, 174, 244, 176, 192, 255, 219, 255, 255, 255,}, - {120, 175, 245, 177, 192, 255, 220, 255, 255, 255,}, - {121, 176, 245, 178, 193, 255, 221, 255, 255, 255,}, - {122, 177, 245, 178, 193, 255, 222, 255, 255, 255,}, - {123, 178, 246, 179, 194, 255, 222, 255, 255, 255,}, - {124, 179, 246, 180, 194, 255, 223, 255, 255, 255,}, - {125, 180, 247, 181, 195, 255, 224, 255, 255, 255,}, - {126, 181, 247, 182, 196, 255, 224, 255, 255, 255,}, - {127, 182, 247, 182, 196, 255, 225, 255, 255, 255,}, - {128, 183, 247, 183, 197, 255, 226, 255, 255, 255,}, - {129, 184, 248, 184, 197, 255, 226, 255, 255, 255,}, - {130, 185, 248, 185, 198, 255, 227, 255, 255, 255,}, - {131, 186, 248, 186, 198, 255, 228, 255, 255, 255,}, - {132, 187, 249, 186, 199, 255, 228, 255, 255, 255,}, - {133, 188, 249, 187, 200, 255, 229, 255, 255, 255,}, - {134, 189, 249, 188, 200, 255, 230, 255, 255, 255,}, - {135, 190, 249, 189, 201, 255, 230, 255, 255, 255,}, - {136, 191, 250, 190, 201, 255, 231, 255, 255, 255,}, - {137, 192, 250, 190, 202, 255, 231, 255, 255, 255,}, - {138, 193, 250, 191, 202, 255, 232, 255, 255, 255,}, - {139, 194, 250, 192, 203, 255, 232, 255, 255, 255,}, - {140, 195, 251, 193, 204, 255, 233, 255, 255, 255,}, - {141, 195, 251, 194, 204, 255, 234, 255, 255, 255,}, - {142, 196, 251, 194, 205, 255, 234, 255, 255, 255,}, - {143, 197, 251, 195, 205, 255, 235, 255, 255, 255,}, - {144, 198, 251, 196, 206, 255, 235, 255, 255, 255,}, - {145, 199, 252, 197, 206, 255, 236, 255, 255, 255,}, - {146, 200, 252, 197, 207, 255, 236, 255, 255, 255,}, - {147, 201, 252, 198, 208, 255, 237, 255, 255, 255,}, - {148, 202, 252, 199, 208, 255, 237, 255, 255, 255,}, - {149, 203, 252, 200, 209, 255, 238, 255, 255, 255,}, - {150, 203, 252, 201, 209, 255, 238, 255, 255, 255,}, - {151, 204, 253, 201, 210, 255, 239, 255, 255, 255,}, - {152, 205, 253, 202, 210, 255, 239, 255, 255, 255,}, - {153, 206, 253, 203, 211, 255, 239, 255, 255, 255,}, - {154, 207, 253, 204, 212, 255, 240, 255, 255, 255,}, - {155, 208, 253, 204, 212, 255, 240, 255, 255, 255,}, - {156, 209, 253, 205, 213, 255, 241, 255, 255, 255,}, - {157, 209, 253, 206, 213, 255, 241, 255, 255, 255,}, - {158, 210, 254, 207, 214, 255, 242, 255, 255, 255,}, - {159, 211, 254, 207, 214, 255, 242, 255, 255, 255,}, - {160, 212, 254, 208, 215, 255, 242, 255, 255, 255,}, - {161, 213, 254, 209, 215, 255, 243, 255, 255, 255,}, - {162, 213, 254, 210, 216, 255, 243, 255, 255, 255,}, - {163, 214, 254, 210, 217, 255, 244, 255, 255, 255,}, - {164, 215, 254, 211, 217, 255, 244, 255, 255, 255,}, - {165, 216, 254, 212, 218, 255, 244, 255, 255, 255,}, - {166, 216, 254, 212, 218, 255, 245, 255, 255, 255,}, - {167, 217, 254, 213, 219, 255, 245, 255, 255, 255,}, - {168, 218, 254, 214, 219, 255, 245, 255, 255, 255,}, - {169, 219, 255, 215, 220, 255, 246, 255, 255, 255,}, - {170, 219, 255, 215, 221, 255, 246, 255, 255, 255,}, - {171, 220, 255, 216, 221, 255, 246, 255, 255, 255,}, - {172, 221, 255, 217, 222, 255, 247, 255, 255, 255,}, - {173, 222, 255, 217, 222, 255, 247, 255, 255, 255,}, - {174, 222, 255, 218, 223, 255, 247, 255, 255, 255,}, - {175, 223, 255, 219, 223, 255, 248, 255, 255, 255,}, - {176, 224, 255, 220, 224, 255, 248, 255, 255, 255,}, - {177, 224, 255, 220, 224, 255, 248, 255, 255, 255,}, - {178, 225, 255, 221, 225, 255, 248, 255, 255, 255,}, - {179, 226, 255, 222, 225, 255, 249, 255, 255, 255,}, - {180, 226, 255, 222, 226, 255, 249, 255, 255, 255,}, - {181, 227, 255, 223, 227, 255, 249, 255, 255, 255,}, - {182, 228, 255, 224, 227, 255, 249, 255, 255, 255,}, - {183, 228, 255, 224, 228, 255, 250, 255, 255, 255,}, - {184, 229, 255, 225, 228, 255, 250, 255, 255, 255,}, - {185, 230, 255, 226, 229, 255, 250, 255, 255, 255,}, - {186, 230, 255, 226, 229, 255, 250, 255, 255, 255,}, - {187, 231, 255, 227, 230, 255, 251, 255, 255, 255,}, - {188, 232, 255, 228, 230, 255, 251, 255, 255, 255,}, - {189, 232, 255, 228, 231, 255, 251, 255, 255, 255,}, - {190, 233, 255, 229, 231, 255, 251, 255, 255, 255,}, - {191, 233, 255, 229, 232, 255, 251, 255, 255, 255,}, - {192, 234, 255, 230, 232, 255, 252, 255, 255, 255,}, - {193, 234, 255, 231, 233, 255, 252, 255, 255, 255,}, - {194, 235, 255, 231, 233, 255, 252, 255, 255, 255,}, - {195, 236, 255, 232, 234, 255, 252, 255, 255, 255,}, - {196, 236, 255, 232, 234, 255, 252, 255, 255, 255,}, - {197, 237, 255, 233, 235, 255, 252, 255, 255, 255,}, - {198, 237, 255, 234, 235, 255, 253, 255, 255, 255,}, - {199, 238, 255, 234, 236, 255, 253, 255, 255, 255,}, - {200, 238, 255, 235, 236, 255, 253, 255, 255, 255,}, - {201, 239, 255, 235, 237, 255, 253, 255, 255, 255,}, - {202, 239, 255, 236, 237, 255, 253, 255, 255, 255,}, - {203, 240, 255, 237, 238, 255, 253, 255, 255, 255,}, - {204, 240, 255, 237, 238, 255, 254, 255, 255, 255,}, - {205, 241, 255, 238, 239, 255, 254, 255, 255, 255,}, - {206, 241, 255, 238, 239, 255, 254, 255, 255, 255,}, - {207, 242, 255, 239, 240, 255, 254, 255, 255, 255,}, - {208, 242, 255, 239, 240, 255, 254, 255, 255, 255,}, - {209, 243, 255, 240, 241, 255, 254, 255, 255, 255,}, - {210, 243, 255, 240, 241, 255, 254, 255, 255, 255,}, - {211, 244, 255, 241, 242, 255, 254, 255, 255, 255,}, - {212, 244, 255, 241, 242, 255, 254, 255, 255, 255,}, - {213, 245, 255, 242, 243, 255, 255, 255, 255, 255,}, - {214, 245, 255, 242, 243, 255, 255, 255, 255, 255,}, - {215, 246, 255, 243, 244, 255, 255, 255, 255, 255,}, - {216, 246, 255, 243, 244, 255, 255, 255, 255, 255,}, - {217, 246, 255, 244, 244, 255, 255, 255, 255, 255,}, - {218, 247, 255, 244, 245, 255, 255, 255, 255, 255,}, - {219, 247, 255, 245, 245, 255, 255, 255, 255, 255,}, - {220, 248, 255, 245, 246, 255, 255, 255, 255, 255,}, - {221, 248, 255, 246, 246, 255, 255, 255, 255, 255,}, - {222, 248, 255, 246, 247, 255, 255, 255, 255, 255,}, - {223, 249, 255, 247, 247, 255, 255, 255, 255, 255,}, - {224, 249, 255, 247, 247, 255, 255, 255, 255, 255,}, - {225, 250, 255, 247, 248, 255, 255, 255, 255, 255,}, - {226, 250, 255, 248, 248, 255, 255, 255, 255, 255,}, - {227, 250, 255, 248, 249, 255, 255, 255, 255, 255,}, - {228, 251, 255, 249, 249, 255, 255, 255, 255, 255,}, - {229, 251, 255, 249, 249, 255, 255, 255, 255, 255,}, - {230, 251, 255, 249, 250, 255, 255, 255, 255, 255,}, - {231, 251, 255, 250, 250, 255, 255, 255, 255, 255,}, - {232, 252, 255, 250, 250, 255, 255, 255, 255, 255,}, - {233, 252, 255, 251, 251, 255, 255, 255, 255, 255,}, - {234, 252, 255, 251, 251, 255, 255, 255, 255, 255,}, - {235, 253, 255, 251, 251, 255, 255, 255, 255, 255,}, - {236, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {237, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {238, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {239, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {240, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {241, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {242, 254, 255, 253, 254, 255, 255, 255, 255, 255,}, - {243, 254, 255, 254, 254, 255, 255, 255, 255, 255,}, - {244, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {245, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {246, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {247, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {248, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {249, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {251, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {252, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, -}; - -const vp9_prob vp9_modelcoefprobs_gg75[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.75) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 2, 6, 87, 129, 11, 88, 39, 93, 47,}, - {2, 4, 11, 88, 130, 21, 89, 68, 98, 79,}, - {3, 6, 16, 89, 131, 30, 91, 92, 103, 105,}, - {4, 8, 21, 90, 131, 38, 92, 112, 107, 126,}, - {5, 10, 26, 90, 132, 46, 94, 129, 111, 143,}, - {6, 11, 31, 91, 133, 54, 95, 143, 115, 157,}, - {7, 13, 35, 92, 133, 61, 96, 156, 119, 170,}, - {8, 15, 40, 93, 134, 68, 97, 167, 123, 180,}, - {9, 17, 44, 94, 134, 74, 98, 177, 126, 189,}, - {10, 19, 48, 94, 135, 80, 100, 185, 130, 197,}, - {11, 20, 52, 95, 135, 86, 101, 192, 133, 204,}, - {12, 22, 56, 96, 136, 92, 102, 199, 137, 210,}, - {13, 24, 60, 96, 136, 97, 103, 205, 140, 215,}, - {14, 26, 64, 97, 137, 103, 104, 210, 143, 219,}, - {15, 27, 68, 98, 137, 108, 105, 215, 146, 223,}, - {16, 29, 71, 98, 138, 112, 106, 219, 149, 227,}, - {17, 31, 75, 99, 138, 117, 107, 223, 152, 230,}, - {18, 32, 78, 100, 139, 121, 108, 226, 155, 233,}, - {19, 34, 82, 100, 139, 126, 109, 229, 158, 235,}, - {20, 36, 85, 101, 140, 130, 110, 231, 161, 238,}, - {21, 37, 88, 102, 140, 134, 111, 234, 164, 239,}, - {22, 39, 91, 102, 141, 138, 112, 236, 167, 241,}, - {23, 40, 94, 103, 141, 141, 113, 238, 169, 243,}, - {24, 42, 97, 104, 142, 145, 114, 240, 172, 244,}, - {25, 44, 100, 104, 142, 149, 115, 241, 174, 245,}, - {26, 45, 103, 105, 143, 152, 116, 243, 177, 246,}, - {27, 47, 106, 105, 143, 155, 117, 244, 179, 247,}, - {28, 48, 109, 106, 143, 158, 118, 245, 182, 248,}, - {29, 50, 112, 107, 144, 161, 119, 246, 184, 249,}, - {30, 52, 115, 107, 144, 164, 120, 247, 186, 250,}, - {31, 53, 117, 108, 145, 167, 121, 248, 188, 250,}, - {32, 55, 120, 109, 145, 170, 122, 249, 190, 251,}, - {33, 56, 122, 109, 146, 173, 123, 249, 192, 252,}, - {34, 58, 125, 110, 146, 175, 124, 250, 194, 252,}, - {35, 59, 127, 110, 147, 178, 125, 251, 196, 252,}, - {36, 61, 130, 111, 147, 180, 126, 251, 198, 253,}, - {37, 62, 132, 112, 147, 183, 127, 251, 200, 253,}, - {38, 64, 135, 112, 148, 185, 128, 252, 202, 253,}, - {39, 65, 137, 113, 148, 187, 129, 252, 204, 254,}, - {40, 67, 139, 114, 149, 189, 130, 253, 205, 254,}, - {41, 68, 141, 114, 149, 191, 131, 253, 207, 254,}, - {42, 70, 144, 115, 150, 193, 132, 253, 209, 254,}, - {43, 71, 146, 115, 150, 195, 133, 254, 210, 254,}, - {44, 72, 148, 116, 151, 197, 134, 254, 212, 255,}, - {45, 74, 150, 117, 151, 199, 135, 254, 213, 255,}, - {46, 75, 152, 117, 151, 201, 136, 254, 215, 255,}, - {47, 77, 154, 118, 152, 202, 137, 254, 216, 255,}, - {48, 78, 156, 119, 152, 204, 138, 254, 217, 255,}, - {49, 80, 158, 119, 153, 206, 139, 255, 219, 255,}, - {50, 81, 160, 120, 153, 207, 140, 255, 220, 255,}, - {51, 82, 162, 120, 154, 209, 141, 255, 221, 255,}, - {52, 84, 164, 121, 154, 210, 142, 255, 222, 255,}, - {53, 85, 165, 122, 155, 212, 143, 255, 224, 255,}, - {54, 87, 167, 122, 155, 213, 144, 255, 225, 255,}, - {55, 88, 169, 123, 155, 215, 145, 255, 226, 255,}, - {56, 89, 171, 124, 156, 216, 146, 255, 227, 255,}, - {57, 91, 172, 124, 156, 217, 146, 255, 228, 255,}, - {58, 92, 174, 125, 157, 218, 147, 255, 229, 255,}, - {59, 93, 176, 126, 157, 220, 148, 255, 230, 255,}, - {60, 95, 177, 126, 158, 221, 149, 255, 231, 255,}, - {61, 96, 179, 127, 158, 222, 150, 255, 232, 255,}, - {62, 97, 180, 127, 159, 223, 151, 255, 232, 255,}, - {63, 99, 182, 128, 159, 224, 152, 255, 233, 255,}, - {64, 100, 183, 129, 159, 225, 153, 255, 234, 255,}, - {65, 101, 185, 129, 160, 226, 154, 255, 235, 255,}, - {66, 103, 186, 130, 160, 227, 155, 255, 236, 255,}, - {67, 104, 188, 131, 161, 228, 156, 255, 236, 255,}, - {68, 105, 189, 131, 161, 229, 157, 255, 237, 255,}, - {69, 106, 190, 132, 162, 230, 158, 255, 238, 255,}, - {70, 108, 192, 133, 162, 231, 159, 255, 238, 255,}, - {71, 109, 193, 133, 162, 231, 159, 255, 239, 255,}, - {72, 110, 194, 134, 163, 232, 160, 255, 240, 255,}, - {73, 111, 196, 134, 163, 233, 161, 255, 240, 255,}, - {74, 113, 197, 135, 164, 234, 162, 255, 241, 255,}, - {75, 114, 198, 136, 164, 235, 163, 255, 241, 255,}, - {76, 115, 199, 136, 165, 235, 164, 255, 242, 255,}, - {77, 116, 200, 137, 165, 236, 165, 255, 243, 255,}, - {78, 118, 202, 138, 166, 237, 166, 255, 243, 255,}, - {79, 119, 203, 138, 166, 237, 167, 255, 244, 255,}, - {80, 120, 204, 139, 167, 238, 168, 255, 244, 255,}, - {81, 121, 205, 140, 167, 239, 168, 255, 244, 255,}, - {82, 123, 206, 140, 167, 239, 169, 255, 245, 255,}, - {83, 124, 207, 141, 168, 240, 170, 255, 245, 255,}, - {84, 125, 208, 142, 168, 240, 171, 255, 246, 255,}, - {85, 126, 209, 142, 169, 241, 172, 255, 246, 255,}, - {86, 127, 210, 143, 169, 241, 173, 255, 247, 255,}, - {87, 129, 211, 144, 170, 242, 174, 255, 247, 255,}, - {88, 130, 212, 144, 170, 242, 175, 255, 247, 255,}, - {89, 131, 213, 145, 171, 243, 175, 255, 248, 255,}, - {90, 132, 214, 146, 171, 243, 176, 255, 248, 255,}, - {91, 133, 215, 146, 171, 244, 177, 255, 248, 255,}, - {92, 134, 216, 147, 172, 244, 178, 255, 249, 255,}, - {93, 136, 217, 148, 172, 245, 179, 255, 249, 255,}, - {94, 137, 218, 148, 173, 245, 180, 255, 249, 255,}, - {95, 138, 219, 149, 173, 245, 181, 255, 249, 255,}, - {96, 139, 220, 150, 174, 246, 181, 255, 250, 255,}, - {97, 140, 220, 150, 174, 246, 182, 255, 250, 255,}, - {98, 141, 221, 151, 175, 247, 183, 255, 250, 255,}, - {99, 142, 222, 152, 175, 247, 184, 255, 250, 255,}, - {100, 144, 223, 152, 176, 247, 185, 255, 251, 255,}, - {101, 145, 224, 153, 176, 248, 186, 255, 251, 255,}, - {102, 146, 224, 154, 177, 248, 186, 255, 251, 255,}, - {103, 147, 225, 154, 177, 248, 187, 255, 251, 255,}, - {104, 148, 226, 155, 177, 248, 188, 255, 252, 255,}, - {105, 149, 226, 156, 178, 249, 189, 255, 252, 255,}, - {106, 150, 227, 156, 178, 249, 190, 255, 252, 255,}, - {107, 151, 228, 157, 179, 249, 191, 255, 252, 255,}, - {108, 152, 229, 158, 179, 250, 191, 255, 252, 255,}, - {109, 153, 229, 158, 180, 250, 192, 255, 252, 255,}, - {110, 154, 230, 159, 180, 250, 193, 255, 253, 255,}, - {111, 155, 231, 160, 181, 250, 194, 255, 253, 255,}, - {112, 157, 231, 160, 181, 251, 195, 255, 253, 255,}, - {113, 158, 232, 161, 182, 251, 195, 255, 253, 255,}, - {114, 159, 232, 162, 182, 251, 196, 255, 253, 255,}, - {115, 160, 233, 162, 183, 251, 197, 255, 253, 255,}, - {116, 161, 234, 163, 183, 251, 198, 255, 253, 255,}, - {117, 162, 234, 164, 184, 252, 198, 255, 254, 255,}, - {118, 163, 235, 165, 184, 252, 199, 255, 254, 255,}, - {119, 164, 235, 165, 185, 252, 200, 255, 254, 255,}, - {120, 165, 236, 166, 185, 252, 201, 255, 254, 255,}, - {121, 166, 236, 167, 186, 252, 201, 255, 254, 255,}, - {122, 167, 237, 167, 186, 252, 202, 255, 254, 255,}, - {123, 168, 237, 168, 186, 253, 203, 255, 254, 255,}, - {124, 169, 238, 169, 187, 253, 204, 255, 254, 255,}, - {125, 170, 238, 169, 187, 253, 204, 255, 254, 255,}, - {126, 171, 239, 170, 188, 253, 205, 255, 254, 255,}, - {127, 172, 239, 171, 188, 253, 206, 255, 254, 255,}, - {128, 173, 240, 171, 189, 253, 207, 255, 255, 255,}, - {129, 174, 240, 172, 189, 253, 207, 255, 255, 255,}, - {130, 175, 241, 173, 190, 253, 208, 255, 255, 255,}, - {131, 176, 241, 174, 190, 254, 209, 255, 255, 255,}, - {132, 177, 241, 174, 191, 254, 209, 255, 255, 255,}, - {133, 178, 242, 175, 191, 254, 210, 255, 255, 255,}, - {134, 179, 242, 176, 192, 254, 211, 255, 255, 255,}, - {135, 180, 243, 176, 192, 254, 212, 255, 255, 255,}, - {136, 180, 243, 177, 193, 254, 212, 255, 255, 255,}, - {137, 181, 243, 178, 193, 254, 213, 255, 255, 255,}, - {138, 182, 244, 179, 194, 254, 214, 255, 255, 255,}, - {139, 183, 244, 179, 194, 254, 214, 255, 255, 255,}, - {140, 184, 244, 180, 195, 254, 215, 255, 255, 255,}, - {141, 185, 245, 181, 195, 254, 216, 255, 255, 255,}, - {142, 186, 245, 181, 196, 255, 216, 255, 255, 255,}, - {143, 187, 245, 182, 196, 255, 217, 255, 255, 255,}, - {144, 188, 246, 183, 197, 255, 218, 255, 255, 255,}, - {145, 189, 246, 183, 197, 255, 218, 255, 255, 255,}, - {146, 190, 246, 184, 198, 255, 219, 255, 255, 255,}, - {147, 191, 247, 185, 198, 255, 220, 255, 255, 255,}, - {148, 191, 247, 186, 199, 255, 220, 255, 255, 255,}, - {149, 192, 247, 186, 199, 255, 221, 255, 255, 255,}, - {150, 193, 248, 187, 200, 255, 221, 255, 255, 255,}, - {151, 194, 248, 188, 200, 255, 222, 255, 255, 255,}, - {152, 195, 248, 188, 201, 255, 223, 255, 255, 255,}, - {153, 196, 248, 189, 201, 255, 223, 255, 255, 255,}, - {154, 197, 249, 190, 202, 255, 224, 255, 255, 255,}, - {155, 198, 249, 191, 202, 255, 224, 255, 255, 255,}, - {156, 198, 249, 191, 203, 255, 225, 255, 255, 255,}, - {157, 199, 249, 192, 203, 255, 226, 255, 255, 255,}, - {158, 200, 250, 193, 204, 255, 226, 255, 255, 255,}, - {159, 201, 250, 193, 204, 255, 227, 255, 255, 255,}, - {160, 202, 250, 194, 205, 255, 227, 255, 255, 255,}, - {161, 203, 250, 195, 206, 255, 228, 255, 255, 255,}, - {162, 203, 250, 196, 206, 255, 228, 255, 255, 255,}, - {163, 204, 251, 196, 207, 255, 229, 255, 255, 255,}, - {164, 205, 251, 197, 207, 255, 229, 255, 255, 255,}, - {165, 206, 251, 198, 208, 255, 230, 255, 255, 255,}, - {166, 207, 251, 198, 208, 255, 231, 255, 255, 255,}, - {167, 207, 251, 199, 209, 255, 231, 255, 255, 255,}, - {168, 208, 252, 200, 209, 255, 232, 255, 255, 255,}, - {169, 209, 252, 201, 210, 255, 232, 255, 255, 255,}, - {170, 210, 252, 201, 210, 255, 233, 255, 255, 255,}, - {171, 211, 252, 202, 211, 255, 233, 255, 255, 255,}, - {172, 211, 252, 203, 211, 255, 234, 255, 255, 255,}, - {173, 212, 252, 203, 212, 255, 234, 255, 255, 255,}, - {174, 213, 252, 204, 212, 255, 235, 255, 255, 255,}, - {175, 214, 253, 205, 213, 255, 235, 255, 255, 255,}, - {176, 214, 253, 206, 213, 255, 236, 255, 255, 255,}, - {177, 215, 253, 206, 214, 255, 236, 255, 255, 255,}, - {178, 216, 253, 207, 214, 255, 237, 255, 255, 255,}, - {179, 217, 253, 208, 215, 255, 237, 255, 255, 255,}, - {180, 217, 253, 208, 216, 255, 237, 255, 255, 255,}, - {181, 218, 253, 209, 216, 255, 238, 255, 255, 255,}, - {182, 219, 254, 210, 217, 255, 238, 255, 255, 255,}, - {183, 220, 254, 211, 217, 255, 239, 255, 255, 255,}, - {184, 220, 254, 211, 218, 255, 239, 255, 255, 255,}, - {185, 221, 254, 212, 218, 255, 240, 255, 255, 255,}, - {186, 222, 254, 213, 219, 255, 240, 255, 255, 255,}, - {187, 222, 254, 213, 219, 255, 241, 255, 255, 255,}, - {188, 223, 254, 214, 220, 255, 241, 255, 255, 255,}, - {189, 224, 254, 215, 220, 255, 241, 255, 255, 255,}, - {190, 225, 254, 215, 221, 255, 242, 255, 255, 255,}, - {191, 225, 254, 216, 221, 255, 242, 255, 255, 255,}, - {192, 226, 254, 217, 222, 255, 243, 255, 255, 255,}, - {193, 227, 255, 218, 223, 255, 243, 255, 255, 255,}, - {194, 227, 255, 218, 223, 255, 243, 255, 255, 255,}, - {195, 228, 255, 219, 224, 255, 244, 255, 255, 255,}, - {196, 229, 255, 220, 224, 255, 244, 255, 255, 255,}, - {197, 229, 255, 220, 225, 255, 244, 255, 255, 255,}, - {198, 230, 255, 221, 225, 255, 245, 255, 255, 255,}, - {199, 230, 255, 222, 226, 255, 245, 255, 255, 255,}, - {200, 231, 255, 222, 226, 255, 246, 255, 255, 255,}, - {201, 232, 255, 223, 227, 255, 246, 255, 255, 255,}, - {202, 232, 255, 224, 228, 255, 246, 255, 255, 255,}, - {203, 233, 255, 224, 228, 255, 247, 255, 255, 255,}, - {204, 234, 255, 225, 229, 255, 247, 255, 255, 255,}, - {205, 234, 255, 226, 229, 255, 247, 255, 255, 255,}, - {206, 235, 255, 227, 230, 255, 248, 255, 255, 255,}, - {207, 235, 255, 227, 230, 255, 248, 255, 255, 255,}, - {208, 236, 255, 228, 231, 255, 248, 255, 255, 255,}, - {209, 237, 255, 229, 231, 255, 248, 255, 255, 255,}, - {210, 237, 255, 229, 232, 255, 249, 255, 255, 255,}, - {211, 238, 255, 230, 233, 255, 249, 255, 255, 255,}, - {212, 238, 255, 231, 233, 255, 249, 255, 255, 255,}, - {213, 239, 255, 231, 234, 255, 250, 255, 255, 255,}, - {214, 239, 255, 232, 234, 255, 250, 255, 255, 255,}, - {215, 240, 255, 233, 235, 255, 250, 255, 255, 255,}, - {216, 241, 255, 233, 235, 255, 250, 255, 255, 255,}, - {217, 241, 255, 234, 236, 255, 251, 255, 255, 255,}, - {218, 242, 255, 235, 236, 255, 251, 255, 255, 255,}, - {219, 242, 255, 235, 237, 255, 251, 255, 255, 255,}, - {220, 243, 255, 236, 237, 255, 251, 255, 255, 255,}, - {221, 243, 255, 236, 238, 255, 252, 255, 255, 255,}, - {222, 244, 255, 237, 239, 255, 252, 255, 255, 255,}, - {223, 244, 255, 238, 239, 255, 252, 255, 255, 255,}, - {224, 245, 255, 238, 240, 255, 252, 255, 255, 255,}, - {225, 245, 255, 239, 240, 255, 252, 255, 255, 255,}, - {226, 246, 255, 240, 241, 255, 253, 255, 255, 255,}, - {227, 246, 255, 240, 241, 255, 253, 255, 255, 255,}, - {228, 247, 255, 241, 242, 255, 253, 255, 255, 255,}, - {229, 247, 255, 242, 242, 255, 253, 255, 255, 255,}, - {230, 248, 255, 242, 243, 255, 253, 255, 255, 255,}, - {231, 248, 255, 243, 244, 255, 254, 255, 255, 255,}, - {232, 248, 255, 243, 244, 255, 254, 255, 255, 255,}, - {233, 249, 255, 244, 245, 255, 254, 255, 255, 255,}, - {234, 249, 255, 245, 245, 255, 254, 255, 255, 255,}, - {235, 250, 255, 245, 246, 255, 254, 255, 255, 255,}, - {236, 250, 255, 246, 246, 255, 254, 255, 255, 255,}, - {237, 251, 255, 246, 247, 255, 255, 255, 255, 255,}, - {238, 251, 255, 247, 247, 255, 255, 255, 255, 255,}, - {239, 251, 255, 248, 248, 255, 255, 255, 255, 255,}, - {240, 252, 255, 248, 248, 255, 255, 255, 255, 255,}, - {241, 252, 255, 249, 249, 255, 255, 255, 255, 255,}, - {242, 252, 255, 249, 249, 255, 255, 255, 255, 255,}, - {243, 253, 255, 250, 250, 255, 255, 255, 255, 255,}, - {244, 253, 255, 250, 250, 255, 255, 255, 255, 255,}, - {245, 253, 255, 251, 251, 255, 255, 255, 255, 255,}, - {246, 254, 255, 251, 251, 255, 255, 255, 255, 255,}, - {247, 254, 255, 252, 252, 255, 255, 255, 255, 255,}, - {248, 254, 255, 252, 252, 255, 255, 255, 255, 255,}, - {249, 255, 255, 253, 253, 255, 255, 255, 255, 255,}, - {250, 255, 255, 253, 253, 255, 255, 255, 255, 255,}, - {251, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {252, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,} -}; - -const vp9_prob vp9_modelcoefprobs_gg625[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.625) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 2, 6, 88, 130, 10, 88, 35, 94, 40,}, - {2, 4, 11, 89, 131, 19, 90, 60, 99, 67,}, - {3, 6, 15, 90, 132, 27, 92, 80, 103, 88,}, - {4, 7, 20, 91, 132, 34, 93, 97, 107, 105,}, - {5, 9, 24, 92, 133, 41, 94, 112, 110, 120,}, - {6, 11, 28, 93, 134, 48, 95, 125, 113, 132,}, - {7, 13, 33, 93, 134, 54, 97, 136, 116, 143,}, - {8, 14, 36, 94, 135, 60, 98, 146, 119, 152,}, - {9, 16, 40, 95, 135, 65, 99, 155, 122, 161,}, - {10, 18, 44, 95, 136, 70, 100, 163, 125, 168,}, - {11, 19, 48, 96, 136, 75, 101, 170, 127, 175,}, - {12, 21, 51, 97, 137, 80, 102, 176, 130, 181,}, - {13, 23, 55, 97, 137, 85, 102, 182, 132, 187,}, - {14, 24, 58, 98, 138, 89, 103, 188, 135, 192,}, - {15, 26, 61, 99, 138, 94, 104, 193, 137, 196,}, - {16, 27, 64, 99, 139, 98, 105, 197, 140, 201,}, - {17, 29, 67, 100, 139, 102, 106, 201, 142, 205,}, - {18, 30, 70, 101, 140, 106, 107, 205, 144, 208,}, - {19, 32, 73, 101, 140, 109, 108, 209, 146, 211,}, - {20, 34, 76, 102, 140, 113, 109, 212, 148, 214,}, - {21, 35, 79, 102, 141, 116, 109, 215, 151, 217,}, - {22, 37, 82, 103, 141, 120, 110, 218, 153, 220,}, - {23, 38, 85, 103, 142, 123, 111, 220, 155, 222,}, - {24, 40, 87, 104, 142, 126, 112, 223, 157, 224,}, - {25, 41, 90, 105, 143, 129, 113, 225, 159, 226,}, - {26, 42, 93, 105, 143, 132, 113, 227, 161, 228,}, - {27, 44, 95, 106, 143, 135, 114, 229, 162, 230,}, - {28, 45, 98, 106, 144, 138, 115, 230, 164, 232,}, - {29, 47, 100, 107, 144, 141, 116, 232, 166, 233,}, - {30, 48, 103, 107, 145, 144, 117, 234, 168, 235,}, - {31, 50, 105, 108, 145, 146, 117, 235, 170, 236,}, - {32, 51, 107, 108, 145, 149, 118, 236, 171, 237,}, - {33, 52, 110, 109, 146, 151, 119, 238, 173, 238,}, - {34, 54, 112, 110, 146, 154, 120, 239, 175, 239,}, - {35, 55, 114, 110, 147, 156, 120, 240, 176, 240,}, - {36, 57, 116, 111, 147, 158, 121, 241, 178, 241,}, - {37, 58, 119, 111, 147, 161, 122, 242, 180, 242,}, - {38, 59, 121, 112, 148, 163, 123, 243, 181, 243,}, - {39, 61, 123, 112, 148, 165, 123, 244, 183, 244,}, - {40, 62, 125, 113, 148, 167, 124, 244, 184, 245,}, - {41, 63, 127, 113, 149, 169, 125, 245, 186, 245,}, - {42, 65, 129, 114, 149, 171, 126, 246, 187, 246,}, - {43, 66, 131, 114, 150, 173, 126, 246, 188, 247,}, - {44, 67, 133, 115, 150, 175, 127, 247, 190, 247,}, - {45, 69, 135, 115, 150, 177, 128, 247, 191, 248,}, - {46, 70, 136, 116, 151, 178, 129, 248, 193, 248,}, - {47, 71, 138, 116, 151, 180, 129, 248, 194, 249,}, - {48, 73, 140, 117, 151, 182, 130, 249, 195, 249,}, - {49, 74, 142, 118, 152, 184, 131, 249, 197, 250,}, - {50, 75, 144, 118, 152, 185, 131, 250, 198, 250,}, - {51, 76, 145, 119, 153, 187, 132, 250, 199, 250,}, - {52, 78, 147, 119, 153, 188, 133, 251, 200, 251,}, - {53, 79, 149, 120, 153, 190, 134, 251, 201, 251,}, - {54, 80, 151, 120, 154, 192, 134, 251, 203, 251,}, - {55, 82, 152, 121, 154, 193, 135, 251, 204, 252,}, - {56, 83, 154, 121, 154, 194, 136, 252, 205, 252,}, - {57, 84, 155, 122, 155, 196, 136, 252, 206, 252,}, - {58, 85, 157, 122, 155, 197, 137, 252, 207, 252,}, - {59, 86, 158, 123, 156, 199, 138, 252, 208, 252,}, - {60, 88, 160, 123, 156, 200, 139, 253, 209, 253,}, - {61, 89, 162, 124, 156, 201, 139, 253, 210, 253,}, - {62, 90, 163, 124, 157, 202, 140, 253, 211, 253,}, - {63, 91, 164, 125, 157, 204, 141, 253, 212, 253,}, - {64, 93, 166, 125, 157, 205, 141, 253, 213, 253,}, - {65, 94, 167, 126, 158, 206, 142, 254, 214, 254,}, - {66, 95, 169, 126, 158, 207, 143, 254, 215, 254,}, - {67, 96, 170, 127, 158, 208, 143, 254, 216, 254,}, - {68, 97, 172, 127, 159, 209, 144, 254, 217, 254,}, - {69, 98, 173, 128, 159, 210, 145, 254, 218, 254,}, - {70, 100, 174, 128, 160, 212, 146, 254, 219, 254,}, - {71, 101, 176, 129, 160, 213, 146, 254, 220, 254,}, - {72, 102, 177, 130, 160, 214, 147, 254, 220, 254,}, - {73, 103, 178, 130, 161, 215, 148, 255, 221, 255,}, - {74, 104, 179, 131, 161, 216, 148, 255, 222, 255,}, - {75, 105, 181, 131, 161, 217, 149, 255, 223, 255,}, - {76, 107, 182, 132, 162, 217, 150, 255, 224, 255,}, - {77, 108, 183, 132, 162, 218, 150, 255, 224, 255,}, - {78, 109, 184, 133, 163, 219, 151, 255, 225, 255,}, - {79, 110, 185, 133, 163, 220, 152, 255, 226, 255,}, - {80, 111, 187, 134, 163, 221, 153, 255, 227, 255,}, - {81, 112, 188, 134, 164, 222, 153, 255, 227, 255,}, - {82, 113, 189, 135, 164, 223, 154, 255, 228, 255,}, - {83, 115, 190, 135, 164, 223, 155, 255, 229, 255,}, - {84, 116, 191, 136, 165, 224, 155, 255, 229, 255,}, - {85, 117, 192, 136, 165, 225, 156, 255, 230, 255,}, - {86, 118, 193, 137, 165, 226, 157, 255, 231, 255,}, - {87, 119, 194, 137, 166, 226, 157, 255, 231, 255,}, - {88, 120, 195, 138, 166, 227, 158, 255, 232, 255,}, - {89, 121, 196, 139, 167, 228, 159, 255, 232, 255,}, - {90, 122, 197, 139, 167, 229, 159, 255, 233, 255,}, - {91, 123, 198, 140, 167, 229, 160, 255, 234, 255,}, - {92, 124, 199, 140, 168, 230, 161, 255, 234, 255,}, - {93, 125, 200, 141, 168, 231, 162, 255, 235, 255,}, - {94, 127, 201, 141, 168, 231, 162, 255, 235, 255,}, - {95, 128, 202, 142, 169, 232, 163, 255, 236, 255,}, - {96, 129, 203, 142, 169, 232, 164, 255, 236, 255,}, - {97, 130, 204, 143, 170, 233, 164, 255, 237, 255,}, - {98, 131, 205, 143, 170, 234, 165, 255, 237, 255,}, - {99, 132, 206, 144, 170, 234, 166, 255, 238, 255,}, - {100, 133, 207, 144, 171, 235, 166, 255, 238, 255,}, - {101, 134, 208, 145, 171, 235, 167, 255, 239, 255,}, - {102, 135, 209, 146, 171, 236, 168, 255, 239, 255,}, - {103, 136, 209, 146, 172, 236, 168, 255, 240, 255,}, - {104, 137, 210, 147, 172, 237, 169, 255, 240, 255,}, - {105, 138, 211, 147, 173, 237, 170, 255, 240, 255,}, - {106, 139, 212, 148, 173, 238, 170, 255, 241, 255,}, - {107, 140, 213, 148, 173, 238, 171, 255, 241, 255,}, - {108, 141, 213, 149, 174, 239, 172, 255, 242, 255,}, - {109, 142, 214, 149, 174, 239, 172, 255, 242, 255,}, - {110, 143, 215, 150, 175, 240, 173, 255, 242, 255,}, - {111, 144, 216, 151, 175, 240, 174, 255, 243, 255,}, - {112, 145, 217, 151, 175, 240, 174, 255, 243, 255,}, - {113, 146, 217, 152, 176, 241, 175, 255, 244, 255,}, - {114, 147, 218, 152, 176, 241, 176, 255, 244, 255,}, - {115, 148, 219, 153, 176, 242, 177, 255, 244, 255,}, - {116, 149, 219, 153, 177, 242, 177, 255, 245, 255,}, - {117, 150, 220, 154, 177, 242, 178, 255, 245, 255,}, - {118, 151, 221, 155, 178, 243, 179, 255, 245, 255,}, - {119, 152, 222, 155, 178, 243, 179, 255, 245, 255,}, - {120, 153, 222, 156, 178, 244, 180, 255, 246, 255,}, - {121, 154, 223, 156, 179, 244, 181, 255, 246, 255,}, - {122, 155, 224, 157, 179, 244, 181, 255, 246, 255,}, - {123, 156, 224, 157, 180, 245, 182, 255, 247, 255,}, - {124, 157, 225, 158, 180, 245, 183, 255, 247, 255,}, - {125, 158, 225, 159, 180, 245, 183, 255, 247, 255,}, - {126, 159, 226, 159, 181, 246, 184, 255, 247, 255,}, - {127, 160, 227, 160, 181, 246, 185, 255, 248, 255,}, - {128, 161, 227, 160, 182, 246, 185, 255, 248, 255,}, - {129, 162, 228, 161, 182, 246, 186, 255, 248, 255,}, - {130, 163, 228, 161, 182, 247, 187, 255, 248, 255,}, - {131, 164, 229, 162, 183, 247, 187, 255, 249, 255,}, - {132, 165, 230, 163, 183, 247, 188, 255, 249, 255,}, - {133, 166, 230, 163, 184, 248, 189, 255, 249, 255,}, - {134, 166, 231, 164, 184, 248, 189, 255, 249, 255,}, - {135, 167, 231, 164, 184, 248, 190, 255, 250, 255,}, - {136, 168, 232, 165, 185, 248, 191, 255, 250, 255,}, - {137, 169, 232, 166, 185, 248, 191, 255, 250, 255,}, - {138, 170, 233, 166, 186, 249, 192, 255, 250, 255,}, - {139, 171, 233, 167, 186, 249, 192, 255, 250, 255,}, - {140, 172, 234, 167, 187, 249, 193, 255, 251, 255,}, - {141, 173, 234, 168, 187, 249, 194, 255, 251, 255,}, - {142, 174, 235, 169, 187, 250, 194, 255, 251, 255,}, - {143, 175, 235, 169, 188, 250, 195, 255, 251, 255,}, - {144, 176, 236, 170, 188, 250, 196, 255, 251, 255,}, - {145, 177, 236, 170, 189, 250, 196, 255, 251, 255,}, - {146, 177, 237, 171, 189, 250, 197, 255, 252, 255,}, - {147, 178, 237, 172, 189, 251, 198, 255, 252, 255,}, - {148, 179, 238, 172, 190, 251, 198, 255, 252, 255,}, - {149, 180, 238, 173, 190, 251, 199, 255, 252, 255,}, - {150, 181, 238, 173, 191, 251, 200, 255, 252, 255,}, - {151, 182, 239, 174, 191, 251, 200, 255, 252, 255,}, - {152, 183, 239, 175, 192, 251, 201, 255, 252, 255,}, - {153, 184, 240, 175, 192, 252, 202, 255, 252, 255,}, - {154, 184, 240, 176, 193, 252, 202, 255, 253, 255,}, - {155, 185, 240, 177, 193, 252, 203, 255, 253, 255,}, - {156, 186, 241, 177, 193, 252, 203, 255, 253, 255,}, - {157, 187, 241, 178, 194, 252, 204, 255, 253, 255,}, - {158, 188, 242, 178, 194, 252, 205, 255, 253, 255,}, - {159, 189, 242, 179, 195, 252, 205, 255, 253, 255,}, - {160, 190, 242, 180, 195, 253, 206, 255, 253, 255,}, - {161, 190, 243, 180, 196, 253, 207, 255, 253, 255,}, - {162, 191, 243, 181, 196, 253, 207, 255, 254, 255,}, - {163, 192, 243, 182, 197, 253, 208, 255, 254, 255,}, - {164, 193, 244, 182, 197, 253, 209, 255, 254, 255,}, - {165, 194, 244, 183, 197, 253, 209, 255, 254, 255,}, - {166, 195, 244, 184, 198, 253, 210, 255, 254, 255,}, - {167, 196, 245, 184, 198, 253, 210, 255, 254, 255,}, - {168, 196, 245, 185, 199, 253, 211, 255, 254, 255,}, - {169, 197, 245, 186, 199, 254, 212, 255, 254, 255,}, - {170, 198, 246, 186, 200, 254, 212, 255, 254, 255,}, - {171, 199, 246, 187, 200, 254, 213, 255, 254, 255,}, - {172, 200, 246, 188, 201, 254, 214, 255, 254, 255,}, - {173, 200, 246, 188, 201, 254, 214, 255, 254, 255,}, - {174, 201, 247, 189, 202, 254, 215, 255, 254, 255,}, - {175, 202, 247, 189, 202, 254, 215, 255, 255, 255,}, - {176, 203, 247, 190, 203, 254, 216, 255, 255, 255,}, - {177, 204, 248, 191, 203, 254, 217, 255, 255, 255,}, - {178, 204, 248, 191, 204, 254, 217, 255, 255, 255,}, - {179, 205, 248, 192, 204, 254, 218, 255, 255, 255,}, - {180, 206, 248, 193, 204, 254, 218, 255, 255, 255,}, - {181, 207, 249, 194, 205, 255, 219, 255, 255, 255,}, - {182, 208, 249, 194, 205, 255, 220, 255, 255, 255,}, - {183, 208, 249, 195, 206, 255, 220, 255, 255, 255,}, - {184, 209, 249, 196, 206, 255, 221, 255, 255, 255,}, - {185, 210, 250, 196, 207, 255, 221, 255, 255, 255,}, - {186, 211, 250, 197, 207, 255, 222, 255, 255, 255,}, - {187, 211, 250, 198, 208, 255, 223, 255, 255, 255,}, - {188, 212, 250, 198, 208, 255, 223, 255, 255, 255,}, - {189, 213, 250, 199, 209, 255, 224, 255, 255, 255,}, - {190, 214, 251, 200, 209, 255, 224, 255, 255, 255,}, - {191, 215, 251, 200, 210, 255, 225, 255, 255, 255,}, - {192, 215, 251, 201, 211, 255, 225, 255, 255, 255,}, - {193, 216, 251, 202, 211, 255, 226, 255, 255, 255,}, - {194, 217, 251, 203, 212, 255, 227, 255, 255, 255,}, - {195, 218, 252, 203, 212, 255, 227, 255, 255, 255,}, - {196, 218, 252, 204, 213, 255, 228, 255, 255, 255,}, - {197, 219, 252, 205, 213, 255, 228, 255, 255, 255,}, - {198, 220, 252, 205, 214, 255, 229, 255, 255, 255,}, - {199, 221, 252, 206, 214, 255, 229, 255, 255, 255,}, - {200, 221, 252, 207, 215, 255, 230, 255, 255, 255,}, - {201, 222, 252, 208, 215, 255, 231, 255, 255, 255,}, - {202, 223, 253, 208, 216, 255, 231, 255, 255, 255,}, - {203, 223, 253, 209, 216, 255, 232, 255, 255, 255,}, - {204, 224, 253, 210, 217, 255, 232, 255, 255, 255,}, - {205, 225, 253, 211, 218, 255, 233, 255, 255, 255,}, - {206, 226, 253, 211, 218, 255, 233, 255, 255, 255,}, - {207, 226, 253, 212, 219, 255, 234, 255, 255, 255,}, - {208, 227, 253, 213, 219, 255, 234, 255, 255, 255,}, - {209, 228, 254, 214, 220, 255, 235, 255, 255, 255,}, - {210, 228, 254, 214, 220, 255, 236, 255, 255, 255,}, - {211, 229, 254, 215, 221, 255, 236, 255, 255, 255,}, - {212, 230, 254, 216, 222, 255, 237, 255, 255, 255,}, - {213, 230, 254, 217, 222, 255, 237, 255, 255, 255,}, - {214, 231, 254, 217, 223, 255, 238, 255, 255, 255,}, - {215, 232, 254, 218, 223, 255, 238, 255, 255, 255,}, - {216, 233, 254, 219, 224, 255, 239, 255, 255, 255,}, - {217, 233, 254, 220, 225, 255, 239, 255, 255, 255,}, - {218, 234, 255, 220, 225, 255, 240, 255, 255, 255,}, - {219, 235, 255, 221, 226, 255, 240, 255, 255, 255,}, - {220, 235, 255, 222, 226, 255, 241, 255, 255, 255,}, - {221, 236, 255, 223, 227, 255, 241, 255, 255, 255,}, - {222, 237, 255, 224, 228, 255, 242, 255, 255, 255,}, - {223, 237, 255, 224, 228, 255, 242, 255, 255, 255,}, - {224, 238, 255, 225, 229, 255, 243, 255, 255, 255,}, - {225, 238, 255, 226, 230, 255, 243, 255, 255, 255,}, - {226, 239, 255, 227, 230, 255, 244, 255, 255, 255,}, - {227, 240, 255, 228, 231, 255, 244, 255, 255, 255,}, - {228, 240, 255, 228, 232, 255, 245, 255, 255, 255,}, - {229, 241, 255, 229, 232, 255, 245, 255, 255, 255,}, - {230, 242, 255, 230, 233, 255, 246, 255, 255, 255,}, - {231, 242, 255, 231, 234, 255, 246, 255, 255, 255,}, - {232, 243, 255, 232, 234, 255, 247, 255, 255, 255,}, - {233, 243, 255, 233, 235, 255, 247, 255, 255, 255,}, - {234, 244, 255, 233, 236, 255, 247, 255, 255, 255,}, - {235, 245, 255, 234, 236, 255, 248, 255, 255, 255,}, - {236, 245, 255, 235, 237, 255, 248, 255, 255, 255,}, - {237, 246, 255, 236, 238, 255, 249, 255, 255, 255,}, - {238, 247, 255, 237, 239, 255, 249, 255, 255, 255,}, - {239, 247, 255, 238, 239, 255, 250, 255, 255, 255,}, - {240, 248, 255, 239, 240, 255, 250, 255, 255, 255,}, - {241, 248, 255, 240, 241, 255, 251, 255, 255, 255,}, - {242, 249, 255, 241, 242, 255, 251, 255, 255, 255,}, - {243, 249, 255, 241, 243, 255, 251, 255, 255, 255,}, - {244, 250, 255, 242, 243, 255, 252, 255, 255, 255,}, - {245, 251, 255, 243, 244, 255, 252, 255, 255, 255,}, - {246, 251, 255, 244, 245, 255, 253, 255, 255, 255,}, - {247, 252, 255, 245, 246, 255, 253, 255, 255, 255,}, - {248, 252, 255, 246, 247, 255, 253, 255, 255, 255,}, - {249, 253, 255, 247, 248, 255, 254, 255, 255, 255,}, - {250, 253, 255, 248, 249, 255, 254, 255, 255, 255,}, - {251, 254, 255, 249, 250, 255, 254, 255, 255, 255,}, - {252, 254, 255, 251, 251, 255, 255, 255, 255, 255,}, - {253, 255, 255, 252, 252, 255, 255, 255, 255, 255,}, - {254, 255, 255, 253, 253, 255, 255, 255, 255, 255,}, - {255, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, -}; - -const vp9_prob vp9_modelcoefprobs_gg875p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.625) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 1, 3, 86, 128, 6, 86, 22, 89, 28,}, - {1, 2, 6, 86, 129, 11, 87, 42, 92, 52,}, - {2, 3, 9, 87, 129, 17, 88, 59, 94, 73,}, - {2, 4, 12, 87, 129, 22, 89, 75, 97, 92,}, - {3, 5, 14, 88, 130, 27, 89, 90, 100, 108,}, - {3, 6, 17, 88, 130, 33, 90, 103, 102, 122,}, - {4, 7, 20, 88, 130, 37, 91, 115, 105, 135,}, - {4, 8, 23, 89, 131, 42, 92, 126, 108, 147,}, - {5, 9, 25, 89, 131, 47, 92, 137, 110, 157,}, - {5, 10, 28, 90, 131, 52, 93, 146, 113, 167,}, - {6, 11, 31, 90, 132, 56, 94, 154, 115, 175,}, - {6, 12, 33, 90, 132, 60, 94, 162, 118, 183,}, - {7, 13, 36, 91, 132, 65, 95, 170, 120, 190,}, - {7, 14, 39, 91, 132, 69, 96, 176, 123, 196,}, - {8, 15, 41, 92, 133, 73, 96, 182, 125, 201,}, - {8, 16, 44, 92, 133, 77, 97, 188, 128, 206,}, - {9, 17, 46, 92, 133, 81, 98, 193, 130, 211,}, - {9, 18, 49, 93, 134, 85, 99, 198, 133, 215,}, - {10, 19, 51, 93, 134, 89, 99, 203, 135, 219,}, - {10, 20, 54, 93, 134, 92, 100, 207, 137, 222,}, - {11, 21, 56, 94, 134, 96, 101, 211, 140, 226,}, - {12, 22, 58, 94, 135, 100, 101, 214, 142, 228,}, - {12, 23, 61, 95, 135, 103, 102, 217, 145, 231,}, - {13, 24, 63, 95, 135, 106, 103, 220, 147, 233,}, - {13, 25, 66, 95, 136, 110, 103, 223, 149, 235,}, - {14, 26, 68, 96, 136, 113, 104, 226, 151, 237,}, - {14, 27, 70, 96, 136, 116, 105, 228, 154, 239,}, - {15, 28, 72, 97, 136, 119, 106, 230, 156, 241,}, - {15, 29, 75, 97, 137, 122, 106, 232, 158, 242,}, - {16, 30, 77, 97, 137, 125, 107, 234, 160, 243,}, - {17, 31, 79, 98, 137, 128, 108, 236, 163, 245,}, - {17, 32, 81, 98, 138, 131, 108, 237, 165, 246,}, - {18, 33, 83, 99, 138, 134, 109, 239, 167, 247,}, - {18, 34, 86, 99, 138, 137, 110, 240, 169, 248,}, - {19, 35, 88, 99, 138, 140, 111, 242, 171, 248,}, - {19, 36, 90, 100, 139, 142, 111, 243, 173, 249,}, - {20, 37, 92, 100, 139, 145, 112, 244, 175, 250,}, - {20, 38, 94, 101, 139, 148, 113, 245, 177, 250,}, - {21, 39, 96, 101, 140, 150, 113, 246, 179, 251,}, - {22, 40, 98, 101, 140, 153, 114, 246, 181, 251,}, - {22, 41, 100, 102, 140, 155, 115, 247, 183, 252,}, - {23, 42, 102, 102, 140, 157, 116, 248, 185, 252,}, - {23, 43, 104, 103, 141, 160, 116, 249, 186, 253,}, - {24, 44, 106, 103, 141, 162, 117, 249, 188, 253,}, - {25, 45, 108, 103, 141, 164, 118, 250, 190, 253,}, - {25, 46, 110, 104, 142, 166, 119, 250, 192, 253,}, - {26, 47, 112, 104, 142, 168, 119, 251, 193, 254,}, - {26, 48, 114, 105, 142, 171, 120, 251, 195, 254,}, - {27, 49, 116, 105, 143, 173, 121, 252, 197, 254,}, - {27, 50, 118, 105, 143, 175, 122, 252, 198, 254,}, - {28, 51, 119, 106, 143, 177, 122, 252, 200, 254,}, - {29, 52, 121, 106, 143, 179, 123, 253, 201, 255,}, - {29, 53, 123, 107, 144, 180, 124, 253, 203, 255,}, - {30, 54, 125, 107, 144, 182, 125, 253, 204, 255,}, - {30, 55, 127, 108, 144, 184, 125, 253, 206, 255,}, - {31, 56, 128, 108, 145, 186, 126, 254, 207, 255,}, - {32, 57, 130, 108, 145, 188, 127, 254, 209, 255,}, - {32, 58, 132, 109, 145, 189, 128, 254, 210, 255,}, - {33, 59, 134, 109, 146, 191, 128, 254, 211, 255,}, - {33, 60, 135, 110, 146, 193, 129, 254, 213, 255,}, - {34, 61, 137, 110, 146, 194, 130, 254, 214, 255,}, - {35, 62, 139, 111, 146, 196, 131, 255, 215, 255,}, - {35, 63, 140, 111, 147, 197, 131, 255, 216, 255,}, - {36, 64, 142, 112, 147, 199, 132, 255, 218, 255,}, - {37, 65, 144, 112, 147, 200, 133, 255, 219, 255,}, - {37, 66, 145, 112, 148, 202, 134, 255, 220, 255,}, - {38, 67, 147, 113, 148, 203, 135, 255, 221, 255,}, - {38, 68, 148, 113, 148, 204, 135, 255, 222, 255,}, - {39, 69, 150, 114, 149, 206, 136, 255, 223, 255,}, - {40, 70, 151, 114, 149, 207, 137, 255, 224, 255,}, - {40, 71, 153, 115, 149, 208, 138, 255, 225, 255,}, - {41, 72, 154, 115, 150, 210, 138, 255, 226, 255,}, - {42, 73, 156, 116, 150, 211, 139, 255, 227, 255,}, - {42, 74, 157, 116, 150, 212, 140, 255, 228, 255,}, - {43, 75, 159, 117, 151, 213, 141, 255, 229, 255,}, - {44, 76, 160, 117, 151, 214, 142, 255, 230, 255,}, - {44, 77, 162, 117, 151, 216, 142, 255, 231, 255,}, - {45, 78, 163, 118, 152, 217, 143, 255, 231, 255,}, - {45, 79, 165, 118, 152, 218, 144, 255, 232, 255,}, - {46, 80, 166, 119, 152, 219, 145, 255, 233, 255,}, - {47, 81, 167, 119, 153, 220, 146, 255, 234, 255,}, - {47, 82, 169, 120, 153, 221, 146, 255, 235, 255,}, - {48, 83, 170, 120, 153, 222, 147, 255, 235, 255,}, - {49, 84, 171, 121, 154, 223, 148, 255, 236, 255,}, - {49, 85, 173, 121, 154, 224, 149, 255, 237, 255,}, - {50, 86, 174, 122, 154, 225, 150, 255, 237, 255,}, - {51, 87, 175, 122, 155, 225, 150, 255, 238, 255,}, - {51, 88, 177, 123, 155, 226, 151, 255, 239, 255,}, - {52, 89, 178, 123, 155, 227, 152, 255, 239, 255,}, - {53, 90, 179, 124, 156, 228, 153, 255, 240, 255,}, - {53, 91, 180, 124, 156, 229, 154, 255, 240, 255,}, - {54, 92, 182, 125, 156, 230, 154, 255, 241, 255,}, - {55, 93, 183, 125, 157, 230, 155, 255, 241, 255,}, - {55, 94, 184, 126, 157, 231, 156, 255, 242, 255,}, - {56, 95, 185, 126, 157, 232, 157, 255, 242, 255,}, - {57, 96, 187, 127, 158, 233, 158, 255, 243, 255,}, - {57, 97, 188, 127, 158, 233, 159, 255, 243, 255,}, - {58, 98, 189, 128, 158, 234, 159, 255, 244, 255,}, - {59, 99, 190, 128, 159, 235, 160, 255, 244, 255,}, - {60, 100, 191, 129, 159, 235, 161, 255, 245, 255,}, - {60, 101, 192, 129, 160, 236, 162, 255, 245, 255,}, - {61, 102, 193, 130, 160, 237, 163, 255, 246, 255,}, - {62, 103, 194, 131, 160, 237, 164, 255, 246, 255,}, - {62, 104, 196, 131, 161, 238, 164, 255, 246, 255,}, - {63, 105, 197, 132, 161, 238, 165, 255, 247, 255,}, - {64, 106, 198, 132, 161, 239, 166, 255, 247, 255,}, - {64, 107, 199, 133, 162, 239, 167, 255, 247, 255,}, - {65, 108, 200, 133, 162, 240, 168, 255, 248, 255,}, - {66, 109, 201, 134, 163, 241, 168, 255, 248, 255,}, - {67, 110, 202, 134, 163, 241, 169, 255, 248, 255,}, - {67, 111, 203, 135, 163, 242, 170, 255, 249, 255,}, - {68, 112, 204, 135, 164, 242, 171, 255, 249, 255,}, - {69, 113, 205, 136, 164, 242, 172, 255, 249, 255,}, - {69, 114, 206, 137, 164, 243, 173, 255, 250, 255,}, - {70, 115, 207, 137, 165, 243, 173, 255, 250, 255,}, - {71, 116, 208, 138, 165, 244, 174, 255, 250, 255,}, - {72, 117, 208, 138, 166, 244, 175, 255, 250, 255,}, - {72, 118, 209, 139, 166, 245, 176, 255, 251, 255,}, - {73, 119, 210, 139, 166, 245, 177, 255, 251, 255,}, - {74, 120, 211, 140, 167, 245, 178, 255, 251, 255,}, - {75, 121, 212, 141, 167, 246, 178, 255, 251, 255,}, - {75, 122, 213, 141, 168, 246, 179, 255, 251, 255,}, - {76, 123, 214, 142, 168, 246, 180, 255, 252, 255,}, - {77, 124, 215, 142, 168, 247, 181, 255, 252, 255,}, - {78, 125, 215, 143, 169, 247, 182, 255, 252, 255,}, - {78, 126, 216, 144, 169, 247, 182, 255, 252, 255,}, - {79, 127, 217, 144, 170, 248, 183, 255, 252, 255,}, - {80, 128, 218, 145, 170, 248, 184, 255, 253, 255,}, - {81, 129, 219, 145, 170, 248, 185, 255, 253, 255,}, - {82, 130, 219, 146, 171, 249, 186, 255, 253, 255,}, - {82, 131, 220, 147, 171, 249, 187, 255, 253, 255,}, - {83, 132, 221, 147, 172, 249, 187, 255, 253, 255,}, - {84, 133, 222, 148, 172, 249, 188, 255, 253, 255,}, - {85, 134, 222, 148, 173, 250, 189, 255, 253, 255,}, - {85, 135, 223, 149, 173, 250, 190, 255, 254, 255,}, - {86, 136, 224, 150, 173, 250, 191, 255, 254, 255,}, - {87, 137, 225, 150, 174, 250, 191, 255, 254, 255,}, - {88, 138, 225, 151, 174, 251, 192, 255, 254, 255,}, - {89, 139, 226, 152, 175, 251, 193, 255, 254, 255,}, - {89, 140, 227, 152, 175, 251, 194, 255, 254, 255,}, - {90, 141, 227, 153, 176, 251, 195, 255, 254, 255,}, - {91, 142, 228, 153, 176, 251, 195, 255, 254, 255,}, - {92, 143, 229, 154, 176, 252, 196, 255, 254, 255,}, - {93, 144, 229, 155, 177, 252, 197, 255, 254, 255,}, - {93, 145, 230, 155, 177, 252, 198, 255, 255, 255,}, - {94, 146, 231, 156, 178, 252, 199, 255, 255, 255,}, - {95, 147, 231, 157, 178, 252, 199, 255, 255, 255,}, - {96, 148, 232, 157, 179, 252, 200, 255, 255, 255,}, - {97, 149, 232, 158, 179, 253, 201, 255, 255, 255,}, - {98, 150, 233, 159, 180, 253, 202, 255, 255, 255,}, - {99, 151, 234, 159, 180, 253, 202, 255, 255, 255,}, - {99, 152, 234, 160, 181, 253, 203, 255, 255, 255,}, - {100, 153, 235, 161, 181, 253, 204, 255, 255, 255,}, - {101, 154, 235, 162, 182, 253, 205, 255, 255, 255,}, - {102, 155, 236, 162, 182, 253, 206, 255, 255, 255,}, - {103, 156, 236, 163, 183, 254, 206, 255, 255, 255,}, - {104, 157, 237, 164, 183, 254, 207, 255, 255, 255,}, - {105, 158, 237, 164, 183, 254, 208, 255, 255, 255,}, - {105, 159, 238, 165, 184, 254, 209, 255, 255, 255,}, - {106, 160, 238, 166, 184, 254, 209, 255, 255, 255,}, - {107, 161, 239, 166, 185, 254, 210, 255, 255, 255,}, - {108, 162, 239, 167, 185, 254, 211, 255, 255, 255,}, - {109, 163, 240, 168, 186, 254, 212, 255, 255, 255,}, - {110, 164, 240, 169, 186, 254, 212, 255, 255, 255,}, - {111, 165, 241, 169, 187, 254, 213, 255, 255, 255,}, - {112, 166, 241, 170, 187, 255, 214, 255, 255, 255,}, - {113, 167, 242, 171, 188, 255, 215, 255, 255, 255,}, - {114, 168, 242, 172, 189, 255, 215, 255, 255, 255,}, - {114, 169, 242, 172, 189, 255, 216, 255, 255, 255,}, - {115, 170, 243, 173, 190, 255, 217, 255, 255, 255,}, - {116, 171, 243, 174, 190, 255, 217, 255, 255, 255,}, - {117, 172, 244, 175, 191, 255, 218, 255, 255, 255,}, - {118, 173, 244, 175, 191, 255, 219, 255, 255, 255,}, - {119, 174, 244, 176, 192, 255, 220, 255, 255, 255,}, - {120, 175, 245, 177, 192, 255, 220, 255, 255, 255,}, - {121, 176, 245, 178, 193, 255, 221, 255, 255, 255,}, - {122, 177, 245, 178, 193, 255, 222, 255, 255, 255,}, - {123, 178, 246, 179, 194, 255, 222, 255, 255, 255,}, - {124, 179, 246, 180, 194, 255, 223, 255, 255, 255,}, - {125, 180, 247, 181, 195, 255, 224, 255, 255, 255,}, - {126, 181, 247, 182, 196, 255, 224, 255, 255, 255,}, - {127, 182, 247, 182, 196, 255, 225, 255, 255, 255,}, - {128, 183, 247, 183, 197, 255, 226, 255, 255, 255,}, - {129, 184, 248, 184, 197, 255, 226, 255, 255, 255,}, - {130, 185, 248, 185, 198, 255, 227, 255, 255, 255,}, - {131, 186, 248, 186, 198, 255, 228, 255, 255, 255,}, - {132, 187, 249, 186, 199, 255, 228, 255, 255, 255,}, - {133, 188, 249, 187, 200, 255, 229, 255, 255, 255,}, - {134, 189, 249, 188, 200, 255, 230, 255, 255, 255,}, - {135, 190, 249, 189, 201, 255, 230, 255, 255, 255,}, - {136, 191, 250, 190, 201, 255, 231, 255, 255, 255,}, - {137, 192, 250, 191, 202, 255, 231, 255, 255, 255,}, - {138, 193, 250, 191, 203, 255, 232, 255, 255, 255,}, - {139, 194, 250, 192, 203, 255, 233, 255, 255, 255,}, - {140, 195, 251, 193, 204, 255, 233, 255, 255, 255,}, - {142, 196, 251, 194, 204, 255, 234, 255, 255, 255,}, - {143, 197, 251, 195, 205, 255, 234, 255, 255, 255,}, - {144, 198, 251, 196, 206, 255, 235, 255, 255, 255,}, - {145, 199, 252, 197, 206, 255, 236, 255, 255, 255,}, - {146, 200, 252, 197, 207, 255, 236, 255, 255, 255,}, - {147, 201, 252, 198, 208, 255, 237, 255, 255, 255,}, - {148, 202, 252, 199, 208, 255, 237, 255, 255, 255,}, - {149, 203, 252, 200, 209, 255, 238, 255, 255, 255,}, - {151, 204, 253, 201, 210, 255, 238, 255, 255, 255,}, - {152, 205, 253, 202, 210, 255, 239, 255, 255, 255,}, - {153, 206, 253, 203, 211, 255, 239, 255, 255, 255,}, - {154, 207, 253, 204, 212, 255, 240, 255, 255, 255,}, - {155, 208, 253, 205, 212, 255, 241, 255, 255, 255,}, - {157, 209, 253, 206, 213, 255, 241, 255, 255, 255,}, - {158, 210, 253, 206, 214, 255, 242, 255, 255, 255,}, - {159, 211, 254, 207, 214, 255, 242, 255, 255, 255,}, - {160, 212, 254, 208, 215, 255, 243, 255, 255, 255,}, - {162, 213, 254, 209, 216, 255, 243, 255, 255, 255,}, - {163, 214, 254, 210, 217, 255, 244, 255, 255, 255,}, - {164, 215, 254, 211, 217, 255, 244, 255, 255, 255,}, - {165, 216, 254, 212, 218, 255, 244, 255, 255, 255,}, - {167, 217, 254, 213, 219, 255, 245, 255, 255, 255,}, - {168, 218, 254, 214, 219, 255, 245, 255, 255, 255,}, - {169, 219, 255, 215, 220, 255, 246, 255, 255, 255,}, - {171, 220, 255, 216, 221, 255, 246, 255, 255, 255,}, - {172, 221, 255, 217, 222, 255, 247, 255, 255, 255,}, - {174, 222, 255, 218, 223, 255, 247, 255, 255, 255,}, - {175, 223, 255, 219, 223, 255, 248, 255, 255, 255,}, - {177, 224, 255, 220, 224, 255, 248, 255, 255, 255,}, - {178, 225, 255, 221, 225, 255, 248, 255, 255, 255,}, - {179, 226, 255, 222, 226, 255, 249, 255, 255, 255,}, - {181, 227, 255, 223, 227, 255, 249, 255, 255, 255,}, - {182, 228, 255, 224, 227, 255, 250, 255, 255, 255,}, - {184, 229, 255, 225, 228, 255, 250, 255, 255, 255,}, - {186, 230, 255, 226, 229, 255, 250, 255, 255, 255,}, - {187, 231, 255, 227, 230, 255, 251, 255, 255, 255,}, - {189, 232, 255, 228, 231, 255, 251, 255, 255, 255,}, - {190, 233, 255, 229, 232, 255, 251, 255, 255, 255,}, - {192, 234, 255, 230, 232, 255, 252, 255, 255, 255,}, - {194, 235, 255, 231, 233, 255, 252, 255, 255, 255,}, - {196, 236, 255, 232, 234, 255, 252, 255, 255, 255,}, - {197, 237, 255, 233, 235, 255, 253, 255, 255, 255,}, - {199, 238, 255, 234, 236, 255, 253, 255, 255, 255,}, - {201, 239, 255, 235, 237, 255, 253, 255, 255, 255,}, - {203, 240, 255, 237, 238, 255, 253, 255, 255, 255,}, - {205, 241, 255, 238, 239, 255, 254, 255, 255, 255,}, - {207, 242, 255, 239, 240, 255, 254, 255, 255, 255,}, - {209, 243, 255, 240, 241, 255, 254, 255, 255, 255,}, - {211, 244, 255, 241, 242, 255, 254, 255, 255, 255,}, - {214, 245, 255, 242, 243, 255, 255, 255, 255, 255,}, - {216, 246, 255, 243, 244, 255, 255, 255, 255, 255,}, - {218, 247, 255, 244, 245, 255, 255, 255, 255, 255,}, - {221, 248, 255, 246, 246, 255, 255, 255, 255, 255,}, - {224, 249, 255, 247, 247, 255, 255, 255, 255, 255,}, - {226, 250, 255, 248, 248, 255, 255, 255, 255, 255,}, - {229, 251, 255, 249, 249, 255, 255, 255, 255, 255,}, - {233, 252, 255, 251, 251, 255, 255, 255, 255, 255,}, - {236, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {241, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {246, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, +const vp9_tree_index vp9_coefmodel_tree[6] = { +#if CONFIG_BALANCED_COEFTREE + -ZERO_TOKEN, 2, + -DCT_EOB_MODEL_TOKEN, 4, +#else + -DCT_EOB_MODEL_TOKEN, 2, /* 0 = EOB */ + -ZERO_TOKEN, 4, /* 1 = ZERO */ +#endif + -ONE_TOKEN, -TWO_TOKEN, }; -const vp9_prob vp9_modelcoefprobs_gg75p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.625) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 1, 3, 86, 129, 6, 87, 21, 90, 26,}, - {1, 2, 6, 87, 129, 11, 88, 39, 93, 47,}, - {2, 3, 9, 87, 130, 16, 89, 55, 96, 65,}, - {2, 4, 11, 88, 130, 21, 89, 69, 98, 81,}, - {3, 5, 14, 88, 130, 26, 90, 82, 101, 95,}, - {3, 6, 17, 89, 131, 31, 91, 94, 103, 107,}, - {4, 7, 20, 89, 131, 35, 92, 105, 105, 119,}, - {4, 8, 22, 90, 131, 40, 92, 115, 108, 129,}, - {5, 9, 25, 90, 132, 44, 93, 124, 110, 138,}, - {5, 10, 27, 91, 132, 48, 94, 133, 112, 147,}, - {6, 11, 30, 91, 132, 52, 95, 141, 114, 155,}, - {6, 12, 32, 92, 133, 56, 95, 148, 116, 162,}, - {7, 13, 35, 92, 133, 60, 96, 155, 118, 168,}, - {7, 14, 37, 92, 133, 64, 97, 161, 121, 174,}, - {8, 15, 40, 93, 134, 68, 97, 167, 123, 180,}, - {9, 16, 42, 93, 134, 71, 98, 173, 125, 185,}, - {9, 17, 44, 94, 134, 75, 99, 178, 127, 190,}, - {10, 18, 47, 94, 135, 78, 99, 182, 129, 195,}, - {10, 19, 49, 94, 135, 82, 100, 187, 131, 199,}, - {11, 20, 51, 95, 135, 85, 100, 191, 133, 202,}, - {11, 21, 54, 95, 135, 88, 101, 195, 135, 206,}, - {12, 22, 56, 96, 136, 92, 102, 199, 137, 209,}, - {13, 23, 58, 96, 136, 95, 102, 202, 138, 213,}, - {13, 24, 61, 96, 136, 98, 103, 206, 140, 215,}, - {14, 25, 63, 97, 137, 101, 104, 209, 142, 218,}, - {14, 26, 65, 97, 137, 104, 104, 211, 144, 221,}, - {15, 27, 67, 98, 137, 107, 105, 214, 146, 223,}, - {15, 28, 69, 98, 138, 110, 106, 217, 148, 225,}, - {16, 29, 71, 98, 138, 113, 106, 219, 150, 227,}, - {17, 30, 73, 99, 138, 115, 107, 221, 151, 229,}, - {17, 31, 76, 99, 138, 118, 107, 223, 153, 231,}, - {18, 32, 78, 100, 139, 121, 108, 225, 155, 232,}, - {18, 33, 80, 100, 139, 123, 109, 227, 157, 234,}, - {19, 34, 82, 100, 139, 126, 109, 229, 158, 235,}, - {20, 35, 84, 101, 140, 128, 110, 231, 160, 237,}, - {20, 36, 86, 101, 140, 131, 111, 232, 162, 238,}, - {21, 37, 88, 102, 140, 133, 111, 234, 164, 239,}, - {21, 38, 90, 102, 140, 136, 112, 235, 165, 240,}, - {22, 39, 92, 102, 141, 138, 112, 236, 167, 241,}, - {23, 40, 94, 103, 141, 140, 113, 237, 169, 242,}, - {23, 41, 95, 103, 141, 143, 114, 238, 170, 243,}, - {24, 42, 97, 103, 142, 145, 114, 240, 172, 244,}, - {25, 43, 99, 104, 142, 147, 115, 241, 173, 245,}, - {25, 44, 101, 104, 142, 149, 116, 242, 175, 246,}, - {26, 45, 103, 105, 142, 151, 116, 242, 176, 246,}, - {26, 46, 105, 105, 143, 153, 117, 243, 178, 247,}, - {27, 47, 107, 105, 143, 156, 117, 244, 180, 248,}, - {28, 48, 108, 106, 143, 158, 118, 245, 181, 248,}, - {28, 49, 110, 106, 144, 159, 119, 245, 182, 249,}, - {29, 50, 112, 107, 144, 161, 119, 246, 184, 249,}, - {30, 51, 114, 107, 144, 163, 120, 247, 185, 250,}, - {30, 52, 115, 108, 144, 165, 121, 247, 187, 250,}, - {31, 53, 117, 108, 145, 167, 121, 248, 188, 250,}, - {32, 54, 119, 108, 145, 169, 122, 248, 190, 251,}, - {32, 55, 121, 109, 145, 171, 123, 249, 191, 251,}, - {33, 56, 122, 109, 146, 172, 123, 249, 192, 251,}, - {34, 57, 124, 110, 146, 174, 124, 250, 194, 252,}, - {34, 58, 126, 110, 146, 176, 125, 250, 195, 252,}, - {35, 59, 127, 110, 147, 177, 125, 250, 196, 252,}, - {36, 60, 129, 111, 147, 179, 126, 251, 197, 253,}, - {36, 61, 130, 111, 147, 181, 127, 251, 199, 253,}, - {37, 62, 132, 112, 147, 182, 127, 251, 200, 253,}, - {38, 63, 134, 112, 148, 184, 128, 252, 201, 253,}, - {38, 64, 135, 112, 148, 185, 128, 252, 202, 253,}, - {39, 65, 137, 113, 148, 187, 129, 252, 204, 254,}, - {40, 66, 138, 113, 149, 188, 130, 253, 205, 254,}, - {40, 67, 140, 114, 149, 190, 130, 253, 206, 254,}, - {41, 68, 141, 114, 149, 191, 131, 253, 207, 254,}, - {42, 69, 143, 115, 150, 192, 132, 253, 208, 254,}, - {42, 70, 144, 115, 150, 194, 132, 253, 209, 254,}, - {43, 71, 146, 115, 150, 195, 133, 254, 210, 254,}, - {44, 72, 147, 116, 150, 197, 134, 254, 211, 255,}, - {44, 73, 149, 116, 151, 198, 134, 254, 212, 255,}, - {45, 74, 150, 117, 151, 199, 135, 254, 213, 255,}, - {46, 75, 152, 117, 151, 200, 136, 254, 214, 255,}, - {46, 76, 153, 118, 152, 202, 136, 254, 215, 255,}, - {47, 77, 154, 118, 152, 203, 137, 254, 216, 255,}, - {48, 78, 156, 119, 152, 204, 138, 254, 217, 255,}, - {49, 79, 157, 119, 153, 205, 139, 255, 218, 255,}, - {49, 80, 159, 119, 153, 206, 139, 255, 219, 255,}, - {50, 81, 160, 120, 153, 207, 140, 255, 220, 255,}, - {51, 82, 161, 120, 154, 208, 141, 255, 221, 255,}, - {51, 83, 163, 121, 154, 210, 141, 255, 222, 255,}, - {52, 84, 164, 121, 154, 211, 142, 255, 223, 255,}, - {53, 85, 165, 122, 154, 212, 143, 255, 223, 255,}, - {54, 86, 166, 122, 155, 213, 143, 255, 224, 255,}, - {54, 87, 168, 123, 155, 214, 144, 255, 225, 255,}, - {55, 88, 169, 123, 155, 215, 145, 255, 226, 255,}, - {56, 89, 170, 123, 156, 216, 145, 255, 227, 255,}, - {57, 90, 172, 124, 156, 217, 146, 255, 227, 255,}, - {57, 91, 173, 124, 156, 218, 147, 255, 228, 255,}, - {58, 92, 174, 125, 157, 218, 147, 255, 229, 255,}, - {59, 93, 175, 125, 157, 219, 148, 255, 230, 255,}, - {60, 94, 176, 126, 157, 220, 149, 255, 230, 255,}, - {60, 95, 178, 126, 158, 221, 150, 255, 231, 255,}, - {61, 96, 179, 127, 158, 222, 150, 255, 232, 255,}, - {62, 97, 180, 127, 158, 223, 151, 255, 232, 255,}, - {63, 98, 181, 128, 159, 224, 152, 255, 233, 255,}, - {63, 99, 182, 128, 159, 224, 152, 255, 234, 255,}, - {64, 100, 183, 129, 159, 225, 153, 255, 234, 255,}, - {65, 101, 184, 129, 160, 226, 154, 255, 235, 255,}, - {66, 102, 186, 130, 160, 227, 154, 255, 235, 255,}, - {66, 103, 187, 130, 160, 227, 155, 255, 236, 255,}, - {67, 104, 188, 131, 161, 228, 156, 255, 236, 255,}, - {68, 105, 189, 131, 161, 229, 157, 255, 237, 255,}, - {69, 106, 190, 132, 161, 230, 157, 255, 238, 255,}, - {69, 107, 191, 132, 162, 230, 158, 255, 238, 255,}, - {70, 108, 192, 133, 162, 231, 159, 255, 239, 255,}, - {71, 109, 193, 133, 163, 232, 159, 255, 239, 255,}, - {72, 110, 194, 134, 163, 232, 160, 255, 240, 255,}, - {73, 111, 195, 134, 163, 233, 161, 255, 240, 255,}, - {73, 112, 196, 135, 164, 233, 162, 255, 241, 255,}, - {74, 113, 197, 135, 164, 234, 162, 255, 241, 255,}, - {75, 114, 198, 136, 164, 235, 163, 255, 241, 255,}, - {76, 115, 199, 136, 165, 235, 164, 255, 242, 255,}, - {77, 116, 200, 137, 165, 236, 165, 255, 242, 255,}, - {77, 117, 201, 137, 165, 236, 165, 255, 243, 255,}, - {78, 118, 202, 138, 166, 237, 166, 255, 243, 255,}, - {79, 119, 203, 138, 166, 237, 167, 255, 244, 255,}, - {80, 120, 204, 139, 166, 238, 167, 255, 244, 255,}, - {81, 121, 205, 139, 167, 238, 168, 255, 244, 255,}, - {82, 122, 206, 140, 167, 239, 169, 255, 245, 255,}, - {82, 123, 206, 141, 168, 239, 170, 255, 245, 255,}, - {83, 124, 207, 141, 168, 240, 170, 255, 245, 255,}, - {84, 125, 208, 142, 168, 240, 171, 255, 246, 255,}, - {85, 126, 209, 142, 169, 241, 172, 255, 246, 255,}, - {86, 127, 210, 143, 169, 241, 173, 255, 246, 255,}, - {87, 128, 211, 143, 169, 242, 173, 255, 247, 255,}, - {87, 129, 212, 144, 170, 242, 174, 255, 247, 255,}, - {88, 130, 212, 144, 170, 242, 175, 255, 247, 255,}, - {89, 131, 213, 145, 171, 243, 176, 255, 248, 255,}, - {90, 132, 214, 146, 171, 243, 176, 255, 248, 255,}, - {91, 133, 215, 146, 171, 244, 177, 255, 248, 255,}, - {92, 134, 216, 147, 172, 244, 178, 255, 248, 255,}, - {93, 135, 216, 147, 172, 244, 179, 255, 249, 255,}, - {93, 136, 217, 148, 173, 245, 179, 255, 249, 255,}, - {94, 137, 218, 148, 173, 245, 180, 255, 249, 255,}, - {95, 138, 219, 149, 173, 245, 181, 255, 249, 255,}, - {96, 139, 220, 150, 174, 246, 181, 255, 250, 255,}, - {97, 140, 220, 150, 174, 246, 182, 255, 250, 255,}, - {98, 141, 221, 151, 175, 246, 183, 255, 250, 255,}, - {99, 142, 222, 151, 175, 247, 184, 255, 250, 255,}, - {100, 143, 222, 152, 175, 247, 184, 255, 251, 255,}, - {100, 144, 223, 153, 176, 247, 185, 255, 251, 255,}, - {101, 145, 224, 153, 176, 248, 186, 255, 251, 255,}, - {102, 146, 224, 154, 177, 248, 187, 255, 251, 255,}, - {103, 147, 225, 154, 177, 248, 187, 255, 251, 255,}, - {104, 148, 226, 155, 178, 248, 188, 255, 252, 255,}, - {105, 149, 226, 156, 178, 249, 189, 255, 252, 255,}, - {106, 150, 227, 156, 178, 249, 190, 255, 252, 255,}, - {107, 151, 228, 157, 179, 249, 190, 255, 252, 255,}, - {108, 152, 228, 158, 179, 249, 191, 255, 252, 255,}, - {109, 153, 229, 158, 180, 250, 192, 255, 252, 255,}, - {110, 154, 230, 159, 180, 250, 193, 255, 253, 255,}, - {111, 155, 230, 159, 181, 250, 193, 255, 253, 255,}, - {111, 156, 231, 160, 181, 250, 194, 255, 253, 255,}, - {112, 157, 231, 161, 181, 251, 195, 255, 253, 255,}, - {113, 158, 232, 161, 182, 251, 196, 255, 253, 255,}, - {114, 159, 233, 162, 182, 251, 196, 255, 253, 255,}, - {115, 160, 233, 163, 183, 251, 197, 255, 253, 255,}, - {116, 161, 234, 163, 183, 251, 198, 255, 253, 255,}, - {117, 162, 234, 164, 184, 252, 199, 255, 254, 255,}, - {118, 163, 235, 165, 184, 252, 199, 255, 254, 255,}, - {119, 164, 235, 165, 185, 252, 200, 255, 254, 255,}, - {120, 165, 236, 166, 185, 252, 201, 255, 254, 255,}, - {121, 166, 236, 167, 186, 252, 202, 255, 254, 255,}, - {122, 167, 237, 167, 186, 252, 202, 255, 254, 255,}, - {123, 168, 237, 168, 187, 253, 203, 255, 254, 255,}, - {124, 169, 238, 169, 187, 253, 204, 255, 254, 255,}, - {125, 170, 238, 169, 188, 253, 205, 255, 254, 255,}, - {126, 171, 239, 170, 188, 253, 205, 255, 254, 255,}, - {127, 172, 239, 171, 189, 253, 206, 255, 254, 255,}, - {128, 173, 240, 172, 189, 253, 207, 255, 255, 255,}, - {129, 174, 240, 172, 190, 253, 208, 255, 255, 255,}, - {130, 175, 241, 173, 190, 253, 208, 255, 255, 255,}, - {131, 176, 241, 174, 191, 254, 209, 255, 255, 255,}, - {132, 177, 242, 175, 191, 254, 210, 255, 255, 255,}, - {133, 178, 242, 175, 192, 254, 210, 255, 255, 255,}, - {134, 179, 242, 176, 192, 254, 211, 255, 255, 255,}, - {135, 180, 243, 177, 193, 254, 212, 255, 255, 255,}, - {137, 181, 243, 177, 193, 254, 213, 255, 255, 255,}, - {138, 182, 244, 178, 194, 254, 213, 255, 255, 255,}, - {139, 183, 244, 179, 194, 254, 214, 255, 255, 255,}, - {140, 184, 244, 180, 195, 254, 215, 255, 255, 255,}, - {141, 185, 245, 181, 195, 254, 216, 255, 255, 255,}, - {142, 186, 245, 181, 196, 255, 216, 255, 255, 255,}, - {143, 187, 245, 182, 196, 255, 217, 255, 255, 255,}, - {144, 188, 246, 183, 197, 255, 218, 255, 255, 255,}, - {145, 189, 246, 184, 197, 255, 218, 255, 255, 255,}, - {146, 190, 247, 184, 198, 255, 219, 255, 255, 255,}, - {147, 191, 247, 185, 199, 255, 220, 255, 255, 255,}, - {149, 192, 247, 186, 199, 255, 221, 255, 255, 255,}, - {150, 193, 247, 187, 200, 255, 221, 255, 255, 255,}, - {151, 194, 248, 188, 200, 255, 222, 255, 255, 255,}, - {152, 195, 248, 188, 201, 255, 223, 255, 255, 255,}, - {153, 196, 248, 189, 201, 255, 223, 255, 255, 255,}, - {154, 197, 249, 190, 202, 255, 224, 255, 255, 255,}, - {156, 198, 249, 191, 203, 255, 225, 255, 255, 255,}, - {157, 199, 249, 192, 203, 255, 225, 255, 255, 255,}, - {158, 200, 250, 193, 204, 255, 226, 255, 255, 255,}, - {159, 201, 250, 193, 205, 255, 227, 255, 255, 255,}, - {160, 202, 250, 194, 205, 255, 227, 255, 255, 255,}, - {162, 203, 250, 195, 206, 255, 228, 255, 255, 255,}, - {163, 204, 251, 196, 206, 255, 229, 255, 255, 255,}, - {164, 205, 251, 197, 207, 255, 229, 255, 255, 255,}, - {165, 206, 251, 198, 208, 255, 230, 255, 255, 255,}, - {166, 207, 251, 199, 208, 255, 231, 255, 255, 255,}, - {168, 208, 251, 200, 209, 255, 231, 255, 255, 255,}, - {169, 209, 252, 201, 210, 255, 232, 255, 255, 255,}, - {170, 210, 252, 201, 210, 255, 233, 255, 255, 255,}, - {172, 211, 252, 202, 211, 255, 233, 255, 255, 255,}, - {173, 212, 252, 203, 212, 255, 234, 255, 255, 255,}, - {174, 213, 252, 204, 212, 255, 235, 255, 255, 255,}, - {175, 214, 253, 205, 213, 255, 235, 255, 255, 255,}, - {177, 215, 253, 206, 214, 255, 236, 255, 255, 255,}, - {178, 216, 253, 207, 215, 255, 237, 255, 255, 255,}, - {179, 217, 253, 208, 215, 255, 237, 255, 255, 255,}, - {181, 218, 253, 209, 216, 255, 238, 255, 255, 255,}, - {182, 219, 254, 210, 217, 255, 238, 255, 255, 255,}, - {184, 220, 254, 211, 217, 255, 239, 255, 255, 255,}, - {185, 221, 254, 212, 218, 255, 240, 255, 255, 255,}, - {186, 222, 254, 213, 219, 255, 240, 255, 255, 255,}, - {188, 223, 254, 214, 220, 255, 241, 255, 255, 255,}, - {189, 224, 254, 215, 221, 255, 241, 255, 255, 255,}, - {191, 225, 254, 216, 221, 255, 242, 255, 255, 255,}, - {192, 226, 254, 217, 222, 255, 243, 255, 255, 255,}, - {194, 227, 255, 218, 223, 255, 243, 255, 255, 255,}, - {195, 228, 255, 219, 224, 255, 244, 255, 255, 255,}, - {197, 229, 255, 220, 225, 255, 244, 255, 255, 255,}, - {198, 230, 255, 221, 225, 255, 245, 255, 255, 255,}, - {200, 231, 255, 222, 226, 255, 245, 255, 255, 255,}, - {201, 232, 255, 223, 227, 255, 246, 255, 255, 255,}, - {203, 233, 255, 224, 228, 255, 247, 255, 255, 255,}, - {205, 234, 255, 226, 229, 255, 247, 255, 255, 255,}, - {206, 235, 255, 227, 230, 255, 248, 255, 255, 255,}, - {208, 236, 255, 228, 231, 255, 248, 255, 255, 255,}, - {210, 237, 255, 229, 232, 255, 249, 255, 255, 255,}, - {211, 238, 255, 230, 233, 255, 249, 255, 255, 255,}, - {213, 239, 255, 231, 234, 255, 250, 255, 255, 255,}, - {215, 240, 255, 233, 235, 255, 250, 255, 255, 255,}, - {217, 241, 255, 234, 236, 255, 251, 255, 255, 255,}, - {219, 242, 255, 235, 237, 255, 251, 255, 255, 255,}, - {221, 243, 255, 236, 238, 255, 252, 255, 255, 255,}, - {223, 244, 255, 237, 239, 255, 252, 255, 255, 255,}, - {225, 245, 255, 239, 240, 255, 252, 255, 255, 255,}, - {227, 246, 255, 240, 241, 255, 253, 255, 255, 255,}, - {229, 247, 255, 241, 242, 255, 253, 255, 255, 255,}, - {231, 248, 255, 243, 244, 255, 254, 255, 255, 255,}, - {233, 249, 255, 244, 245, 255, 254, 255, 255, 255,}, - {236, 250, 255, 246, 246, 255, 254, 255, 255, 255,}, - {238, 251, 255, 247, 247, 255, 255, 255, 255, 255,}, - {241, 252, 255, 249, 249, 255, 255, 255, 255, 255,}, - {244, 253, 255, 250, 250, 255, 255, 255, 255, 255,}, - {247, 254, 255, 252, 252, 255, 255, 255, 255, 255,}, - {251, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, -}; +// Model obtained from a 2-sided zero-centerd distribuition derived +// from a Pareto distribution. The cdf of the distribution is: +// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta] +// +// For a given beta and a given probablity of the 1-node, the alpha +// is first solved, and then the {alpha, beta} pair is used to generate +// the probabilities for the rest of the nodes. + +// beta = 8 +const vp9_prob vp9_modelcoefprobs_pareto8[COEFPROB_MODELS][MODEL_NODES] = { + { 3, 86, 128, 6, 86, 23, 88, 29}, + { 9, 86, 129, 17, 88, 61, 94, 76}, + { 15, 87, 129, 28, 89, 93, 100, 110}, + { 20, 88, 130, 38, 91, 118, 106, 136}, + { 26, 89, 131, 48, 92, 139, 111, 156}, + { 31, 90, 131, 58, 94, 156, 117, 171}, + { 37, 90, 132, 66, 95, 171, 122, 184}, + { 42, 91, 132, 75, 97, 183, 127, 194}, + { 47, 92, 133, 83, 98, 193, 132, 202}, + { 52, 93, 133, 90, 100, 201, 137, 208}, + { 57, 94, 134, 98, 101, 208, 142, 214}, + { 62, 94, 135, 105, 103, 214, 146, 218}, + { 66, 95, 135, 111, 104, 219, 151, 222}, + { 71, 96, 136, 117, 106, 224, 155, 225}, + { 76, 97, 136, 123, 107, 227, 159, 228}, + { 80, 98, 137, 129, 109, 231, 162, 231}, + { 84, 98, 138, 134, 110, 234, 166, 233}, + { 89, 99, 138, 140, 112, 236, 170, 235}, + { 93, 100, 139, 145, 113, 238, 173, 236}, + { 97, 101, 140, 149, 115, 240, 176, 238}, + {101, 102, 140, 154, 116, 242, 179, 239}, + {105, 103, 141, 158, 118, 243, 182, 240}, + {109, 104, 141, 162, 119, 244, 185, 241}, + {113, 104, 142, 166, 120, 245, 187, 242}, + {116, 105, 143, 170, 122, 246, 190, 243}, + {120, 106, 143, 173, 123, 247, 192, 244}, + {123, 107, 144, 177, 125, 248, 195, 244}, + {127, 108, 145, 180, 126, 249, 197, 245}, + {130, 109, 145, 183, 128, 249, 199, 245}, + {134, 110, 146, 186, 129, 250, 201, 246}, + {137, 111, 147, 189, 131, 251, 203, 246}, + {140, 112, 147, 192, 132, 251, 205, 247}, + {143, 113, 148, 194, 133, 251, 207, 247}, + {146, 114, 149, 197, 135, 252, 208, 248}, + {149, 115, 149, 199, 136, 252, 210, 248}, + {152, 115, 150, 201, 138, 252, 211, 248}, + {155, 116, 151, 204, 139, 253, 213, 249}, + {158, 117, 151, 206, 140, 253, 214, 249}, + {161, 118, 152, 208, 142, 253, 216, 249}, + {163, 119, 153, 210, 143, 253, 217, 249}, + {166, 120, 153, 212, 144, 254, 218, 250}, + {168, 121, 154, 213, 146, 254, 220, 250}, + {171, 122, 155, 215, 147, 254, 221, 250}, + {173, 123, 155, 217, 148, 254, 222, 250}, + {176, 124, 156, 218, 150, 254, 223, 250}, + {178, 125, 157, 220, 151, 254, 224, 251}, + {180, 126, 157, 221, 152, 254, 225, 251}, + {183, 127, 158, 222, 153, 254, 226, 251}, + {185, 128, 159, 224, 155, 255, 227, 251}, + {187, 129, 160, 225, 156, 255, 228, 251}, + {189, 131, 160, 226, 157, 255, 228, 251}, + {191, 132, 161, 227, 159, 255, 229, 251}, + {193, 133, 162, 228, 160, 255, 230, 252}, + {195, 134, 163, 230, 161, 255, 231, 252}, + {197, 135, 163, 231, 162, 255, 231, 252}, + {199, 136, 164, 232, 163, 255, 232, 252}, + {201, 137, 165, 233, 165, 255, 233, 252}, + {202, 138, 166, 233, 166, 255, 233, 252}, + {204, 139, 166, 234, 167, 255, 234, 252}, + {206, 140, 167, 235, 168, 255, 235, 252}, + {207, 141, 168, 236, 169, 255, 235, 252}, + {209, 142, 169, 237, 171, 255, 236, 252}, + {210, 144, 169, 237, 172, 255, 236, 252}, + {212, 145, 170, 238, 173, 255, 237, 252}, + {214, 146, 171, 239, 174, 255, 237, 253}, + {215, 147, 172, 240, 175, 255, 238, 253}, + {216, 148, 173, 240, 176, 255, 238, 253}, + {218, 149, 173, 241, 177, 255, 239, 253}, + {219, 150, 174, 241, 179, 255, 239, 253}, + {220, 152, 175, 242, 180, 255, 240, 253}, + {222, 153, 176, 242, 181, 255, 240, 253}, + {223, 154, 177, 243, 182, 255, 240, 253}, + {224, 155, 178, 244, 183, 255, 241, 253}, + {225, 156, 178, 244, 184, 255, 241, 253}, + {226, 158, 179, 244, 185, 255, 242, 253}, + {228, 159, 180, 245, 186, 255, 242, 253}, + {229, 160, 181, 245, 187, 255, 242, 253}, + {230, 161, 182, 246, 188, 255, 243, 253}, + {231, 163, 183, 246, 189, 255, 243, 253}, + {232, 164, 184, 247, 190, 255, 243, 253}, + {233, 165, 185, 247, 191, 255, 244, 253}, + {234, 166, 185, 247, 192, 255, 244, 253}, + {235, 168, 186, 248, 193, 255, 244, 253}, + {236, 169, 187, 248, 194, 255, 244, 253}, + {236, 170, 188, 248, 195, 255, 245, 253}, + {237, 171, 189, 249, 196, 255, 245, 254}, + {238, 173, 190, 249, 197, 255, 245, 254}, + {239, 174, 191, 249, 198, 255, 245, 254}, + {240, 175, 192, 249, 199, 255, 246, 254}, + {240, 177, 193, 250, 200, 255, 246, 254}, + {241, 178, 194, 250, 201, 255, 246, 254}, + {242, 179, 195, 250, 202, 255, 246, 254}, + {242, 181, 196, 250, 203, 255, 247, 254}, + {243, 182, 197, 251, 204, 255, 247, 254}, + {244, 184, 198, 251, 205, 255, 247, 254}, + {244, 185, 199, 251, 206, 255, 247, 254}, + {245, 186, 200, 251, 207, 255, 247, 254}, + {246, 188, 201, 252, 207, 255, 248, 254}, + {246, 189, 202, 252, 208, 255, 248, 254}, + {247, 191, 203, 252, 209, 255, 248, 254}, + {247, 192, 204, 252, 210, 255, 248, 254}, + {248, 194, 205, 252, 211, 255, 248, 254}, + {248, 195, 206, 252, 212, 255, 249, 254}, + {249, 197, 207, 253, 213, 255, 249, 254}, + {249, 198, 208, 253, 214, 255, 249, 254}, + {250, 200, 210, 253, 215, 255, 249, 254}, + {250, 201, 211, 253, 215, 255, 249, 254}, + {250, 203, 212, 253, 216, 255, 249, 254}, + {251, 204, 213, 253, 217, 255, 250, 254}, + {251, 206, 214, 254, 218, 255, 250, 254}, + {252, 207, 216, 254, 219, 255, 250, 254}, + {252, 209, 217, 254, 220, 255, 250, 254}, + {252, 211, 218, 254, 221, 255, 250, 254}, + {253, 213, 219, 254, 222, 255, 250, 254}, + {253, 214, 221, 254, 223, 255, 250, 254}, + {253, 216, 222, 254, 224, 255, 251, 254}, + {253, 218, 224, 254, 225, 255, 251, 254}, + {254, 220, 225, 254, 225, 255, 251, 254}, + {254, 222, 227, 255, 226, 255, 251, 254}, + {254, 224, 228, 255, 227, 255, 251, 254}, + {254, 226, 230, 255, 228, 255, 251, 254}, + {255, 228, 231, 255, 230, 255, 251, 254}, + {255, 230, 233, 255, 231, 255, 252, 254}, + {255, 232, 235, 255, 232, 255, 252, 254}, + {255, 235, 237, 255, 233, 255, 252, 254}, + {255, 238, 240, 255, 235, 255, 252, 255}, + {255, 241, 243, 255, 236, 255, 252, 254}, + {255, 246, 247, 255, 239, 255, 253, 255} +}; + +static void extend_model_to_full_distribution(vp9_prob p, + vp9_prob *tree_probs) { + const int l = ((p - 1) / 2); + const vp9_prob (*model)[MODEL_NODES]; + model = vp9_modelcoefprobs_pareto8; + if (p & 1) { + vpx_memcpy(tree_probs + UNCONSTRAINED_NODES, + model[l], MODEL_NODES * sizeof(vp9_prob)); + } else { + // interpolate + int i; + for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i) + tree_probs[i] = (model[l][i - UNCONSTRAINED_NODES] + + model[l + 1][i - UNCONSTRAINED_NODES]) >> 1; + } +} -const vp9_prob vp9_modelcoefprobs_gg625p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.625) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 1, 3, 87, 129, 6, 87, 20, 91, 24,}, - {1, 2, 6, 88, 130, 11, 89, 36, 94, 41,}, - {2, 3, 8, 88, 130, 15, 90, 50, 97, 56,}, - {2, 4, 11, 89, 131, 20, 90, 62, 99, 70,}, - {3, 5, 14, 90, 131, 24, 91, 74, 102, 81,}, - {3, 6, 16, 90, 132, 29, 92, 84, 104, 92,}, - {4, 7, 19, 91, 132, 33, 93, 93, 106, 101,}, - {4, 8, 21, 91, 132, 37, 93, 102, 108, 110,}, - {5, 9, 24, 92, 133, 40, 94, 110, 110, 118,}, - {5, 10, 26, 92, 133, 44, 95, 118, 111, 125,}, - {6, 11, 29, 93, 134, 48, 96, 125, 113, 132,}, - {7, 12, 31, 93, 134, 51, 96, 132, 115, 139,}, - {7, 13, 33, 93, 134, 55, 97, 138, 117, 145,}, - {8, 14, 36, 94, 135, 58, 97, 144, 119, 150,}, - {8, 15, 38, 94, 135, 62, 98, 149, 120, 155,}, - {9, 16, 40, 95, 135, 65, 99, 154, 122, 160,}, - {10, 17, 42, 95, 136, 68, 99, 159, 124, 165,}, - {10, 18, 45, 96, 136, 71, 100, 164, 125, 169,}, - {11, 19, 47, 96, 136, 74, 100, 168, 127, 174,}, - {11, 20, 49, 96, 136, 77, 101, 173, 128, 177,}, - {12, 21, 51, 97, 137, 80, 102, 176, 130, 181,}, - {13, 22, 53, 97, 137, 83, 102, 180, 131, 185,}, - {13, 23, 55, 98, 137, 86, 103, 184, 133, 188,}, - {14, 24, 57, 98, 138, 89, 103, 187, 135, 191,}, - {14, 25, 59, 98, 138, 91, 104, 190, 136, 194,}, - {15, 26, 61, 99, 138, 94, 104, 193, 138, 197,}, - {16, 27, 64, 99, 139, 97, 105, 196, 139, 200,}, - {16, 28, 66, 100, 139, 99, 106, 199, 141, 202,}, - {17, 29, 68, 100, 139, 102, 106, 201, 142, 205,}, - {18, 30, 69, 100, 139, 104, 107, 204, 143, 207,}, - {18, 31, 71, 101, 140, 107, 107, 206, 145, 209,}, - {19, 32, 73, 101, 140, 109, 108, 209, 146, 211,}, - {20, 33, 75, 102, 140, 112, 108, 211, 148, 213,}, - {20, 34, 77, 102, 141, 114, 109, 213, 149, 215,}, - {21, 35, 79, 102, 141, 116, 109, 215, 150, 217,}, - {22, 36, 81, 103, 141, 119, 110, 217, 152, 219,}, - {22, 37, 83, 103, 141, 121, 110, 218, 153, 220,}, - {23, 38, 85, 103, 142, 123, 111, 220, 155, 222,}, - {24, 39, 87, 104, 142, 125, 112, 222, 156, 224,}, - {24, 40, 88, 104, 142, 127, 112, 223, 157, 225,}, - {25, 41, 90, 105, 143, 129, 113, 225, 159, 226,}, - {26, 42, 92, 105, 143, 131, 113, 226, 160, 228,}, - {26, 43, 94, 105, 143, 133, 114, 227, 161, 229,}, - {27, 44, 95, 106, 143, 135, 114, 229, 162, 230,}, - {28, 45, 97, 106, 144, 137, 115, 230, 164, 231,}, - {28, 46, 99, 107, 144, 139, 115, 231, 165, 232,}, - {29, 47, 101, 107, 144, 141, 116, 232, 166, 233,}, - {30, 48, 102, 107, 145, 143, 116, 233, 168, 234,}, - {31, 49, 104, 108, 145, 145, 117, 234, 169, 235,}, - {31, 50, 106, 108, 145, 147, 118, 235, 170, 236,}, - {32, 51, 107, 108, 145, 149, 118, 236, 171, 237,}, - {33, 52, 109, 109, 146, 150, 119, 237, 172, 238,}, - {33, 53, 111, 109, 146, 152, 119, 238, 174, 239,}, - {34, 54, 112, 110, 146, 154, 120, 239, 175, 240,}, - {35, 55, 114, 110, 146, 156, 120, 240, 176, 240,}, - {36, 56, 115, 110, 147, 157, 121, 240, 177, 241,}, - {36, 57, 117, 111, 147, 159, 121, 241, 178, 242,}, - {37, 58, 119, 111, 147, 161, 122, 242, 180, 242,}, - {38, 59, 120, 112, 148, 162, 122, 242, 181, 243,}, - {38, 60, 122, 112, 148, 164, 123, 243, 182, 244,}, - {39, 61, 123, 112, 148, 165, 124, 244, 183, 244,}, - {40, 62, 125, 113, 148, 167, 124, 244, 184, 245,}, - {41, 63, 126, 113, 149, 168, 125, 245, 185, 245,}, - {41, 64, 128, 114, 149, 170, 125, 245, 186, 246,}, - {42, 65, 129, 114, 149, 171, 126, 246, 187, 246,}, - {43, 66, 131, 114, 150, 173, 126, 246, 188, 247,}, - {44, 67, 132, 115, 150, 174, 127, 247, 189, 247,}, - {44, 68, 134, 115, 150, 176, 127, 247, 191, 247,}, - {45, 69, 135, 116, 150, 177, 128, 248, 192, 248,}, - {46, 70, 136, 116, 151, 178, 129, 248, 193, 248,}, - {47, 71, 138, 116, 151, 180, 129, 248, 194, 249,}, - {48, 72, 139, 117, 151, 181, 130, 249, 195, 249,}, - {48, 73, 141, 117, 152, 183, 130, 249, 196, 249,}, - {49, 74, 142, 118, 152, 184, 131, 249, 197, 250,}, - {50, 75, 143, 118, 152, 185, 131, 250, 198, 250,}, - {51, 76, 145, 118, 152, 186, 132, 250, 199, 250,}, - {51, 77, 146, 119, 153, 188, 132, 250, 200, 250,}, - {52, 78, 148, 119, 153, 189, 133, 251, 201, 251,}, - {53, 79, 149, 120, 153, 190, 134, 251, 201, 251,}, - {54, 80, 150, 120, 154, 191, 134, 251, 202, 251,}, - {55, 81, 151, 120, 154, 192, 135, 251, 203, 251,}, - {55, 82, 153, 121, 154, 194, 135, 252, 204, 252,}, - {56, 83, 154, 121, 155, 195, 136, 252, 205, 252,}, - {57, 84, 155, 122, 155, 196, 136, 252, 206, 252,}, - {58, 85, 157, 122, 155, 197, 137, 252, 207, 252,}, - {59, 86, 158, 123, 155, 198, 138, 252, 208, 252,}, - {59, 87, 159, 123, 156, 199, 138, 253, 209, 253,}, - {60, 88, 160, 123, 156, 200, 139, 253, 210, 253,}, - {61, 89, 162, 124, 156, 201, 139, 253, 210, 253,}, - {62, 90, 163, 124, 157, 202, 140, 253, 211, 253,}, - {63, 91, 164, 125, 157, 203, 140, 253, 212, 253,}, - {64, 92, 165, 125, 157, 204, 141, 253, 213, 253,}, - {64, 93, 166, 126, 158, 205, 142, 254, 214, 253,}, - {65, 94, 168, 126, 158, 206, 142, 254, 214, 254,}, - {66, 95, 169, 126, 158, 207, 143, 254, 215, 254,}, - {67, 96, 170, 127, 158, 208, 143, 254, 216, 254,}, - {68, 97, 171, 127, 159, 209, 144, 254, 217, 254,}, - {69, 98, 172, 128, 159, 210, 145, 254, 218, 254,}, - {69, 99, 173, 128, 159, 211, 145, 254, 218, 254,}, - {70, 100, 175, 129, 160, 212, 146, 254, 219, 254,}, - {71, 101, 176, 129, 160, 213, 146, 254, 220, 254,}, - {72, 102, 177, 130, 160, 214, 147, 254, 220, 254,}, - {73, 103, 178, 130, 161, 214, 148, 255, 221, 255,}, - {74, 104, 179, 130, 161, 215, 148, 255, 222, 255,}, - {75, 105, 180, 131, 161, 216, 149, 255, 223, 255,}, - {75, 106, 181, 131, 162, 217, 149, 255, 223, 255,}, - {76, 107, 182, 132, 162, 218, 150, 255, 224, 255,}, - {77, 108, 183, 132, 162, 219, 151, 255, 225, 255,}, - {78, 109, 184, 133, 163, 219, 151, 255, 225, 255,}, - {79, 110, 185, 133, 163, 220, 152, 255, 226, 255,}, - {80, 111, 186, 134, 163, 221, 152, 255, 226, 255,}, - {81, 112, 187, 134, 164, 222, 153, 255, 227, 255,}, - {82, 113, 188, 135, 164, 222, 154, 255, 228, 255,}, - {83, 114, 189, 135, 164, 223, 154, 255, 228, 255,}, - {83, 115, 190, 136, 165, 224, 155, 255, 229, 255,}, - {84, 116, 191, 136, 165, 224, 156, 255, 230, 255,}, - {85, 117, 192, 137, 165, 225, 156, 255, 230, 255,}, - {86, 118, 193, 137, 166, 226, 157, 255, 231, 255,}, - {87, 119, 194, 137, 166, 226, 157, 255, 231, 255,}, - {88, 120, 195, 138, 166, 227, 158, 255, 232, 255,}, - {89, 121, 196, 138, 167, 228, 159, 255, 232, 255,}, - {90, 122, 197, 139, 167, 228, 159, 255, 233, 255,}, - {91, 123, 198, 139, 167, 229, 160, 255, 233, 255,}, - {92, 124, 199, 140, 168, 230, 161, 255, 234, 255,}, - {93, 125, 200, 140, 168, 230, 161, 255, 234, 255,}, - {93, 126, 201, 141, 168, 231, 162, 255, 235, 255,}, - {94, 127, 202, 141, 169, 231, 163, 255, 235, 255,}, - {95, 128, 203, 142, 169, 232, 163, 255, 236, 255,}, - {96, 129, 203, 142, 169, 233, 164, 255, 236, 255,}, - {97, 130, 204, 143, 170, 233, 164, 255, 237, 255,}, - {98, 131, 205, 143, 170, 234, 165, 255, 237, 255,}, - {99, 132, 206, 144, 170, 234, 166, 255, 238, 255,}, - {100, 133, 207, 145, 171, 235, 166, 255, 238, 255,}, - {101, 134, 208, 145, 171, 235, 167, 255, 239, 255,}, - {102, 135, 209, 146, 171, 236, 168, 255, 239, 255,}, - {103, 136, 209, 146, 172, 236, 168, 255, 240, 255,}, - {104, 137, 210, 147, 172, 237, 169, 255, 240, 255,}, - {105, 138, 211, 147, 173, 237, 170, 255, 240, 255,}, - {106, 139, 212, 148, 173, 238, 170, 255, 241, 255,}, - {107, 140, 213, 148, 173, 238, 171, 255, 241, 255,}, - {108, 141, 213, 149, 174, 239, 172, 255, 242, 255,}, - {109, 142, 214, 149, 174, 239, 172, 255, 242, 255,}, - {110, 143, 215, 150, 174, 240, 173, 255, 242, 255,}, - {111, 144, 216, 150, 175, 240, 174, 255, 243, 255,}, - {112, 145, 216, 151, 175, 240, 174, 255, 243, 255,}, - {113, 146, 217, 152, 176, 241, 175, 255, 243, 255,}, - {114, 147, 218, 152, 176, 241, 176, 255, 244, 255,}, - {115, 148, 219, 153, 176, 242, 176, 255, 244, 255,}, - {116, 149, 219, 153, 177, 242, 177, 255, 244, 255,}, - {117, 150, 220, 154, 177, 242, 178, 255, 245, 255,}, - {118, 151, 221, 154, 178, 243, 178, 255, 245, 255,}, - {119, 152, 221, 155, 178, 243, 179, 255, 245, 255,}, - {120, 153, 222, 156, 178, 244, 180, 255, 246, 255,}, - {121, 154, 223, 156, 179, 244, 180, 255, 246, 255,}, - {122, 155, 223, 157, 179, 244, 181, 255, 246, 255,}, - {123, 156, 224, 157, 180, 245, 182, 255, 247, 255,}, - {124, 157, 225, 158, 180, 245, 183, 255, 247, 255,}, - {125, 158, 225, 159, 180, 245, 183, 255, 247, 255,}, - {126, 159, 226, 159, 181, 246, 184, 255, 247, 255,}, - {127, 160, 227, 160, 181, 246, 185, 255, 248, 255,}, - {128, 161, 227, 160, 182, 246, 185, 255, 248, 255,}, - {129, 162, 228, 161, 182, 246, 186, 255, 248, 255,}, - {130, 163, 229, 162, 183, 247, 187, 255, 248, 255,}, - {131, 164, 229, 162, 183, 247, 187, 255, 249, 255,}, - {132, 165, 230, 163, 183, 247, 188, 255, 249, 255,}, - {133, 166, 230, 163, 184, 248, 189, 255, 249, 255,}, - {135, 167, 231, 164, 184, 248, 190, 255, 249, 255,}, - {136, 168, 232, 165, 185, 248, 190, 255, 250, 255,}, - {137, 169, 232, 165, 185, 248, 191, 255, 250, 255,}, - {138, 170, 233, 166, 186, 249, 192, 255, 250, 255,}, - {139, 171, 233, 167, 186, 249, 192, 255, 250, 255,}, - {140, 172, 234, 167, 187, 249, 193, 255, 251, 255,}, - {141, 173, 234, 168, 187, 249, 194, 255, 251, 255,}, - {142, 174, 235, 169, 187, 250, 195, 255, 251, 255,}, - {143, 175, 235, 169, 188, 250, 195, 255, 251, 255,}, - {144, 176, 236, 170, 188, 250, 196, 255, 251, 255,}, - {146, 177, 236, 171, 189, 250, 197, 255, 251, 255,}, - {147, 178, 237, 171, 189, 251, 197, 255, 252, 255,}, - {148, 179, 237, 172, 190, 251, 198, 255, 252, 255,}, - {149, 180, 238, 173, 190, 251, 199, 255, 252, 255,}, - {150, 181, 238, 173, 191, 251, 200, 255, 252, 255,}, - {151, 182, 239, 174, 191, 251, 200, 255, 252, 255,}, - {152, 183, 239, 175, 192, 251, 201, 255, 252, 255,}, - {153, 184, 240, 176, 192, 252, 202, 255, 253, 255,}, - {155, 185, 240, 176, 193, 252, 203, 255, 253, 255,}, - {156, 186, 241, 177, 193, 252, 203, 255, 253, 255,}, - {157, 187, 241, 178, 194, 252, 204, 255, 253, 255,}, - {158, 188, 242, 179, 194, 252, 205, 255, 253, 255,}, - {159, 189, 242, 179, 195, 252, 206, 255, 253, 255,}, - {160, 190, 242, 180, 195, 253, 206, 255, 253, 255,}, - {162, 191, 243, 181, 196, 253, 207, 255, 253, 255,}, - {163, 192, 243, 182, 196, 253, 208, 255, 254, 255,}, - {164, 193, 244, 182, 197, 253, 209, 255, 254, 255,}, - {165, 194, 244, 183, 198, 253, 209, 255, 254, 255,}, - {166, 195, 244, 184, 198, 253, 210, 255, 254, 255,}, - {168, 196, 245, 185, 199, 253, 211, 255, 254, 255,}, - {169, 197, 245, 185, 199, 254, 212, 255, 254, 255,}, - {170, 198, 246, 186, 200, 254, 212, 255, 254, 255,}, - {171, 199, 246, 187, 200, 254, 213, 255, 254, 255,}, - {172, 200, 246, 188, 201, 254, 214, 255, 254, 255,}, - {174, 201, 247, 189, 201, 254, 215, 255, 254, 255,}, - {175, 202, 247, 189, 202, 254, 215, 255, 255, 255,}, - {176, 203, 247, 190, 203, 254, 216, 255, 255, 255,}, - {177, 204, 248, 191, 203, 254, 217, 255, 255, 255,}, - {179, 205, 248, 192, 204, 254, 218, 255, 255, 255,}, - {180, 206, 248, 193, 204, 254, 218, 255, 255, 255,}, - {181, 207, 249, 194, 205, 255, 219, 255, 255, 255,}, - {183, 208, 249, 195, 206, 255, 220, 255, 255, 255,}, - {184, 209, 249, 195, 206, 255, 221, 255, 255, 255,}, - {185, 210, 250, 196, 207, 255, 221, 255, 255, 255,}, - {186, 211, 250, 197, 208, 255, 222, 255, 255, 255,}, - {188, 212, 250, 198, 208, 255, 223, 255, 255, 255,}, - {189, 213, 250, 199, 209, 255, 224, 255, 255, 255,}, - {190, 214, 251, 200, 210, 255, 224, 255, 255, 255,}, - {192, 215, 251, 201, 210, 255, 225, 255, 255, 255,}, - {193, 216, 251, 202, 211, 255, 226, 255, 255, 255,}, - {194, 217, 251, 203, 212, 255, 227, 255, 255, 255,}, - {196, 218, 252, 204, 212, 255, 228, 255, 255, 255,}, - {197, 219, 252, 205, 213, 255, 228, 255, 255, 255,}, - {198, 220, 252, 206, 214, 255, 229, 255, 255, 255,}, - {200, 221, 252, 207, 215, 255, 230, 255, 255, 255,}, - {201, 222, 252, 208, 215, 255, 231, 255, 255, 255,}, - {202, 223, 253, 209, 216, 255, 231, 255, 255, 255,}, - {204, 224, 253, 210, 217, 255, 232, 255, 255, 255,}, - {205, 225, 253, 211, 218, 255, 233, 255, 255, 255,}, - {207, 226, 253, 212, 218, 255, 234, 255, 255, 255,}, - {208, 227, 253, 213, 219, 255, 234, 255, 255, 255,}, - {209, 228, 254, 214, 220, 255, 235, 255, 255, 255,}, - {211, 229, 254, 215, 221, 255, 236, 255, 255, 255,}, - {212, 230, 254, 216, 222, 255, 237, 255, 255, 255,}, - {214, 231, 254, 217, 223, 255, 238, 255, 255, 255,}, - {215, 232, 254, 218, 223, 255, 238, 255, 255, 255,}, - {217, 233, 254, 219, 224, 255, 239, 255, 255, 255,}, - {218, 234, 255, 221, 225, 255, 240, 255, 255, 255,}, - {220, 235, 255, 222, 226, 255, 241, 255, 255, 255,}, - {221, 236, 255, 223, 227, 255, 241, 255, 255, 255,}, - {223, 237, 255, 224, 228, 255, 242, 255, 255, 255,}, - {224, 238, 255, 225, 229, 255, 243, 255, 255, 255,}, - {226, 239, 255, 227, 230, 255, 244, 255, 255, 255,}, - {227, 240, 255, 228, 231, 255, 244, 255, 255, 255,}, - {229, 241, 255, 229, 232, 255, 245, 255, 255, 255,}, - {231, 242, 255, 231, 233, 255, 246, 255, 255, 255,}, - {232, 243, 255, 232, 234, 255, 247, 255, 255, 255,}, - {234, 244, 255, 233, 236, 255, 247, 255, 255, 255,}, - {235, 245, 255, 235, 237, 255, 248, 255, 255, 255,}, - {237, 246, 255, 236, 238, 255, 249, 255, 255, 255,}, - {239, 247, 255, 238, 239, 255, 250, 255, 255, 255,}, - {241, 248, 255, 239, 241, 255, 250, 255, 255, 255,}, - {242, 249, 255, 241, 242, 255, 251, 255, 255, 255,}, - {244, 250, 255, 243, 243, 255, 252, 255, 255, 255,}, - {246, 251, 255, 244, 245, 255, 253, 255, 255, 255,}, - {248, 252, 255, 246, 247, 255, 253, 255, 255, 255,}, - {250, 253, 255, 248, 248, 255, 254, 255, 255, 255,}, - {252, 254, 255, 250, 250, 255, 255, 255, 255, 255,}, - {254, 255, 255, 253, 253, 255, 255, 255, 255, 255,}, -}; +void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full) { + if (full != model) + vpx_memcpy(full, model, sizeof(vp9_prob) * UNCONSTRAINED_NODES); + extend_model_to_full_distribution(model[PIVOT_NODE], full); +} -void vp9_get_model_distribution(vp9_prob p, vp9_prob *tree_probs, - int b, int r) { - const vp9_prob (*model)[ENTROPY_NODES - 1]; -#if UNCONSTRAINED_NODES == 2 - if (r != INTRA_FRAME && b == PLANE_TYPE_UV) - model = vp9_modelcoefprobs_gg75; - else if (r == INTRA_FRAME && b == PLANE_TYPE_UV) - model = vp9_modelcoefprobs_gg75; - else if (r != INTRA_FRAME && b == PLANE_TYPE_Y_WITH_DC) - model = vp9_modelcoefprobs_gg75; - else - model = vp9_modelcoefprobs_gg75; -#else - if (r != INTRA_FRAME && b == PLANE_TYPE_UV) - model = vp9_modelcoefprobs_gg75p1; - else if (r == INTRA_FRAME && b == PLANE_TYPE_UV) - model = vp9_modelcoefprobs_gg75p1; - else if (r != INTRA_FRAME && b == PLANE_TYPE_Y_WITH_DC) - model = vp9_modelcoefprobs_gg75p1; - else - model = vp9_modelcoefprobs_gg75p1; -#endif - vpx_memcpy(tree_probs + UNCONSTRAINED_NODES, - model[p] + UNCONSTRAINED_NODES - 1, - (ENTROPY_NODES - UNCONSTRAINED_NODES) * sizeof(vp9_prob)); +void vp9_model_to_full_probs_sb( + vp9_prob model[COEF_BANDS][PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES], + vp9_prob full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) { + int c, p; + for (c = 0; c < COEF_BANDS; ++c) + for (p = 0; p < PREV_COEF_CONTEXTS; ++p) { + vp9_model_to_full_probs(model[c][p], full[c][p]); + } } -#endif static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28]; @@ -2077,7 +439,7 @@ static void init_bit_trees() { init_bit_tree(cat6, 14); } -vp9_extra_bit_struct vp9_extra_bits[12] = { +vp9_extra_bit vp9_extra_bits[12] = { { 0, 0, 0, 0}, { 0, 0, 0, 1}, { 0, 0, 0, 2}, @@ -2111,177 +473,32 @@ int vp9_get_coef_context(const int *scan, const int *neighbors, int ctx; assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0); if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) { - ctx = (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + - token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; + ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] + + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1; } else { - ctx = token_cache[neighbors[MAX_NEIGHBORS * c + 0]]; + ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]]; } - return vp9_pt_energy_class[ctx]; + return ctx; } }; void vp9_default_coef_probs(VP9_COMMON *pc) { -#if CONFIG_MODELCOEFPROB - int b, r, c, p; -#endif -#if CONFIG_CODE_NONZEROCOUNT -#ifdef NZC_DEFAULT_COUNTS - int h, g; - for (h = 0; h < MAX_NZC_CONTEXTS; ++h) { - for (g = 0; g < REF_TYPES; ++g) { - int i; - unsigned int branch_ct4x4[NZC4X4_NODES][2]; - unsigned int branch_ct8x8[NZC8X8_NODES][2]; - unsigned int branch_ct16x16[NZC16X16_NODES][2]; - unsigned int branch_ct32x32[NZC32X32_NODES][2]; - for (i = 0; i < BLOCK_TYPES; ++i) { - vp9_tree_probs_from_distribution( - vp9_nzc4x4_tree, - pc->fc.nzc_probs_4x4[h][g][i], branch_ct4x4, - default_nzc_counts_4x4[h][g][i], 0); - } - for (i = 0; i < BLOCK_TYPES; ++i) { - vp9_tree_probs_from_distribution( - vp9_nzc8x8_tree, - pc->fc.nzc_probs_8x8[h][g][i], branch_ct8x8, - default_nzc_counts_8x8[h][g][i], 0); - } - for (i = 0; i < BLOCK_TYPES; ++i) { - vp9_tree_probs_from_distribution( - vp9_nzc16x16_tree, - pc->fc.nzc_probs_16x16[h][g][i], branch_ct16x16, - default_nzc_counts_16x16[h][g][i], 0); - } - for (i = 0; i < BLOCK_TYPES; ++i) { - vp9_tree_probs_from_distribution( - vp9_nzc32x32_tree, - pc->fc.nzc_probs_32x32[h][g][i], branch_ct32x32, - default_nzc_counts_32x32[h][g][i], 0); - } - } - } -#else - vpx_memcpy(pc->fc.nzc_probs_4x4, default_nzc_probs_4x4, - sizeof(pc->fc.nzc_probs_4x4)); - vpx_memcpy(pc->fc.nzc_probs_8x8, default_nzc_probs_8x8, - sizeof(pc->fc.nzc_probs_8x8)); - vpx_memcpy(pc->fc.nzc_probs_16x16, default_nzc_probs_16x16, - sizeof(pc->fc.nzc_probs_16x16)); - vpx_memcpy(pc->fc.nzc_probs_32x32, default_nzc_probs_32x32, - sizeof(pc->fc.nzc_probs_32x32)); -#endif - vpx_memcpy(pc->fc.nzc_pcat_probs, default_nzc_pcat_probs, - sizeof(pc->fc.nzc_pcat_probs)); -#endif // CONFIG_CODE_NONZEROCOUNT -#if CONFIG_MODELCOEFPROB - for (b = 0; b < BLOCK_TYPES; ++b) - for (r = 0; r < REF_TYPES; ++r) - for (c = 0; c < COEF_BANDS; ++c) - for (p = 0; p < PREV_COEF_CONTEXTS; ++p) { - int t; - for (t = 0; t < UNCONSTRAINED_NODES; t++) - pc->fc.coef_probs_4x4[b][r][c][p][t] = - default_coef_probs_4x4[b][r][c][p][t]; - vp9_get_model_distribution( - default_coef_probs_4x4[b][r][c][p][UNCONSTRAINED_NODES - 1], - pc->fc.coef_probs_4x4[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) - pc->fc.coef_probs_8x8[b][r][c][p][t] = - default_coef_probs_8x8[b][r][c][p][t]; - vp9_get_model_distribution( - default_coef_probs_8x8[b][r][c][p][UNCONSTRAINED_NODES - 1], - pc->fc.coef_probs_8x8[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) - pc->fc.coef_probs_16x16[b][r][c][p][t] = - default_coef_probs_16x16[b][r][c][p][t]; - vp9_get_model_distribution( - default_coef_probs_16x16[b][r][c][p][UNCONSTRAINED_NODES - 1], - pc->fc.coef_probs_16x16[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) - pc->fc.coef_probs_32x32[b][r][c][p][t] = - default_coef_probs_32x32[b][r][c][p][t]; - vp9_get_model_distribution( - default_coef_probs_32x32[b][r][c][p][UNCONSTRAINED_NODES - 1], - pc->fc.coef_probs_32x32[b][r][c][p], b, r); - } -#else - vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4, - sizeof(pc->fc.coef_probs_4x4)); - vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8, - sizeof(pc->fc.coef_probs_8x8)); - vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16, - sizeof(pc->fc.coef_probs_16x16)); - vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32, - sizeof(pc->fc.coef_probs_32x32)); -#endif -} - -#if CONFIG_MODELCOEFPROB -// This is a placeholder function that will enable the default coef probs to -// change for key frames based on the base_qindex. If base_qindex is large, -// we can expect probabilities of zeros to be bigger, and vice versa. The rest -// of the probabilities are derived from the nodel. -void vp9_adjust_default_coef_probs(VP9_COMMON *cm) { - static const int factor_bits = 4; - static const int factor_rnd = 8; // (1 << (factor_bits - 1)) - int b, r, c, p; - int factor = (1 << factor_bits); - /* - if (cm->base_qindex < 32) - factor -= ((32 - cm->base_qindex) >> 4); - */ - if (cm->base_qindex > 128) - factor += ((cm->base_qindex - 128) >> 4); - // printf(" Q %d factor %d\n", cm->base_qindex, factor); - - for (b = 0; b < BLOCK_TYPES; ++b) - for (r = 0; r < REF_TYPES; ++r) - for (c = 0; c < COEF_BANDS; ++c) - for (p = 0; p < PREV_COEF_CONTEXTS; ++p) { - int t, x; - vp9_prob prob; - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_4x4[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_4x4[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_4x4[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_8x8[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_8x8[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_8x8[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_16x16[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_16x16[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_16x16[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_32x32[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_32x32[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_32x32[b][r][c][p], b, r); - } + vpx_memcpy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4, + sizeof(pc->fc.coef_probs[TX_4X4])); + vpx_memcpy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8, + sizeof(pc->fc.coef_probs[TX_8X8])); + vpx_memcpy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16, + sizeof(pc->fc.coef_probs[TX_16X16])); + vpx_memcpy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32, + sizeof(pc->fc.coef_probs[TX_32X32])); } -#endif // Neighborhood 5-tuples for various scans and blocksizes, // in {top, left, topleft, topright, bottomleft} order // for each position in raster scan order. // -1 indicates the neighbor does not exist. DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_4x4_neighbors[16 * MAX_NEIGHBORS]); + vp9_default_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, @@ -2291,15 +508,15 @@ DECLARE_ALIGNED(16, int, DECLARE_ALIGNED(16, int, vp9_row_scan_8x8_neighbors[64 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]); + vp9_default_scan_8x8_neighbors[64 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, vp9_col_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, vp9_row_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]); + vp9_default_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, - vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]); + vp9_default_scan_32x32_neighbors[1024 * MAX_NEIGHBORS]); static int find_in_scan(const int *scan, int l, int idx) { int n, l2 = l * l; @@ -2361,59 +578,59 @@ static void init_scan_neighbors(const int *scan, int l, int *neighbors, } void vp9_init_neighbors() { - init_scan_neighbors(vp9_default_zig_zag1d_4x4, 4, - vp9_default_zig_zag1d_4x4_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_default_scan_4x4, 4, + vp9_default_scan_4x4_neighbors, MAX_NEIGHBORS); init_scan_neighbors(vp9_row_scan_4x4, 4, vp9_row_scan_4x4_neighbors, MAX_NEIGHBORS); init_scan_neighbors(vp9_col_scan_4x4, 4, vp9_col_scan_4x4_neighbors, MAX_NEIGHBORS); - init_scan_neighbors(vp9_default_zig_zag1d_8x8, 8, - vp9_default_zig_zag1d_8x8_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_default_scan_8x8, 8, + vp9_default_scan_8x8_neighbors, MAX_NEIGHBORS); init_scan_neighbors(vp9_row_scan_8x8, 8, vp9_row_scan_8x8_neighbors, MAX_NEIGHBORS); init_scan_neighbors(vp9_col_scan_8x8, 8, vp9_col_scan_8x8_neighbors, MAX_NEIGHBORS); - init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16, - vp9_default_zig_zag1d_16x16_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_default_scan_16x16, 16, + vp9_default_scan_16x16_neighbors, MAX_NEIGHBORS); init_scan_neighbors(vp9_row_scan_16x16, 16, vp9_row_scan_16x16_neighbors, MAX_NEIGHBORS); init_scan_neighbors(vp9_col_scan_16x16, 16, vp9_col_scan_16x16_neighbors, MAX_NEIGHBORS); - init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32, - vp9_default_zig_zag1d_32x32_neighbors, MAX_NEIGHBORS); + init_scan_neighbors(vp9_default_scan_32x32, 32, + vp9_default_scan_32x32_neighbors, MAX_NEIGHBORS); } const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad) { - if (scan == vp9_default_zig_zag1d_4x4) { + if (scan == vp9_default_scan_4x4) { *pad = MAX_NEIGHBORS; - return vp9_default_zig_zag1d_4x4_neighbors; + return vp9_default_scan_4x4_neighbors; } else if (scan == vp9_row_scan_4x4) { *pad = MAX_NEIGHBORS; return vp9_row_scan_4x4_neighbors; } else if (scan == vp9_col_scan_4x4) { *pad = MAX_NEIGHBORS; return vp9_col_scan_4x4_neighbors; - } else if (scan == vp9_default_zig_zag1d_8x8) { + } else if (scan == vp9_default_scan_8x8) { *pad = MAX_NEIGHBORS; - return vp9_default_zig_zag1d_8x8_neighbors; + return vp9_default_scan_8x8_neighbors; } else if (scan == vp9_row_scan_8x8) { *pad = 2; return vp9_row_scan_8x8_neighbors; } else if (scan == vp9_col_scan_8x8) { *pad = 2; return vp9_col_scan_8x8_neighbors; - } else if (scan == vp9_default_zig_zag1d_16x16) { + } else if (scan == vp9_default_scan_16x16) { *pad = MAX_NEIGHBORS; - return vp9_default_zig_zag1d_16x16_neighbors; + return vp9_default_scan_16x16_neighbors; } else if (scan == vp9_row_scan_16x16) { *pad = 2; return vp9_row_scan_16x16_neighbors; } else if (scan == vp9_col_scan_16x16) { *pad = 2; return vp9_col_scan_16x16_neighbors; - } else if (scan == vp9_default_zig_zag1d_32x32) { + } else if (scan == vp9_default_scan_32x32) { *pad = MAX_NEIGHBORS; - return vp9_default_zig_zag1d_32x32_neighbors; + return vp9_default_scan_32x32_neighbors; } else { assert(0); return NULL; @@ -2424,1098 +641,8 @@ void vp9_coef_tree_initialize() { vp9_init_neighbors(); init_bit_trees(); vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree); -#if CONFIG_CODE_NONZEROCOUNT - vp9_tokens_from_tree(vp9_nzc4x4_encodings, vp9_nzc4x4_tree); - vp9_tokens_from_tree(vp9_nzc8x8_encodings, vp9_nzc8x8_tree); - vp9_tokens_from_tree(vp9_nzc16x16_encodings, vp9_nzc16x16_tree); - vp9_tokens_from_tree(vp9_nzc32x32_encodings, vp9_nzc32x32_tree); -#endif -} - -#if CONFIG_CODE_NONZEROCOUNT - -#define mb_in_cur_tile(cm, mb_row, mb_col) \ - ((mb_col) >= (cm)->cur_tile_mb_col_start && \ - (mb_col) <= (cm)->cur_tile_mb_col_end && \ - (mb_row) >= 0) - -#define choose_nzc_context(nzc_exp, t2, t1) \ - ((nzc_exp) >= (t2) ? 2 : (nzc_exp) >= (t1) ? 1 : 0) - -#define NZC_T2_32X32 (16 << 6) -#define NZC_T1_32X32 (4 << 6) - -#define NZC_T2_16X16 (12 << 6) -#define NZC_T1_16X16 (3 << 6) - -#define NZC_T2_8X8 (8 << 6) -#define NZC_T1_8X8 (2 << 6) - -#define NZC_T2_4X4 (4 << 6) -#define NZC_T1_4X4 (1 << 6) - -// Transforms a mb16 block index to a sb64 block index -static inline int mb16_to_sb64_index(int mb_row, int mb_col, int block) { - int r = (mb_row & 3); - int c = (mb_col & 3); - int b; - if (block < 16) { // Y - int ib = block >> 2; - int jb = block & 3; - ib += r * 4; - jb += c * 4; - b = ib * 16 + jb; - assert(b < 256); - return b; - } else { // UV - int base = block - (block & 3); - int ib = (block - base) >> 1; - int jb = (block - base) & 1; - ib += r * 2; - jb += c * 2; - b = base * 16 + ib * 8 + jb; - assert(b >= 256 && b < 384); - return b; - } -} - -// Transforms a mb16 block index to a sb32 block index -static inline int mb16_to_sb32_index(int mb_row, int mb_col, int block) { - int r = (mb_row & 1); - int c = (mb_col & 1); - int b; - if (block < 16) { // Y - int ib = block >> 2; - int jb = block & 3; - ib += r * 4; - jb += c * 4; - b = ib * 8 + jb; - assert(b < 64); - return b; - } else { // UV - int base = block - (block & 3); - int ib = (block - base) >> 1; - int jb = (block - base) & 1; - ib += r * 2; - jb += c * 2; - b = base * 4 + ib * 4 + jb; - assert(b >= 64 && b < 96); - return b; - } -} - -static inline int block_to_txfm_index(int block, TX_SIZE tx_size, int s) { - // s is the log of the number of 4x4 blocks in each row/col of larger block - int b, ib, jb, nb; - ib = block >> s; - jb = block - (ib << s); - ib >>= tx_size; - jb >>= tx_size; - nb = 1 << (s - tx_size); - b = (ib * nb + jb) << (2 * tx_size); - return b; -} - -/* BEGIN - Helper functions to get the y nzcs */ -static unsigned int get_nzc_4x4_y_sb64(MB_MODE_INFO *mi, int block) { - int b; - assert(block < 256); - b = block_to_txfm_index(block, mi->txfm_size, 4); - assert(b < 256); - return mi->nzcs[b] << (6 - 2 * mi->txfm_size); -} - -static unsigned int get_nzc_4x4_y_sb32(MB_MODE_INFO *mi, int block) { - int b; - assert(block < 64); - b = block_to_txfm_index(block, mi->txfm_size, 3); - assert(b < 64); - return mi->nzcs[b] << (6 - 2 * mi->txfm_size); -} - -static unsigned int get_nzc_4x4_y_mb16(MB_MODE_INFO *mi, int block) { - int b; - assert(block < 16); - b = block_to_txfm_index(block, mi->txfm_size, 2); - assert(b < 16); - return mi->nzcs[b] << (6 - 2 * mi->txfm_size); -} -/* END - Helper functions to get the y nzcs */ - -/* Function to get y nzc where block index is in mb16 terms */ -static unsigned int get_nzc_4x4_y(VP9_COMMON *cm, MODE_INFO *m, - int mb_row, int mb_col, int block) { - // NOTE: All values returned are at 64 times the true value at 4x4 scale - MB_MODE_INFO *const mi = &m->mbmi; - const int mis = cm->mode_info_stride; - if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col)) - return 0; - if (mi->sb_type == BLOCK_SIZE_SB64X64) { - int r = mb_row & 3; - int c = mb_col & 3; - m -= c + r * mis; - if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) - return 0; - else - return get_nzc_4x4_y_sb64( - &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block)); - } else if (mi->sb_type == BLOCK_SIZE_SB32X32) { - int r = mb_row & 1; - int c = mb_col & 1; - m -= c + r * mis; - if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) - return 0; - else - return get_nzc_4x4_y_sb32( - &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block)); - } else { - if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col)) - return 0; - return get_nzc_4x4_y_mb16(mi, block); - } -} - -/* BEGIN - Helper functions to get the uv nzcs */ -static unsigned int get_nzc_4x4_uv_sb64(MB_MODE_INFO *mi, int block) { - int b; - int base, uvtxfm_size; - assert(block >= 256 && block < 384); - uvtxfm_size = mi->txfm_size; - base = 256 + (block & 64); - block -= base; - b = base + block_to_txfm_index(block, uvtxfm_size, 3); - assert(b >= 256 && b < 384); - return mi->nzcs[b] << (6 - 2 * uvtxfm_size); -} - -static unsigned int get_nzc_4x4_uv_sb32(MB_MODE_INFO *mi, int block) { - int b; - int base, uvtxfm_size; - assert(block >= 64 && block < 96); - if (mi->txfm_size == TX_32X32) - uvtxfm_size = TX_16X16; - else - uvtxfm_size = mi->txfm_size; - base = 64 + (block & 16); - block -= base; - b = base + block_to_txfm_index(block, uvtxfm_size, 2); - assert(b >= 64 && b < 96); - return mi->nzcs[b] << (6 - 2 * uvtxfm_size); -} - -static unsigned int get_nzc_4x4_uv_mb16(MB_MODE_INFO *mi, int block) { - int b; - int base, uvtxfm_size; - assert(block >= 16 && block < 24); - if (mi->txfm_size == TX_8X8 && - (mi->mode == SPLITMV || mi->mode == I8X8_PRED)) - uvtxfm_size = TX_4X4; - else if (mi->txfm_size == TX_16X16) - uvtxfm_size = TX_8X8; - else - uvtxfm_size = mi->txfm_size; - base = 16 + (block & 4); - block -= base; - b = base + block_to_txfm_index(block, uvtxfm_size, 1); - assert(b >= 16 && b < 24); - return mi->nzcs[b] << (6 - 2 * uvtxfm_size); -} -/* END - Helper functions to get the uv nzcs */ - -/* Function to get uv nzc where block index is in mb16 terms */ -static unsigned int get_nzc_4x4_uv(VP9_COMMON *cm, MODE_INFO *m, - int mb_row, int mb_col, int block) { - // NOTE: All values returned are at 64 times the true value at 4x4 scale - MB_MODE_INFO *const mi = &m->mbmi; - const int mis = cm->mode_info_stride; - if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col)) - return 0; - if (mi->sb_type == BLOCK_SIZE_SB64X64) { - int r = mb_row & 3; - int c = mb_col & 3; - m -= c + r * mis; - if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) - return 0; - else - return get_nzc_4x4_uv_sb64( - &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block)); - } else if (mi->sb_type == BLOCK_SIZE_SB32X32) { - int r = mb_row & 1; - int c = mb_col & 1; - m -= c + r * mis; - if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) - return 0; - else - return get_nzc_4x4_uv_sb32( - &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block)); - } else { - return get_nzc_4x4_uv_mb16(mi, block); - } -} - -int vp9_get_nzc_context_y_sb64(VP9_COMMON *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block) { - // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy - // neighboring blocks are - int mis = cm->mode_info_stride; - int nzc_exp = 0; - TX_SIZE txfm_size = cur->mbmi.txfm_size; - assert(block < 256); - switch (txfm_size) { - case TX_32X32: - assert((block & 63) == 0); - if (block < 128) { - int o = (block >> 6) * 2; - nzc_exp = - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15) + - get_nzc_4x4_y(cm, cur - mis + o + 1, - mb_row - 1, mb_col + o + 1, 12) + - get_nzc_4x4_y(cm, cur - mis + o + 1, - mb_row - 1, mb_col + o + 1, 13) + - get_nzc_4x4_y(cm, cur - mis + o + 1, - mb_row - 1, mb_col + o + 1, 14) + - get_nzc_4x4_y(cm, cur - mis + o + 1, - mb_row - 1, mb_col + o + 1, 15); - } else { - nzc_exp = cur->mbmi.nzcs[block - 128] << 3; - } - if ((block & 127) == 0) { - int o = (block >> 7) * 2; - nzc_exp += - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15) + - get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, - mb_row + o + 1, mb_col - 1, 3) + - get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, - mb_row + o + 1, mb_col - 1, 7) + - get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, - mb_row + o + 1, mb_col - 1, 11) + - get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, - mb_row + o + 1, mb_col - 1, 15); - } else { - nzc_exp += cur->mbmi.nzcs[block - 64] << 3; - } - nzc_exp <<= 2; - // Note nzc_exp is 64 times the average value expected at 32x32 scale - return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32); - break; - - case TX_16X16: - assert((block & 15) == 0); - if (block < 64) { - int o = block >> 4; - nzc_exp = - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15); - } else { - nzc_exp = cur->mbmi.nzcs[block - 64] << 4; - } - if ((block & 63) == 0) { - int o = block >> 6; - nzc_exp += - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15); - } else { - nzc_exp += cur->mbmi.nzcs[block - 16] << 4; - } - nzc_exp <<= 1; - // Note nzc_exp is 64 times the average value expected at 16x16 scale - return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); - break; - - case TX_8X8: - assert((block & 3) == 0); - if (block < 32) { - int o = block >> 3; - int p = ((block >> 2) & 1) ? 14 : 12; - nzc_exp = - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1); - } else { - nzc_exp = cur->mbmi.nzcs[block - 32] << 5; - } - if ((block & 31) == 0) { - int o = block >> 6; - int p = ((block >> 5) & 1) ? 11 : 3; - nzc_exp += - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4); - } else { - nzc_exp += cur->mbmi.nzcs[block - 4] << 5; - } - // Note nzc_exp is 64 times the average value expected at 8x8 scale - return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); - break; - - case TX_4X4: - if (block < 16) { - int o = block >> 2; - int p = block & 3; - nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, - 12 + p); - } else { - nzc_exp = (cur->mbmi.nzcs[block - 16] << 6); - } - if ((block & 15) == 0) { - int o = block >> 6; - int p = (block >> 4) & 3; - nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - 3 + 4 * p); - } else { - nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); - } - nzc_exp >>= 1; - // Note nzc_exp is 64 times the average value expected at 4x4 scale - return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); - break; - - default: - return 0; - } -} - -int vp9_get_nzc_context_y_sb32(VP9_COMMON *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block) { - // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy - // neighboring blocks are - int mis = cm->mode_info_stride; - int nzc_exp = 0; - TX_SIZE txfm_size = cur->mbmi.txfm_size; - assert(block < 64); - switch (txfm_size) { - case TX_32X32: - assert(block == 0); - nzc_exp = - (get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) + - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) + - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) + - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) + - get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 12) + - get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 13) + - get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 14) + - get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 15) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15) + - get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 3) + - get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 7) + - get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 11) + - get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 15)) << 2; - // Note nzc_exp is 64 times the average value expected at 32x32 scale - return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32); - break; - - case TX_16X16: - assert((block & 15) == 0); - if (block < 32) { - int o = (block >> 4) & 1; - nzc_exp = - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15); - } else { - nzc_exp = cur->mbmi.nzcs[block - 32] << 4; - } - if ((block & 31) == 0) { - int o = block >> 5; - nzc_exp += - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15); - } else { - nzc_exp += cur->mbmi.nzcs[block - 16] << 4; - } - nzc_exp <<= 1; - // Note nzc_exp is 64 times the average value expected at 16x16 scale - return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); - break; - - case TX_8X8: - assert((block & 3) == 0); - if (block < 16) { - int o = block >> 3; - int p = ((block >> 2) & 1) ? 14 : 12; - nzc_exp = - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) + - get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1); - } else { - nzc_exp = cur->mbmi.nzcs[block - 16] << 5; - } - if ((block & 15) == 0) { - int o = block >> 5; - int p = ((block >> 4) & 1) ? 11 : 3; - nzc_exp += - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) + - get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4); - } else { - nzc_exp += cur->mbmi.nzcs[block - 4] << 5; - } - // Note nzc_exp is 64 times the average value expected at 8x8 scale - return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); - break; - - case TX_4X4: - if (block < 8) { - int o = block >> 2; - int p = block & 3; - nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, - 12 + p); - } else { - nzc_exp = (cur->mbmi.nzcs[block - 8] << 6); - } - if ((block & 7) == 0) { - int o = block >> 5; - int p = (block >> 3) & 3; - nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - 3 + 4 * p); - } else { - nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); - } - nzc_exp >>= 1; - // Note nzc_exp is 64 times the average value expected at 4x4 scale - return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); - break; - - default: - return 0; - break; - } -} - -int vp9_get_nzc_context_y_mb16(VP9_COMMON *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block) { - // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy - // neighboring blocks are - int mis = cm->mode_info_stride; - int nzc_exp = 0; - TX_SIZE txfm_size = cur->mbmi.txfm_size; - assert(block < 16); - switch (txfm_size) { - case TX_16X16: - assert(block == 0); - nzc_exp = - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) + - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) + - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) + - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15); - nzc_exp <<= 1; - // Note nzc_exp is 64 times the average value expected at 16x16 scale - return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); - - case TX_8X8: - assert((block & 3) == 0); - if (block < 8) { - int p = ((block >> 2) & 1) ? 14 : 12; - nzc_exp = - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p) + - get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p + 1); - } else { - nzc_exp = cur->mbmi.nzcs[block - 8] << 5; - } - if ((block & 7) == 0) { - int p = ((block >> 3) & 1) ? 11 : 3; - nzc_exp += - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p) + - get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p + 4); - } else { - nzc_exp += cur->mbmi.nzcs[block - 4] << 5; - } - // Note nzc_exp is 64 times the average value expected at 8x8 scale - return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); - - case TX_4X4: - if (block < 4) { - int p = block & 3; - nzc_exp = get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, - 12 + p); - } else { - nzc_exp = (cur->mbmi.nzcs[block - 4] << 6); - } - if ((block & 3) == 0) { - int p = (block >> 2) & 3; - nzc_exp += get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, - 3 + 4 * p); - } else { - nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); - } - nzc_exp >>= 1; - // Note nzc_exp is 64 times the average value expected at 4x4 scale - return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); - - default: - return 0; - break; - } -} - -int vp9_get_nzc_context_uv_sb64(VP9_COMMON *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block) { - // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy - // neighboring blocks are - int mis = cm->mode_info_stride; - int nzc_exp = 0; - const int base = block - (block & 63); - const int boff = (block & 63); - const int base_mb16 = base >> 4; - TX_SIZE txfm_size = cur->mbmi.txfm_size; - TX_SIZE txfm_size_uv; - - assert(block >= 256 && block < 384); - txfm_size_uv = txfm_size; - - switch (txfm_size_uv) { - case TX_32X32: - assert(block == 256 || block == 320); - nzc_exp = - get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1, - base_mb16 + 3); - nzc_exp <<= 2; - // Note nzc_exp is 64 times the average value expected at 32x32 scale - return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32); - - case TX_16X16: - // uv txfm_size 16x16 - assert((block & 15) == 0); - if (boff < 32) { - int o = (boff >> 4) & 1; - nzc_exp = - get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1, - base_mb16 + 3); - } else { - nzc_exp = cur->mbmi.nzcs[block - 32] << 4; - } - if ((boff & 31) == 0) { - int o = boff >> 5; - nzc_exp += - get_nzc_4x4_uv(cm, cur - 1 + o * mis, - mb_row + o, mb_col - 1, base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + o * mis, - mb_row + o, mb_col - 1, base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis, - mb_row + o + 1, mb_col - 1, base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis, - mb_row + o + 1, mb_col - 1, base_mb16 + 3); - } else { - nzc_exp += cur->mbmi.nzcs[block - 16] << 4; - } - nzc_exp <<= 1; - // Note nzc_exp is 64 times the average value expected at 16x16 scale - return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); - - case TX_8X8: - assert((block & 3) == 0); - if (boff < 16) { - int o = boff >> 2; - nzc_exp = - get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 3); - } else { - nzc_exp = cur->mbmi.nzcs[block - 16] << 5; - } - if ((boff & 15) == 0) { - int o = boff >> 4; - nzc_exp += - get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - base_mb16 + 3); - } else { - nzc_exp += cur->mbmi.nzcs[block - 4] << 5; - } - // Note nzc_exp is 64 times the average value expected at 8x8 scale - return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); - - case TX_4X4: - if (boff < 8) { - int o = boff >> 1; - int p = boff & 1; - nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 2 + p); - } else { - nzc_exp = (cur->mbmi.nzcs[block - 8] << 6); - } - if ((boff & 7) == 0) { - int o = boff >> 4; - int p = (boff >> 3) & 1; - nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - base_mb16 + 1 + 2 * p); - } else { - nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); - } - nzc_exp >>= 1; - // Note nzc_exp is 64 times the average value expected at 4x4 scale - return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); - - default: - return 0; - } -} - -int vp9_get_nzc_context_uv_sb32(VP9_COMMON *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block) { - // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy - // neighboring blocks are - int mis = cm->mode_info_stride; - int nzc_exp = 0; - const int base = block - (block & 15); - const int boff = (block & 15); - const int base_mb16 = base >> 2; - TX_SIZE txfm_size = cur->mbmi.txfm_size; - TX_SIZE txfm_size_uv; - - assert(block >= 64 && block < 96); - if (txfm_size == TX_32X32) - txfm_size_uv = TX_16X16; - else - txfm_size_uv = txfm_size; - - switch (txfm_size_uv) { - case TX_16X16: - // uv txfm_size 16x16 - assert(block == 64 || block == 80); - nzc_exp = - get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1, - base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, - base_mb16 + 3); - nzc_exp <<= 1; - // Note nzc_exp is 64 times the average value expected at 16x16 scale - return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); - break; - - case TX_8X8: - assert((block & 3) == 0); - if (boff < 8) { - int o = boff >> 2; - nzc_exp = - get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 3); - } else { - nzc_exp = cur->mbmi.nzcs[block - 8] << 5; - } - if ((boff & 7) == 0) { - int o = boff >> 3; - nzc_exp += - get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - base_mb16 + 3); - } else { - nzc_exp += cur->mbmi.nzcs[block - 4] << 5; - } - // Note nzc_exp is 64 times the average value expected at 8x8 scale - return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); - - case TX_4X4: - if (boff < 4) { - int o = boff >> 1; - int p = boff & 1; - nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, - base_mb16 + 2 + p); - } else { - nzc_exp = (cur->mbmi.nzcs[block - 4] << 6); - } - if ((boff & 3) == 0) { - int o = boff >> 3; - int p = (boff >> 2) & 1; - nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, - base_mb16 + 1 + 2 * p); - } else { - nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); - } - nzc_exp >>= 1; - // Note nzc_exp is 64 times the average value expected at 4x4 scale - return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); - - default: - return 0; - } -} - -int vp9_get_nzc_context_uv_mb16(VP9_COMMON *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block) { - // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy - // neighboring blocks are - int mis = cm->mode_info_stride; - int nzc_exp = 0; - const int base = block - (block & 3); - const int boff = (block & 3); - const int base_mb16 = base; - TX_SIZE txfm_size = cur->mbmi.txfm_size; - TX_SIZE txfm_size_uv; - - assert(block >= 16 && block < 24); - if (txfm_size == TX_16X16) - txfm_size_uv = TX_8X8; - else if (txfm_size == TX_8X8 && - (cur->mbmi.mode == I8X8_PRED || cur->mbmi.mode == SPLITMV)) - txfm_size_uv = TX_4X4; - else - txfm_size_uv = txfm_size; - - switch (txfm_size_uv) { - case TX_8X8: - assert((block & 3) == 0); - nzc_exp = - get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 2) + - get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 3) + - get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 1) + - get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 3); - // Note nzc_exp is 64 times the average value expected at 8x8 scale - return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); - - case TX_4X4: - if (boff < 2) { - int p = boff & 1; - nzc_exp = get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, - base_mb16 + 2 + p); - } else { - nzc_exp = (cur->mbmi.nzcs[block - 2] << 6); - } - if ((boff & 1) == 0) { - int p = (boff >> 1) & 1; - nzc_exp += get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, - base_mb16 + 1 + 2 * p); - } else { - nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); - } - nzc_exp >>= 1; - // Note nzc_exp is 64 times the average value expected at 4x4 scale - return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); - - default: - return 0; - } -} - -int vp9_get_nzc_context(VP9_COMMON *cm, MACROBLOCKD *xd, int block) { - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - assert(block < 384); - if (block < 256) - return vp9_get_nzc_context_y_sb64(cm, xd->mode_info_context, - get_mb_row(xd), get_mb_col(xd), block); - else - return vp9_get_nzc_context_uv_sb64(cm, xd->mode_info_context, - get_mb_row(xd), get_mb_col(xd), block); - } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { - assert(block < 96); - if (block < 64) - return vp9_get_nzc_context_y_sb32(cm, xd->mode_info_context, - get_mb_row(xd), get_mb_col(xd), block); - else - return vp9_get_nzc_context_uv_sb32(cm, xd->mode_info_context, - get_mb_row(xd), get_mb_col(xd), block); - } else { - assert(block < 64); - if (block < 16) - return vp9_get_nzc_context_y_mb16(cm, xd->mode_info_context, - get_mb_row(xd), get_mb_col(xd), block); - else - return vp9_get_nzc_context_uv_mb16(cm, xd->mode_info_context, - get_mb_row(xd), get_mb_col(xd), block); - } -} - -static void update_nzc(VP9_COMMON *cm, - uint16_t nzc, - int nzc_context, - TX_SIZE tx_size, - int ref, - int type) { - int e, c; - c = codenzc(nzc); - if (tx_size == TX_32X32) - cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++; - else if (tx_size == TX_16X16) - cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++; - else if (tx_size == TX_8X8) - cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++; - else if (tx_size == TX_4X4) - cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++; - else - assert(0); - - if ((e = vp9_extranzcbits[c])) { - int x = nzc - vp9_basenzcvalue[c]; - while (e--) { - int b = (x >> e) & 1; - cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++; - } - } -} - -static void update_nzcs_sb64(VP9_COMMON *cm, - MACROBLOCKD *xd, - int mb_row, - int mb_col) { - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_32X32: - for (j = 0; j < 256; j += 64) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0); - } - for (j = 256; j < 384; j += 64) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1); - } - break; - - case TX_16X16: - for (j = 0; j < 256; j += 16) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0); - } - for (j = 256; j < 384; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1); - } - break; - - case TX_8X8: - for (j = 0; j < 256; j += 4) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0); - } - for (j = 256; j < 384; j += 4) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); - } - break; - - case TX_4X4: - for (j = 0; j < 256; ++j) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0); - } - for (j = 256; j < 384; ++j) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); - } - break; - - default: - break; - } -} - -static void update_nzcs_sb32(VP9_COMMON *cm, - MACROBLOCKD *xd, - int mb_row, - int mb_col) { - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_32X32: - for (j = 0; j < 64; j += 64) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0); - } - for (j = 64; j < 96; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1); - } - break; - - case TX_16X16: - for (j = 0; j < 64; j += 16) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0); - } - for (j = 64; j < 96; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1); - } - break; - - case TX_8X8: - for (j = 0; j < 64; j += 4) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0); - } - for (j = 64; j < 96; j += 4) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); - } - break; - - case TX_4X4: - for (j = 0; j < 64; ++j) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0); - } - for (j = 64; j < 96; ++j) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); - } - break; - - default: - break; - } } -static void update_nzcs_mb16(VP9_COMMON *cm, - MACROBLOCKD *xd, - int mb_row, - int mb_col) { - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_16X16: - for (j = 0; j < 16; j += 16) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0); - } - for (j = 16; j < 24; j += 4) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); - } - break; - - case TX_8X8: - for (j = 0; j < 16; j += 4) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0); - } - if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) { - for (j = 16; j < 24; ++j) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); - } - } else { - for (j = 16; j < 24; j += 4) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); - } - } - break; - - case TX_4X4: - for (j = 0; j < 16; ++j) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0); - } - for (j = 16; j < 24; ++j) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); - } - break; - - default: - break; - } -} - -void vp9_update_nzc_counts(VP9_COMMON *cm, - MACROBLOCKD *xd, - int mb_row, - int mb_col) { - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) - update_nzcs_sb64(cm, xd, mb_row, mb_col); - else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) - update_nzcs_sb32(cm, xd, mb_row, mb_col); - else - update_nzcs_mb16(cm, xd, mb_row, mb_col); -} -#endif // CONFIG_CODE_NONZEROCOUNT - // #define COEF_COUNT_TESTING #define COEF_COUNT_SAT 24 @@ -3525,34 +652,61 @@ void vp9_update_nzc_counts(VP9_COMMON *cm, #define COEF_COUNT_SAT_AFTER_KEY 24 #define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 -static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs, - vp9_coeff_probs *pre_coef_probs, - int block_types, vp9_coeff_count *coef_counts, - unsigned int (*eob_branch_count)[REF_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS], +void vp9_full_to_model_count(unsigned int *model_count, + unsigned int *full_count) { + int n; + model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN]; + model_count[ONE_TOKEN] = full_count[ONE_TOKEN]; + model_count[TWO_TOKEN] = full_count[TWO_TOKEN]; + for (n = THREE_TOKEN; n < DCT_EOB_TOKEN; ++n) + model_count[TWO_TOKEN] += full_count[n]; + model_count[DCT_EOB_MODEL_TOKEN] = full_count[DCT_EOB_TOKEN]; +} + +void vp9_full_to_model_counts( + vp9_coeff_count_model *model_count, vp9_coeff_count *full_count) { + int i, j, k, l; + for (i = 0; i < BLOCK_TYPES; ++i) + for (j = 0; j < REF_TYPES; ++j) + for (k = 0; k < COEF_BANDS; ++k) + for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { + if (l >= 3 && k == 0) + continue; + vp9_full_to_model_count(model_count[i][j][k][l], + full_count[i][j][k][l]); + } +} + +static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE txfm_size, int count_sat, int update_factor) { + vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[txfm_size]; + vp9_coeff_probs_model *pre_coef_probs = cm->fc.pre_coef_probs[txfm_size]; + vp9_coeff_count_model *coef_counts = cm->fc.coef_counts[txfm_size]; + unsigned int (*eob_branch_count)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] = + cm->fc.eob_branch_counts[txfm_size]; int t, i, j, k, l, count; - unsigned int branch_ct[ENTROPY_NODES][2]; - vp9_prob coef_probs[ENTROPY_NODES]; int factor; -#if CONFIG_MODELCOEFPROB && MODEL_BASED_ADAPT - int entropy_nodes_adapt = UNCONSTRAINED_ADAPT_NODES; -#else - int entropy_nodes_adapt = ENTROPY_NODES; -#endif + unsigned int branch_ct[UNCONSTRAINED_NODES][2]; + vp9_prob coef_probs[UNCONSTRAINED_NODES]; + int entropy_nodes_adapt = UNCONSTRAINED_NODES; - for (i = 0; i < block_types; ++i) + for (i = 0; i < BLOCK_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { if (l >= 3 && k == 0) continue; - vp9_tree_probs_from_distribution(vp9_coef_tree, - coef_probs, branch_ct, - coef_counts[i][j][k][l], 0); + vp9_tree_probs_from_distribution( + vp9_coefmodel_tree, + coef_probs, branch_ct, + coef_counts[i][j][k][l], 0); +#if CONFIG_BALANCED_COEFTREE + branch_ct[1][1] = eob_branch_count[i][j][k][l] - branch_ct[1][0]; + coef_probs[1] = get_binary_prob(branch_ct[1][0], branch_ct[1][1]); +#else branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0]; coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]); +#endif for (t = 0; t < entropy_nodes_adapt; ++t) { count = branch_ct[t][0] + branch_ct[t][1]; count = count > count_sat ? count_sat : count; @@ -3560,152 +714,16 @@ static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs, dst_coef_probs[i][j][k][l][t] = weighted_prob(pre_coef_probs[i][j][k][l][t], coef_probs[t], factor); -#if CONFIG_MODELCOEFPROB && MODEL_BASED_ADAPT - if (t == UNCONSTRAINED_NODES - 1) - vp9_get_model_distribution( - dst_coef_probs[i][j][k][l][UNCONSTRAINED_NODES - 1], - dst_coef_probs[i][j][k][l], i, j); -#endif } } } void vp9_adapt_coef_probs(VP9_COMMON *cm) { + TX_SIZE t; int count_sat; int update_factor; /* denominator 256 */ - if (cm->frame_type == KEY_FRAME) { - update_factor = COEF_MAX_UPDATE_FACTOR_KEY; - count_sat = COEF_COUNT_SAT_KEY; - } else if (cm->last_frame_type == KEY_FRAME) { - update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ - count_sat = COEF_COUNT_SAT_AFTER_KEY; - } else { - update_factor = COEF_MAX_UPDATE_FACTOR; - count_sat = COEF_COUNT_SAT; - } - - adapt_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4, - BLOCK_TYPES, cm->fc.coef_counts_4x4, - cm->fc.eob_branch_counts[TX_4X4], - count_sat, update_factor); - adapt_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8, - BLOCK_TYPES, cm->fc.coef_counts_8x8, - cm->fc.eob_branch_counts[TX_8X8], - count_sat, update_factor); - adapt_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16, - BLOCK_TYPES, cm->fc.coef_counts_16x16, - cm->fc.eob_branch_counts[TX_16X16], - count_sat, update_factor); - adapt_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, - BLOCK_TYPES, cm->fc.coef_counts_32x32, - cm->fc.eob_branch_counts[TX_32X32], - count_sat, update_factor); -} - -#if CONFIG_CODE_NONZEROCOUNT -static void adapt_nzc_probs(VP9_COMMON *cm, - int block_size, - int count_sat, - int update_factor) { - int c, r, b, n; - int count, factor; - unsigned int nzc_branch_ct[NZC32X32_NODES][2]; - vp9_prob nzc_probs[NZC32X32_NODES]; - int tokens, nodes; - const vp9_tree_index *nzc_tree; - vp9_prob *dst_nzc_probs; - vp9_prob *pre_nzc_probs; - unsigned int *nzc_counts; - - if (block_size == 32) { - tokens = NZC32X32_TOKENS; - nzc_tree = vp9_nzc32x32_tree; - dst_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0]; - pre_nzc_probs = cm->fc.pre_nzc_probs_32x32[0][0][0]; - nzc_counts = cm->fc.nzc_counts_32x32[0][0][0]; - } else if (block_size == 16) { - tokens = NZC16X16_TOKENS; - nzc_tree = vp9_nzc16x16_tree; - dst_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0]; - pre_nzc_probs = cm->fc.pre_nzc_probs_16x16[0][0][0]; - nzc_counts = cm->fc.nzc_counts_16x16[0][0][0]; - } else if (block_size == 8) { - tokens = NZC8X8_TOKENS; - nzc_tree = vp9_nzc8x8_tree; - dst_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0]; - pre_nzc_probs = cm->fc.pre_nzc_probs_8x8[0][0][0]; - nzc_counts = cm->fc.nzc_counts_8x8[0][0][0]; - } else { - nzc_tree = vp9_nzc4x4_tree; - tokens = NZC4X4_TOKENS; - dst_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0]; - pre_nzc_probs = cm->fc.pre_nzc_probs_4x4[0][0][0]; - nzc_counts = cm->fc.nzc_counts_4x4[0][0][0]; - } - nodes = tokens - 1; - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) - for (r = 0; r < REF_TYPES; ++r) - for (b = 0; b < BLOCK_TYPES; ++b) { - int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; - int offset_nodes = offset * nodes; - int offset_tokens = offset * tokens; - vp9_tree_probs_from_distribution(nzc_tree, - nzc_probs, nzc_branch_ct, - nzc_counts + offset_tokens, 0); - for (n = 0; n < nodes; ++n) { - count = nzc_branch_ct[n][0] + nzc_branch_ct[n][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - dst_nzc_probs[offset_nodes + n] = - weighted_prob(pre_nzc_probs[offset_nodes + n], - nzc_probs[n], factor); - } - } -} - -static void adapt_nzc_pcat(VP9_COMMON *cm, int count_sat, int update_factor) { - int c, t; - int count, factor; - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (t = 0; t < NZC_TOKENS_EXTRA; ++t) { - int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA]; - int b; - for (b = 0; b < bits; ++b) { - vp9_prob prob = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0], - cm->fc.nzc_pcat_counts[c][t][b][1]); - count = cm->fc.nzc_pcat_counts[c][t][b][0] + - cm->fc.nzc_pcat_counts[c][t][b][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - cm->fc.nzc_pcat_probs[c][t][b] = weighted_prob( - cm->fc.pre_nzc_pcat_probs[c][t][b], prob, factor); - } - } - } -} - -// #define NZC_COUNT_TESTING -void vp9_adapt_nzc_probs(VP9_COMMON *cm) { - int count_sat; - int update_factor; /* denominator 256 */ -#ifdef NZC_COUNT_TESTING - int c, r, b, t; - printf("\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - printf(" {"); - for (t = 0; t < NZC4X4_TOKENS; ++t) { - printf(" %d,", cm->fc.nzc_counts_4x4[c][r][b][t]); - } - printf("}\n"); - } - printf("\n"); - } -#endif - - if (cm->frame_type == KEY_FRAME) { + if ((cm->frame_type == KEY_FRAME) || cm->intra_only) { update_factor = COEF_MAX_UPDATE_FACTOR_KEY; count_sat = COEF_COUNT_SAT_KEY; } else if (cm->last_frame_type == KEY_FRAME) { @@ -3715,11 +733,6 @@ void vp9_adapt_nzc_probs(VP9_COMMON *cm) { update_factor = COEF_MAX_UPDATE_FACTOR; count_sat = COEF_COUNT_SAT; } - - adapt_nzc_probs(cm, 4, count_sat, update_factor); - adapt_nzc_probs(cm, 8, count_sat, update_factor); - adapt_nzc_probs(cm, 16, count_sat, update_factor); - adapt_nzc_probs(cm, 32, count_sat, update_factor); - adapt_nzc_pcat(cm, count_sat, update_factor); + for (t = TX_4X4; t <= TX_32X32; t++) + adapt_coef_probs(cm, t, count_sat, update_factor); } -#endif // CONFIG_CODE_NONZEROCOUNT diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 64f595047ae3f16b49fce4457a5c1f6c22e19fd0..27e3bec5ae1ad309f74cb16ad685bb18d592291e 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -16,8 +16,6 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" -extern const int vp9_i8x8_block[4]; - /* Coefficient token alphabet */ #define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ @@ -40,16 +38,19 @@ extern const int vp9_i8x8_block[4]; extern const vp9_tree_index vp9_coef_tree[]; -extern struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS]; +#define DCT_EOB_MODEL_TOKEN 3 /* EOB Extra Bits 0+0 */ +extern const vp9_tree_index vp9_coefmodel_tree[]; + +extern struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS]; typedef struct { vp9_tree_p tree; const vp9_prob *prob; - int Len; + int len; int base_val; -} vp9_extra_bit_struct; +} vp9_extra_bit; -extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ +extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */ #define PROB_UPDATE_BASELINE_COST 7 @@ -84,6 +85,8 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ /*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ #define PREV_COEF_CONTEXTS 6 +// #define ENTROPY_STATS + typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] @@ -96,173 +99,126 @@ typedef vp9_prob vp9_coeff_probs[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] struct VP9Common; void vp9_default_coef_probs(struct VP9Common *); -extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]); +extern DECLARE_ALIGNED(16, const int, vp9_default_scan_4x4[16]); extern DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]); extern DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]); -extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]); +extern DECLARE_ALIGNED(64, const int, vp9_default_scan_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]); -extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]); +extern DECLARE_ALIGNED(16, const int, vp9_default_scan_16x16[256]); extern DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]); extern DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]); -extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); +extern DECLARE_ALIGNED(16, const int, vp9_default_scan_32x32[1024]); void vp9_coef_tree_initialize(void); void vp9_adapt_coef_probs(struct VP9Common *); -static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { +static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd, + BLOCK_SIZE_TYPE bsize) { /* Clear entropy contexts */ - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + const int bw = 1 << b_width_log2(bsize); + const int bh = 1 << b_height_log2(bsize); + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + vpx_memset(xd->plane[i].above_context, 0, + sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[i].subsampling_x); + vpx_memset(xd->plane[i].left_context, 0, + sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[i].subsampling_y); + } } -static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) { - /* Clear entropy contexts */ - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); -} +// This is the index in the scan order beyond which all coefficients for +// 8x8 transform and above are in the top band. +// For 4x4 blocks the index is less but to keep things common the lookup +// table for 4x4 is padded out to this index. +#define MAXBAND_INDEX 21 -static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) { - /* Clear entropy contexts */ - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); -} +extern const uint8_t vp9_coefband_trans_8x8plus[MAXBAND_INDEX + 1]; +extern const uint8_t vp9_coefband_trans_4x4[MAXBAND_INDEX + 1]; -extern const int vp9_coef_bands8x8[64]; -extern const int vp9_coef_bands4x4[16]; - -static int get_coef_band(const int *scan, TX_SIZE tx_size, int coef_index) { - if (tx_size == TX_4X4) { - return vp9_coef_bands4x4[scan[coef_index]]; - } else { - const int pos = scan[coef_index]; - const int sz = 1 << (2 + tx_size); - const int x = pos & (sz - 1), y = pos >> (2 + tx_size); - if (x >= 8 || y >= 8) - return 5; - else - return vp9_coef_bands8x8[y * 8 + x]; - } + +static int get_coef_band(const uint8_t * band_translate, int coef_index) { + return (coef_index > MAXBAND_INDEX) + ? (COEF_BANDS-1) : band_translate[coef_index]; } + extern int vp9_get_coef_context(const int *scan, const int *neighbors, int nb_pad, uint8_t *token_cache, int c, int l); const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad); -#if CONFIG_MODELCOEFPROB -#define COEFPROB_BITS 8 -#define COEFPROB_MODELS (1 << COEFPROB_BITS) -// 2 => EOB and Zero nodes are unconstrained, rest are modeled -// 3 => EOB, Zero and One nodes are unconstrained, rest are modeled -#define UNCONSTRAINED_NODES 3 // Choose one of 2 or 3 +// 128 lists of probabilities are stored for the following ONE node probs: +// 1, 3, 5, 7, ..., 253, 255 +// In between probabilities are interpolated linearly -// whether forward updates are model-based -#define MODEL_BASED_UPDATE 0 -// if model-based how many nodes are unconstrained -#define UNCONSTRAINED_UPDATE_NODES 3 -// whether backward updates are model-based -#define MODEL_BASED_ADAPT 0 -#define UNCONSTRAINED_ADAPT_NODES 3 +#define COEFPROB_MODELS 128 -// whether to adjust the coef probs for key frames based on qindex -#define ADJUST_KF_COEF_PROBS 0 +#define UNCONSTRAINED_NODES 3 +#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) + +#define PIVOT_NODE 2 // which node is pivot typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][2]; + [PREV_COEF_CONTEXTS] + [UNCONSTRAINED_NODES]; + +typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS] + [PREV_COEF_CONTEXTS] + [UNCONSTRAINED_NODES + 1]; +typedef unsigned int vp9_coeff_stats_model[REF_TYPES][COEF_BANDS] + [PREV_COEF_CONTEXTS] + [UNCONSTRAINED_NODES][2]; +extern void vp9_full_to_model_count(unsigned int *model_count, + unsigned int *full_count); +extern void vp9_full_to_model_counts( + vp9_coeff_count_model *model_count, vp9_coeff_count *full_count); + +void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full); + +void vp9_model_to_full_probs_sb( + vp9_prob model[COEF_BANDS][PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES], + vp9_prob full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]); + extern const vp9_prob vp9_modelcoefprobs[COEFPROB_MODELS][ENTROPY_NODES - 1]; -void vp9_get_model_distribution(vp9_prob model, vp9_prob *tree_probs, - int b, int r); -void vp9_adjust_default_coef_probs(struct VP9Common *cm); -#endif // CONFIG_MODELCOEFPROB - -#if CONFIG_CODE_NONZEROCOUNT -/* Alphabet for number of non-zero symbols in block */ -#define NZC_0 0 /* Used for all blocks */ -#define NZC_1 1 /* Used for all blocks */ -#define NZC_2 2 /* Used for all blocks */ -#define NZC_3TO4 3 /* Used for all blocks */ -#define NZC_5TO8 4 /* Used for all blocks */ -#define NZC_9TO16 5 /* Used for all blocks */ -#define NZC_17TO32 6 /* Used for 8x8 and larger blocks */ -#define NZC_33TO64 7 /* Used for 8x8 and larger blocks */ -#define NZC_65TO128 8 /* Used for 16x16 and larger blocks */ -#define NZC_129TO256 9 /* Used for 16x16 and larger blocks */ -#define NZC_257TO512 10 /* Used for 32x32 and larger blocks */ -#define NZC_513TO1024 11 /* Used for 32x32 and larger blocks */ - -/* Number of tokens for each block size */ -#define NZC4X4_TOKENS 6 -#define NZC8X8_TOKENS 8 -#define NZC16X16_TOKENS 10 -#define NZC32X32_TOKENS 12 - -/* Number of nodes for each block size */ -#define NZC4X4_NODES 5 -#define NZC8X8_NODES 7 -#define NZC16X16_NODES 9 -#define NZC32X32_NODES 11 - -/* Max number of tokens with extra bits */ -#define NZC_TOKENS_EXTRA 9 - -/* Max number of extra bits */ -#define NZC_BITS_EXTRA 9 - -/* Tokens without extra bits */ -#define NZC_TOKENS_NOEXTRA (NZC32X32_TOKENS - NZC_TOKENS_EXTRA) - -#define MAX_NZC_CONTEXTS 3 - -/* whether to update extra bit probabilities */ -#define NZC_PCAT_UPDATE - -/* nzc trees */ -extern const vp9_tree_index vp9_nzc4x4_tree[]; -extern const vp9_tree_index vp9_nzc8x8_tree[]; -extern const vp9_tree_index vp9_nzc16x16_tree[]; -extern const vp9_tree_index vp9_nzc32x32_tree[]; - -/* nzc encodings */ -extern struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS]; -extern struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS]; -extern struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS]; -extern struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS]; - -#define codenzc(x) (\ - (x) <= 3 ? (x) : (x) <= 4 ? 3 : (x) <= 8 ? 4 : \ - (x) <= 16 ? 5 : (x) <= 32 ? 6 : (x) <= 64 ? 7 :\ - (x) <= 128 ? 8 : (x) <= 256 ? 9 : (x) <= 512 ? 10 : 11) - -int vp9_get_nzc_context_y_sb64(struct VP9Common *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block); -int vp9_get_nzc_context_y_sb32(struct VP9Common *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block); -int vp9_get_nzc_context_y_mb16(struct VP9Common *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block); -int vp9_get_nzc_context_uv_sb64(struct VP9Common *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block); -int vp9_get_nzc_context_uv_sb32(struct VP9Common *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block); -int vp9_get_nzc_context_uv_mb16(struct VP9Common *cm, MODE_INFO *cur, - int mb_row, int mb_col, int block); -int vp9_get_nzc_context(struct VP9Common *cm, MACROBLOCKD *xd, int block); -void vp9_update_nzc_counts(struct VP9Common *cm, MACROBLOCKD *xd, - int mb_row, int mb_col); -void vp9_adapt_nzc_probs(struct VP9Common *cm); - -/* Extra bits array */ -extern const int vp9_extranzcbits[NZC32X32_TOKENS]; - -/* Base nzc values */ -extern const int vp9_basenzcvalue[NZC32X32_TOKENS]; - -#endif // CONFIG_CODE_NONZEROCOUNT + +static INLINE const int* get_scan_4x4(TX_TYPE tx_type) { + switch (tx_type) { + case ADST_DCT: + return vp9_row_scan_4x4; + case DCT_ADST: + return vp9_col_scan_4x4; + default: + return vp9_default_scan_4x4; + } +} + +static INLINE const int* get_scan_8x8(TX_TYPE tx_type) { + switch (tx_type) { + case ADST_DCT: + return vp9_row_scan_8x8; + case DCT_ADST: + return vp9_col_scan_8x8; + default: + return vp9_default_scan_8x8; + } +} + +static INLINE const int* get_scan_16x16(TX_TYPE tx_type) { + switch (tx_type) { + case ADST_DCT: + return vp9_row_scan_16x16; + case DCT_ADST: + return vp9_col_scan_16x16; + default: + return vp9_default_scan_16x16; + } +} #include "vp9/common/vp9_coefupdateprobs.h" diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 673b35a8f3bb5c43b370caaf4d57cb25675d7c98..33028146ac47615f9bb9aeba9bb601fb8299ce2a 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -15,464 +15,274 @@ #include "vp9/common/vp9_alloccommon.h" #include "vpx_mem/vpx_mem.h" -static const unsigned int kf_y_mode_cts[8][VP9_YMODES] = { - /* DC V H D45 135 117 153 D27 D63 TM i8x8 BPRED */ - {12, 6, 5, 5, 5, 5, 5, 5, 5, 2, 22, 200}, - {25, 13, 13, 7, 7, 7, 7, 7, 7, 6, 27, 160}, - {31, 17, 18, 8, 8, 8, 8, 8, 8, 9, 26, 139}, - {40, 22, 23, 8, 8, 8, 8, 8, 8, 12, 27, 116}, - {53, 26, 28, 8, 8, 8, 8, 8, 8, 13, 26, 94}, - {68, 33, 35, 8, 8, 8, 8, 8, 8, 17, 20, 68}, - {78, 38, 38, 8, 8, 8, 8, 8, 8, 19, 16, 52}, - {89, 42, 42, 8, 8, 8, 8, 8, 8, 21, 12, 34}, +static const vp9_prob default_kf_uv_probs[VP9_INTRA_MODES] + [VP9_INTRA_MODES - 1] = { + { 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */, + { 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */, + { 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */, + { 120, 11, 50, 123, 163, 135, 64, 77, 103 } /* y = d45 */, + { 113, 9, 36, 155, 111, 157, 32, 44, 161 } /* y = d135 */, + { 116, 9, 55, 176, 76, 96, 37, 61, 149 } /* y = d117 */, + { 115, 9, 28, 141, 161, 167, 21, 25, 193 } /* y = d153 */, + { 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d27 */, + { 116, 12, 64, 120, 140, 125, 49, 115, 121 } /* y = d63 */, + { 102, 19, 66, 162, 182, 122, 35, 59, 128 } /* y = tm */ }; -static const unsigned int y_mode_cts [VP9_YMODES] = { - /* DC V H D45 135 117 153 D27 D63 TM i8x8 BPRED */ - 98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 16, 70 +static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS] + [VP9_INTRA_MODES - 1] = { + { 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* block_size < 8x8 */, + { 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* block_size < 16x16 */, + { 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* block_size < 32x32 */, + { 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* block_size >= 32x32 */ }; -static const unsigned int uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = { - /* DC V H D45 135 117 153 D27 D63 TM */ - { 200, 15, 15, 10, 10, 10, 10, 10, 10, 6}, /* DC */ - { 130, 75, 10, 10, 10, 10, 10, 10, 10, 6}, /* V */ - { 130, 10, 75, 10, 10, 10, 10, 10, 10, 6}, /* H */ - { 130, 15, 10, 75, 10, 10, 10, 10, 10, 6}, /* D45 */ - { 150, 15, 10, 10, 75, 10, 10, 10, 10, 6}, /* D135 */ - { 150, 15, 10, 10, 10, 75, 10, 10, 10, 6}, /* D117 */ - { 150, 15, 10, 10, 10, 10, 75, 10, 10, 6}, /* D153 */ - { 150, 15, 10, 10, 10, 10, 10, 75, 10, 6}, /* D27 */ - { 150, 15, 10, 10, 10, 10, 10, 10, 75, 6}, /* D63 */ - { 160, 30, 30, 10, 10, 10, 10, 10, 10, 16}, /* TM */ - { 132, 46, 40, 10, 10, 10, 10, 10, 10, 18}, /* i8x8 - never used */ - { 150, 35, 41, 10, 10, 10, 10, 10, 10, 10}, /* BPRED */ +static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES] + [VP9_INTRA_MODES - 1] = { + { 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */, + { 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */, + { 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */, + { 97, 5, 44, 131, 176, 139, 48, 68, 97 } /* y = d45 */, + { 83, 5, 42, 156, 111, 152, 26, 49, 152 } /* y = d135 */, + { 80, 5, 58, 178, 74, 83, 33, 62, 145 } /* y = d117 */, + { 86, 5, 32, 154, 192, 168, 14, 22, 163 } /* y = d153 */, + { 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d27 */, + { 77, 7, 64, 116, 132, 122, 37, 126, 120 } /* y = d63 */, + { 101, 21, 107, 181, 192, 103, 19, 67, 125 } /* y = tm */ }; -static const unsigned int i8x8_mode_cts [VP9_I8X8_MODES] = { - /* DC V H D45 135 117 153 D27 D63 TM */ - 73, 49, 61, 30, 30, 30, 30, 30, 30, 13 -}; - -static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = { - // DC V H D45 135 117 153 D27 D63 TM - { 160, 24, 24, 20, 20, 20, 20, 20, 20, 8}, /* DC */ - { 102, 64, 30, 20, 20, 20, 20, 20, 20, 10}, /* V */ - { 102, 30, 64, 20, 20, 20, 20, 20, 20, 10}, /* H */ - { 102, 33, 20, 64, 20, 20, 20, 20, 20, 14}, /* D45 */ - { 102, 33, 20, 20, 64, 20, 20, 20, 20, 14}, /* D135 */ - { 122, 33, 20, 20, 20, 64, 20, 20, 20, 14}, /* D117 */ - { 102, 33, 20, 20, 20, 20, 64, 20, 20, 14}, /* D153 */ - { 102, 33, 20, 20, 20, 20, 20, 64, 20, 14}, /* D27 */ - { 102, 33, 20, 20, 20, 20, 20, 20, 64, 14}, /* D63 */ - { 132, 36, 30, 20, 20, 20, 20, 20, 20, 18}, /* TM */ - { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* i8x8 - never used */ - { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* BPRED */ +const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS] + [PARTITION_TYPES - 1] = { + { /* frame_type = keyframe */ + /* 8x8 -> 4x4 */ + { 158, 97, 94 } /* a/l both not split */, + { 93, 24, 99 } /* a split, l not split */, + { 85, 119, 44 } /* l split, a not split */, + { 62, 59, 67 } /* a/l both split */, + /* 16x16 -> 8x8 */ + { 149, 53, 53 } /* a/l both not split */, + { 94, 20, 48 } /* a split, l not split */, + { 83, 53, 24 } /* l split, a not split */, + { 52, 18, 18 } /* a/l both split */, + /* 32x32 -> 16x16 */ + { 150, 40, 39 } /* a/l both not split */, + { 78, 12, 26 } /* a split, l not split */, + { 67, 33, 11 } /* l split, a not split */, + { 24, 7, 5 } /* a/l both split */, + /* 64x64 -> 32x32 */ + { 174, 35, 49 } /* a/l both not split */, + { 68, 11, 27 } /* a split, l not split */, + { 57, 15, 9 } /* l split, a not split */, + { 12, 3, 3 } /* a/l both split */ + }, { /* frame_type = interframe */ + /* 8x8 -> 4x4 */ + { 199, 122, 141 } /* a/l both not split */, + { 147, 63, 159 } /* a split, l not split */, + { 148, 133, 118 } /* l split, a not split */, + { 121, 104, 114 } /* a/l both split */, + /* 16x16 -> 8x8 */ + { 174, 73, 87 } /* a/l both not split */, + { 92, 41, 83 } /* a split, l not split */, + { 82, 99, 50 } /* l split, a not split */, + { 53, 39, 39 } /* a/l both split */, + /* 32x32 -> 16x16 */ + { 177, 58, 59 } /* a/l both not split */, + { 68, 26, 63 } /* a split, l not split */, + { 52, 79, 25 } /* l split, a not split */, + { 17, 14, 12 } /* a/l both split */, + /* 64x64 -> 32x32 */ + { 222, 34, 30 } /* a/l both not split */, + { 72, 16, 44 } /* a split, l not split */, + { 58, 32, 12 } /* l split, a not split */, + { 10, 7, 6 } /* a/l both split */ + } }; -static const unsigned int bmode_cts[VP9_NKF_BINTRAMODES] = { -#if CONFIG_NEWBINTRAMODES -#if CONTEXT_PRED_REPLACEMENTS == 6 - /* DC TM VE HE CONTEXT */ - 43891, 17694, 10036, 3920, 20000 -#elif CONTEXT_PRED_REPLACEMENTS == 4 - /* DC TM VE HE LD RD CONTEXT */ - 43891, 17694, 10036, 3920, 3363, 2546, 14000 -#elif CONTEXT_PRED_REPLACEMENTS == 0 - /* DC TM VE HE LD RD VR VL HD HU CONTEXT */ - 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723, 50000 -#endif -#else - /* DC TM VE HE LD RD VR VL HD HU */ - 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723 -#endif +/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ +const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = { + -DC_PRED, 2, /* 0 = DC_NODE */ + -TM_PRED, 4, /* 1 = TM_NODE */ + -V_PRED, 6, /* 2 = V_NODE */ + 8, 12, /* 3 = COM_NODE */ + -H_PRED, 10, /* 4 = H_NODE */ + -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ + -D45_PRED, 14, /* 6 = D45_NODE */ + -D63_PRED, 16, /* 7 = D63_NODE */ + -D153_PRED, -D27_PRED /* 8 = D153_NODE */ }; -typedef enum { - SUBMVREF_NORMAL, - SUBMVREF_LEFT_ZED, - SUBMVREF_ABOVE_ZED, - SUBMVREF_LEFT_ABOVE_SAME, - SUBMVREF_LEFT_ABOVE_ZED -} sumvfref_t; - -int vp9_mv_cont(const int_mv *l, const int_mv *a) { - int lez = (l->as_int == 0); - int aez = (a->as_int == 0); - int lea = (l->as_int == a->as_int); - - if (lea && lez) - return SUBMVREF_LEFT_ABOVE_ZED; - - if (lea) - return SUBMVREF_LEFT_ABOVE_SAME; - - if (aez) - return SUBMVREF_ABOVE_ZED; - - if (lez) - return SUBMVREF_LEFT_ZED; - - return SUBMVREF_NORMAL; -} - -const vp9_prob vp9_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP9_SUBMVREFS - 1] = { - { 147, 136, 18 }, - { 106, 145, 1 }, - { 179, 121, 1 }, - { 223, 1, 34 }, - { 208, 1, 1 } +const vp9_tree_index vp9_sb_mv_ref_tree[6] = { + -ZEROMV, 2, + -NEARESTMV, 4, + -NEARMV, -NEWMV }; -vp9_mbsplit vp9_mbsplits [VP9_NUMMBSPLITS] = { - { - 0, 0, 0, 0, - 0, 0, 0, 0, - 1, 1, 1, 1, - 1, 1, 1, 1, - }, { - 0, 0, 1, 1, - 0, 0, 1, 1, - 0, 0, 1, 1, - 0, 0, 1, 1, - }, { - 0, 0, 1, 1, - 0, 0, 1, 1, - 2, 2, 3, 3, - 2, 2, 3, 3, - }, { - 0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15, - }, +const vp9_tree_index vp9_partition_tree[6] = { + -PARTITION_NONE, 2, + -PARTITION_HORZ, 4, + -PARTITION_VERT, -PARTITION_SPLIT }; -const int vp9_mbsplit_count [VP9_NUMMBSPLITS] = { 2, 2, 4, 16}; +struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES]; -const vp9_prob vp9_mbsplit_probs [VP9_NUMMBSPLITS - 1] = { 110, 111, 150}; +struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_INTER_MODES]; -/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ +struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; -const vp9_tree_index vp9_kf_bmode_tree[VP9_KF_BINTRAMODES * 2 - 2] = { - -B_DC_PRED, 2, /* 0 = DC_NODE */ - -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ - 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ +static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { + 9, 102, 187, 225 }; -const vp9_tree_index vp9_bmode_tree[VP9_NKF_BINTRAMODES * 2 - 2] = { -#if CONFIG_NEWBINTRAMODES -#if CONTEXT_PRED_REPLACEMENTS == 6 - -B_DC_PRED, 2, - -B_TM_PRED, 4, - 6, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS), - -B_VE_PRED, -B_HE_PRED -#elif CONTEXT_PRED_REPLACEMENTS == 4 - -B_DC_PRED, 2, - -B_TM_PRED, 4, - 6, 8, - -B_VE_PRED, -B_HE_PRED, - 10, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS), - -B_RD_PRED, -B_LD_PRED, -#elif CONTEXT_PRED_REPLACEMENTS == 0 - -B_DC_PRED, 2, /* 0 = DC_NODE */ - -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ - 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, 18, - -B_HU_PRED, -B_CONTEXT_PRED -#endif -#else - -B_DC_PRED, 2, /* 0 = DC_NODE */ - -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ - 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ -#endif -}; - -/* Again, these trees use the same probability indices as their - explicitly-programmed predecessors. */ -const vp9_tree_index vp9_ymode_tree[VP9_YMODES * 2 - 2] = { - 2, 14, - -DC_PRED, 4, - 6, 8, - -D45_PRED, -D135_PRED, - 10, 12, - -D117_PRED, -D153_PRED, - -D27_PRED, -D63_PRED, - 16, 18, - -V_PRED, -H_PRED, - -TM_PRED, 20, - -B_PRED, -I8X8_PRED +static const vp9_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { + 239, 183, 119, 96, 41 }; -const vp9_tree_index vp9_kf_ymode_tree[VP9_YMODES * 2 - 2] = { - 2, 14, - -DC_PRED, 4, - 6, 8, - -D45_PRED, -D135_PRED, - 10, 12, - -D117_PRED, -D153_PRED, - -D27_PRED, -D63_PRED, - 16, 18, - -V_PRED, -H_PRED, - -TM_PRED, 20, - -B_PRED, -I8X8_PRED +static const vp9_prob default_comp_ref_p[REF_CONTEXTS] = { + 50, 126, 123, 221, 226 }; -const vp9_tree_index vp9_i8x8_mode_tree[VP9_I8X8_MODES * 2 - 2] = { - 2, 14, - -DC_PRED, 4, - 6, 8, - -D45_PRED, -D135_PRED, - 10, 12, - -D117_PRED, -D153_PRED, - -D27_PRED, -D63_PRED, - -V_PRED, 16, - -H_PRED, -TM_PRED +static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = { + { 33, 16 }, + { 77, 74 }, + { 142, 142 }, + { 172, 170 }, + { 238, 247 } }; -const vp9_tree_index vp9_uv_mode_tree[VP9_UV_MODES * 2 - 2] = { - 2, 14, - -DC_PRED, 4, - 6, 8, - -D45_PRED, -D135_PRED, - 10, 12, - -D117_PRED, -D153_PRED, - -D27_PRED, -D63_PRED, - -V_PRED, 16, - -H_PRED, -TM_PRED +const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS] + [TX_SIZE_MAX_SB - 1] = { + { 3, 136, 37, }, + { 5, 52, 13, }, }; - -const vp9_tree_index vp9_mbsplit_tree[6] = { - -PARTITIONING_4X4, 2, - -PARTITIONING_8X8, 4, - -PARTITIONING_16X8, -PARTITIONING_8X16, +const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS] + [TX_SIZE_MAX_SB - 2] = { + { 20, 152, }, + { 15, 101, }, }; - -const vp9_tree_index vp9_mv_ref_tree[8] = { - -ZEROMV, 2, - -NEARESTMV, 4, - -NEARMV, 6, - -NEWMV, -SPLITMV +const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS] + [TX_SIZE_MAX_SB - 3] = { + { 100, }, + { 66, }, }; -const vp9_tree_index vp9_sb_mv_ref_tree[6] = { - -ZEROMV, 2, - -NEARESTMV, 4, - -NEARMV, -NEWMV -}; +void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p, + unsigned int (*ct_32x32p)[2]) { + ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; + ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + + tx_count_32x32p[TX_16X16] + + tx_count_32x32p[TX_32X32]; + ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; + ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + + tx_count_32x32p[TX_32X32]; + ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; + ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; +} -const vp9_tree_index vp9_sub_mv_ref_tree[6] = { - -LEFT4X4, 2, - -ABOVE4X4, 4, - -ZERO4X4, -NEW4X4 -}; +void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p, + unsigned int (*ct_16x16p)[2]) { + ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; + ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + + tx_count_16x16p[TX_16X16]; + ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; + ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; +} -struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES]; -struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES]; -struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES]; -struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES]; -struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; -struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES]; -struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES]; -struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; -struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; +void tx_counts_to_branch_counts_8x8(unsigned int *tx_count_8x8p, + unsigned int (*ct_8x8p)[2]) { + ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; + ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; +} -struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS]; -struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; -struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS]; +const vp9_prob vp9_default_mbskip_probs[MBSKIP_CONTEXTS] = { + 192, 128, 64 +}; void vp9_init_mbmode_probs(VP9_COMMON *x) { - unsigned int bct [VP9_YMODES] [2]; /* num Ymodes > num UV modes */ - - vp9_tree_probs_from_distribution(vp9_ymode_tree, x->fc.ymode_prob, - bct, y_mode_cts, 0); - vp9_tree_probs_from_distribution(vp9_sb_ymode_tree, x->fc.sb_ymode_prob, - bct, y_mode_cts, 0); - { - int i; - for (i = 0; i < 8; i++) { - vp9_tree_probs_from_distribution(vp9_kf_ymode_tree, x->kf_ymode_prob[i], - bct, kf_y_mode_cts[i], 0); - vp9_tree_probs_from_distribution(vp9_sb_kf_ymode_tree, - x->sb_kf_ymode_prob[i], bct, - kf_y_mode_cts[i], 0); - } - } - { - int i; - for (i = 0; i < VP9_YMODES; i++) { - vp9_tree_probs_from_distribution(vp9_uv_mode_tree, x->kf_uv_mode_prob[i], - bct, kf_uv_mode_cts[i], 0); - vp9_tree_probs_from_distribution(vp9_uv_mode_tree, x->fc.uv_mode_prob[i], - bct, uv_mode_cts[i], 0); - } - } + vpx_memcpy(x->fc.uv_mode_prob, default_if_uv_probs, + sizeof(default_if_uv_probs)); + vpx_memcpy(x->kf_uv_mode_prob, default_kf_uv_probs, + sizeof(default_kf_uv_probs)); + vpx_memcpy(x->fc.y_mode_prob, default_if_y_probs, + sizeof(default_if_y_probs)); - vp9_tree_probs_from_distribution(vp9_i8x8_mode_tree, x->fc.i8x8_mode_prob, - bct, i8x8_mode_cts, 0); - - vpx_memcpy(x->fc.sub_mv_ref_prob, vp9_sub_mv_ref_prob2, - sizeof(vp9_sub_mv_ref_prob2)); - vpx_memcpy(x->fc.mbsplit_prob, vp9_mbsplit_probs, sizeof(vp9_mbsplit_probs)); vpx_memcpy(x->fc.switchable_interp_prob, vp9_switchable_interp_prob, sizeof(vp9_switchable_interp_prob)); -#if CONFIG_COMP_INTERINTRA_PRED - x->fc.interintra_prob = VP9_DEF_INTERINTRA_PROB; -#endif - x->ref_pred_probs[0] = 120; - x->ref_pred_probs[1] = 80; - x->ref_pred_probs[2] = 40; -} - -static void intra_bmode_probs_from_distribution( - vp9_prob p[VP9_NKF_BINTRAMODES - 1], - unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2], - const unsigned int events[VP9_NKF_BINTRAMODES]) { - vp9_tree_probs_from_distribution(vp9_bmode_tree, p, branch_ct, events, 0); + vpx_memcpy(x->fc.partition_prob, vp9_partition_probs, + sizeof(vp9_partition_probs)); + + vpx_memcpy(x->fc.intra_inter_prob, default_intra_inter_p, + sizeof(default_intra_inter_p)); + vpx_memcpy(x->fc.comp_inter_prob, default_comp_inter_p, + sizeof(default_comp_inter_p)); + vpx_memcpy(x->fc.comp_ref_prob, default_comp_ref_p, + sizeof(default_comp_ref_p)); + vpx_memcpy(x->fc.single_ref_prob, default_single_ref_p, + sizeof(default_single_ref_p)); + vpx_memcpy(x->fc.tx_probs_32x32p, vp9_default_tx_probs_32x32p, + sizeof(vp9_default_tx_probs_32x32p)); + vpx_memcpy(x->fc.tx_probs_16x16p, vp9_default_tx_probs_16x16p, + sizeof(vp9_default_tx_probs_16x16p)); + vpx_memcpy(x->fc.tx_probs_8x8p, vp9_default_tx_probs_8x8p, + sizeof(vp9_default_tx_probs_8x8p)); + vpx_memcpy(x->fc.mbskip_probs, vp9_default_mbskip_probs, + sizeof(vp9_default_mbskip_probs)); } -void vp9_default_bmode_probs(vp9_prob p[VP9_NKF_BINTRAMODES - 1]) { - unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2]; - intra_bmode_probs_from_distribution(p, branch_ct, bmode_cts); -} - -static void intra_kf_bmode_probs_from_distribution( - vp9_prob p[VP9_KF_BINTRAMODES - 1], - unsigned int branch_ct[VP9_KF_BINTRAMODES - 1][2], - const unsigned int events[VP9_KF_BINTRAMODES]) { - vp9_tree_probs_from_distribution(vp9_kf_bmode_tree, p, branch_ct, events, 0); -} - -void vp9_kf_default_bmode_probs(vp9_prob p[VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES - 1]) { - unsigned int branch_ct[VP9_KF_BINTRAMODES - 1][2]; - int i, j; - - for (i = 0; i < VP9_KF_BINTRAMODES; ++i) { - for (j = 0; j < VP9_KF_BINTRAMODES; ++j) { - intra_kf_bmode_probs_from_distribution( - p[i][j], branch_ct, vp9_kf_default_bmode_counts[i][j]); - } - } -} - -#if VP9_SWITCHABLE_FILTERS == 3 const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { -0, 2, -1, -2 }; -struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; -#if CONFIG_ENABLE_6TAP -const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = { - SIXTAP, EIGHTTAP, EIGHTTAP_SHARP}; -const int vp9_switchable_interp_map[SWITCHABLE+1] = {0, -1, 1, 2, -1, -1}; -#else +struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = { EIGHTTAP, EIGHTTAP_SMOOTH, EIGHTTAP_SHARP}; const int vp9_switchable_interp_map[SWITCHABLE+1] = {1, 0, 2, -1, -1}; -#endif const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] [VP9_SWITCHABLE_FILTERS-1] = { - {248, 192}, { 32, 248}, { 32, 32}, {192, 160} -}; -#elif VP9_SWITCHABLE_FILTERS == 2 -const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { - -0, -1, + { 235, 162, }, + { 36, 255, }, + { 34, 3, }, + { 149, 144, }, }; -struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; -const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] - [VP9_SWITCHABLE_FILTERS-1] = { - {248}, - { 64}, - {192}, -}; -const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = { - EIGHTTAP, EIGHTTAP_SHARP}; -#if CONFIG_ENABLE_6TAP -const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, -1, 0, 1, -1, -1}; -#else -const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, 0, 1, -1, -1}; -#endif -#endif // VP9_SWITCHABLE_FILTERS // Indicates if the filter is interpolating or non-interpolating -// Note currently only the EIGHTTAP_SMOOTH is non-interpolating -#if CONFIG_ENABLE_6TAP -const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {1, 0, 1, 1, 1, -1}; -#else -const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {0, 1, 1, 1, -1}; -#endif +const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {1, 1, 1, 1, -1}; void vp9_entropy_mode_init() { - vp9_tokens_from_tree(vp9_kf_bmode_encodings, vp9_kf_bmode_tree); - vp9_tokens_from_tree(vp9_bmode_encodings, vp9_bmode_tree); - vp9_tokens_from_tree(vp9_ymode_encodings, vp9_ymode_tree); - vp9_tokens_from_tree(vp9_kf_ymode_encodings, vp9_kf_ymode_tree); - vp9_tokens_from_tree(vp9_sb_ymode_encodings, vp9_sb_ymode_tree); - vp9_tokens_from_tree(vp9_sb_kf_ymode_encodings, vp9_sb_kf_ymode_tree); - vp9_tokens_from_tree(vp9_uv_mode_encodings, vp9_uv_mode_tree); - vp9_tokens_from_tree(vp9_i8x8_mode_encodings, vp9_i8x8_mode_tree); - vp9_tokens_from_tree(vp9_mbsplit_encodings, vp9_mbsplit_tree); + vp9_tokens_from_tree(vp9_intra_mode_encodings, vp9_intra_mode_tree); vp9_tokens_from_tree(vp9_switchable_interp_encodings, vp9_switchable_interp_tree); + vp9_tokens_from_tree(vp9_partition_encodings, vp9_partition_tree); - vp9_tokens_from_tree_offset(vp9_mv_ref_encoding_array, - vp9_mv_ref_tree, NEARESTMV); vp9_tokens_from_tree_offset(vp9_sb_mv_ref_encoding_array, vp9_sb_mv_ref_tree, NEARESTMV); - vp9_tokens_from_tree_offset(vp9_sub_mv_ref_encoding_array, - vp9_sub_mv_ref_tree, LEFT4X4); } void vp9_init_mode_contexts(VP9_COMMON *pc) { - vpx_memset(pc->fc.mv_ref_ct, 0, sizeof(pc->fc.mv_ref_ct)); - vpx_memcpy(pc->fc.vp9_mode_contexts, - vp9_default_mode_contexts, - sizeof(vp9_default_mode_contexts)); + vpx_memset(pc->fc.inter_mode_counts, 0, sizeof(pc->fc.inter_mode_counts)); + vpx_memcpy(pc->fc.inter_mode_probs, + vp9_default_inter_mode_probs, + sizeof(vp9_default_inter_mode_probs)); } void vp9_accum_mv_refs(VP9_COMMON *pc, MB_PREDICTION_MODE m, const int context) { - unsigned int (*mv_ref_ct)[4][2]; - - mv_ref_ct = pc->fc.mv_ref_ct; + unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] = + pc->fc.inter_mode_counts; if (m == ZEROMV) { - ++mv_ref_ct[context][0][0]; + ++inter_mode_counts[context][0][0]; } else { - ++mv_ref_ct[context][0][1]; + ++inter_mode_counts[context][0][1]; if (m == NEARESTMV) { - ++mv_ref_ct[context][1][0]; + ++inter_mode_counts[context][1][0]; } else { - ++mv_ref_ct[context][1][1]; + ++inter_mode_counts[context][1][1]; if (m == NEARMV) { - ++mv_ref_ct[context][2][0]; + ++inter_mode_counts[context][2][0]; } else { - ++mv_ref_ct[context][2][1]; - if (m == NEWMV) { - ++mv_ref_ct[context][3][0]; - } else { - ++mv_ref_ct[context][3][1]; - } + ++inter_mode_counts[context][2][1]; } } } @@ -482,50 +292,35 @@ void vp9_accum_mv_refs(VP9_COMMON *pc, #define MVREF_MAX_UPDATE_FACTOR 128 void vp9_adapt_mode_context(VP9_COMMON *pc) { int i, j; - unsigned int (*mv_ref_ct)[4][2]; - int (*mode_context)[4]; - - mode_context = pc->fc.vp9_mode_contexts; - - mv_ref_ct = pc->fc.mv_ref_ct; + unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] = + pc->fc.inter_mode_counts; + vp9_prob (*mode_context)[VP9_INTER_MODES - 1] = pc->fc.inter_mode_probs; for (j = 0; j < INTER_MODE_CONTEXTS; j++) { - for (i = 0; i < 4; i++) { - int count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1], factor; - + for (i = 0; i < VP9_INTER_MODES - 1; i++) { + int count = inter_mode_counts[j][i][0] + inter_mode_counts[j][i][1]; + int factor; count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count; factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT); - mode_context[j][i] = weighted_prob(pc->fc.vp9_mode_contexts[j][i], - get_binary_prob(mv_ref_ct[j][i][0], - mv_ref_ct[j][i][1]), - factor); + mode_context[j][i] = weighted_prob( + pc->fc.pre_inter_mode_probs[j][i], + get_binary_prob(inter_mode_counts[j][i][0], + inter_mode_counts[j][i][1]), + factor); } } } -#ifdef MODE_STATS -#include "vp9/common/vp9_modecont.h" -void print_mode_contexts(VP9_COMMON *pc) { - int j, i; - printf("\n====================\n"); - for (j = 0; j < INTER_MODE_CONTEXTS; j++) { - for (i = 0; i < 4; i++) { - printf("%4d ", pc->fc.mode_context[j][i]); - } - printf("\n"); - } - printf("====================\n"); - for (j = 0; j < INTER_MODE_CONTEXTS; j++) { - for (i = 0; i < 4; i++) { - printf("%4d ", pc->fc.mode_context_a[j][i]); - } - printf("\n"); - } +#define MODE_COUNT_SAT 20 +#define MODE_MAX_UPDATE_FACTOR 128 +static int update_mode_ct(vp9_prob pre_prob, vp9_prob prob, + unsigned int branch_ct[2]) { + int factor, count = branch_ct[0] + branch_ct[1]; + count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; + factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); + return weighted_prob(pre_prob, prob, factor); } -#endif -#define MODE_COUNT_SAT 20 -#define MODE_MAX_UPDATE_FACTOR 144 static void update_mode_probs(int n_modes, const vp9_tree_index *tree, unsigned int *cnt, vp9_prob *pre_probs, vp9_prob *dst_probs, @@ -533,119 +328,158 @@ static void update_mode_probs(int n_modes, #define MAX_PROBS 32 vp9_prob probs[MAX_PROBS]; unsigned int branch_ct[MAX_PROBS][2]; - int t, count, factor; + int t; assert(n_modes - 1 < MAX_PROBS); vp9_tree_probs_from_distribution(tree, probs, branch_ct, cnt, tok0_offset); - for (t = 0; t < n_modes - 1; ++t) { - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - dst_probs[t] = weighted_prob(pre_probs[t], probs[t], factor); - } + for (t = 0; t < n_modes - 1; ++t) + dst_probs[t] = update_mode_ct(pre_probs[t], probs[t], branch_ct[t]); +} + +static int update_mode_ct2(vp9_prob pre_prob, unsigned int branch_ct[2]) { + return update_mode_ct(pre_prob, get_binary_prob(branch_ct[0], + branch_ct[1]), branch_ct); } // #define MODE_COUNT_TESTING void vp9_adapt_mode_probs(VP9_COMMON *cm) { - int i; + int i, j; + FRAME_CONTEXT *fc = &cm->fc; #ifdef MODE_COUNT_TESTING int t; printf("static const unsigned int\nymode_counts" - "[VP9_YMODES] = {\n"); - for (t = 0; t < VP9_YMODES; ++t) printf("%d, ", cm->fc.ymode_counts[t]); + "[VP9_INTRA_MODES] = {\n"); + for (t = 0; t < VP9_INTRA_MODES; ++t) + printf("%d, ", fc->ymode_counts[t]); printf("};\n"); printf("static const unsigned int\nuv_mode_counts" - "[VP9_YMODES] [VP9_UV_MODES] = {\n"); - for (i = 0; i < VP9_YMODES; ++i) { + "[VP9_INTRA_MODES] [VP9_INTRA_MODES] = {\n"); + for (i = 0; i < VP9_INTRA_MODES; ++i) { printf(" {"); - for (t = 0; t < VP9_UV_MODES; ++t) printf("%d, ", cm->fc.uv_mode_counts[i][t]); + for (t = 0; t < VP9_INTRA_MODES; ++t) + printf("%d, ", fc->uv_mode_counts[i][t]); printf("},\n"); } printf("};\n"); printf("static const unsigned int\nbmode_counts" "[VP9_NKF_BINTRAMODES] = {\n"); for (t = 0; t < VP9_NKF_BINTRAMODES; ++t) - printf("%d, ", cm->fc.bmode_counts[t]); + printf("%d, ", fc->bmode_counts[t]); printf("};\n"); printf("static const unsigned int\ni8x8_mode_counts" "[VP9_I8X8_MODES] = {\n"); - for (t = 0; t < VP9_I8X8_MODES; ++t) printf("%d, ", cm->fc.i8x8_mode_counts[t]); - printf("};\n"); - printf("static const unsigned int\nsub_mv_ref_counts" - "[SUBMVREF_COUNT] [VP9_SUBMVREFS] = {\n"); - for (i = 0; i < SUBMVREF_COUNT; ++i) { - printf(" {"); - for (t = 0; t < VP9_SUBMVREFS; ++t) printf("%d, ", cm->fc.sub_mv_ref_counts[i][t]); - printf("},\n"); - } + for (t = 0; t < VP9_I8X8_MODES; ++t) + printf("%d, ", fc->i8x8_mode_counts[t]); printf("};\n"); printf("static const unsigned int\nmbsplit_counts" "[VP9_NUMMBSPLITS] = {\n"); - for (t = 0; t < VP9_NUMMBSPLITS; ++t) printf("%d, ", cm->fc.mbsplit_counts[t]); - printf("};\n"); -#if CONFIG_COMP_INTERINTRA_PRED - printf("static const unsigned int\ninterintra_counts" - "[2] = {\n"); - for (t = 0; t < 2; ++t) printf("%d, ", cm->fc.interintra_counts[t]); + for (t = 0; t < VP9_NUMMBSPLITS; ++t) + printf("%d, ", fc->mbsplit_counts[t]); printf("};\n"); -#endif #endif - update_mode_probs(VP9_YMODES, vp9_ymode_tree, - cm->fc.ymode_counts, cm->fc.pre_ymode_prob, - cm->fc.ymode_prob, 0); - update_mode_probs(VP9_I32X32_MODES, vp9_sb_ymode_tree, - cm->fc.sb_ymode_counts, cm->fc.pre_sb_ymode_prob, - cm->fc.sb_ymode_prob, 0); - for (i = 0; i < VP9_YMODES; ++i) { - update_mode_probs(VP9_UV_MODES, vp9_uv_mode_tree, - cm->fc.uv_mode_counts[i], cm->fc.pre_uv_mode_prob[i], - cm->fc.uv_mode_prob[i], 0); - } - update_mode_probs(VP9_NKF_BINTRAMODES, vp9_bmode_tree, - cm->fc.bmode_counts, cm->fc.pre_bmode_prob, - cm->fc.bmode_prob, 0); - update_mode_probs(VP9_I8X8_MODES, - vp9_i8x8_mode_tree, cm->fc.i8x8_mode_counts, - cm->fc.pre_i8x8_mode_prob, cm->fc.i8x8_mode_prob, 0); - for (i = 0; i < SUBMVREF_COUNT; ++i) { - update_mode_probs(VP9_SUBMVREFS, - vp9_sub_mv_ref_tree, cm->fc.sub_mv_ref_counts[i], - cm->fc.pre_sub_mv_ref_prob[i], cm->fc.sub_mv_ref_prob[i], - LEFT4X4); + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + fc->intra_inter_prob[i] = update_mode_ct2(fc->pre_intra_inter_prob[i], + fc->intra_inter_count[i]); + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + fc->comp_inter_prob[i] = update_mode_ct2(fc->pre_comp_inter_prob[i], + fc->comp_inter_count[i]); + for (i = 0; i < REF_CONTEXTS; i++) + fc->comp_ref_prob[i] = update_mode_ct2(fc->pre_comp_ref_prob[i], + fc->comp_ref_count[i]); + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + fc->single_ref_prob[i][j] = update_mode_ct2(fc->pre_single_ref_prob[i][j], + fc->single_ref_count[i][j]); + + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) + update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, + fc->y_mode_counts[i], fc->pre_y_mode_prob[i], + fc->y_mode_prob[i], 0); + + for (i = 0; i < VP9_INTRA_MODES; ++i) + update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, + fc->uv_mode_counts[i], fc->pre_uv_mode_prob[i], + fc->uv_mode_prob[i], 0); + + for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) + update_mode_probs(PARTITION_TYPES, vp9_partition_tree, + fc->partition_counts[i], fc->pre_partition_prob[i], + fc->partition_prob[INTER_FRAME][i], 0); + + if (cm->mcomp_filter_type == SWITCHABLE) { + for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) { + update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree, + fc->switchable_interp_count[i], + fc->pre_switchable_interp_prob[i], + fc->switchable_interp_prob[i], 0); + } } - update_mode_probs(VP9_NUMMBSPLITS, vp9_mbsplit_tree, - cm->fc.mbsplit_counts, cm->fc.pre_mbsplit_prob, - cm->fc.mbsplit_prob, 0); -#if CONFIG_COMP_INTERINTRA_PRED - if (cm->use_interintra) { - int factor, interintra_prob, count; - - interintra_prob = get_binary_prob(cm->fc.interintra_counts[0], - cm->fc.interintra_counts[1]); - count = cm->fc.interintra_counts[0] + cm->fc.interintra_counts[1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - cm->fc.interintra_prob = weighted_prob(cm->fc.pre_interintra_prob, - interintra_prob, factor); + if (cm->txfm_mode == TX_MODE_SELECT) { + int j; + unsigned int branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2]; + unsigned int branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2]; + unsigned int branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2]; + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + tx_counts_to_branch_counts_8x8(cm->fc.tx_count_8x8p[i], + branch_ct_8x8p); + for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) { + int factor; + int count = branch_ct_8x8p[j][0] + branch_ct_8x8p[j][1]; + vp9_prob prob = get_binary_prob(branch_ct_8x8p[j][0], + branch_ct_8x8p[j][1]); + count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; + factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); + cm->fc.tx_probs_8x8p[i][j] = weighted_prob( + cm->fc.pre_tx_probs_8x8p[i][j], prob, factor); + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + tx_counts_to_branch_counts_16x16(cm->fc.tx_count_16x16p[i], + branch_ct_16x16p); + for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) { + int factor; + int count = branch_ct_16x16p[j][0] + branch_ct_16x16p[j][1]; + vp9_prob prob = get_binary_prob(branch_ct_16x16p[j][0], + branch_ct_16x16p[j][1]); + count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; + factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); + cm->fc.tx_probs_16x16p[i][j] = weighted_prob( + cm->fc.pre_tx_probs_16x16p[i][j], prob, factor); + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + tx_counts_to_branch_counts_32x32(cm->fc.tx_count_32x32p[i], + branch_ct_32x32p); + for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) { + int factor; + int count = branch_ct_32x32p[j][0] + branch_ct_32x32p[j][1]; + vp9_prob prob = get_binary_prob(branch_ct_32x32p[j][0], + branch_ct_32x32p[j][1]); + count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; + factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); + cm->fc.tx_probs_32x32p[i][j] = weighted_prob( + cm->fc.pre_tx_probs_32x32p[i][j], prob, factor); + } + } } -#endif + for (i = 0; i < MBSKIP_CONTEXTS; ++i) + fc->mbskip_probs[i] = update_mode_ct2(fc->pre_mbskip_probs[i], + fc->mbskip_count[i]); } static void set_default_lf_deltas(MACROBLOCKD *xd) { xd->mode_ref_lf_delta_enabled = 1; xd->mode_ref_lf_delta_update = 1; - xd->ref_lf_deltas[INTRA_FRAME] = 2; + xd->ref_lf_deltas[INTRA_FRAME] = 1; xd->ref_lf_deltas[LAST_FRAME] = 0; - xd->ref_lf_deltas[GOLDEN_FRAME] = -2; - xd->ref_lf_deltas[ALTREF_FRAME] = -2; + xd->ref_lf_deltas[GOLDEN_FRAME] = -1; + xd->ref_lf_deltas[ALTREF_FRAME] = -1; - xd->mode_lf_deltas[0] = 4; // BPRED - xd->mode_lf_deltas[1] = -2; // Zero - xd->mode_lf_deltas[2] = 2; // New mv - xd->mode_lf_deltas[3] = 4; // Split mv + xd->mode_lf_deltas[0] = 0; // Zero + xd->mode_lf_deltas[1] = 0; // New mv } void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) { @@ -655,42 +489,47 @@ void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) { vp9_clearall_segfeatures(xd); xd->mb_segment_abs_delta = SEGMENT_DELTADATA; if (cm->last_frame_seg_map) - vpx_memset(cm->last_frame_seg_map, 0, (cm->mb_rows * cm->mb_cols)); + vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); - /* reset the mode ref deltas for loop filter */ + // Reset the mode ref deltas for loop filter vpx_memset(xd->last_ref_lf_deltas, 0, sizeof(xd->last_ref_lf_deltas)); vpx_memset(xd->last_mode_lf_deltas, 0, sizeof(xd->last_mode_lf_deltas)); set_default_lf_deltas(xd); vp9_default_coef_probs(cm); vp9_init_mbmode_probs(cm); - vp9_default_bmode_probs(cm->fc.bmode_prob); - vp9_kf_default_bmode_probs(cm->kf_bmode_prob); + vpx_memcpy(cm->kf_y_mode_prob, vp9_kf_default_bmode_probs, + sizeof(vp9_kf_default_bmode_probs)); vp9_init_mv_probs(cm); + // To force update of the sharpness cm->last_sharpness_level = -1; vp9_init_mode_contexts(cm); - for (i = 0; i < NUM_FRAME_CONTEXTS; i++) { - vpx_memcpy(&cm->frame_contexts[i], &cm->fc, sizeof(cm->fc)); + if ((cm->frame_type == KEY_FRAME) || + cm->error_resilient_mode || (cm->reset_frame_context == 3)) { + // Reset all frame contexts. + for (i = 0; i < NUM_FRAME_CONTEXTS; ++i) + vpx_memcpy(&cm->frame_contexts[i], &cm->fc, sizeof(cm->fc)); + } else if (cm->reset_frame_context == 2) { + // Reset only the frame context specified in the frame header. + vpx_memcpy(&cm->frame_contexts[cm->frame_context_idx], &cm->fc, + sizeof(cm->fc)); } vpx_memset(cm->prev_mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); + cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); vpx_memset(cm->mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); + cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); vp9_update_mode_info_border(cm, cm->mip); vp9_update_mode_info_in_image(cm, cm->mi); -#if CONFIG_NEW_MVREF - // Defaults probabilities for encoding the MV ref id signal - vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB, - sizeof(xd->mb_mv_ref_probs)); -#endif - cm->ref_frame_sign_bias[GOLDEN_FRAME] = 0; - cm->ref_frame_sign_bias[ALTREF_FRAME] = 0; + vp9_update_mode_info_border(cm, cm->prev_mip); + vp9_update_mode_info_in_image(cm, cm->prev_mi); + + vpx_memset(cm->ref_frame_sign_bias, 0, sizeof(cm->ref_frame_sign_bias)); cm->frame_context_idx = 0; } diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 8b0caf6eb38d04479bf921b33b89334c8be182e5..aa8aec7d2af87dcf2b7dc71439f0a3dcdff3c5dc 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -15,60 +15,34 @@ #include "vp9/common/vp9_treecoder.h" #define SUBMVREF_COUNT 5 -#define VP9_NUMMBSPLITS 4 +#define TX_SIZE_CONTEXTS 2 -#if CONFIG_COMP_INTERINTRA_PRED -#define VP9_DEF_INTERINTRA_PROB 248 -#define VP9_UPD_INTERINTRA_PROB 192 -// whether to use a separate uv mode (1) or use the same as the y mode (0) -#define SEPARATE_INTERINTRA_UV 0 -#endif +#define VP9_MODE_UPDATE_PROB 252 -typedef const int vp9_mbsplit[16]; - -extern vp9_mbsplit vp9_mbsplits[VP9_NUMMBSPLITS]; - -extern const int vp9_mbsplit_count[VP9_NUMMBSPLITS]; /* # of subsets */ - -extern const vp9_prob vp9_mbsplit_probs[VP9_NUMMBSPLITS - 1]; +// #define MODE_STATS extern int vp9_mv_cont(const int_mv *l, const int_mv *a); -extern const vp9_prob vp9_sub_mv_ref_prob2[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; -extern const unsigned int vp9_kf_default_bmode_counts[VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES]; +extern const vp9_prob vp9_kf_default_bmode_probs[VP9_INTRA_MODES] + [VP9_INTRA_MODES] + [VP9_INTRA_MODES - 1]; -extern const vp9_tree_index vp9_bmode_tree[]; -extern const vp9_tree_index vp9_kf_bmode_tree[]; - -extern const vp9_tree_index vp9_ymode_tree[]; -extern const vp9_tree_index vp9_kf_ymode_tree[]; -extern const vp9_tree_index vp9_uv_mode_tree[]; -#define vp9_sb_ymode_tree vp9_uv_mode_tree -#define vp9_sb_kf_ymode_tree vp9_uv_mode_tree -extern const vp9_tree_index vp9_i8x8_mode_tree[]; -extern const vp9_tree_index vp9_mbsplit_tree[]; -extern const vp9_tree_index vp9_mv_ref_tree[]; +extern const vp9_tree_index vp9_intra_mode_tree[]; extern const vp9_tree_index vp9_sb_mv_ref_tree[]; -extern const vp9_tree_index vp9_sub_mv_ref_tree[]; - -extern struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES]; -extern struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES]; -extern struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES]; -extern struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES]; -extern struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; -extern struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES]; -extern struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; -extern struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES]; -extern struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; + +extern struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES]; /* Inter mode values do not start at zero */ -extern struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS]; -extern struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; -extern struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS]; +extern struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_INTER_MODES]; + +// probability models for partition information +extern const vp9_tree_index vp9_partition_tree[]; +extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES]; +extern const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES] + [NUM_PARTITION_CONTEXTS] + [PARTITION_TYPES - 1]; void vp9_entropy_mode_init(void); @@ -87,12 +61,6 @@ extern void vp9_accum_mv_refs(struct VP9Common *pc, MB_PREDICTION_MODE m, const int context); -void vp9_default_bmode_probs(vp9_prob dest[VP9_NKF_BINTRAMODES - 1]); - -void vp9_kf_default_bmode_probs(vp9_prob dest[VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES - 1]); - void vp9_adapt_mode_probs(struct VP9Common *); #define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */ @@ -107,10 +75,22 @@ extern const int vp9_is_interpolating_filter[SWITCHABLE + 1]; extern const vp9_tree_index vp9_switchable_interp_tree [2 * (VP9_SWITCHABLE_FILTERS - 1)]; -extern struct vp9_token_struct vp9_switchable_interp_encodings - [VP9_SWITCHABLE_FILTERS]; +extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; extern const vp9_prob vp9_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; +extern const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS] + [TX_SIZE_MAX_SB - 1]; +extern const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS] + [TX_SIZE_MAX_SB - 2]; +extern const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS] + [TX_SIZE_MAX_SB - 3]; + +extern void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p, + unsigned int (*ct_32x32p)[2]); +extern void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p, + unsigned int (*ct_16x16p)[2]); +extern void tx_counts_to_branch_counts_8x8(unsigned int *tx_count_8x8p, + unsigned int (*ct_8x8p)[2]); #endif // VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c index a4a9d5465ed43af0aff68ca4aa93a6fe77216dca..e07e43c8b51309b76078525bcf8ac89f3987bf5a 100644 --- a/vp9/common/vp9_entropymv.c +++ b/vp9/common/vp9_entropymv.c @@ -14,16 +14,11 @@ //#define MV_COUNT_TESTING -#define MV_COUNT_SAT 16 -#define MV_MAX_UPDATE_FACTOR 160 +#define MV_COUNT_SAT 20 +#define MV_MAX_UPDATE_FACTOR 128 -#if CONFIG_NEW_MVREF -/* Integer pel reference mv threshold for use of high-precision 1/8 mv */ -#define COMPANDED_MVREF_THRESH 1000000 -#else /* Integer pel reference mv threshold for use of high-precision 1/8 mv */ #define COMPANDED_MVREF_THRESH 8 -#endif /* Smooth or bias the mv-counts before prob computation */ /* #define SMOOTH_MV_COUNTS */ @@ -33,7 +28,7 @@ const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = { -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ }; -struct vp9_token_struct vp9_mv_joint_encodings[MV_JOINTS]; +struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { -MV_CLASS_0, 2, @@ -47,19 +42,19 @@ const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10, }; -struct vp9_token_struct vp9_mv_class_encodings[MV_CLASSES]; +struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = { -0, -1, }; -struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE]; +struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = { -0, 2, -1, 4, -2, -3 }; -struct vp9_token_struct vp9_mv_fp_encodings[4]; +struct vp9_token vp9_mv_fp_encodings[4]; const nmv_context vp9_default_nmv_context = { {32, 64, 96}, @@ -87,11 +82,15 @@ const nmv_context vp9_default_nmv_context = { }, }; -MV_JOINT_TYPE vp9_get_mv_joint(MV mv) { - if (mv.row == 0 && mv.col == 0) return MV_JOINT_ZERO; - else if (mv.row == 0 && mv.col != 0) return MV_JOINT_HNZVZ; - else if (mv.row != 0 && mv.col == 0) return MV_JOINT_HZVNZ; - else return MV_JOINT_HNZVNZ; +MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) { + if (mv->row == 0 && mv->col == 0) + return MV_JOINT_ZERO; + else if (mv->row == 0 && mv->col != 0) + return MV_JOINT_HNZVZ; + else if (mv->row != 0 && mv->col == 0) + return MV_JOINT_HZVNZ; + else + return MV_JOINT_HNZVNZ; } #define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0) @@ -137,7 +136,8 @@ static void increment_nmv_component(int v, int incr, int usehp) { int s, z, c, o, d, e, f; - if (!incr) return; + if (!incr) + return; assert (v != 0); /* should not be zero */ s = v < 0; mvcomp->sign[s] += incr; @@ -152,8 +152,8 @@ static void increment_nmv_component(int v, if (c == MV_CLASS_0) { mvcomp->class0[d] += incr; } else { - int i, b; - b = c + CLASS0_BITS - 1; /* number of bits */ + int i; + int b = c + CLASS0_BITS - 1; // number of bits for (i = 0; i < b; ++i) mvcomp->bits[i][((d >> i) & 1)] += incr; } @@ -204,25 +204,22 @@ static void counts_to_context(nmv_component_counts *mvcomp, int usehp) { void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx, int usehp) { - MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); + const MV_JOINT_TYPE j = vp9_get_mv_joint(mv); mvctx->joints[j]++; usehp = usehp && vp9_use_nmv_hp(ref); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { + if (mv_joint_vertical(j)) increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { + + if (mv_joint_horizontal(j)) increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp); - } } -static void adapt_prob(vp9_prob *dest, vp9_prob prep, - unsigned int ct[2]) { - int count = ct[0] + ct[1]; +static void adapt_prob(vp9_prob *dest, vp9_prob prep, unsigned int ct[2]) { + const int count = MIN(ct[0] + ct[1], MV_COUNT_SAT); if (count) { - vp9_prob newp = get_binary_prob(ct[0], ct[1]); - count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - *dest = weighted_prob(prep, newp, - MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); + const vp9_prob newp = get_binary_prob(ct[0], ct[1]); + const int factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT; + *dest = weighted_prob(prep, newp, factor); } else { *dest = prep; } @@ -253,10 +250,12 @@ void vp9_counts_to_nmv_context( branch_ct_joint, nmv_count->joints, 0); for (i = 0; i < 2; ++i) { - prob->comps[i].sign = get_binary_prob(nmv_count->comps[i].sign[0], - nmv_count->comps[i].sign[1]); - branch_ct_sign[i][0] = nmv_count->comps[i].sign[0]; - branch_ct_sign[i][1] = nmv_count->comps[i].sign[1]; + const uint32_t s0 = nmv_count->comps[i].sign[0]; + const uint32_t s1 = nmv_count->comps[i].sign[1]; + + prob->comps[i].sign = get_binary_prob(s0, s1); + branch_ct_sign[i][0] = s0; + branch_ct_sign[i][1] = s1; vp9_tree_probs_from_distribution(vp9_mv_class_tree, prob->comps[i].classes, branch_ct_classes[i], @@ -266,10 +265,12 @@ void vp9_counts_to_nmv_context( branch_ct_class0[i], nmv_count->comps[i].class0, 0); for (j = 0; j < MV_OFFSET_BITS; ++j) { - prob->comps[i].bits[j] = get_binary_prob(nmv_count->comps[i].bits[j][0], - nmv_count->comps[i].bits[j][1]); - branch_ct_bits[i][j][0] = nmv_count->comps[i].bits[j][0]; - branch_ct_bits[i][j][1] = nmv_count->comps[i].bits[j][1]; + const uint32_t b0 = nmv_count->comps[i].bits[j][0]; + const uint32_t b1 = nmv_count->comps[i].bits[j][1]; + + prob->comps[i].bits[j] = get_binary_prob(b0, b1); + branch_ct_bits[i][j][0] = b0; + branch_ct_bits[i][j][1] = b1; } } for (i = 0; i < 2; ++i) { @@ -286,16 +287,18 @@ void vp9_counts_to_nmv_context( } if (usehp) { for (i = 0; i < 2; ++i) { - prob->comps[i].class0_hp = - get_binary_prob(nmv_count->comps[i].class0_hp[0], - nmv_count->comps[i].class0_hp[1]); - branch_ct_class0_hp[i][0] = nmv_count->comps[i].class0_hp[0]; - branch_ct_class0_hp[i][1] = nmv_count->comps[i].class0_hp[1]; - - prob->comps[i].hp = get_binary_prob(nmv_count->comps[i].hp[0], - nmv_count->comps[i].hp[1]); - branch_ct_hp[i][0] = nmv_count->comps[i].hp[0]; - branch_ct_hp[i][1] = nmv_count->comps[i].hp[1]; + const uint32_t c0_hp0 = nmv_count->comps[i].class0_hp[0]; + const uint32_t c0_hp1 = nmv_count->comps[i].class0_hp[1]; + const uint32_t hp0 = nmv_count->comps[i].hp[0]; + const uint32_t hp1 = nmv_count->comps[i].hp[1]; + + prob->comps[i].class0_hp = get_binary_prob(c0_hp0, c0_hp1); + branch_ct_class0_hp[i][0] = c0_hp0; + branch_ct_class0_hp[i][1] = c0_hp1; + + prob->comps[i].hp = get_binary_prob(hp0, hp1); + branch_ct_hp[i][0] = hp0; + branch_ct_hp[i][1] = hp1; } } } diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index 162d2b44ff78cbed15324019e922f180e4e5a4af..15994a6aec02efafd38fa804f46dc3a4bca0d5e4 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -24,13 +24,7 @@ void vp9_init_mv_probs(struct VP9Common *cm); void vp9_adapt_nmv_probs(struct VP9Common *cm, int usehp); int vp9_use_nmv_hp(const MV *ref); -#define VP9_NMV_UPDATE_PROB 255 - -#if CONFIG_NEW_MVREF -#define VP9_MVREF_UPDATE_PROB 252 -#define VP9_DEFAULT_MV_REF_PROB 192 -#define VP9_MV_REF_UPDATE_COST (14 << 8) -#endif +#define VP9_NMV_UPDATE_PROB 252 //#define MV_GROUP_UPDATE @@ -45,8 +39,16 @@ typedef enum { MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ } MV_JOINT_TYPE; +static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { + return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; +} + +static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { + return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; +} + extern const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2]; -extern struct vp9_token_struct vp9_mv_joint_encodings [MV_JOINTS]; +extern struct vp9_token vp9_mv_joint_encodings[MV_JOINTS]; /* Symbols for coding magnitude class of nonzero components */ #define MV_CLASSES 11 @@ -65,7 +67,7 @@ typedef enum { } MV_CLASS_TYPE; extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2]; -extern struct vp9_token_struct vp9_mv_class_encodings [MV_CLASSES]; +extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES]; #define CLASS0_BITS 1 /* bits at integer precision for class 0 */ #define CLASS0_SIZE (1 << CLASS0_BITS) @@ -76,10 +78,10 @@ extern struct vp9_token_struct vp9_mv_class_encodings [MV_CLASSES]; #define MV_VALS ((MV_MAX << 1) + 1) extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2]; -extern struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE]; +extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE]; extern const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2]; -extern struct vp9_token_struct vp9_mv_fp_encodings[4]; +extern struct vp9_token vp9_mv_fp_encodings[4]; typedef struct { vp9_prob sign; @@ -97,7 +99,7 @@ typedef struct { nmv_component comps[2]; } nmv_context; -MV_JOINT_TYPE vp9_get_mv_joint(MV mv); +MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv); MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset); int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset); diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h new file mode 100644 index 0000000000000000000000000000000000000000..e18d353d37845a784988e1dc05197093c3022e47 --- /dev/null +++ b/vp9/common/vp9_enums.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENUMS_H_ +#define VP9_COMMON_VP9_ENUMS_H_ + +#include "./vpx_config.h" + +#define LOG2_MI_SIZE 3 + +#define MI_SIZE (1 << LOG2_MI_SIZE) +#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1) + +typedef enum BLOCK_SIZE_TYPE { + BLOCK_SIZE_AB4X4, + BLOCK_SIZE_SB4X8, + BLOCK_SIZE_SB8X4, + BLOCK_SIZE_SB8X8, + BLOCK_SIZE_SB8X16, + BLOCK_SIZE_SB16X8, + BLOCK_SIZE_MB16X16, + BLOCK_SIZE_SB16X32, + BLOCK_SIZE_SB32X16, + BLOCK_SIZE_SB32X32, + BLOCK_SIZE_SB32X64, + BLOCK_SIZE_SB64X32, + BLOCK_SIZE_SB64X64, + BLOCK_SIZE_TYPES +} BLOCK_SIZE_TYPE; + +typedef enum PARTITION_TYPE { + PARTITION_NONE, + PARTITION_HORZ, + PARTITION_VERT, + PARTITION_SPLIT, + PARTITION_TYPES +} PARTITION_TYPE; + +#define PARTITION_PLOFFSET 4 // number of probability models per block size +#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) + +#endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/vp9/common/vp9_extend.c b/vp9/common/vp9_extend.c index 6aac905378a8ef86c0b9f28243a2dd8aa9bf06f8..95ec59061f5f6fb315cbf6db68988635eb90ee7f 100644 --- a/vp9/common/vp9_extend.c +++ b/vp9/common/vp9_extend.c @@ -60,10 +60,22 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, const int eb_y = dst->border + dst->y_height - src->y_height; const int er_y = dst->border + dst->y_width - src->y_width; - const int et_uv = dst->border >> 1; - const int el_uv = dst->border >> 1; - const int eb_uv = (dst->border >> 1) + dst->uv_height - src->uv_height; - const int er_uv = (dst->border >> 1) + dst->uv_width - src->uv_width; + const int et_uv = dst->border >> (dst->uv_height != dst->y_height); + const int el_uv = dst->border >> (dst->uv_width != dst->y_width); + const int eb_uv = et_uv + dst->uv_height - src->uv_height; + const int er_uv = el_uv + dst->uv_width - src->uv_width; + +#if CONFIG_ALPHA + const int et_a = dst->border >> (dst->alpha_height != dst->y_height); + const int el_a = dst->border >> (dst->alpha_width != dst->y_width); + const int eb_a = et_a + dst->alpha_height - src->alpha_height; + const int er_a = el_a + dst->alpha_width - src->alpha_width; + + copy_and_extend_plane(src->alpha_buffer, src->alpha_stride, + dst->alpha_buffer, dst->alpha_stride, + src->alpha_width, src->alpha_height, + et_a, el_a, eb_a, er_a); +#endif copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, @@ -78,7 +90,7 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, src->uv_width, src->uv_height, - et_y, el_y, eb_uv, er_uv); + et_uv, el_uv, eb_uv, er_uv); } void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, @@ -119,29 +131,3 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, srcw_uv, srch_uv, et_uv, el_uv, eb_uv, er_uv); } - -// note the extension is only for the last row, for intra prediction purpose -void vp9_extend_mb_row(YV12_BUFFER_CONFIG *buf, - uint8_t *y, uint8_t *u, uint8_t *v) { - int i; - - y += buf->y_stride * 14; - u += buf->uv_stride * 6; - v += buf->uv_stride * 6; - - for (i = 0; i < 4; i++) { - y[i] = y[-1]; - u[i] = u[-1]; - v[i] = v[-1]; - } - - y += buf->y_stride; - u += buf->uv_stride; - v += buf->uv_stride; - - for (i = 0; i < 4; i++) { - y[i] = y[-1]; - u[i] = u[-1]; - v[i] = v[-1]; - } -} diff --git a/vp9/common/vp9_extend.h b/vp9/common/vp9_extend.h index 6ec75c992afa627185c796f1c1328084ad0b656c..7ff79b7b6b3ce35c98d84cc06857773890c02629 100644 --- a/vp9/common/vp9_extend.h +++ b/vp9/common/vp9_extend.h @@ -22,9 +22,4 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int srcy, int srcx, int srch, int srcw); - -void vp9_extend_mb_row(YV12_BUFFER_CONFIG *buf, - uint8_t *y, uint8_t *u, uint8_t *v); - - #endif // VP9_COMMON_VP9_EXTEND_H_ diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 6c1ea21a1b594f7e4c276373e7e5e222bf259351..e5503cdd9ec347028265d64092a586ef1e127207 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -34,12 +34,7 @@ DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = { { 0, 0, 0, 8, 120, 0, 0, 0 } }; -#define FILTER_ALPHA 0 -#define FILTER_ALPHA_SHARP 0 -#define FILTER_ALPHA_SMOOTH 50 -DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) - = { -#if FILTER_ALPHA == 0 +DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { /* Lagrangian interpolation filter */ { 0, 0, 0, 128, 0, 0, 0, 0}, { 0, 1, -5, 126, 8, -3, 1, 0}, @@ -57,38 +52,10 @@ DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) { -1, 3, -9, 27, 118, -13, 4, -1}, { 0, 2, -6, 18, 122, -10, 3, -1}, { 0, 1, -3, 8, 126, -5, 1, 0} - -#elif FILTER_ALPHA == 50 - /* Generated using MATLAB: - * alpha = 0.5; - * b=intfilt(8,4,alpha); - * bi=round(128*b); - * ba=flipud(reshape([bi 0], 8, 8)); - * disp(num2str(ba, '%d,')) - */ - { 0, 0, 0, 128, 0, 0, 0, 0}, - { 0, 1, -5, 126, 8, -3, 1, 0}, - { 0, 2, -10, 122, 18, -6, 2, 0}, - { -1, 3, -13, 118, 27, -9, 3, 0}, - { -1, 4, -16, 112, 37, -11, 3, 0}, - { -1, 5, -17, 104, 48, -14, 4, -1}, - { -1, 5, -18, 96, 58, -16, 5, -1}, - { -1, 5, -19, 88, 68, -17, 5, -1}, - { -1, 5, -18, 78, 78, -18, 5, -1}, - { -1, 5, -17, 68, 88, -19, 5, -1}, - { -1, 5, -16, 58, 96, -18, 5, -1}, - { -1, 4, -14, 48, 104, -17, 5, -1}, - { 0, 3, -11, 37, 112, -16, 4, -1}, - { 0, 3, -9, 27, 118, -13, 3, -1}, - { 0, 2, -6, 18, 122, -10, 2, 0}, - { 0, 1, -3, 8, 126, -5, 1, 0} - -#endif /* FILTER_ALPHA */ }; DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { -#if FILTER_ALPHA_SHARP == 0 /* dct based filter */ {0, 0, 0, 128, 0, 0, 0, 0}, {-1, 3, -7, 127, 8, -3, 1, 0}, @@ -106,88 +73,25 @@ DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) {-2, 5, -10, 27, 121, -17, 7, -3}, {-1, 3, -6, 17, 125, -13, 5, -2}, {0, 1, -3, 8, 127, -7, 3, -1} - -#elif FILTER_ALPHA_SHARP == 80 - /* alpha = 0.80 */ - { 0, 0, 0, 128, 0, 0, 0, 0}, - {-1, 2, -6, 127, 9, -4, 2, -1}, - {-2, 5, -12, 124, 18, -7, 4, -2}, - {-2, 7, -16, 119, 28, -11, 5, -2}, - {-3, 8, -19, 114, 38, -14, 7, -3}, - {-3, 9, -22, 107, 49, -17, 8, -3}, - {-4, 10, -23, 99, 60, -20, 10, -4}, - {-4, 11, -23, 90, 70, -22, 10, -4}, - {-4, 11, -23, 80, 80, -23, 11, -4}, - {-4, 10, -22, 70, 90, -23, 11, -4}, - {-4, 10, -20, 60, 99, -23, 10, -4}, - {-3, 8, -17, 49, 107, -22, 9, -3}, - {-3, 7, -14, 38, 114, -19, 8, -3}, - {-2, 5, -11, 28, 119, -16, 7, -2}, - {-2, 4, -7, 18, 124, -12, 5, -2}, - {-1, 2, -4, 9, 127, -6, 2, -1} -#endif /* FILTER_ALPHA_SHARP */ }; DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = { - /* 8-tap lowpass filter */ - /* Hamming window */ - /* freqmultiplier = 0.625 */ -#if FILTER_ALPHA_SMOOTH == 625 - {-1, -7, 32, 80, 32, -7, -1, 0}, - {-1, -8, 28, 80, 37, -7, -2, 1}, - { 0, -8, 24, 79, 41, -7, -2, 1}, - { 0, -8, 20, 78, 45, -5, -3, 1}, - { 0, -8, 16, 76, 50, -4, -3, 1}, - { 0, -7, 13, 74, 54, -3, -4, 1}, - { 1, -7, 9, 71, 58, -1, -4, 1}, - { 1, -6, 6, 68, 62, 1, -5, 1}, - { 1, -6, 4, 65, 65, 4, -6, 1}, - { 1, -5, 1, 62, 68, 6, -6, 1}, - { 1, -4, -1, 58, 71, 9, -7, 1}, - { 1, -4, -3, 54, 74, 13, -7, 0}, - { 1, -3, -4, 50, 76, 16, -8, 0}, - { 1, -3, -5, 45, 78, 20, -8, 0}, - { 1, -2, -7, 41, 79, 24, -8, 0}, - { 1, -2, -7, 37, 80, 28, -8, -1} - -#elif FILTER_ALPHA_SMOOTH == 50 /* freqmultiplier = 0.5 */ - {-3, 0, 35, 64, 35, 0, -3, 0}, - {-3, -1, 32, 64, 38, 1, -3, 0}, - {-2, -2, 29, 63, 41, 2, -3, 0}, - {-2, -2, 26, 63, 43, 4, -4, 0}, - {-2, -3, 24, 62, 46, 5, -4, 0}, - {-2, -3, 21, 60, 49, 7, -4, 0}, - {-1, -4, 18, 59, 51, 9, -4, 0}, - {-1, -4, 16, 57, 53, 12, -4, -1}, - {-1, -4, 14, 55, 55, 14, -4, -1}, - {-1, -4, 12, 53, 57, 16, -4, -1}, - {0, -4, 9, 51, 59, 18, -4, -1}, - {0, -4, 7, 49, 60, 21, -3, -2}, - {0, -4, 5, 46, 62, 24, -3, -2}, - {0, -4, 4, 43, 63, 26, -2, -2}, - {0, -3, 2, 41, 63, 29, -2, -2}, - {0, -3, 1, 38, 64, 32, -1, -3} -#endif -}; - -DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8]) - = { - {0, 0, 0, 128, 0, 0, 0, 0}, - {0, 1, -5, 125, 8, -2, 1, 0}, - {0, 1, -8, 122, 17, -5, 1, 0}, - {0, 2, -11, 116, 27, -8, 2, 0}, - {0, 3, -14, 110, 37, -10, 2, 0}, - {0, 3, -15, 103, 47, -12, 2, 0}, - {0, 3, -16, 95, 57, -14, 3, 0}, - {0, 3, -16, 86, 67, -15, 3, 0}, - {0, 3, -16, 77, 77, -16, 3, 0}, - {0, 3, -15, 67, 86, -16, 3, 0}, - {0, 3, -14, 57, 95, -16, 3, 0}, - {0, 2, -12, 47, 103, -15, 3, 0}, - {0, 2, -10, 37, 110, -14, 3, 0}, - {0, 2, -8, 27, 116, -11, 2, 0}, - {0, 1, -5, 17, 122, -8, 1, 0}, - {0, 1, -2, 8, 125, -5, 1, 0} + { 0, 0, 0, 128, 0, 0, 0, 0}, + {-3, -1, 32, 64, 38, 1, -3, 0}, + {-2, -2, 29, 63, 41, 2, -3, 0}, + {-2, -2, 26, 63, 43, 4, -4, 0}, + {-2, -3, 24, 62, 46, 5, -4, 0}, + {-2, -3, 21, 60, 49, 7, -4, 0}, + {-1, -4, 18, 59, 51, 9, -4, 0}, + {-1, -4, 16, 57, 53, 12, -4, -1}, + {-1, -4, 14, 55, 55, 14, -4, -1}, + {-1, -4, 12, 53, 57, 16, -4, -1}, + { 0, -4, 9, 51, 59, 18, -4, -1}, + { 0, -4, 7, 49, 60, 21, -3, -2}, + { 0, -4, 5, 46, 62, 24, -3, -2}, + { 0, -4, 4, 43, 63, 26, -2, -2}, + { 0, -3, 2, 41, 63, 29, -2, -2}, + { 0, -3, 1, 38, 64, 32, -1, -3} }; diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index a063ca4778fdfe62992821b4ad3b0b6a524d8991..d7817114eecee1cab5ace1e59d9df39c3a8328b6 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -8,22 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include <limits.h> #include "vp9/common/vp9_findnearmv.h" +#include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_sadmxn.h" #include "vp9/common/vp9_subpelvar.h" -const uint8_t vp9_mbsplit_offset[4][16] = { - { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} -}; - -static void lower_mv_precision(int_mv *mv, int usehp) -{ +static void lower_mv_precision(int_mv *mv, int usehp) { if (!usehp || !vp9_use_nmv_hp(&mv->as_mv)) { if (mv->as_mv.row & 1) mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1); @@ -32,278 +24,14 @@ static void lower_mv_precision(int_mv *mv, int usehp) } } -vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, - vp9_prob p[4], const int context) { - p[0] = pc->fc.vp9_mode_contexts[context][0]; - p[1] = pc->fc.vp9_mode_contexts[context][1]; - p[2] = pc->fc.vp9_mode_contexts[context][2]; - p[3] = pc->fc.vp9_mode_contexts[context][3]; +vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, vp9_prob *p, int context) { + p[0] = pc->fc.inter_mode_probs[context][0]; + p[1] = pc->fc.inter_mode_probs[context][1]; + p[2] = pc->fc.inter_mode_probs[context][2]; return p; } -#define SP(x) (((x) & 7) << 1) -unsigned int vp9_sad3x16_c(const uint8_t *src_ptr, - int src_stride, - const uint8_t *ref_ptr, - int ref_stride) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16); -} -unsigned int vp9_sad16x3_c(const uint8_t *src_ptr, - int src_stride, - const uint8_t *ref_ptr, - int ref_stride) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3); -} - - -unsigned int vp9_variance2x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - int sum; - variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, sse, &sum); - return (*sse - (((unsigned int)sum * sum) >> 5)); -} - -unsigned int vp9_variance16x2_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - int sum; - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, sse, &sum); - return (*sse - (((unsigned int)sum * sum) >> 5)); -} - -unsigned int vp9_sub_pixel_variance16x2_c(const uint8_t *src_ptr, - int source_stride, - int xoffset, - int yoffset, - const uint8_t *ref_ptr, - int ref_stride, - unsigned int *sse) { - uint16_t FData3[16 * 3]; // Temp data buffer used in filtering - uint8_t temp2[2 * 16]; - const int16_t *HFilter, *VFilter; - - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - - var_filter_block2d_bil_first_pass(src_ptr, FData3, - source_stride, 1, 3, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter); - - return vp9_variance16x2_c(temp2, 16, ref_ptr, ref_stride, sse); -} - -unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr, - int source_stride, - int xoffset, - int yoffset, - const uint8_t *ref_ptr, - int ref_stride, - unsigned int *sse) { - uint16_t FData3[2 * 17]; // Temp data buffer used in filtering - uint8_t temp2[2 * 16]; - const int16_t *HFilter, *VFilter; - - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - - var_filter_block2d_bil_first_pass(src_ptr, FData3, - source_stride, 1, 17, 2, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter); - - return vp9_variance2x16_c(temp2, 2, ref_ptr, ref_stride, sse); -} - -#if CONFIG_USESELECTREFMV -/* check a list of motion vectors by sad score using a number rows of pixels - * above and a number cols of pixels in the left to select the one with best - * score to use as ref motion vector - */ - void vp9_find_best_ref_mvs(MACROBLOCKD *xd, - uint8_t *ref_y_buffer, - int ref_y_stride, - int_mv *mvlist, - int_mv *nearest, - int_mv *near) { - int i, j; - uint8_t *above_src; - uint8_t *above_ref; -#if !CONFIG_ABOVESPREFMV - uint8_t *left_src; - uint8_t *left_ref; -#endif - unsigned int score; - unsigned int sse; - unsigned int ref_scores[MAX_MV_REF_CANDIDATES] = {0}; - int_mv sorted_mvs[MAX_MV_REF_CANDIDATES]; - int zero_seen = FALSE; - - if (ref_y_buffer) { - - // Default all to 0,0 if nothing else available - nearest->as_int = near->as_int = 0; - vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs)); - - above_src = xd->dst.y_buffer - xd->dst.y_stride * 2; - above_ref = ref_y_buffer - ref_y_stride * 2; -#if CONFIG_ABOVESPREFMV - above_src -= 4; - above_ref -= 4; -#else - left_src = xd->dst.y_buffer - 2; - left_ref = ref_y_buffer - 2; -#endif - - // Limit search to the predicted best few candidates - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - int_mv this_mv; - int offset = 0; - int row_offset, col_offset; - - this_mv.as_int = mvlist[i].as_int; - - // If we see a 0,0 vector for a second time we have reached the end of - // the list of valid candidate vectors. - if (!this_mv.as_int && zero_seen) - break; - - zero_seen = zero_seen || !this_mv.as_int; - -#if !CONFIG_ABOVESPREFMV - clamp_mv(&this_mv, - xd->mb_to_left_edge - LEFT_TOP_MARGIN + 24, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); -#else - clamp_mv(&this_mv, - xd->mb_to_left_edge - LEFT_TOP_MARGIN + 32, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); -#endif - - row_offset = this_mv.as_mv.row >> 3; - col_offset = this_mv.as_mv.col >> 3; - offset = ref_y_stride * row_offset + col_offset; - score = 0; -#if !CONFIG_ABOVESPREFMV - if (xd->up_available) { -#else - if (xd->up_available && xd->left_available) { -#endif - vp9_sub_pixel_variance16x2(above_ref + offset, ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src, xd->dst.y_stride, &sse); - score += sse; - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { - vp9_sub_pixel_variance16x2(above_ref + offset + 16, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src + 16, xd->dst.y_stride, &sse); - score += sse; - } - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { - vp9_sub_pixel_variance16x2(above_ref + offset + 32, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src + 32, xd->dst.y_stride, &sse); - score += sse; - vp9_sub_pixel_variance16x2(above_ref + offset + 48, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src + 48, xd->dst.y_stride, &sse); - score += sse; - } - } -#if !CONFIG_ABOVESPREFMV - if (xd->left_available) { - vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - left_src, xd->dst.y_stride, &sse); - score += sse; - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { - vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - left_src + xd->dst.y_stride * 16, - xd->dst.y_stride, &sse); - score += sse; - } - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { - vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - left_src + xd->dst.y_stride * 32, - xd->dst.y_stride, &sse); - score += sse; - vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 48, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - left_src + xd->dst.y_stride * 48, - xd->dst.y_stride, &sse); - score += sse; - } - } -#endif - // Add the entry to our list and then resort the list on score. - ref_scores[i] = score; - sorted_mvs[i].as_int = this_mv.as_int; - j = i; - while (j > 0) { - if (ref_scores[j] < ref_scores[j-1]) { - ref_scores[j] = ref_scores[j-1]; - sorted_mvs[j].as_int = sorted_mvs[j-1].as_int; - ref_scores[j-1] = score; - sorted_mvs[j-1].as_int = this_mv.as_int; - j--; - } else { - break; - } - } - } - } else { - vpx_memcpy(sorted_mvs, mvlist, sizeof(sorted_mvs)); - } - - // Make sure all the candidates are properly clamped etc - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - lower_mv_precision(&sorted_mvs[i], xd->allow_high_precision_mv); - clamp_mv2(&sorted_mvs[i], xd); - } - - // Nearest may be a 0,0 or non zero vector and now matches the chosen - // "best reference". This has advantages when it is used as part of a - // compound predictor as it means a non zero vector can be paired using - // this mode with a 0 vector. The Near vector is still forced to be a - // non zero candidate if one is avaialble. - nearest->as_int = sorted_mvs[0].as_int; - if ( sorted_mvs[1].as_int ) { - near->as_int = sorted_mvs[1].as_int; - } else { - near->as_int = sorted_mvs[2].as_int; - } - - // Copy back the re-ordered mv list - vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs)); -} -#else -void vp9_find_best_ref_mvs(MACROBLOCKD *xd, - uint8_t *ref_y_buffer, - int ref_y_stride, int_mv *mvlist, int_mv *nearest, int_mv *near) { @@ -316,4 +44,53 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, *nearest = mvlist[0]; *near = mvlist[1]; } -#endif + +void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + int_mv *dst_nearest, + int_mv *dst_near, + int block_idx, int ref_idx) { + int_mv dst_list[MAX_MV_REF_CANDIDATES]; + int_mv mv_list[MAX_MV_REF_CANDIDATES]; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + + assert(ref_idx == 0 || ref_idx == 1); + assert(MAX_MV_REF_CANDIDATES == 2); // makes code here slightly easier + + vp9_find_mv_refs_idx(cm, xd, xd->mode_info_context, + xd->prev_mode_info_context, + mbmi->ref_frame[ref_idx], + mv_list, cm->ref_frame_sign_bias, block_idx); + + dst_list[1].as_int = 0; + if (block_idx == 0) { + memcpy(dst_list, mv_list, MAX_MV_REF_CANDIDATES * sizeof(int_mv)); + } else if (block_idx == 1 || block_idx == 2) { + int dst = 0, n; + union b_mode_info *bmi = mi->bmi; + + dst_list[dst++].as_int = bmi[0].as_mv[ref_idx].as_int; + for (n = 0; dst < MAX_MV_REF_CANDIDATES && + n < MAX_MV_REF_CANDIDATES; n++) + if (mv_list[n].as_int != dst_list[0].as_int) + dst_list[dst++].as_int = mv_list[n].as_int; + } else { + int dst = 0, n; + union b_mode_info *bmi = mi->bmi; + + assert(block_idx == 3); + dst_list[dst++].as_int = bmi[2].as_mv[ref_idx].as_int; + if (dst_list[0].as_int != bmi[1].as_mv[ref_idx].as_int) + dst_list[dst++].as_int = bmi[1].as_mv[ref_idx].as_int; + if (dst < MAX_MV_REF_CANDIDATES && + dst_list[0].as_int != bmi[0].as_mv[ref_idx].as_int) + dst_list[dst++].as_int = bmi[0].as_mv[ref_idx].as_int; + for (n = 0; dst < MAX_MV_REF_CANDIDATES && + n < MAX_MV_REF_CANDIDATES; n++) + if (mv_list[n].as_int != dst_list[0].as_int) + dst_list[dst++].as_int = mv_list[n].as_int; + } + + dst_nearest->as_int = dst_list[0].as_int; + dst_near->as_int = dst_list[1].as_int; +} diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index 6887b044f6d6559b965d550c7426b82eb92fe265..91736fefe7a6151860f2e95bc98cfe6062676264 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h @@ -17,16 +17,13 @@ #include "vp9/common/vp9_treecoder.h" #include "vp9/common/vp9_onyxc_int.h" -#define LEFT_TOP_MARGIN (16 << 3) -#define RIGHT_BOTTOM_MARGIN (16 << 3) +#define LEFT_TOP_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3) +#define RIGHT_BOTTOM_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3) -/* check a list of motion vectors by sad score using a number rows of pixels - * above and a number cols of pixels in the left to select the one with best - * score to use as ref motion vector - */ +// check a list of motion vectors by sad score using a number rows of pixels +// above and a number cols of pixels in the left to select the one with best +// score to use as ref motion vector void vp9_find_best_ref_mvs(MACROBLOCKD *xd, - uint8_t *ref_y_buffer, - int ref_y_stride, int_mv *mvlist, int_mv *nearest, int_mv *near); @@ -43,35 +40,30 @@ static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, mvp->as_mv = xmv; } - +// TODO(jingning): this mv clamping function should be block size dependent. static void clamp_mv(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge, int mb_to_top_edge, int mb_to_bottom_edge) { - mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ? - mb_to_left_edge : mv->as_mv.col; - mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ? - mb_to_right_edge : mv->as_mv.col; - mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ? - mb_to_top_edge : mv->as_mv.row; - mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ? - mb_to_bottom_edge : mv->as_mv.row; + mv->as_mv.col = clamp(mv->as_mv.col, mb_to_left_edge, mb_to_right_edge); + mv->as_mv.row = clamp(mv->as_mv.row, mb_to_top_edge, mb_to_bottom_edge); } -static void clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) { +static int clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) { + int_mv tmp_mv; + tmp_mv.as_int = mv->as_int; clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, xd->mb_to_top_edge - LEFT_TOP_MARGIN, xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); + return tmp_mv.as_int != mv->as_int; } -static unsigned int check_mv_bounds(int_mv *mv, - int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) { +static int check_mv_bounds(int_mv *mv, + int mb_to_left_edge, int mb_to_right_edge, + int mb_to_top_edge, int mb_to_bottom_edge) { return mv->as_mv.col < mb_to_left_edge || mv->as_mv.col > mb_to_right_edge || mv->as_mv.row < mb_to_top_edge || @@ -79,116 +71,50 @@ static unsigned int check_mv_bounds(int_mv *mv, } vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, - vp9_prob p[VP9_MVREFS - 1], + vp9_prob p[VP9_INTER_MODES - 1], const int context); -extern const uint8_t vp9_mbsplit_offset[4][16]; - -static int left_block_mv(const MACROBLOCKD *xd, - const MODE_INFO *cur_mb, int b) { - if (!(b & 3)) { - if (!xd->left_available) - return 0; - - // On L edge, get from MB to left of us - --cur_mb; - - if (cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.mv[0].as_int; - - b += 4; - } - - return (cur_mb->bmi + b - 1)->as_mv[0].as_int; -} - -static int left_block_second_mv(const MACROBLOCKD *xd, - const MODE_INFO *cur_mb, int b) { - if (!(b & 3)) { - if (!xd->left_available) - return 0; - - /* On L edge, get from MB to left of us */ - --cur_mb; - - if (cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.second_ref_frame > 0 ? - cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int; - b += 4; - } - - return cur_mb->mbmi.second_ref_frame > 0 ? - (cur_mb->bmi + b - 1)->as_mv[1].as_int : - (cur_mb->bmi + b - 1)->as_mv[0].as_int; -} - -static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { - if (!(b >> 2)) { - /* On top edge, get from MB above us */ - cur_mb -= mi_stride; - - if (cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.mv[0].as_int; - b += 16; - } - - return (cur_mb->bmi + b - 4)->as_mv[0].as_int; -} - -static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { - if (!(b >> 2)) { - /* On top edge, get from MB above us */ - cur_mb -= mi_stride; - - if (cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.second_ref_frame > 0 ? - cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int; - b += 16; - } - - return cur_mb->mbmi.second_ref_frame > 0 ? - (cur_mb->bmi + b - 4)->as_mv[1].as_int : - (cur_mb->bmi + b - 4)->as_mv[0].as_int; -} +void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *pc, + MACROBLOCKD *xd, + int_mv *dst_nearest, + int_mv *dst_near, + int block_idx, int ref_idx); -static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { - if (!(b & 3)) { +static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { + // FIXME(rbultje, jingning): temporary hack because jenkins doesn't + // understand this condition. This will go away soon. + if (b == 0 || b == 2) { /* On L edge, get from MB to left of us */ --cur_mb; - if (cur_mb->mbmi.mode < I8X8_PRED) { - return pred_mode_conv(cur_mb->mbmi.mode); - } else if (cur_mb->mbmi.mode == I8X8_PRED) { - return pred_mode_conv( - (MB_PREDICTION_MODE)(cur_mb->bmi + 3 + b)->as_mode.first); - } else if (cur_mb->mbmi.mode == B_PRED) { - return ((cur_mb->bmi + 3 + b)->as_mode.first); + if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) { + return DC_PRED; + } else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) { + return ((cur_mb->bmi + 1 + b)->as_mode.first); } else { - return B_DC_PRED; + return cur_mb->mbmi.mode; } } + assert(b == 1 || b == 3); return (cur_mb->bmi + b - 1)->as_mode.first; } -static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, +static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride) { - if (!(b >> 2)) { + if (!(b >> 1)) { /* On top edge, get from MB above us */ cur_mb -= mi_stride; - if (cur_mb->mbmi.mode < I8X8_PRED) { - return pred_mode_conv(cur_mb->mbmi.mode); - } else if (cur_mb->mbmi.mode == I8X8_PRED) { - return pred_mode_conv( - (MB_PREDICTION_MODE)(cur_mb->bmi + 12 + b)->as_mode.first); - } else if (cur_mb->mbmi.mode == B_PRED) { - return ((cur_mb->bmi + 12 + b)->as_mode.first); + if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) { + return DC_PRED; + } else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) { + return ((cur_mb->bmi + 2 + b)->as_mode.first); } else { - return B_DC_PRED; + return cur_mb->mbmi.mode; } } - return (cur_mb->bmi + b - 4)->as_mode.first; + return (cur_mb->bmi + b - 2)->as_mode.first; } #endif // VP9_COMMON_VP9_FINDNEARMV_H_ diff --git a/vp9/common/vp9_header.h b/vp9/common/vp9_header.h deleted file mode 100644 index c51ce135d2addb58cfebfe41f694c69f8d996a26..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_header.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_HEADER_H_ -#define VP9_COMMON_VP9_HEADER_H_ - -/* 24 bits total */ -typedef struct { - unsigned int type: 1; - unsigned int version: 3; - unsigned int show_frame: 1; - - /* Allow 2^20 bytes = 8 megabits for first partition */ - - unsigned int first_partition_length_in_bytes: 19; - -#ifdef PACKET_TESTING - unsigned int frame_number; - unsigned int update_gold: 1; - unsigned int uses_gold: 1; - unsigned int update_last: 1; - unsigned int uses_last: 1; -#endif - -} VP9_HEADER; - -#ifdef PACKET_TESTING -#define VP9_HEADER_SIZE 8 -#else -#define VP9_HEADER_SIZE 3 -#endif - -#endif // VP9_COMMON_VP9_HEADER_H_ diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 3ec093f735a7a2ecc8eff6f99737392c36678e95..dcc7f03307eaa2d27bfdf248b6bce1342e8ddcb4 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -18,81 +18,81 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -void vp9_short_iwalsh4x4_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, + 0.5 shifts per pixel. */ int i; - int a1, b1, c1, d1; + int16_t output[16]; + int a1, b1, c1, d1, e1; int16_t *ip = input; int16_t *op = output; - const int half_pitch = pitch >> 1; for (i = 0; i < 4; i++) { - a1 = (ip[0] + ip[3]) >> WHT_UPSCALE_FACTOR; - b1 = (ip[1] + ip[2]) >> WHT_UPSCALE_FACTOR; - c1 = (ip[1] - ip[2]) >> WHT_UPSCALE_FACTOR; - d1 = (ip[0] - ip[3]) >> WHT_UPSCALE_FACTOR; - - op[0] = (a1 + b1 + 1) >> 1; - op[1] = (c1 + d1) >> 1; - op[2] = (a1 - b1) >> 1; - op[3] = (d1 - c1) >> 1; - + a1 = ip[0] >> WHT_UPSCALE_FACTOR; + c1 = ip[1] >> WHT_UPSCALE_FACTOR; + d1 = ip[2] >> WHT_UPSCALE_FACTOR; + b1 = ip[3] >> WHT_UPSCALE_FACTOR; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + op[0] = a1; + op[1] = b1; + op[2] = c1; + op[3] = d1; ip += 4; - op += half_pitch; + op += 4; } ip = output; - op = output; for (i = 0; i < 4; i++) { - a1 = ip[half_pitch * 0] + ip[half_pitch * 3]; - b1 = ip[half_pitch * 1] + ip[half_pitch * 2]; - c1 = ip[half_pitch * 1] - ip[half_pitch * 2]; - d1 = ip[half_pitch * 0] - ip[half_pitch * 3]; - - - op[half_pitch * 0] = (a1 + b1 + 1) >> 1; - op[half_pitch * 1] = (c1 + d1) >> 1; - op[half_pitch * 2] = (a1 - b1) >> 1; - op[half_pitch * 3] = (d1 - c1) >> 1; + a1 = ip[4 * 0]; + c1 = ip[4 * 1]; + d1 = ip[4 * 2]; + b1 = ip[4 * 3]; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); + dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1); + dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1); + dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1); ip++; - op++; + dest++; } } -void vp9_short_iwalsh4x4_1_c(int16_t *in, int16_t *out, int pitch) { +void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { int i; + int a1, e1; int16_t tmp[4]; int16_t *ip = in; int16_t *op = tmp; - const int half_pitch = pitch >> 1; - op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1; - op[1] = op[2] = op[3] = (ip[0] >> WHT_UPSCALE_FACTOR) >> 1; + a1 = ip[0] >> WHT_UPSCALE_FACTOR; + e1 = a1 >> 1; + a1 -= e1; + op[0] = a1; + op[1] = op[2] = op[3] = e1; ip = tmp; - op = out; for (i = 0; i < 4; i++) { - op[half_pitch * 0] = (ip[0] + 1) >> 1; - op[half_pitch * 1] = op[half_pitch * 2] = op[half_pitch * 3] = ip[0] >> 1; + e1 = ip[0] >> 1; + a1 = ip[0] - e1; + dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); + dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); + dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); + dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); ip++; - op++; - } -} - -void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr, - uint8_t *dst_ptr, - int pitch, int stride) { - int r, c; - int16_t dc = input_dc; - int16_t tmp[4 * 4]; - vp9_short_iwalsh4x4_1_c(&dc, tmp, 4 << 1); - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) - dst_ptr[c] = clip_pixel(tmp[r * 4 + c] + pred_ptr[c]); - - dst_ptr += stride; - pred_ptr += pitch; + dest++; } } @@ -116,10 +116,9 @@ void vp9_idct4_1d_c(int16_t *input, int16_t *output) { output[3] = step[0] - step[3]; } -void vp9_short_idct4x4_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[4 * 4]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; int i, j; int16_t temp_in[4], temp_out[4]; @@ -138,22 +137,24 @@ void vp9_short_idct4x4_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 4 + i]; vp9_idct4_1d(temp_in, temp_out); for (j = 0; j < 4; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + + dest[j * dest_stride + i]); } } -void vp9_short_idct4x4_1_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int i; int a1; - int16_t *op = output; - const int half_pitch = pitch >> 1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); out = dct_const_round_shift(out * cospi_16_64); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { - op[0] = op[1] = op[2] = op[3] = a1; - op += half_pitch; + dest[0] = clip_pixel(dest[0] + a1); + dest[1] = clip_pixel(dest[1] + a1); + dest[2] = clip_pixel(dest[2] + a1); + dest[3] = clip_pixel(dest[3] + a1); + dest += dest_stride; } } @@ -219,27 +220,27 @@ static void idct8_1d(int16_t *input, int16_t *output) { output[7] = step1[0] - step1[7]; } -void vp9_short_idct8x8_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[8 * 8]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; int i, j; int16_t temp_in[8], temp_out[8]; - // Rows + // First transform rows for (i = 0; i < 8; ++i) { idct8_1d(input, outptr); input += 8; outptr += 8; } - // Columns + // Then transform columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; idct8_1d(temp_in, temp_out); for (j = 0; j < 8; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * dest_stride + i]); } } @@ -285,8 +286,8 @@ static void iadst4_1d(int16_t *input, int16_t *output) { output[3] = dct_const_round_shift(s3); } -void vp9_short_iht4x4_c(int16_t *input, int16_t *output, - int pitch, int tx_type) { +void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, + int tx_type) { const transform_2d IHT_4[] = { { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0 { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1 @@ -312,10 +313,10 @@ void vp9_short_iht4x4_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 4 + i]; IHT_4[tx_type].cols(temp_in, temp_out); for (j = 0; j < 4; ++j) - output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + + dest[j * dest_stride + i]); } } - static void iadst8_1d(int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; @@ -400,8 +401,8 @@ static const transform_2d IHT_8[] = { { iadst8_1d, iadst8_1d } // ADST_ADST = 3 }; -void vp9_short_iht8x8_c(int16_t *input, int16_t *output, - int pitch, int tx_type) { +void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride, + int tx_type) { int i, j; int16_t out[8 * 8]; int16_t *outptr = out; @@ -421,14 +422,14 @@ void vp9_short_iht8x8_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 8 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 8; ++j) - output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5); - } + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * dest_stride + i]); } } -void vp9_short_idct10_8x8_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, + int dest_stride) { int16_t out[8 * 8]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; int i, j; int16_t temp_in[8], temp_out[8]; @@ -447,7 +448,8 @@ void vp9_short_idct10_8x8_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 8 + i]; idct8_1d(temp_in, temp_out); for (j = 0; j < 8; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + + dest[j * dest_stride + i]); } } @@ -621,10 +623,9 @@ static void idct16_1d(int16_t *input, int16_t *output) { output[15] = step2[0] - step2[15]; } -void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[16 * 16]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; int i, j; int16_t temp_in[16], temp_out[16]; @@ -641,7 +642,8 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); } } @@ -823,8 +825,8 @@ static const transform_2d IHT_16[] = { { iadst16_1d, iadst16_1d } // ADST_ADST = 3 }; -void vp9_short_iht16x16_c(int16_t *input, int16_t *output, - int pitch, int tx_type) { +void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, + int tx_type) { int i, j; int16_t out[16 * 16]; int16_t *outptr = out; @@ -844,37 +846,37 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); - } + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); } } -void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { - int16_t out[16 * 16]; - int16_t *outptr = out; - const int half_pitch = pitch >> 1; - int i, j; - int16_t temp_in[16], temp_out[16]; - - /* First transform rows. Since all non-zero dct coefficients are in - * upper-left 4x4 area, we only need to calculate first 4 rows here. - */ - vpx_memset(out, 0, sizeof(out)); - for (i = 0; i < 4; ++i) { - idct16_1d(input, outptr); - input += 16; - outptr += 16; - } - - // Then transform columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - idct16_1d(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); - } -} +void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, + int dest_stride) { + int16_t out[16 * 16]; + int16_t *outptr = out; + int i, j; + int16_t temp_in[16], temp_out[16]; + + /* First transform rows. Since all non-zero dct coefficients are in + * upper-left 4x4 area, we only need to calculate first 4 rows here. + */ + vpx_memset(out, 0, sizeof(out)); + for (i = 0; i < 4; ++i) { + idct16_1d(input, outptr); + input += 16; + outptr += 16; + } + // Then transform columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j*16 + i]; + idct16_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); + } +} void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) { int16_t out = dct_const_round_shift(input[0] * cospi_16_64); @@ -1249,10 +1251,9 @@ static void idct32_1d(int16_t *input, int16_t *output) { output[31] = step1[0] - step1[31]; } -void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[32 * 32]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; int i, j; int16_t temp_in[32], temp_out[32]; @@ -1269,7 +1270,8 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); } } @@ -1279,10 +1281,10 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { output[0] = ROUND_POWER_OF_TWO(out, 6); } -void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, + int dest_stride) { int16_t out[32 * 32]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; int i, j; int16_t temp_in[32], temp_out[32]; @@ -1302,6 +1304,7 @@ void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); } } diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 176bf5da4311ad2fbf30921f852cc1772ff17a5a..af35432c44cc8998ccea332801a5fc124ad2aa69 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -17,6 +17,7 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_common.h" + // Constants and Macros used by all idct/dct functions #define DCT_CONST_BITS 14 #define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) diff --git a/vp9/common/vp9_implicit_segmentation.c b/vp9/common/vp9_implicit_segmentation.c index e88eec48cc9d51025d97f80437249d01737d9419..2a1d35f9a1ce01282e574e25a8564c87e9bd3f11 100644 --- a/vp9/common/vp9_implicit_segmentation.c +++ b/vp9/common/vp9_implicit_segmentation.c @@ -140,11 +140,11 @@ void segment_via_mode_info(VP9_COMMON *oci, int how) { break; case SEGMENT_MV: n = mi[mb_index].mbmi.mv[0].as_int; - if (mi[mb_index].mbmi.ref_frame == INTRA_FRAME) + if (mi[mb_index].mbmi.ref_frame[0] == INTRA_FRAME) n = -9999999; break; case SEGMENT_REFFRAME: - n = mi[mb_index].mbmi.ref_frame; + n = mi[mb_index].mbmi.ref_frame[0]; break; case SEGMENT_SKIPPED: n = mi[mb_index].mbmi.mb_skip_coeff; @@ -191,11 +191,12 @@ void segment_via_mode_info(VP9_COMMON *oci, int how) { // give new labels to regions for (i = 1; i < label; i++) - if (labels[i].next->count > min_mbs_in_region && labels[labels[i].next->label].label == 0) { + if (labels[i].next->count > min_mbs_in_region && + labels[labels[i].next->label].label == 0) { segment_info *cs = &segments[label_count]; cs->label = label_count; labels[labels[i].next->label].label = label_count++; - labels[labels[i].next->label].seg_value = labels[i].next->seg_value; + labels[labels[i].next->label].seg_value = labels[i].next->seg_value; cs->seg_value = labels[labels[i].next->label].seg_value; cs->min_x = oci->mb_cols; cs->min_y = oci->mb_rows; @@ -204,24 +205,21 @@ void segment_via_mode_info(VP9_COMMON *oci, int how) { cs->sum_x = 0; cs->sum_y = 0; cs->pixels = 0; - } + lp = labeling; // this is just to gather stats... for (i = 0; i < oci->mb_rows; i++, lp += pitch) { for (j = 0; j < oci->mb_cols; j++) { - segment_info *cs; - int oldlab = labels[lp[j]].next->label; - int lab = labels[oldlab].label; - lp[j] = lab; - - cs = &segments[lab]; - - cs->min_x = (j < cs->min_x ? j : cs->min_x); - cs->max_x = (j > cs->max_x ? j : cs->max_x); - cs->min_y = (i < cs->min_y ? i : cs->min_y); - cs->max_y = (i > cs->max_y ? i : cs->max_y); + const int old_lab = labels[lp[j]].next->label; + const int lab = labels[old_lab].label; + segment_info *cs = &segments[lab]; + + cs->min_x = MIN(cs->min_x, j); + cs->max_x = MAX(cs->max_x, j); + cs->min_y = MIN(cs->min_y, i); + cs->max_y = MAX(cs->max_y, i); cs->sum_x += j; cs->sum_y += i; cs->pixels++; diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index a03a66e338809eab9c250dc918472de9d673cc30..d47fca190b4cb5bdf38746c911511a4ea078fe65 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -11,311 +11,10 @@ #include "vp9/common/vp9_invtrans.h" #include "./vp9_rtcd.h" -void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, - int16_t *dqcoeff, int16_t *diff, - int pitch) { +void vp9_inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob, int16_t *dqcoeff, + uint8_t *dest, int stride) { if (eob <= 1) - xd->inv_txm4x4_1(dqcoeff, diff, pitch); + xd->inv_txm4x4_1_add(dqcoeff, dest, stride); else - xd->inv_txm4x4(dqcoeff, diff, pitch); -} - -void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { - int i; - - for (i = 0; i < 16; i++) { - TX_TYPE tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); - } else { - vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, - xd->block[i].diff, 32); - } - } -} - -void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) { - int i; - - for (i = 16; i < 24; i++) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, - xd->block[i].diff, 16); - } -} - -void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd) { - vp9_inverse_transform_mby_4x4(xd); - vp9_inverse_transform_mbuv_4x4(xd); -} - -void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, int16_t *output_coeff, - int pitch) { - vp9_short_idct8x8(input_dqcoeff, output_coeff, pitch); -} - -void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { - int i; - BLOCKD *blockd = xd->block; - - for (i = 0; i < 9; i += 8) { - TX_TYPE tx_type = get_tx_type_8x8(xd, i); - if (tx_type != DCT_DCT) { - vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); - } else { - vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], - &blockd[i].diff[0], 32); - } - } - for (i = 2; i < 11; i += 8) { - TX_TYPE tx_type = get_tx_type_8x8(xd, i); - if (tx_type != DCT_DCT) { - vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff, - 16, tx_type); - } else { - vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0], - &blockd[i].diff[0], 32); - } - } -} - -void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd) { - int i; - BLOCKD *blockd = xd->block; - - for (i = 16; i < 24; i += 4) { - vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], - &blockd[i].diff[0], 16); - } -} - -void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd) { - vp9_inverse_transform_mby_8x8(xd); - vp9_inverse_transform_mbuv_8x8(xd); -} - -void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, - int16_t *output_coeff, int pitch) { - vp9_short_idct16x16(input_dqcoeff, output_coeff, pitch); -} - -void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) { - BLOCKD *bd = &xd->block[0]; - TX_TYPE tx_type = get_tx_type_16x16(xd, 0); - if (tx_type != DCT_DCT) { - vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type); - } else { - vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0], - &xd->block[0].diff[0], 32); - } -} - -void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { - vp9_inverse_transform_mby_16x16(xd); - vp9_inverse_transform_mbuv_8x8(xd); -} - -void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) { - vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64); -} - -void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, - xd->diff + x_idx * 16 + y_idx * 32 * 16, - 64); - } else { - vp9_short_iht16x16(xd->dqcoeff + n * 256, - xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type); - } - } -} - -void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, - xd->diff + x_idx * 8 + y_idx * 32 * 8, 64); - } else { - vp9_short_iht8x8(xd->dqcoeff + n * 64, - xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type); - } - } -} - -void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, - xd->diff + x_idx * 4 + y_idx * 4 * 32, 64); - } else { - vp9_short_iht4x4(xd->dqcoeff + n * 16, - xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type); - } - } -} - -void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024, - xd->diff + 1024, 32); - vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280, - xd->diff + 1280, 32); -} - -void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64, - xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8, - 32); - vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64, - xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8, - 32); - } -} - -void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n], - xd->dqcoeff + 1024 + n * 16, - xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4, - 32); - vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n], - xd->dqcoeff + 1280 + n * 16, - xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4, - 32); - } -} - -void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - vp9_short_idct32x32(xd->dqcoeff + n * 1024, - xd->diff + x_idx * 32 + y_idx * 32 * 64, 128); - } -} - -void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, - xd->diff + x_idx * 16 + y_idx * 64 * 16, - 128); - } else { - vp9_short_iht16x16(xd->dqcoeff + n * 256, - xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type); - } - } -} - -void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, - xd->diff + x_idx * 8 + y_idx * 64 * 8, 128); - } else { - vp9_short_iht8x8(xd->dqcoeff + n * 64, - xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type); - } - } -} - -void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, - xd->diff + x_idx * 4 + y_idx * 4 * 64, 128); - } else { - vp9_short_iht4x4(xd->dqcoeff + n * 16, - xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type); - } - } -} - -void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) { - vp9_short_idct32x32(xd->dqcoeff + 4096, - xd->diff + 4096, 64); - vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024, - xd->diff + 4096 + 1024, 64); -} - -void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16; - - vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256, - xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256, - xd->diff + 4096 + 1024 + off, 64); - } -} - -void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8; - - vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64, - xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64, - xd->diff + 4096 + 1024 + off, 64); - } -} - -void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4; - - vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n], - xd->dqcoeff + 4096 + n * 16, - xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n], - xd->dqcoeff + 4096 + 1024 + n * 16, - xd->diff + 4096 + 1024 + off, 64); - } + xd->inv_txm4x4_add(dqcoeff, dest, stride); } diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index 89916570d0d9dfe0f22e50437a9e89578784671a..dbdc50a2a4a774c2581721923a2dec6e04356f89 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -15,47 +15,6 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" -void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, - int16_t *dqcoeff, int16_t *diff, - int pitch); - -void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd); - -void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd); - -void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd); - -void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, - int16_t *output_coeff, int pitch); - -void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd); - -void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd); - -void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd); - -void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, - int16_t *output_coeff, int pitch); - -void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); - -void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); - -void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd); -void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd); - -void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd); - +void vp9_inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob, int16_t *dqcoeff, + uint8_t *dest, int stride); #endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index cbdb273b0401c2ad4a411b292e94ff8062dc2d3d..f623c21db5680bf31c459742a212f9f36bd2fc63 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -11,46 +11,26 @@ #include "vpx_config.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_reconinter.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_seg_common.h" static void lf_init_lut(loop_filter_info_n *lfi) { - int filt_lvl; - - for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++) { - if (filt_lvl >= 40) { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3; - } else if (filt_lvl >= 20) { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2; - } else if (filt_lvl >= 15) { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1; - } else { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0; - } - } - - lfi->mode_lf_lut[DC_PRED] = 1; - lfi->mode_lf_lut[D45_PRED] = 1; - lfi->mode_lf_lut[D135_PRED] = 1; - lfi->mode_lf_lut[D117_PRED] = 1; - lfi->mode_lf_lut[D153_PRED] = 1; - lfi->mode_lf_lut[D27_PRED] = 1; - lfi->mode_lf_lut[D63_PRED] = 1; - lfi->mode_lf_lut[V_PRED] = 1; - lfi->mode_lf_lut[H_PRED] = 1; - lfi->mode_lf_lut[TM_PRED] = 1; - lfi->mode_lf_lut[B_PRED] = 0; - lfi->mode_lf_lut[I8X8_PRED] = 0; - lfi->mode_lf_lut[ZEROMV] = 1; - lfi->mode_lf_lut[NEARESTMV] = 2; - lfi->mode_lf_lut[NEARMV] = 2; - lfi->mode_lf_lut[NEWMV] = 2; - lfi->mode_lf_lut[SPLITMV] = 3; + lfi->mode_lf_lut[DC_PRED] = 0; + lfi->mode_lf_lut[D45_PRED] = 0; + lfi->mode_lf_lut[D135_PRED] = 0; + lfi->mode_lf_lut[D117_PRED] = 0; + lfi->mode_lf_lut[D153_PRED] = 0; + lfi->mode_lf_lut[D27_PRED] = 0; + lfi->mode_lf_lut[D63_PRED] = 0; + lfi->mode_lf_lut[V_PRED] = 0; + lfi->mode_lf_lut[H_PRED] = 0; + lfi->mode_lf_lut[TM_PRED] = 0; + lfi->mode_lf_lut[ZEROMV] = 0; + lfi->mode_lf_lut[NEARESTMV] = 1; + lfi->mode_lf_lut[NEARMV] = 1; + lfi->mode_lf_lut[NEWMV] = 1; } void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi, @@ -86,25 +66,28 @@ void vp9_loop_filter_init(VP9_COMMON *cm) { loop_filter_info_n *lfi = &cm->lf_info; int i; - /* init limits for given sharpness*/ + // init limits for given sharpness vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level); cm->last_sharpness_level = cm->sharpness_level; - /* init LUT for lvl and hev thr picking */ + // init LUT for lvl and hev thr picking lf_init_lut(lfi); - /* init hev threshold const vectors */ - for (i = 0; i < 4; i++) { + // init hev threshold const vectors + for (i = 0; i < 4; i++) vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH); - } } void vp9_loop_filter_frame_init(VP9_COMMON *cm, MACROBLOCKD *xd, int default_filt_lvl) { - int seg, /* segment number */ - ref, /* index in ref_lf_deltas */ - mode; /* index in mode_lf_deltas */ + int seg, // segment number + ref, // index in ref_lf_deltas + mode; // index in mode_lf_deltas + // n_shift is the a multiplier for lf_deltas + // the multiplier is 1 for when filter_lvl is between 0 and 31; + // 2 when filter_lvl is between 32 and 63 + int n_shift = default_filt_lvl >> 5; loop_filter_info_n *lfi = &cm->lf_info; @@ -147,360 +130,278 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, ref = INTRA_FRAME; /* Apply delta for reference frame */ - lvl_ref += xd->ref_lf_deltas[ref]; + lvl_ref += xd->ref_lf_deltas[ref] << n_shift; - /* Apply delta for Intra modes */ - mode = 0; /* B_PRED */ - /* Only the split mode BPRED has a further special case */ - lvl_mode = clamp(lvl_ref + xd->mode_lf_deltas[mode], 0, 63); - - lfi->lvl[seg][ref][mode] = lvl_mode; - - mode = 1; /* all the rest of Intra modes */ - lvl_mode = clamp(lvl_ref, 0, 63); - lfi->lvl[seg][ref][mode] = lvl_mode; + mode = 0; /* all the rest of Intra modes */ + lvl_mode = lvl_ref; + lfi->lvl[seg][ref][mode] = clamp(lvl_mode, 0, 63); /* LAST, GOLDEN, ALT */ for (ref = 1; ref < MAX_REF_FRAMES; ref++) { int lvl_ref = lvl_seg; /* Apply delta for reference frame */ - lvl_ref += xd->ref_lf_deltas[ref]; + lvl_ref += xd->ref_lf_deltas[ref] << n_shift; /* Apply delta for Inter modes */ - for (mode = 1; mode < 4; mode++) { - lvl_mode = clamp(lvl_ref + xd->mode_lf_deltas[mode], 0, 63); - lfi->lvl[seg][ref][mode] = lvl_mode; + for (mode = 0; mode < MAX_MODE_LF_DELTAS; mode++) { + lvl_mode = lvl_ref + (xd->mode_lf_deltas[mode] << n_shift); + lfi->lvl[seg][ref][mode] = clamp(lvl_mode, 0, 63); } } } } -// Determine if we should skip inner-MB loop filtering within a MB -// The current condition is that the loop filtering is skipped only -// the MB uses a prediction size of 16x16 and either 16x16 transform -// is used or there is no residue at all. -static int mb_lf_skip(const MB_MODE_INFO *const mbmi) { - const MB_PREDICTION_MODE mode = mbmi->mode; - const int skip_coef = mbmi->mb_skip_coeff; - const int tx_size = mbmi->txfm_size; - return mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV && - (tx_size >= TX_16X16 || skip_coef); +static int build_lfi(const VP9_COMMON *cm, const MB_MODE_INFO *mbmi, + struct loop_filter_info *lfi) { + const loop_filter_info_n *lfi_n = &cm->lf_info; + int mode = mbmi->mode; + int mode_index = lfi_n->mode_lf_lut[mode]; + int seg = mbmi->segment_id; + int ref_frame = mbmi->ref_frame[0]; + int filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) { + const int hev_index = filter_level >> 4; + lfi->mblim = lfi_n->mblim[filter_level]; + lfi->blim = lfi_n->blim[filter_level]; + lfi->lim = lfi_n->lim[filter_level]; + lfi->hev_thr = lfi_n->hev_thr[hev_index]; + return 1; + } + return 0; } -// Determine if we should skip MB loop filtering on a MB edge within -// a superblock, the current condition is that MB loop filtering is -// skipped only when both MBs do not use inner MB loop filtering, and -// same motion vector with same reference frame -static int sb_mb_lf_skip(const MODE_INFO *const mip0, - const MODE_INFO *const mip1) { - const MB_MODE_INFO *mbmi0 = &mip0->mbmi; - const MB_MODE_INFO *mbmi1 = &mip0->mbmi; - return mb_lf_skip(mbmi0) && mb_lf_skip(mbmi1) && - (mbmi0->ref_frame == mbmi1->ref_frame) && - (mbmi0->mv[mbmi0->ref_frame].as_int == - mbmi1->mv[mbmi1->ref_frame].as_int) && - mbmi0->ref_frame != INTRA_FRAME; -} +static void filter_selectively_vert(uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const struct loop_filter_info *lfi) { + unsigned int mask; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= 1) { + if (mask & 1) { + if (mask_16x16 & 1) { + vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + assert(!(mask_8x8 & 1)); + assert(!(mask_4x4 & 1)); + assert(!(mask_4x4_int & 1)); + } else if (mask_8x8 & 1) { + vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + assert(!(mask_16x16 & 1)); + assert(!(mask_4x4 & 1)); + } else if (mask_4x4 & 1) { + vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + assert(!(mask_16x16 & 1)); + assert(!(mask_8x8 & 1)); + } else { + assert(0); + } -void vp9_loop_filter_frame(VP9_COMMON *cm, - MACROBLOCKD *xd, - int frame_filter_level, - int y_only, - int dering) { - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - loop_filter_info_n *lfi_n = &cm->lf_info; - struct loop_filter_info lfi; - const FRAME_TYPE frame_type = cm->frame_type; - int mb_row, mb_col; - uint8_t *y_ptr, *u_ptr, *v_ptr; - - /* Point at base of Mb MODE_INFO list */ - const MODE_INFO *mode_info_context = cm->mi; - const int mis = cm->mode_info_stride; - - /* Initialize the loop filter for this frame. */ - vp9_loop_filter_frame_init(cm, xd, frame_filter_level); - /* Set up the buffer pointers */ - y_ptr = post->y_buffer; - if (y_only) { - u_ptr = 0; - v_ptr = 0; - } else { - u_ptr = post->u_buffer; - v_ptr = post->v_buffer; + if (mask_4x4_int & 1) + vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + } + s += 8; + lfi++; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } +} - /* vp9_filter each macro block */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - const MB_PREDICTION_MODE mode = mode_info_context->mbmi.mode; - const int mode_index = lfi_n->mode_lf_lut[mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - const int filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - if (filter_level) { - const int skip_lf = mb_lf_skip(&mode_info_context->mbmi); - const int tx_size = mode_info_context->mbmi.txfm_size; - if (cm->filter_type == NORMAL_LOOPFILTER) { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0 && - !((mb_col & 1) && mode_info_context->mbmi.sb_type && - (sb_mb_lf_skip(mode_info_context - 1, mode_info_context) || - tx_size >= TX_32X32)) - ) { - if (tx_size >= TX_16X16) - vp9_lpf_mbv_w(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - if (!skip_lf) { - if (tx_size >= TX_8X8) { - if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) - vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_bv8x8(y_ptr, NULL, NULL, post->y_stride, - post->uv_stride, &lfi); - } else { - vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - } - /* don't apply across umv border */ - if (mb_row > 0 && - !((mb_row & 1) && mode_info_context->mbmi.sb_type && - (sb_mb_lf_skip(mode_info_context - mis, mode_info_context) || - tx_size >= TX_32X32)) - ) { - if (tx_size >= TX_16X16) - vp9_lpf_mbh_w(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - if (!skip_lf) { - if (tx_size >= TX_8X8) { - if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) - vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else - vp9_loop_filter_bh8x8(y_ptr, NULL, NULL, post->y_stride, - post->uv_stride, &lfi); - } else { - vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - } - } -#if CONFIG_LOOP_DERING - if (dering) { - if (mb_row && mb_row < cm->mb_rows - 1 && - mb_col && mb_col < cm->mb_cols - 1) { - vp9_post_proc_down_and_across(y_ptr, y_ptr, - post->y_stride, post->y_stride, - 16, 16, dering); - if (!y_only) { - vp9_post_proc_down_and_across(u_ptr, u_ptr, - post->uv_stride, post->uv_stride, - 8, 8, dering); - vp9_post_proc_down_and_across(v_ptr, v_ptr, - post->uv_stride, post->uv_stride, - 8, 8, dering); - } - } else { - // Adjust the filter so that no out-of-frame data is used. - uint8_t *dr_y = y_ptr, *dr_u = u_ptr, *dr_v = v_ptr; - int w_adjust = 0; - int h_adjust = 0; - - if (mb_col == 0) { - dr_y += 2; - dr_u += 2; - dr_v += 2; - w_adjust += 2; - } - if (mb_col == cm->mb_cols - 1) - w_adjust += 2; - if (mb_row == 0) { - dr_y += 2 * post->y_stride; - dr_u += 2 * post->uv_stride; - dr_v += 2 * post->uv_stride; - h_adjust += 2; - } - if (mb_row == cm->mb_rows - 1) - h_adjust += 2; - vp9_post_proc_down_and_across_c(dr_y, dr_y, - post->y_stride, post->y_stride, - 16 - w_adjust, 16 - h_adjust, - dering); - if (!y_only) { - vp9_post_proc_down_and_across_c(dr_u, dr_u, - post->uv_stride, - post->uv_stride, - 8 - w_adjust, 8 - h_adjust, - dering); - vp9_post_proc_down_and_across_c(dr_v, dr_v, - post->uv_stride, - post->uv_stride, - 8 - w_adjust, 8 - h_adjust, - dering); - } - } - } -#endif +static void filter_selectively_horiz(uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + int only_4x4_1, + const struct loop_filter_info *lfi) { + unsigned int mask; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= 1) { + if (mask & 1) { + if (!only_4x4_1) { + if (mask_16x16 & 1) { + vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + assert(!(mask_8x8 & 1)); + assert(!(mask_4x4 & 1)); + assert(!(mask_4x4_int & 1)); + } else if (mask_8x8 & 1) { + vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + assert(!(mask_16x16 & 1)); + assert(!(mask_4x4 & 1)); + } else if (mask_4x4 & 1) { + vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + assert(!(mask_16x16 & 1)); + assert(!(mask_8x8 & 1)); } else { - // FIXME: Not 8x8 aware - if (mb_col > 0 && - !(skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) && - !((mb_col & 1) && mode_info_context->mbmi.sb_type)) - vp9_loop_filter_simple_mbv(y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); - if (!skip_lf) - vp9_loop_filter_simple_bv(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0 && - !(skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) && - !((mb_row & 1) && mode_info_context->mbmi.sb_type)) - vp9_loop_filter_simple_mbh(y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); - if (!skip_lf) - vp9_loop_filter_simple_bh(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); + assert(0); } } - y_ptr += 16; - if (!y_only) { - u_ptr += 8; - v_ptr += 8; - } - mode_info_context++; /* step to next MB */ - } - y_ptr += post->y_stride * 16 - post->y_width; - if (!y_only) { - u_ptr += post->uv_stride * 8 - post->uv_width; - v_ptr += post->uv_stride * 8 - post->uv_width; + + if (mask_4x4_int & 1) + vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, 1); } - mode_info_context++; /* Skip border mb */ + s += 8; + lfi++; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } } +static void filter_block_plane(VP9_COMMON *cm, MACROBLOCKD *xd, + int plane, int mi_row, int mi_col) { + const int ss_x = xd->plane[plane].subsampling_x; + const int ss_y = xd->plane[plane].subsampling_y; + const int row_step = 1 << xd->plane[plane].subsampling_y; + const int col_step = 1 << xd->plane[plane].subsampling_x; + struct buf_2d * const dst = &xd->plane[plane].dst; + uint8_t* const dst0 = dst->buf; + MODE_INFO* const mi0 = xd->mode_info_context; + unsigned int mask_16x16[64 / MI_SIZE] = {0}; + unsigned int mask_8x8[64 / MI_SIZE] = {0}; + unsigned int mask_4x4[64 / MI_SIZE] = {0}; + unsigned int mask_4x4_int[64 / MI_SIZE] = {0}; + struct loop_filter_info lfi[64 / MI_SIZE][64 / MI_SIZE]; + int r, c; + + for (r = 0; r < 64 / MI_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + unsigned int mask_16x16_c = 0; + unsigned int mask_8x8_c = 0; + unsigned int mask_4x4_c = 0; + unsigned int border_mask; + + // Determine the vertical edges that need filtering + for (c = 0; c < 64 / MI_SIZE && mi_col + c < cm->mi_cols; c += col_step) { + const MODE_INFO const *mi = xd->mode_info_context; + const int skip_this = mi[c].mbmi.mb_skip_coeff + && mi[c].mbmi.ref_frame != INTRA_FRAME; + // left edge of current unit is block/partition edge -> no skip + const int block_edge_left = b_width_log2(mi[c].mbmi.sb_type) ? + !(c & ((1 << (b_width_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1; + const int skip_this_c = skip_this && !block_edge_left; + // top edge of current unit is block/partition edge -> no skip + const int block_edge_above = b_height_log2(mi[c].mbmi.sb_type) ? + !(r & ((1 << (b_height_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1; + const int skip_this_r = skip_this && !block_edge_above; + const TX_SIZE tx_size = plane ? get_uv_tx_size(&mi[c].mbmi) + : mi[c].mbmi.txfm_size; + const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; + const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; + + // Filter level can vary per MI + if (!build_lfi(cm, &mi[c].mbmi, + lfi[r] + (c >> xd->plane[plane].subsampling_x))) + continue; + + // Build masks based on the transform size of each block + if (tx_size == TX_32X32) { + if (!skip_this_c && ((c >> ss_x) & 3) == 0) { + if (!skip_border_4x4_c) + mask_16x16_c |= 1 << (c >> ss_x); + else + mask_8x8_c |= 1 << (c >> ss_x); + } + if (!skip_this_r && ((r >> ss_y) & 3) == 0) { + if (!skip_border_4x4_r) + mask_16x16[r] |= 1 << (c >> ss_x); + else + mask_8x8[r] |= 1 << (c >> ss_x); + } + } else if (tx_size == TX_16X16) { + if (!skip_this_c && ((c >> ss_x) & 1) == 0) { + if (!skip_border_4x4_c) + mask_16x16_c |= 1 << (c >> ss_x); + else + mask_8x8_c |= 1 << (c >> ss_x); + } + if (!skip_this_r && ((r >> ss_y) & 1) == 0) { + if (!skip_border_4x4_r) + mask_16x16[r] |= 1 << (c >> ss_x); + else + mask_8x8[r] |= 1 << (c >> ss_x); + } + } else { + // force 8x8 filtering on 32x32 boundaries + if (!skip_this_c) { + if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) + mask_8x8_c |= 1 << (c >> ss_x); + else + mask_4x4_c |= 1 << (c >> ss_x); + } -void vp9_loop_filter_partial_frame(VP9_COMMON *cm, MACROBLOCKD *xd, - int default_filt_lvl) { - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - - uint8_t *y_ptr; - int mb_row; - int mb_col; - int mb_cols = post->y_width >> 4; - - int linestocopy, i; - - loop_filter_info_n *lfi_n = &cm->lf_info; - struct loop_filter_info lfi; - - int filter_level; - int alt_flt_enabled = xd->segmentation_enabled; - FRAME_TYPE frame_type = cm->frame_type; - - const MODE_INFO *mode_info_context; - - int lvl_seg[MAX_MB_SEGMENTS]; - - mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); - - /* 3 is a magic number. 4 is probably magic too */ - linestocopy = (post->y_height >> (4 + 3)); - - if (linestocopy < 1) - linestocopy = 1; - - linestocopy <<= 4; + if (!skip_this_r) { + if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) + mask_8x8[r] |= 1 << (c >> ss_x); + else + mask_4x4[r] |= 1 << (c >> ss_x); + } - /* Note the baseline filter values for each segment */ - /* See vp9_loop_filter_frame_init. Rather than call that for each change - * to default_filt_lvl, copy the relevant calculation here. - */ - if (alt_flt_enabled) { - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) { - // Abs value - lvl_seg[i] = vp9_get_segdata(xd, i, SEG_LVL_ALT_LF); - } else { - // Delta Value - lvl_seg[i] = default_filt_lvl + vp9_get_segdata(xd, i, SEG_LVL_ALT_LF); - lvl_seg[i] = clamp(lvl_seg[i], 0, 63); + if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) + mask_4x4_int[r] |= 1 << (c >> ss_x); } } - } - - /* Set up the buffer pointers */ - y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride; - - /* vp9_filter each macro block */ - for (mb_row = 0; mb_row < (linestocopy >> 4); mb_row++) { - for (mb_col = 0; mb_col < mb_cols; mb_col++) { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != I8X8_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - if (alt_flt_enabled) - filter_level = lvl_seg[mode_info_context->mbmi.segment_id]; - else - filter_level = default_filt_lvl; - - if (filter_level) { - if (cm->filter_type == NORMAL_LOOPFILTER) { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp9_loop_filter_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi); + // Disable filtering on the leftmost column + border_mask = ~(mi_col == 0); + filter_selectively_vert(dst->buf, dst->stride, + mask_16x16_c & border_mask, + mask_8x8_c & border_mask, + mask_4x4_c & border_mask, + mask_4x4_int[r], lfi[r]); + dst->buf += 8 * dst->stride; + xd->mode_info_context += cm->mode_info_stride * row_step; + } - if (!skip_lf) - vp9_loop_filter_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi); + // Now do horizontal pass + dst->buf = dst0; + xd->mode_info_context = mi0; + for (r = 0; r < 64 / MI_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; + const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; + + filter_selectively_horiz(dst->buf, dst->stride, + mask_16x16[r], + mask_8x8[r], + mask_4x4[r], + mask_4x4_int_r, mi_row + r == 0, lfi[r]); + dst->buf += 8 * dst->stride; + xd->mode_info_context += cm->mode_info_stride * row_step; + } +} - vp9_loop_filter_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi); +void vp9_loop_filter_frame(VP9_COMMON *cm, + MACROBLOCKD *xd, + int frame_filter_level, + int y_only) { + int mi_row, mi_col; - if (!skip_lf) - vp9_loop_filter_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi); - } else { - if (mb_col > 0) - vp9_loop_filter_simple_mbv (y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); + // Initialize the loop filter for this frame. + vp9_loop_filter_frame_init(cm, xd, frame_filter_level); - if (!skip_lf) - vp9_loop_filter_simple_bv(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 64 / MI_SIZE) { + MODE_INFO* const mi = cm->mi + mi_row * cm->mode_info_stride; - vp9_loop_filter_simple_mbh(y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 64 / MI_SIZE) { + int plane; - if (!skip_lf) - vp9_loop_filter_simple_bh(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); - } + setup_dst_planes(xd, cm->frame_to_show, mi_row, mi_col); + for (plane = 0; plane < (y_only ? 1 : MAX_MB_PLANE); plane++) { + xd->mode_info_context = mi + mi_col; + filter_block_plane(cm, xd, plane, mi_row, mi_col); } - - y_ptr += 16; - mode_info_context += 1; /* step to next MB */ } - - y_ptr += post->y_stride * 16 - post->y_width; - mode_info_context += 1; /* Skip border mb */ } } diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index 458afc50bc76676b2d00dbf265c97eff81b3813f..65f522baba71e68448725053f27b17be24ea1180 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -16,12 +16,6 @@ #include "vp9/common/vp9_blockd.h" #define MAX_LOOP_FILTER 63 - -typedef enum { - NORMAL_LOOPFILTER = 0, - SIMPLE_LOOPFILTER = 1 -} LOOPFILTERTYPE; - #define SIMD_WIDTH 16 /* Need to align this structure so when it is declared and @@ -36,8 +30,7 @@ typedef struct { lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]); - unsigned char lvl[4][4][4]; - unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1]; + unsigned char lvl[MAX_MB_SEGMENTS][4][4]; unsigned char mode_lf_lut[MB_MODE_COUNT]; } loop_filter_info_n; @@ -56,9 +49,6 @@ struct loop_filter_info { void sym(uint8_t *y, uint8_t *u, uint8_t *v, \ int ystride, int uv_stride, struct loop_filter_info *lfi) -#define prototype_simple_loopfilter(sym) \ - void sym(uint8_t *y, int ystride, const unsigned char *blimit) - #if ARCH_X86 || ARCH_X86_64 #include "x86/vp9_loopfilter_x86.h" #endif @@ -83,8 +73,7 @@ void vp9_loop_filter_frame_init(struct VP9Common *cm, void vp9_loop_filter_frame(struct VP9Common *cm, struct macroblockd *mbd, int filter_level, - int y_only, - int dering); + int y_only); void vp9_loop_filter_partial_frame(struct VP9Common *cm, struct macroblockd *mbd, diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index a473cf7421a8152cbec22112b454d4de2f9617e7..bf03692a0dfc8d20d08f22f381be810c00992b3e 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -8,19 +8,16 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include <stdlib.h> #include "vpx_config.h" +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" static INLINE int8_t signed_char_clamp(int t) { - t = (t < -128 ? -128 : t); - t = (t > 127 ? 127 : t); - return (int8_t) t; + return (int8_t)clamp(t, -128, 127); } - -/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ +// should we apply any filter at all: 11111111 yes, 00000000 no static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3, uint8_t p2, uint8_t p1, uint8_t p0, @@ -34,11 +31,10 @@ static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, mask |= (abs(q2 - q1) > limit) * -1; mask |= (abs(q3 - q2) > limit) * -1; mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = ~mask; - return mask; + return ~mask; } -/* is there high variance internal edge ( 11111111 yes, 00000000 no) */ +// is there high edge variance internal edge: 11111111 yes, 00000000 no static INLINE int8_t hevmask(uint8_t thresh, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1) { int8_t hev = 0; @@ -70,73 +66,59 @@ static INLINE void filter(int8_t mask, uint8_t hev, uint8_t *op1, *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; - filter = filter1; // outer tap adjustments - filter += 1; - filter >>= 1; - filter &= ~hev; + filter = ((filter1 + 1) >> 1) & ~hev; *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; } -void vp9_loop_filter_horizontal_edge_c(uint8_t *s, - int p, /* pitch */ - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, +void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { - int hev = 0; /* high edge variance */ - int8_t mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do { - mask = filter_mask(limit[0], blimit[0], - s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], - s[0 * p], s[1 * p], s[2 * p], s[3 * p]); - - hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]); - + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8 * count; ++i) { + const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; + const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hevmask(*thresh, p1, p0, q0, q1); filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p); - ++s; - } while (++i < count * 8); + } } -void vp9_loop_filter_vertical_edge_c(uint8_t *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, +void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { - int hev = 0; /* high edge variance */ - int8_t mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do { - mask = filter_mask(limit[0], blimit[0], - s[-4], s[-3], s[-2], s[-1], - s[0], s[1], s[2], s[3]); - - hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8 * count; ++i) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hevmask(*thresh, p1, p0, q0, q1); filter(mask, hev, s - 2, s - 1, s, s + 1); - - s += p; - } while (++i < count * 8); + s += pitch; + } } -static INLINE signed char flatmask4(uint8_t thresh, - uint8_t p3, uint8_t p2, - uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1, - uint8_t q2, uint8_t q3) { + +static INLINE int8_t flatmask4(uint8_t thresh, + uint8_t p3, uint8_t p2, + uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1, + uint8_t q2, uint8_t q3) { int8_t flat = 0; flat |= (abs(p1 - p0) > thresh) * -1; flat |= (abs(q1 - q0) > thresh) * -1; @@ -144,8 +126,7 @@ static INLINE signed char flatmask4(uint8_t thresh, flat |= (abs(q0 - q2) > thresh) * -1; flat |= (abs(p3 - p0) > thresh) * -1; flat |= (abs(q3 - q0) > thresh) * -1; - flat = ~flat; - return flat; + return ~flat; } static INLINE signed char flatmask5(uint8_t thresh, uint8_t p4, uint8_t p3, uint8_t p2, @@ -167,287 +148,62 @@ static INLINE void mbfilter(int8_t mask, uint8_t hev, uint8_t flat, uint8_t *oq2, uint8_t *oq3) { // use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line if (flat && mask) { - const uint8_t p3 = *op3; - const uint8_t p2 = *op2; - const uint8_t p1 = *op1; - const uint8_t p0 = *op0; - const uint8_t q0 = *oq0; - const uint8_t q1 = *oq1; - const uint8_t q2 = *oq2; - const uint8_t q3 = *oq3; - - *op2 = (p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3; - *op1 = (p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3; - *op0 = (p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2 + 4) >> 3; - *oq0 = (p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3 + 4) >> 3; - *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3 + 4) >> 3; - *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3 + 4) >> 3; + const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; + const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; + + *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3); + *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3); + *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3); + *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3); + *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3); + *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3); } else { - int8_t filter1, filter2; - - const int8_t ps1 = (int8_t) *op1 ^ 0x80; - const int8_t ps0 = (int8_t) *op0 ^ 0x80; - const int8_t qs0 = (int8_t) *oq0 ^ 0x80; - const int8_t qs1 = (int8_t) *oq1 ^ 0x80; - - // add outer taps if we have high edge variance - int8_t filter = signed_char_clamp(ps1 - qs1) & hev; - - // inner taps - filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; - - filter1 = signed_char_clamp(filter + 4) >> 3; - filter2 = signed_char_clamp(filter + 3) >> 3; - - *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; - *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; - filter = filter1; - - // outer tap adjustments - filter += 1; - filter >>= 1; - filter &= ~hev; - - *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; - *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; + filter(mask, hev, op1, op0, oq0, oq1); } } -void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, +void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { - int8_t hev = 0; /* high edge variance */ - int8_t mask = 0; - int8_t flat = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do { - mask = filter_mask(limit[0], blimit[0], - s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], - s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]); - - hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]); - - flat = flatmask4(1, s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], - s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]); + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8 * count; ++i) { + const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; + const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hevmask(*thresh, p1, p0, q0, q1); + const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3); mbfilter(mask, hev, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p, s + 3 * p); - ++s; - } while (++i < count * 8); - + } } -void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, +void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { - int8_t hev = 0; /* high edge variance */ - int8_t mask = 0; - int8_t flat = 0; - int i = 0; - - do { - mask = filter_mask(limit[0], blimit[0], - s[-4], s[-3], s[-2], s[-1], - s[0], s[1], s[2], s[3]); - - hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - flat = flatmask4(1, - s[-4], s[-3], s[-2], s[-1], - s[ 0], s[ 1], s[ 2], s[ 3]); - mbfilter(mask, hev, flat, - s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3); - s += p; - } while (++i < count * 8); - -} - -/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static INLINE int8_t simple_filter_mask(uint8_t blimit, - uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1) { - return (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; -} - -static INLINE void simple_filter(int8_t mask, - uint8_t *op1, uint8_t *op0, - uint8_t *oq0, uint8_t *oq1) { - int8_t filter1, filter2; - const int8_t p1 = (int8_t) *op1 ^ 0x80; - const int8_t p0 = (int8_t) *op0 ^ 0x80; - const int8_t q0 = (int8_t) *oq0 ^ 0x80; - const int8_t q1 = (int8_t) *oq1 ^ 0x80; - - int8_t filter = signed_char_clamp(p1 - q1); - filter = signed_char_clamp(filter + 3 * (q0 - p0)); - filter &= mask; - - // save bottom 3 bits so that we round one side +4 and the other +3 - filter1 = signed_char_clamp(filter + 4) >> 3; - *oq0 = signed_char_clamp(q0 - filter1) ^ 0x80; - - filter2 = signed_char_clamp(filter + 3) >> 3; - *op0 = signed_char_clamp(p0 + filter2) ^ 0x80; -} - -void vp9_loop_filter_simple_horizontal_edge_c(uint8_t *s, - int p, - const unsigned char *blimit) { - int8_t mask = 0; - int i = 0; - - do { - mask = simple_filter_mask(blimit[0], - s[-2 * p], s[-1 * p], - s[0 * p], s[1 * p]); - simple_filter(mask, - s - 2 * p, s - 1 * p, - s, s + 1 * p); - ++s; - } while (++i < 16); -} - -void vp9_loop_filter_simple_vertical_edge_c(uint8_t *s, - int p, - const unsigned char *blimit) { - int8_t mask = 0; - int i = 0; - - do { - mask = simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]); - simple_filter(mask, s - 2, s - 1, s, s + 1); - s += p; - } while (++i < 16); -} - -/* Vertical MB Filtering */ -void vp9_loop_filter_mbv_c(uint8_t *y_ptr, uint8_t *u_ptr, - uint8_t *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_mbloop_filter_vertical_edge_c(y_ptr, y_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Vertical B Filtering */ -void vp9_loop_filter_bv_c(uint8_t*y_ptr, uint8_t *u_ptr, - uint8_t *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -/* Horizontal MB filtering */ -void vp9_loop_filter_mbh_c(uint8_t *y_ptr, uint8_t *u_ptr, - uint8_t *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Horizontal B Filtering */ -void vp9_loop_filter_bh_c(uint8_t *y_ptr, uint8_t *u_ptr, - uint8_t *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp9_loop_filter_bh8x8_c(uint8_t *y_ptr, uint8_t *u_ptr, - uint8_t *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_mbloop_filter_horizontal_edge_c( - y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp9_loop_filter_bhs_c(uint8_t *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, - y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, - y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, - y_stride, blimit); -} - -void vp9_loop_filter_bv8x8_c(uint8_t *y_ptr, uint8_t *u_ptr, - uint8_t *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_mbloop_filter_vertical_edge_c( - y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, - lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp9_loop_filter_bvs_c(uint8_t *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit); + int i; + + for (i = 0; i < 8 * count; ++i) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hevmask(thresh[0], p1, p0, q0, q1); + const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + mbfilter(mask, hev, flat, s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3); + s += pitch; + } } static INLINE void wide_mbfilter(int8_t mask, uint8_t hev, @@ -460,129 +216,64 @@ static INLINE void wide_mbfilter(int8_t mask, uint8_t hev, uint8_t *oq7) { // use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line if (flat2 && flat && mask) { - const uint8_t p7 = *op7; - const uint8_t p6 = *op6; - const uint8_t p5 = *op5; - const uint8_t p4 = *op4; - const uint8_t p3 = *op3; - const uint8_t p2 = *op2; - const uint8_t p1 = *op1; - const uint8_t p0 = *op0; - const uint8_t q0 = *oq0; - const uint8_t q1 = *oq1; - const uint8_t q2 = *oq2; - const uint8_t q3 = *oq3; - const uint8_t q4 = *oq4; - const uint8_t q5 = *oq5; - const uint8_t q6 = *oq6; - const uint8_t q7 = *oq7; - - *op6 = (p7 * 7 + p6 * 2 + - p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4; - *op5 = (p7 * 6 + p6 + p5 * 2 + - p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4; - *op4 = (p7 * 5 + p6 + p5 + p4 * 2 + - p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4; - *op3 = (p7 * 4 + p6 + p5 + p4 + p3 * 2 + - p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4; - *op2 = (p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + - p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4; - *op1 = (p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + - p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4; - *op0 = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + - q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4; - *oq0 = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + - q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4; - *oq1 = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + - q2 + q3 + q4 + q5 + q6 + q7 * 2 + 8) >> 4; - *oq2 = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + - q3 + q4 + q5 + q6 + q7 * 3 + 8) >> 4; - *oq3 = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 + - q4 + q5 + q6 + q7 * 4 + 8) >> 4; - *oq4 = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 + - q5 + q6 + q7 * 5 + 8) >> 4; - *oq5 = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 + - q6 + q7 * 6 + 8) >> 4; - *oq6 = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + - q7 * 7 + 8) >> 4; - } else if (flat && mask) { - const uint8_t p3 = *op3; - const uint8_t p2 = *op2; - const uint8_t p1 = *op1; - const uint8_t p0 = *op0; - const uint8_t q0 = *oq0; - const uint8_t q1 = *oq1; - const uint8_t q2 = *oq2; - const uint8_t q3 = *oq3; - - *op2 = (p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3; - *op1 = (p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3; - *op0 = (p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2 + 4) >> 3; - *oq0 = (p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3 + 4) >> 3; - *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3 + 4) >> 3; - *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3 + 4) >> 3; + const uint8_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4, + p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; + + const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, + q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; + + *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + + q0, 4); + *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + + q0 + q1, 4); + *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + + q0 + q1 + q2, 4); + *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + + q0 + q1 + q2 + q3, 4); + *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + + q0 + q1 + q2 + q3 + q4, 4); + *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + + q0 + q1 + q2 + q3 + q4 + q5, 4); + *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + + q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); + *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + + q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); + *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + + q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); + *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + + q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); + *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + + q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); + *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + + q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); + *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + + q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); + *oq6 = ROUND_POWER_OF_TWO(p0 + + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); } else { - int8_t filter1, filter2; - - const int8_t ps1 = (int8_t) * op1 ^ 0x80; - const int8_t ps0 = (int8_t) * op0 ^ 0x80; - const int8_t qs0 = (int8_t) * oq0 ^ 0x80; - const int8_t qs1 = (int8_t) * oq1 ^ 0x80; - - // add outer taps if we have high edge variance - int8_t filter = signed_char_clamp(ps1 - qs1) & hev; - - // inner taps - filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; - filter1 = signed_char_clamp(filter + 4) >> 3; - filter2 = signed_char_clamp(filter + 3) >> 3; - - *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; - *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; - filter = filter1; - - // outer tap adjustments - filter += 1; - filter >>= 1; - filter &= ~hev; - - *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; - *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; + mbfilter(mask, hev, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3); } } -void vp9_mb_lpf_horizontal_edge_w -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) { - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - signed char flat = 0; - signed char flat2 = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do { - mask = filter_mask(limit[0], blimit[0], - s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], - s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]); - - hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]); - - flat = flatmask4(1, - s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], - s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]); - - flat2 = flatmask5(1, - s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], s[-1 * p], - s[ 0 * p], s[ 4 * p], s[ 5 * p], s[ 6 * p], s[ 7 * p]); +void vp9_mb_lpf_horizontal_edge_w(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8 * count; ++i) { + const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; + const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hevmask(*thresh, p1, p0, q0, q1); + const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat2 = flatmask5(1, + s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, + q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p]); wide_mbfilter(mask, hev, flat, flat2, s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, @@ -591,71 +282,29 @@ void vp9_mb_lpf_horizontal_edge_w s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p); ++s; - } while (++i < count * 8); + } } -void vp9_mb_lpf_vertical_edge_w -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) { - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - signed char flat = 0; - signed char flat2 = 0; - int i = 0; - - do { - mask = filter_mask(limit[0], blimit[0], - s[-4], s[-3], s[-2], s[-1], - s[0], s[1], s[2], s[3]); - - hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - flat = flatmask4(1, - s[-4], s[-3], s[-2], s[-1], - s[ 0], s[ 1], s[ 2], s[ 3]); - flat2 = flatmask5(1, - s[-8], s[-7], s[-6], s[-5], s[-1], - s[ 0], s[ 4], s[ 5], s[ 6], s[ 7]); + +void vp9_mb_lpf_vertical_edge_w(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { + int i; + + for (i = 0; i < 8 * count; ++i) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t hev = hevmask(*thresh, p1, p0, q0, q1); + const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat2 = flatmask5(1, s[-8], s[-7], s[-6], s[-5], p0, + q0, s[4], s[5], s[6], s[7]); wide_mbfilter(mask, hev, flat, flat2, - s - 8, s - 7, s - 6, s - 5, - s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3, - s + 4, s + 5, s + 6, s + 7); + s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7); s += p; - } while (++i < count * 8); -} - -void vp9_lpf_mbv_w_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_mb_lpf_vertical_edge_w(y_ptr, y_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} -void vp9_lpf_mbh_w_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi) { - vp9_mb_lpf_horizontal_edge_w(y_ptr, y_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 1); + } } - diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index 8d99335d4f39262f373bbc5ee72e67a2879f96f1..3cf37ffab3a1289600fc08c2d083ae458ecf716c 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -11,105 +11,18 @@ #include "vp9/common/vp9_blockd.h" -typedef enum { - PRED = 0, - DEST = 1 -} BLOCKSET; - -static void setup_block(BLOCKD *b, - int mv_stride, - uint8_t **base, - uint8_t **base2, - int stride, - int offset, - BLOCKSET bs) { - if (bs == DEST) { - b->dst_stride = stride; - b->dst = offset; - b->base_dst = base; - } else { - b->pre_stride = stride; - b->pre = offset; - b->base_pre = base; - b->base_second_pre = base2; - } -} - -static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) { - int block; - - uint8_t **y, **u, **v; - uint8_t **y2 = NULL, **u2 = NULL, **v2 = NULL; - BLOCKD *blockd = xd->block; - int stride; - - if (bs == DEST) { - y = &xd->dst.y_buffer; - u = &xd->dst.u_buffer; - v = &xd->dst.v_buffer; - } else { - y = &xd->pre.y_buffer; - u = &xd->pre.u_buffer; - v = &xd->pre.v_buffer; - - y2 = &xd->second_pre.y_buffer; - u2 = &xd->second_pre.u_buffer; - v2 = &xd->second_pre.v_buffer; - } - - stride = xd->dst.y_stride; - for (block = 0; block < 16; block++) { /* y blocks */ - setup_block(&blockd[block], stride, y, y2, stride, - (block >> 2) * 4 * stride + (block & 3) * 4, bs); - } - - stride = xd->dst.uv_stride; - for (block = 16; block < 20; block++) { /* U and V blocks */ - setup_block(&blockd[block], stride, u, u2, stride, - ((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs); - - setup_block(&blockd[block + 4], stride, v, v2, stride, - ((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs); - } -} - -void vp9_setup_block_dptrs(MACROBLOCKD *xd) { - int r, c; - BLOCKD *blockd = xd->block; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - blockd[r * 4 + c].diff = &xd->diff[r * 4 * 16 + c * 4]; - blockd[r * 4 + c].predictor = xd->predictor + r * 4 * 16 + c * 4; - } - } - - for (r = 0; r < 2; r++) { - for (c = 0; c < 2; c++) { - blockd[16 + r * 2 + c].diff = &xd->diff[256 + r * 4 * 8 + c * 4]; - blockd[16 + r * 2 + c].predictor = - xd->predictor + 256 + r * 4 * 8 + c * 4; - - } - } - - for (r = 0; r < 2; r++) { - for (c = 0; c < 2; c++) { - blockd[20 + r * 2 + c].diff = &xd->diff[320 + r * 4 * 8 + c * 4]; - blockd[20 + r * 2 + c].predictor = - xd->predictor + 320 + r * 4 * 8 + c * 4; - - } - } - - for (r = 0; r < 24; r++) { - blockd[r].qcoeff = xd->qcoeff + r * 16; - blockd[r].dqcoeff = xd->dqcoeff + r * 16; - } -} - -void vp9_build_block_doffsets(MACROBLOCKD *xd) { - /* handle the destination pitch features */ - setup_macroblock(xd, DEST); - setup_macroblock(xd, PRED); +void vp9_setup_block_dptrs(MACROBLOCKD *mb, + int subsampling_x, int subsampling_y) { + int i; + + for (i = 0; i < MAX_MB_PLANE; i++) { + mb->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC; + mb->plane[i].subsampling_x = i ? subsampling_x : 0; + mb->plane[i].subsampling_y = i ? subsampling_y : 0; + } +#if CONFIG_ALPHA + // TODO(jkoleszar): Using the Y w/h for now + mb->plane[3].subsampling_x = 0; + mb->plane[3].subsampling_y = 0; +#endif } diff --git a/vp9/common/vp9_modecont.c b/vp9/common/vp9_modecont.c index 73cb5e15e39ec97459588a05741cfacbacc85b53..bdb0049b4537f06ba6a29227761f3c8a2cccc747 100644 --- a/vp9/common/vp9_modecont.c +++ b/vp9/common/vp9_modecont.c @@ -11,12 +11,13 @@ #include "vp9/common/vp9_entropy.h" -const int vp9_default_mode_contexts[INTER_MODE_CONTEXTS][4] = { - {1, 223, 1, 237}, // 0,0 best: Only candidate - {87, 166, 26, 219}, // 0,0 best: non zero candidates - {89, 67, 18, 125}, // 0,0 best: non zero candidates, split - {16, 141, 69, 226}, // strong nz candidate(s), no split - {35, 122, 14, 227}, // weak nz candidate(s), no split - {14, 122, 22, 164}, // strong nz candidate(s), split - {16, 70, 9, 183}, // weak nz candidate(s), split +const vp9_prob vp9_default_inter_mode_probs[INTER_MODE_CONTEXTS] + [VP9_INTER_MODES - 1] = { + {2, 173, 34}, // 0 = both zero mv + {7, 145, 85}, // 1 = one zero mv + one a predicted mv + {7, 166, 63}, // 2 = two predicted mvs + {7, 94, 66}, // 3 = one predicted/zero and one new mv + {8, 64, 46}, // 4 = two new mvs + {17, 81, 31}, // 5 = one intra neighbour + x + {25, 29, 30}, // 6 = two intra neighbours }; diff --git a/vp9/common/vp9_modecont.h b/vp9/common/vp9_modecont.h index 24f1a6cb3210882d13902700c72b3012c105d509..1a3e993a5f0fc0b6ab9ad48705c0b4149ae6012a 100644 --- a/vp9/common/vp9_modecont.h +++ b/vp9/common/vp9_modecont.h @@ -11,6 +11,9 @@ #ifndef VP9_COMMON_VP9_MODECONT_H_ #define VP9_COMMON_VP9_MODECONT_H_ -extern const int vp9_default_mode_contexts[INTER_MODE_CONTEXTS][4]; +#include "vp9/common/vp9_entropy.h" + +extern const int vp9_default_inter_mode_probs[INTER_MODE_CONTEXTS] + [VP9_INTER_MODES - 1]; #endif // VP9_COMMON_VP9_MODECONT_H_ diff --git a/vp9/common/vp9_modecontext.c b/vp9/common/vp9_modecontext.c index ab91c0b14db10250c3f961b8e40f9168b85f63ac..a79ab2a6c6684e3f41b148eca92c4fb96a3ba272 100644 --- a/vp9/common/vp9_modecontext.c +++ b/vp9/common/vp9_modecontext.c @@ -11,137 +11,118 @@ #include "vp9/common/vp9_entropymode.h" -const unsigned int vp9_kf_default_bmode_counts[VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] = { - { - /*Above Mode : 0*/ - { 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, /* left_mode 0 */ - { 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, /* left_mode 1 */ - { 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, /* left_mode 2 */ - { 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, /* left_mode 3 */ - { 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, /* left_mode 4 */ - { 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, /* left_mode 5 */ - { 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, /* left_mode 6 */ - { 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, /* left_mode 7 */ - { 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, /* left_mode 8 */ - { 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, /* left_mode 9 */ - }, - { - /*Above Mode : 1*/ - { 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, /* left_mode 0 */ - { 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, /* left_mode 1 */ - { 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, /* left_mode 2 */ - { 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, /* left_mode 3 */ - { 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, /* left_mode 4 */ - { 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, /* left_mode 5 */ - { 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, /* left_mode 6 */ - { 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, /* left_mode 7 */ - { 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, /* left_mode 8 */ - { 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, /* left_mode 9 */ - }, - { - /*Above Mode : 2*/ - { 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, /* left_mode 0 */ - { 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, /* left_mode 1 */ - { 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, /* left_mode 2 */ - { 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, /* left_mode 3 */ - { 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, /* left_mode 4 */ - { 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, /* left_mode 5 */ - { 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, /* left_mode 6 */ - { 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, /* left_mode 7 */ - { 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, /* left_mode 8 */ - { 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, /* left_mode 9 */ - }, - { - /*Above Mode : 3*/ - { 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, /* left_mode 0 */ - { 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, /* left_mode 1 */ - { 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, /* left_mode 2 */ - { 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, /* left_mode 3 */ - { 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, /* left_mode 4 */ - { 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, /* left_mode 5 */ - { 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, /* left_mode 6 */ - { 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, /* left_mode 7 */ - { 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, /* left_mode 8 */ - { 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, /* left_mode 9 */ - }, - { - /*Above Mode : 4*/ - { 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, /* left_mode 0 */ - { 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, /* left_mode 1 */ - { 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, /* left_mode 2 */ - { 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, /* left_mode 3 */ - { 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, /* left_mode 4 */ - { 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, /* left_mode 5 */ - { 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, /* left_mode 6 */ - { 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, /* left_mode 7 */ - { 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, /* left_mode 8 */ - { 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, /* left_mode 9 */ - }, - { - /*Above Mode : 5*/ - { 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, /* left_mode 0 */ - { 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, /* left_mode 1 */ - { 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, /* left_mode 2 */ - { 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, /* left_mode 3 */ - { 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, /* left_mode 4 */ - { 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, /* left_mode 5 */ - { 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, /* left_mode 6 */ - { 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, /* left_mode 7 */ - { 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, /* left_mode 8 */ - { 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, /* left_mode 9 */ - }, - { - /*Above Mode : 6*/ - { 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, /* left_mode 0 */ - { 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, /* left_mode 1 */ - { 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, /* left_mode 2 */ - { 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, /* left_mode 3 */ - { 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, /* left_mode 4 */ - { 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, /* left_mode 5 */ - { 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, /* left_mode 6 */ - { 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, /* left_mode 7 */ - { 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, /* left_mode 8 */ - { 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, /* left_mode 9 */ - }, - { - /*Above Mode : 7*/ - { 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, /* left_mode 0 */ - { 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, /* left_mode 1 */ - { 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, /* left_mode 2 */ - { 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, /* left_mode 3 */ - { 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, /* left_mode 4 */ - { 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, /* left_mode 5 */ - { 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, /* left_mode 6 */ - { 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, /* left_mode 7 */ - { 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, /* left_mode 8 */ - { 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, /* left_mode 9 */ - }, - { - /*Above Mode : 8*/ - { 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, /* left_mode 0 */ - { 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, /* left_mode 1 */ - { 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, /* left_mode 2 */ - { 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, /* left_mode 3 */ - { 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, /* left_mode 4 */ - { 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, /* left_mode 5 */ - { 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, /* left_mode 6 */ - { 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, /* left_mode 7 */ - { 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, /* left_mode 8 */ - { 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, /* left_mode 9 */ - }, - { - /*Above Mode : 9*/ - { 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, /* left_mode 0 */ - { 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, /* left_mode 1 */ - { 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, /* left_mode 2 */ - { 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, /* left_mode 3 */ - { 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, /* left_mode 4 */ - { 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, /* left_mode 5 */ - { 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, /* left_mode 6 */ - { 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, /* left_mode 7 */ - { 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, /* left_mode 8 */ - { 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, /* left_mode 9 */ - }, +const vp9_prob vp9_kf_default_bmode_probs[VP9_INTRA_MODES] + [VP9_INTRA_MODES] + [VP9_INTRA_MODES - 1] = { + { /* above = dc */ + { 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */, + { 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */, + { 73, 32, 19, 187, 222, 215, 46, 34, 100 } /* left = h */, + { 91, 30, 32, 116, 121, 186, 93, 86, 94 } /* left = d45 */, + { 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */, + { 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */, + { 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */, + { 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */, + { 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */, + { 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */ + }, { /* above = v */ + { 63, 36, 126, 146, 123, 158, 60, 90, 96 } /* left = dc */, + { 43, 46, 168, 134, 107, 128, 69, 142, 92 } /* left = v */, + { 44, 29, 68, 159, 201, 177, 50, 57, 77 } /* left = h */, + { 58, 38, 76, 114, 97, 172, 78, 133, 92 } /* left = d45 */, + { 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */, + { 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */, + { 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */, + { 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */, + { 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */, + { 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */ + }, { /* above = h */ + { 82, 26, 26, 171, 208, 204, 44, 32, 105 } /* left = dc */, + { 55, 44, 68, 166, 179, 192, 57, 57, 108 } /* left = v */, + { 42, 26, 11, 199, 241, 228, 23, 15, 85 } /* left = h */, + { 68, 42, 19, 131, 160, 199, 55, 52, 83 } /* left = d45 */, + { 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */, + { 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */, + { 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */, + { 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */, + { 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */, + { 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */ + }, { /* above = d45 */ + { 103, 26, 36, 129, 132, 201, 83, 80, 93 } /* left = dc */, + { 59, 38, 83, 112, 103, 162, 98, 136, 90 } /* left = v */, + { 62, 30, 23, 158, 200, 207, 59, 57, 50 } /* left = h */, + { 67, 30, 29, 84, 86, 191, 102, 91, 59 } /* left = d45 */, + { 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */, + { 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */, + { 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */, + { 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */, + { 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */, + { 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */ + }, { /* above = d135 */ + { 69, 23, 29, 128, 83, 199, 46, 44, 101 } /* left = dc */, + { 53, 40, 55, 139, 69, 183, 61, 80, 110 } /* left = v */, + { 40, 29, 19, 161, 180, 207, 43, 24, 91 } /* left = h */, + { 60, 34, 19, 105, 61, 198, 53, 64, 89 } /* left = d45 */, + { 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */, + { 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */, + { 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */, + { 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */, + { 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */, + { 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */ + }, { /* above = d117 */ + { 64, 19, 37, 156, 66, 138, 49, 95, 133 } /* left = dc */, + { 46, 27, 80, 150, 55, 124, 55, 121, 135 } /* left = v */, + { 36, 23, 27, 165, 149, 166, 54, 64, 118 } /* left = h */, + { 53, 21, 36, 131, 63, 163, 60, 109, 81 } /* left = d45 */, + { 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */, + { 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */, + { 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */, + { 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */, + { 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */, + { 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */ + }, { /* above = d153 */ + { 75, 17, 22, 136, 138, 185, 32, 34, 166 } /* left = dc */, + { 56, 39, 58, 133, 117, 173, 48, 53, 187 } /* left = v */, + { 35, 21, 12, 161, 212, 207, 20, 23, 145 } /* left = h */, + { 56, 29, 19, 117, 109, 181, 55, 68, 112 } /* left = d45 */, + { 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */, + { 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */, + { 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */, + { 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */, + { 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */, + { 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */ + }, { /* above = d27 */ + { 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */, + { 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */, + { 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */, + { 68, 36, 17, 106, 102, 206, 59, 74, 74 } /* left = d45 */, + { 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */, + { 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */, + { 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */, + { 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */, + { 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */, + { 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */ + }, { /* above = d63 */ + { 78, 23, 39, 111, 117, 170, 74, 124, 94 } /* left = dc */, + { 48, 34, 86, 101, 92, 146, 78, 179, 134 } /* left = v */, + { 47, 22, 24, 138, 187, 178, 68, 69, 59 } /* left = h */, + { 56, 25, 33, 105, 112, 187, 95, 177, 129 } /* left = d45 */, + { 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */, + { 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */, + { 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */, + { 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */, + { 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */, + { 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */ + }, { /* above = tm */ + { 65, 70, 60, 155, 159, 199, 61, 60, 81 } /* left = dc */, + { 44, 78, 115, 132, 119, 173, 71, 112, 93 } /* left = v */, + { 39, 38, 21, 184, 227, 206, 42, 32, 64 } /* left = h */, + { 58, 47, 36, 124, 137, 193, 80, 82, 78 } /* left = d45 */, + { 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */, + { 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */, + { 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */, + { 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */, + { 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */, + { 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */ + } }; diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 8d376adbf70b8fc83a23015ee3293e6f8fb9ea6e..78fb2f022d6716574d92b05749f70001ecc05a12 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -11,35 +11,34 @@ #include "vp9/common/vp9_mvref_common.h" #define MVREF_NEIGHBOURS 8 - -static int mb_mv_ref_search[MVREF_NEIGHBOURS][2] = { - {0, -1}, {-1, 0}, {-1, -1}, {0, -2}, - {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2} -}; - -static int mb_ref_distance_weight[MVREF_NEIGHBOURS] = - { 3, 3, 2, 1, 1, 1, 1, 1 }; - -static int sb_mv_ref_search[MVREF_NEIGHBOURS][2] = { - {0, -1}, {-1, 0}, {1, -1}, {-1, 1}, - {-1, -1}, {0, -2}, {-2, 0}, {-1, -2} +static int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = { + // SB4X4 + {{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}}, + // SB4X8 + {{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}}, + // SB8X4 + {{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}}, + // SB8X8 + {{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}}, + // SB8X16 + {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, + // SB16X8 + {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, + // SB16X16 + {{0, -1}, {-1, 0}, {1, -1}, {-1, 1}, {-1, -1}, {0, -3}, {-3, 0}, {-3, -3}}, + // SB16X32 + {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // SB32X16 + {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, + // SB32X32 + {{1, -1}, {-1, 1}, {2, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {-3, -3}}, + // SB32X64 + {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, + // SB64X32 + {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, + // SB64X64 + {{3, -1}, {-1, 3}, {4, -1}, {-1, 4}, {-1, -1}, {0, -1}, {-1, 0}, {6, -1}} }; - -static int sb_ref_distance_weight[MVREF_NEIGHBOURS] = - { 3, 3, 2, 2, 2, 1, 1, 1 }; - - - -static int sb64_mv_ref_search[MVREF_NEIGHBOURS][2] = { - {0, -1}, {-1, 0}, {1, -1}, {-1, 1}, - {2, -1}, {-1, 2}, {3, -1}, {-1,-1} -}; - -static int sb64_ref_distance_weight[MVREF_NEIGHBOURS] = - { 1, 1, 1, 1, 1, 1, 1, 1 }; - - - // clamp_mv_ref #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units @@ -50,15 +49,21 @@ static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) { xd->mb_to_bottom_edge + MV_BORDER); } -// Gets a candidate refenence motion vector from the given mode info +// Gets a candidate reference motion vector from the given mode info // structure if one exists that matches the given reference frame. static int get_matching_candidate(const MODE_INFO *candidate_mi, MV_REFERENCE_FRAME ref_frame, - int_mv *c_mv) { - if (ref_frame == candidate_mi->mbmi.ref_frame) { - c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - } else if (ref_frame == candidate_mi->mbmi.second_ref_frame) { - c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; + int_mv *c_mv, int block_idx) { + if (ref_frame == candidate_mi->mbmi.ref_frame[0]) { + if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) + c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[0].as_int; + else + c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; + } else if (ref_frame == candidate_mi->mbmi.ref_frame[1]) { + if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) + c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[1].as_int; + else + c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; } else { return 0; } @@ -66,7 +71,7 @@ static int get_matching_candidate(const MODE_INFO *candidate_mi, return 1; } -// Gets candidate refenence motion vector(s) from the given mode info +// Gets candidate reference motion vector(s) from the given mode info // structure if they exists and do NOT match the given reference frame. static void get_non_matching_candidates(const MODE_INFO *candidate_mi, MV_REFERENCE_FRAME ref_frame, @@ -81,18 +86,18 @@ static void get_non_matching_candidates(const MODE_INFO *candidate_mi, *c2_ref_frame = INTRA_FRAME; // If first candidate not valid neither will be. - if (candidate_mi->mbmi.ref_frame > INTRA_FRAME) { + if (candidate_mi->mbmi.ref_frame[0] > INTRA_FRAME) { // First candidate - if (candidate_mi->mbmi.ref_frame != ref_frame) { - *c_ref_frame = candidate_mi->mbmi.ref_frame; + if (candidate_mi->mbmi.ref_frame[0] != ref_frame) { + *c_ref_frame = candidate_mi->mbmi.ref_frame[0]; c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; } // Second candidate - if ((candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) && - (candidate_mi->mbmi.second_ref_frame != ref_frame) && + if ((candidate_mi->mbmi.ref_frame[1] > INTRA_FRAME) && + (candidate_mi->mbmi.ref_frame[1] != ref_frame) && (candidate_mi->mbmi.mv[1].as_int != candidate_mi->mbmi.mv[0].as_int)) { - *c2_ref_frame = candidate_mi->mbmi.second_ref_frame; + *c2_ref_frame = candidate_mi->mbmi.ref_frame[1]; c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int; } } @@ -103,145 +108,41 @@ static void get_non_matching_candidates(const MODE_INFO *candidate_mi, static void scale_mv(MACROBLOCKD *xd, MV_REFERENCE_FRAME this_ref_frame, MV_REFERENCE_FRAME candidate_ref_frame, int_mv *candidate_mv, int *ref_sign_bias) { - // int frame_distances[MAX_REF_FRAMES]; - // int last_distance = 1; - // int gf_distance = xd->frames_since_golden; - // int arf_distance = xd->frames_till_alt_ref_frame; // Sign inversion where appropriate. if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) { candidate_mv->as_mv.row = -candidate_mv->as_mv.row; candidate_mv->as_mv.col = -candidate_mv->as_mv.col; } - - /* - // Scale based on frame distance if the reference frames not the same. - frame_distances[INTRA_FRAME] = 1; // should never be used - frame_distances[LAST_FRAME] = 1; - frame_distances[GOLDEN_FRAME] = - (xd->frames_since_golden) ? xd->frames_si nce_golden : 1; - frame_distances[ALTREF_FRAME] = - (xd->frames_till_alt_ref_frame) ? xd->frames_till_alt_ref_frame : 1; - - if (frame_distances[this_ref_frame] && - frame_distances[candidate_ref_frame]) { - candidate_mv->as_mv.row = - (short)(((int)(candidate_mv->as_mv.row) * - frame_distances[this_ref_frame]) / - frame_distances[candidate_ref_frame]); - - candidate_mv->as_mv.col = - (short)(((int)(candidate_mv->as_mv.col) * - frame_distances[this_ref_frame]) / - frame_distances[candidate_ref_frame]); - } - */ -} - -/* -// Adds a new candidate reference vector to the sorted list. -// If it is a repeat the weight of the existing entry is increased -// and the order of the list is resorted. -// This method of add plus sort has been deprecated for now as there is a -// further sort of the best candidates in vp9_find_best_ref_mvs() and the -// incremental benefit of both is small. If the decision is made to remove -// the sort in vp9_find_best_ref_mvs() for performance reasons then it may be -// worth re-instating some sort of list reordering by weight here. -// -static void addmv_and_shuffle( - int_mv *mv_list, - int *mv_scores, - int *refmv_count, - int_mv candidate_mv, - int weight -) { - - int i; - int insert_point; - int duplicate_found = FALSE; - - // Check for duplicates. If there is one increase its score. - // We only compare vs the current top candidates. - insert_point = (*refmv_count < (MAX_MV_REF_CANDIDATES - 1)) - ? *refmv_count : (MAX_MV_REF_CANDIDATES - 1); - - i = insert_point; - if (*refmv_count > i) - i++; - while (i > 0) { - i--; - if (candidate_mv.as_int == mv_list[i].as_int) { - duplicate_found = TRUE; - mv_scores[i] += weight; - break; - } - } - - // If no duplicate and the new candidate is good enough then add it. - if (!duplicate_found ) { - if (weight > mv_scores[insert_point]) { - mv_list[insert_point].as_int = candidate_mv.as_int; - mv_scores[insert_point] = weight; - i = insert_point; - } - (*refmv_count)++; - } - - // Reshuffle the list so that highest scoring mvs at the top. - while (i > 0) { - if (mv_scores[i] > mv_scores[i-1]) { - int tmp_score = mv_scores[i-1]; - int_mv tmp_mv = mv_list[i-1]; - - mv_scores[i-1] = mv_scores[i]; - mv_list[i-1] = mv_list[i]; - mv_scores[i] = tmp_score; - mv_list[i] = tmp_mv; - i--; - } else - break; - } } -*/ -// Adds a new candidate reference vector to the list. -// The mv is thrown out if it is already in the list. -// Unlike the addmv_and_shuffle() this does not reorder the list -// but assumes that candidates are added in the order most likely to -// match distance and reference frame bias. +// Add a candidate mv. +// Discard if it has already been seen. static void add_candidate_mv(int_mv *mv_list, int *mv_scores, int *candidate_count, int_mv candidate_mv, int weight) { - int i; - - // Make sure we dont insert off the end of the list - const int insert_point = MIN(*candidate_count, MAX_MV_REF_CANDIDATES - 1); - - // Look for duplicates - for (i = 0; i <= insert_point; ++i) { - if (candidate_mv.as_int == mv_list[i].as_int) - break; - } - - // Add the candidate. If the list is already full it is only desirable that - // it should overwrite if it has a higher weight than the last entry. - if (i >= insert_point && weight > mv_scores[insert_point]) { - mv_list[insert_point].as_int = candidate_mv.as_int; - mv_scores[insert_point] = weight; - *candidate_count += (*candidate_count < MAX_MV_REF_CANDIDATES); + if (*candidate_count == 0) { + mv_list[0].as_int = candidate_mv.as_int; + mv_scores[0] = weight; + *candidate_count += 1; + } else if ((*candidate_count == 1) && + (candidate_mv.as_int != mv_list[0].as_int)) { + mv_list[1].as_int = candidate_mv.as_int; + mv_scores[1] = weight; + *candidate_count += 1; } } -// This function searches the neighbourhood of a given MB/SB and populates a -// list of candidate reference vectors. +// This function searches the neighbourhood of a given MB/SB +// to try and find candidate reference vectors. // -void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, - MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, int *ref_sign_bias) { +void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, + MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, int *ref_sign_bias, + int block_idx) { int i; MODE_INFO *candidate_mi; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int_mv candidate_mvs[MAX_MV_REF_CANDIDATES]; int_mv c_refmv; int_mv c2_refmv; MV_REFERENCE_FRAME c_ref_frame; @@ -250,110 +151,119 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, int refmv_count = 0; int split_count = 0; int (*mv_ref_search)[2]; - int *ref_distance_weight; - int zero_seen = FALSE; - const int mb_col = (-xd->mb_to_left_edge) >> 7; + const int mi_col = get_mi_col(xd); + const int mi_row = get_mi_row(xd); + int intra_count = 0; + int zero_count = 0; + int newmv_count = 0; + int x_idx = 0, y_idx = 0; // Blank the reference vector lists and other local structures. vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); - vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); vpx_memset(candidate_scores, 0, sizeof(candidate_scores)); - if (mbmi->sb_type == BLOCK_SIZE_SB64X64) { - mv_ref_search = sb64_mv_ref_search; - ref_distance_weight = sb64_ref_distance_weight; - } else if (mbmi->sb_type == BLOCK_SIZE_SB32X32) { - mv_ref_search = sb_mv_ref_search; - ref_distance_weight = sb_ref_distance_weight; - } else { - mv_ref_search = mb_mv_ref_search; - ref_distance_weight = mb_ref_distance_weight; + mv_ref_search = mv_ref_blocks[mbmi->sb_type]; + if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + x_idx = block_idx & 1; + y_idx = block_idx >> 1; } // We first scan for candidate vectors that match the current reference frame // Look at nearest neigbours for (i = 0; i < 2; ++i) { - const int mb_search_col = mb_col + mv_ref_search[i][0]; - - if ((mb_search_col >= cm->cur_tile_mb_col_start) && - (mb_search_col < cm->cur_tile_mb_col_end) && - ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { + const int mi_search_col = mi_col + mv_ref_search[i][0]; + const int mi_search_row = mi_row + mv_ref_search[i][1]; + if ((mi_search_col >= cm->cur_tile_mi_col_start) && + (mi_search_col < cm->cur_tile_mi_col_end) && + (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { + int b; candidate_mi = here + mv_ref_search[i][0] + (mv_ref_search[i][1] * xd->mode_info_stride); - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) { - add_candidate_mv(candidate_mvs, candidate_scores, - &refmv_count, c_refmv, ref_distance_weight[i] + 16); + if (block_idx >= 0) { + if (mv_ref_search[i][0]) + b = 1 + y_idx * 2; + else + b = 2 + x_idx; + } else { + b = -1; } - split_count += (candidate_mi->mbmi.mode == SPLITMV); - } - } - // Look in the last frame if it exists - if (lf_here) { - candidate_mi = lf_here; - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) { - add_candidate_mv(candidate_mvs, candidate_scores, - &refmv_count, c_refmv, 18); + if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, b)) { + add_candidate_mv(mv_ref_list, candidate_scores, + &refmv_count, c_refmv, 16); + } + split_count += (candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 && + candidate_mi->mbmi.ref_frame[0] != INTRA_FRAME); + + // Count number of neihgbours coded intra and zeromv + intra_count += (candidate_mi->mbmi.mode < NEARESTMV); + zero_count += (candidate_mi->mbmi.mode == ZEROMV); + newmv_count += (candidate_mi->mbmi.mode >= NEWMV); } } + // More distant neigbours for (i = 2; (i < MVREF_NEIGHBOURS) && - (refmv_count < (MAX_MV_REF_CANDIDATES - 1)); ++i) { - const int mb_search_col = mb_col + mv_ref_search[i][0]; - - if ((mb_search_col >= cm->cur_tile_mb_col_start) && - (mb_search_col < cm->cur_tile_mb_col_end) && - ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { + (refmv_count < MAX_MV_REF_CANDIDATES); ++i) { + const int mi_search_col = mi_col + mv_ref_search[i][0]; + const int mi_search_row = mi_row + mv_ref_search[i][1]; + if ((mi_search_col >= cm->cur_tile_mi_col_start) && + (mi_search_col < cm->cur_tile_mi_col_end) && + (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { candidate_mi = here + mv_ref_search[i][0] + (mv_ref_search[i][1] * xd->mode_info_stride); - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) { - add_candidate_mv(candidate_mvs, candidate_scores, - &refmv_count, c_refmv, ref_distance_weight[i] + 16); + if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) { + add_candidate_mv(mv_ref_list, candidate_scores, + &refmv_count, c_refmv, 16); } } } + // Look in the last frame if it exists + if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) { + candidate_mi = lf_here; + if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) { + add_candidate_mv(mv_ref_list, candidate_scores, + &refmv_count, c_refmv, 16); + } + } + // If we have not found enough candidates consider ones where the // reference frame does not match. Break out when we have // MAX_MV_REF_CANDIDATES candidates. // Look first at spatial neighbours - if (refmv_count < (MAX_MV_REF_CANDIDATES - 1)) { - for (i = 0; i < MVREF_NEIGHBOURS; ++i) { - const int mb_search_col = mb_col + mv_ref_search[i][0]; - - if ((mb_search_col >= cm->cur_tile_mb_col_start) && - (mb_search_col < cm->cur_tile_mb_col_end) && - ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { - - candidate_mi = here + mv_ref_search[i][0] + - (mv_ref_search[i][1] * xd->mode_info_stride); - - get_non_matching_candidates(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - if (c_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); - add_candidate_mv(candidate_mvs, candidate_scores, - &refmv_count, c_refmv, ref_distance_weight[i]); - } - - if (c2_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); - add_candidate_mv(candidate_mvs, candidate_scores, - &refmv_count, c2_refmv, ref_distance_weight[i]); - } + for (i = 0; (i < MVREF_NEIGHBOURS) && + (refmv_count < MAX_MV_REF_CANDIDATES); ++i) { + const int mi_search_col = mi_col + mv_ref_search[i][0]; + const int mi_search_row = mi_row + mv_ref_search[i][1]; + if ((mi_search_col >= cm->cur_tile_mi_col_start) && + (mi_search_col < cm->cur_tile_mi_col_end) && + (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { + candidate_mi = here + mv_ref_search[i][0] + + (mv_ref_search[i][1] * xd->mode_info_stride); + + get_non_matching_candidates(candidate_mi, ref_frame, + &c_ref_frame, &c_refmv, + &c2_ref_frame, &c2_refmv); + + if (c_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); + add_candidate_mv(mv_ref_list, candidate_scores, + &refmv_count, c_refmv, 1); } - if (refmv_count >= (MAX_MV_REF_CANDIDATES - 1)) { - break; + if (c2_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); + add_candidate_mv(mv_ref_list, candidate_scores, + &refmv_count, c2_refmv, 1); } } } + // Look at the last frame if it exists - if (refmv_count < (MAX_MV_REF_CANDIDATES - 1) && lf_here) { + if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) { candidate_mi = lf_here; get_non_matching_candidates(candidate_mi, ref_frame, &c_ref_frame, &c_refmv, @@ -361,49 +271,36 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, if (c_ref_frame != INTRA_FRAME) { scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); - add_candidate_mv(candidate_mvs, candidate_scores, - &refmv_count, c_refmv, 2); + add_candidate_mv(mv_ref_list, candidate_scores, + &refmv_count, c_refmv, 1); } if (c2_ref_frame != INTRA_FRAME) { scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); - add_candidate_mv(candidate_mvs, candidate_scores, - &refmv_count, c2_refmv, 2); + add_candidate_mv(mv_ref_list, candidate_scores, + &refmv_count, c2_refmv, 1); } } - // Define inter mode coding context. - // 0,0 was best - if (candidate_mvs[0].as_int == 0) { - // 0,0 is only candidate - if (refmv_count <= 1) { - mbmi->mb_mode_context[ref_frame] = 0; - // non zero candidates candidates available - } else if (split_count == 0) { - mbmi->mb_mode_context[ref_frame] = 1; + if (!intra_count) { + if (!newmv_count) { + // 0 = both zero mv + // 1 = one zero mv + one a predicted mv + // 2 = two predicted mvs + mbmi->mb_mode_context[ref_frame] = 2 - zero_count; } else { - mbmi->mb_mode_context[ref_frame] = 2; + // 3 = one predicted/zero and one new mv + // 4 = two new mvs + mbmi->mb_mode_context[ref_frame] = 2 + newmv_count; } - } else if (split_count == 0) { - // Non zero best, No Split MV cases - mbmi->mb_mode_context[ref_frame] = candidate_scores[0] >= 16 ? 3 : 4; } else { - // Non zero best, some split mv - mbmi->mb_mode_context[ref_frame] = candidate_scores[0] >= 16 ? 5 : 6; + // 5 = one intra neighbour + x + // 6 = two intra neighbours + mbmi->mb_mode_context[ref_frame] = 4 + intra_count; } - // Scan for 0,0 case and clamp non zero choices + // Clamp vectors for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - if (candidate_mvs[i].as_int == 0) { - zero_seen = TRUE; - } else { - clamp_mv_ref(xd, &candidate_mvs[i]); - } + clamp_mv_ref(xd, &mv_ref_list[i]); } - // 0,0 is always a valid reference. Add it if not already seen. - if (!zero_seen) - candidate_mvs[MAX_MV_REF_CANDIDATES-1].as_int = 0; - - // Copy over the candidate list. - vpx_memcpy(mv_ref_list, candidate_mvs, sizeof(candidate_mvs)); } diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h index a81366997bdfbf204eabd839509a5b16da0dea7c..7290f10abdf50fd17cfbe56415a04c4aa2102d15 100644 --- a/vp9/common/vp9_mvref_common.h +++ b/vp9/common/vp9_mvref_common.h @@ -14,12 +14,24 @@ #ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ #define VP9_COMMON_VP9_MVREF_COMMON_H_ -void vp9_find_mv_refs(VP9_COMMON *cm, - MACROBLOCKD *xd, - MODE_INFO *here, - MODE_INFO *lf_here, - MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int *ref_sign_bias); +void vp9_find_mv_refs_idx(VP9_COMMON *cm, + MACROBLOCKD *xd, + MODE_INFO *here, + MODE_INFO *lf_here, + MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int *ref_sign_bias, + int block_idx); + +static INLINE void vp9_find_mv_refs(VP9_COMMON *cm, + MACROBLOCKD *xd, + MODE_INFO *here, + MODE_INFO *lf_here, + MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int *ref_sign_bias) { + vp9_find_mv_refs_idx(cm, xd, here, lf_here, ref_frame, + mv_ref_list, ref_sign_bias, -1); +} #endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 422f3885f28c68ad2371465da929d9d0f12473c9..b85b88968742550e3af11baab4de484bb7a13e7d 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -21,6 +21,9 @@ extern "C" #include "vpx/vp8cx.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_ppflags.h" + +#define MAX_MB_SEGMENTS 8 + typedef int *VP9_PTR; /* Create/destroy static data structures. */ @@ -225,8 +228,9 @@ extern "C" int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols, - int delta_q[4], int delta_lf[4], - unsigned int threshold[4]); + int delta_q[MAX_MB_SEGMENTS], + int delta_lf[MAX_MB_SEGMENTS], + unsigned int threshold[MAX_MB_SEGMENTS]); int vp9_set_active_map(VP9_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols); diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index fdbabc5376a661bdb61231e491e6bc3706b5f432..f461bf3badda1e9c9a1c731082693cab09794350 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -18,27 +18,19 @@ #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_quant_common.h" + #if CONFIG_POSTPROC #include "vp9/common/vp9_postproc.h" #endif -/*#ifdef PACKET_TESTING*/ -#include "vp9/common/vp9_header.h" -/*#endif*/ - /* Create/destroy static data structures. */ -void vp9_initialize_common(void); - -#define MINQ 0 +// Define the number of candidate reference buffers. +#define NUM_REF_FRAMES 8 +#define NUM_REF_FRAMES_LG2 3 -#define MAXQ 255 -#define QINDEX_BITS 8 - -#define QINDEX_RANGE (MAXQ + 1) - -#define NUM_REF_FRAMES 3 -#define NUM_REF_FRAMES_LG2 2 +#define ALLOWED_REFS_PER_FRAME 3 // 1 scratch frame for the new frame, 3 for scaled references on the encoder // TODO(jkoleszar): These 3 extra references could probably come from the @@ -48,106 +40,71 @@ void vp9_initialize_common(void); #define NUM_FRAME_CONTEXTS_LG2 2 #define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LG2) -#define COMP_PRED_CONTEXTS 2 +#define MAX_LAG_BUFFERS 25 typedef struct frame_contexts { - vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1]; - vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ - vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; - vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; - vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; - vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; - vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; - - vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; -#if CONFIG_CODE_NONZEROCOUNT - vp9_prob nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC4X4_NODES]; - vp9_prob nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC8X8_NODES]; - vp9_prob nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC16X16_NODES]; - vp9_prob nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC32X32_NODES]; - vp9_prob nzc_pcat_probs[MAX_NZC_CONTEXTS] - [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA]; -#endif + vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES - 1]; + vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; + vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS] + [PARTITION_TYPES - 1]; nmv_context nmvc; nmv_context pre_nmvc; - vp9_prob pre_bmode_prob[VP9_NKF_BINTRAMODES - 1]; - vp9_prob pre_ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ - vp9_prob pre_sb_ymode_prob[VP9_I32X32_MODES - 1]; - vp9_prob pre_uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; - vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1]; - vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; - vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1]; - unsigned int bmode_counts[VP9_NKF_BINTRAMODES]; - unsigned int ymode_counts[VP9_YMODES]; /* interframe intra mode probs */ - unsigned int sb_ymode_counts[VP9_I32X32_MODES]; - unsigned int uv_mode_counts[VP9_YMODES][VP9_UV_MODES]; - unsigned int i8x8_mode_counts[VP9_I8X8_MODES]; /* interframe intra probs */ - unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS]; - unsigned int mbsplit_counts[VP9_NUMMBSPLITS]; - - vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES]; - vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES]; - vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES]; -#if CONFIG_CODE_NONZEROCOUNT - vp9_prob pre_nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC4X4_NODES]; - vp9_prob pre_nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC8X8_NODES]; - vp9_prob pre_nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC16X16_NODES]; - vp9_prob pre_nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC32X32_NODES]; - vp9_prob pre_nzc_pcat_probs[MAX_NZC_CONTEXTS] - [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA]; -#endif - - vp9_coeff_count coef_counts_4x4[BLOCK_TYPES]; - vp9_coeff_count coef_counts_8x8[BLOCK_TYPES]; - vp9_coeff_count coef_counts_16x16[BLOCK_TYPES]; - vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; + /* interframe intra mode probs */ + vp9_prob pre_y_mode_prob[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES - 1]; + vp9_prob pre_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; + vp9_prob pre_partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; + /* interframe intra mode probs */ + unsigned int y_mode_counts[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES]; + unsigned int uv_mode_counts[VP9_INTRA_MODES][VP9_INTRA_MODES]; + unsigned int partition_counts[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; + + vp9_coeff_probs_model coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES]; + vp9_coeff_probs_model pre_coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES]; + vp9_coeff_count_model coef_counts[TX_SIZE_MAX_SB][BLOCK_TYPES]; unsigned int eob_branch_counts[TX_SIZE_MAX_SB][BLOCK_TYPES][REF_TYPES] [COEF_BANDS][PREV_COEF_CONTEXTS]; -#if CONFIG_CODE_NONZEROCOUNT - unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC4X4_TOKENS]; - unsigned int nzc_counts_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC8X8_TOKENS]; - unsigned int nzc_counts_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC16X16_TOKENS]; - unsigned int nzc_counts_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC32X32_TOKENS]; - unsigned int nzc_pcat_counts[MAX_NZC_CONTEXTS] - [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA][2]; -#endif - nmv_context_counts NMVcount; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; -#if CONFIG_COMP_INTERINTRA_PRED - unsigned int interintra_counts[2]; - vp9_prob interintra_prob; - vp9_prob pre_interintra_prob; -#endif - - int vp9_mode_contexts[INTER_MODE_CONTEXTS][4]; - unsigned int mv_ref_ct[INTER_MODE_CONTEXTS][4][2]; + vp9_prob pre_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] + [VP9_SWITCHABLE_FILTERS - 1]; + unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] + [VP9_SWITCHABLE_FILTERS]; + + vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1]; + vp9_prob pre_inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1]; + unsigned int inter_mode_counts[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2]; + + vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; + vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS]; + vp9_prob single_ref_prob[REF_CONTEXTS][2]; + vp9_prob comp_ref_prob[REF_CONTEXTS]; + vp9_prob pre_intra_inter_prob[INTRA_INTER_CONTEXTS]; + vp9_prob pre_comp_inter_prob[COMP_INTER_CONTEXTS]; + vp9_prob pre_single_ref_prob[REF_CONTEXTS][2]; + vp9_prob pre_comp_ref_prob[REF_CONTEXTS]; + unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2]; + unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2]; + unsigned int single_ref_count[REF_CONTEXTS][2][2]; + unsigned int comp_ref_count[REF_CONTEXTS][2]; + + vp9_prob tx_probs_32x32p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; + vp9_prob tx_probs_16x16p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; + vp9_prob tx_probs_8x8p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 3]; + vp9_prob pre_tx_probs_32x32p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; + vp9_prob pre_tx_probs_16x16p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; + vp9_prob pre_tx_probs_8x8p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 3]; + unsigned int tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB]; + unsigned int tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; + unsigned int tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; + + vp9_prob mbskip_probs[MBSKIP_CONTEXTS]; + vp9_prob pre_mbskip_probs[MBSKIP_CONTEXTS]; + unsigned int mbskip_count[MBSKIP_CONTEXTS][2]; } FRAME_CONTEXT; -typedef enum { - RECON_CLAMP_REQUIRED = 0, - RECON_CLAMP_NOTREQUIRED = 1 -} CLAMP_TYPE; - typedef enum { SINGLE_PREDICTION_ONLY = 0, COMP_PREDICTION_ONLY = 1, @@ -167,8 +124,11 @@ typedef enum { typedef struct VP9Common { struct vpx_internal_error_info error; - DECLARE_ALIGNED(16, int16_t, Y1dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, int16_t, UVdequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][2]); + DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][2]); +#if CONFIG_ALPHA + DECLARE_ALIGNED(16, int16_t, a_dequant[QINDEX_RANGE][2]); +#endif int width; int height; @@ -177,8 +137,13 @@ typedef struct VP9Common { int last_width; int last_height; + // TODO(jkoleszar): this implies chroma ss right now, but could vary per + // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to + // support additional planes. + int subsampling_x; + int subsampling_y; + YUV_TYPE clr_type; - CLAMP_TYPE clamp_type; YV12_BUFFER_CONFIG *frame_to_show; @@ -186,12 +151,14 @@ typedef struct VP9Common { int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; /* reference counts */ int ref_frame_map[NUM_REF_FRAMES]; /* maps fb_idx to reference slot */ - /* TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and - * roll new_fb_idx into it. - */ - int active_ref_idx[3]; /* each frame can reference 3 buffers */ + // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and + // roll new_fb_idx into it. + + // Each frame can reference ALLOWED_REFS_PER_FRAME buffers + int active_ref_idx[ALLOWED_REFS_PER_FRAME]; + struct scale_factors active_ref_scale[ALLOWED_REFS_PER_FRAME]; int new_fb_idx; - struct scale_factors active_ref_scale[3]; + YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG temp_scale_frame; @@ -201,28 +168,37 @@ typedef struct VP9Common { FRAME_TYPE frame_type; int show_frame; + int last_show_frame; + + // Flag signaling that the frame is encoded using only INTRA modes. + int intra_only; + + // Flag signaling that the frame context should be reset to default values. + // 0 or 1 implies don't reset, 2 reset just the context specified in the + // frame header, 3 reset all contexts. + int reset_frame_context; int frame_flags; + // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in + // MODE_INFO (8-pixel) units. int MBs; - int mb_rows; - int mb_cols; + int mb_rows, mi_rows; + int mb_cols, mi_cols; int mode_info_stride; /* profile settings */ - int experimental; - int mb_no_coeff_skip; TXFM_MODE txfm_mode; - COMPPREDMODE_TYPE comp_pred_mode; - int no_lpf; - int use_bilinear_mc_filter; - int full_pixel; int base_qindex; int last_kf_gf_q; /* Q used on the last GF or KF */ - int y1dc_delta_q; - int uvdc_delta_q; - int uvac_delta_q; + int y_dc_delta_q; + int uv_dc_delta_q; + int uv_ac_delta_q; +#if CONFIG_ALPHA + int a_dc_delta_q; + int a_ac_delta_q; +#endif unsigned int frames_since_golden; unsigned int frames_till_alt_ref_frame; @@ -240,55 +216,41 @@ typedef struct VP9Common { unsigned char *last_frame_seg_map; INTERPOLATIONFILTERTYPE mcomp_filter_type; - LOOPFILTERTYPE filter_type; loop_filter_info_n lf_info; int filter_level; int last_sharpness_level; int sharpness_level; - int dering_enabled; - int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */ + int refresh_frame_context; /* Two state 0 = NO, 1 = YES */ int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ /* Y,U,V */ - ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ - ENTROPY_CONTEXT_PLANES left_context[4]; /* (up to) 4 contexts "" */ + ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; + ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; + + // partition contexts + PARTITION_CONTEXT *above_seg_context; + PARTITION_CONTEXT left_seg_context[8]; /* keyframe block modes are predicted by their above, left neighbors */ - vp9_prob kf_bmode_prob[VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES - 1]; - vp9_prob kf_ymode_prob[8][VP9_YMODES - 1]; /* keyframe "" */ - vp9_prob sb_kf_ymode_prob[8][VP9_I32X32_MODES - 1]; - int kf_ymode_probs_index; - int kf_ymode_probs_update; - vp9_prob kf_uv_mode_prob[VP9_YMODES] [VP9_UV_MODES - 1]; - - vp9_prob prob_intra_coded; - vp9_prob prob_last_coded; - vp9_prob prob_gf_coded; - vp9_prob sb32_coded; - vp9_prob sb64_coded; + vp9_prob kf_y_mode_prob[VP9_INTRA_MODES] + [VP9_INTRA_MODES] + [VP9_INTRA_MODES - 1]; + vp9_prob kf_uv_mode_prob[VP9_INTRA_MODES] [VP9_INTRA_MODES - 1]; // Context probabilities when using predictive coding of segment id vp9_prob segment_pred_probs[PREDICTION_PROBS]; unsigned char temporal_update; // Context probabilities for reference frame prediction - unsigned char ref_scores[MAX_REF_FRAMES]; - vp9_prob ref_pred_probs[PREDICTION_PROBS]; - vp9_prob mod_refprobs[MAX_REF_FRAMES][PREDICTION_PROBS]; - - vp9_prob prob_comppred[COMP_PRED_CONTEXTS]; - - // FIXME contextualize - vp9_prob prob_tx[TX_SIZE_MAX_SB - 1]; - - vp9_prob mbskip_pred_probs[MBSKIP_CONTEXTS]; + int allow_comp_inter_inter; + MV_REFERENCE_FRAME comp_fixed_ref; + MV_REFERENCE_FRAME comp_var_ref[2]; + COMPPREDMODE_TYPE comp_pred_mode; FRAME_CONTEXT fc; /* this frame entropy */ FRAME_CONTEXT frame_contexts[NUM_FRAME_CONTEXTS]; @@ -298,9 +260,6 @@ typedef struct VP9Common { int near_boffset[3]; int version; -#ifdef PACKET_TESTING - VP9_HEADER oh; -#endif double bitrate; double framerate; @@ -308,17 +267,13 @@ typedef struct VP9Common { struct postproc_state postproc_state; #endif -#if CONFIG_COMP_INTERINTRA_PRED - int use_interintra; -#endif - int error_resilient_mode; int frame_parallel_decoding_mode; int tile_columns, log2_tile_columns; - int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_col_idx; + int cur_tile_mi_col_start, cur_tile_mi_col_end, cur_tile_col_idx; int tile_rows, log2_tile_rows; - int cur_tile_mb_row_start, cur_tile_mb_row_end, cur_tile_row_idx; + int cur_tile_mi_row_start, cur_tile_mi_row_end, cur_tile_row_idx; } VP9_COMMON; static int get_free_fb(VP9_COMMON *cm) { @@ -341,31 +296,76 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) { buf[new_idx]++; } -// TODO(debargha): merge the two functions -static void set_mb_row(VP9_COMMON *cm, MACROBLOCKD *xd, - int mb_row, int block_size) { - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; +static int mi_cols_aligned_to_sb(VP9_COMMON *cm) { + return 2 * ((cm->mb_cols + 3) & ~3); +} - // Are edges available for intra prediction? - xd->up_available = (mb_row != 0); +static INLINE void set_partition_seg_context(VP9_COMMON *cm, + MACROBLOCKD *xd, + int mi_row, int mi_col) { + xd->above_seg_context = cm->above_seg_context + mi_col; + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); } -static void set_mb_col(VP9_COMMON *cm, MACROBLOCKD *xd, - int mb_col, int block_size) { - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; +static int check_bsize_coverage(VP9_COMMON *cm, MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize) { + int bsl = mi_width_log2(bsize), bs = 1 << bsl; + int ms = bs / 2; + + if ((mi_row + ms < cm->mi_rows) && (mi_col + ms < cm->mi_cols)) + return 0; + + // frame width/height are multiples of 8, hence 8x8 block should always + // pass the above check + assert(bsize > BLOCK_SIZE_SB8X8); + + // return the node index in the prob tree for binary coding + // skip horizontal/none partition types + if ((mi_col + ms < cm->mi_cols) && (mi_row + ms >= cm->mi_rows)) + return 1; + // skip vertical/none partition types + if ((mi_row + ms < cm->mi_rows) && (mi_col + ms >= cm->mi_cols)) + return 2; + + return -1; +} + +static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd, + int mi_row, int bh, + int mi_col, int bw) { + xd->mb_to_top_edge = -((mi_row * MI_SIZE) << 3); + xd->mb_to_bottom_edge = ((cm->mi_rows - bh - mi_row) * MI_SIZE) << 3; + xd->mb_to_left_edge = -((mi_col * MI_SIZE) << 3); + xd->mb_to_right_edge = ((cm->mi_cols - bw - mi_col) * MI_SIZE) << 3; // Are edges available for intra prediction? - xd->left_available = (mb_col > cm->cur_tile_mb_col_start); - xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end); + xd->up_available = (mi_row != 0); + xd->left_available = (mi_col > cm->cur_tile_mi_col_start); + xd->right_available = (mi_col + bw < cm->cur_tile_mi_col_end); +} + +static int get_mi_row(const MACROBLOCKD *xd) { + return ((-xd->mb_to_top_edge) >> (3 + LOG2_MI_SIZE)); +} + +static int get_mi_col(const MACROBLOCKD *xd) { + return ((-xd->mb_to_left_edge) >> (3 + LOG2_MI_SIZE)); } -static int get_mb_row(const MACROBLOCKD *xd) { - return ((-xd->mb_to_top_edge) >> 7); +static int get_token_alloc(int mb_rows, int mb_cols) { + return mb_rows * mb_cols * (48 * 16 + 4); } -static int get_mb_col(const MACROBLOCKD *xd) { - return ((-xd->mb_to_left_edge) >> 7); +static void set_prev_mi(VP9_COMMON *cm) { + const int use_prev_in_find_mv_refs = cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->error_resilient_mode && + !cm->intra_only && + cm->last_show_frame; + // Special case: set prev_mi to NULL when the previous mode info + // context cannot be used. + cm->prev_mi = use_prev_in_find_mv_refs ? + cm->prev_mip + cm->mode_info_stride + 1 : NULL; } #endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c index 06dadfca5b66af4dcead1fd0b50289515448ddd0..4282ddd1c39debb2d5064dc7ed2b016be639aa9a 100644 --- a/vp9/common/vp9_postproc.c +++ b/vp9/common/vp9_postproc.c @@ -53,7 +53,7 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = { { RGB_TO_YUV(0xCC33FF) }, /* Magenta */ }; -static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] = { +static const unsigned char B_PREDICTION_MODE_colors[VP9_INTRA_MODES][3] = { { RGB_TO_YUV(0x6633ff) }, /* Purple */ { RGB_TO_YUV(0xcc33ff) }, /* Magenta */ { RGB_TO_YUV(0xff33cc) }, /* Pink */ @@ -132,14 +132,15 @@ const short vp9_rv[] = { /**************************************************************************** */ -void vp9_post_proc_down_and_across_c(uint8_t *src_ptr, +void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit) { - uint8_t *p_src, *p_dst; + uint8_t const *p_src; + uint8_t *p_dst; int row; int col; int i; @@ -313,51 +314,64 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, source->uv_height, source->uv_width, ppl); } -void vp9_deblock(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, - int flag) { - double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; - int ppl = (int)(level + .5); - (void) low_var_thresh; - (void) flag; - - vp9_post_proc_down_and_across(source->y_buffer, post->y_buffer, - source->y_stride, post->y_stride, - source->y_height, source->y_width, ppl); - - vp9_post_proc_down_and_across(source->u_buffer, post->u_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); +void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, + int q) { + const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + + 0.0065 + 0.5); + int i; - vp9_post_proc_down_and_across(source->v_buffer, post->v_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); + const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + const int src_widths[4] = {src->y_width, src->uv_width, src->uv_width, + src->alpha_width}; + const int src_heights[4] = {src->y_height, src->uv_height, src->uv_height, + src->alpha_height}; + + uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer, + dst->alpha_buffer}; + const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride, + dst->alpha_stride}; + + for (i = 0; i < MAX_MB_PLANE; ++i) + vp9_post_proc_down_and_across(srcs[i], dsts[i], + src_strides[i], dst_strides[i], + src_heights[i], src_widths[i], ppl); } -void vp9_denoise(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *post, - int q, int low_var_thresh, int flag) { - double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; - int ppl = (int)(level + .5); - (void) post; - (void) low_var_thresh; - (void) flag; - - vp9_post_proc_down_and_across(src->y_buffer + 2 * src->y_stride + 2, - src->y_buffer + 2 * src->y_stride + 2, - src->y_stride, src->y_stride, src->y_height - 4, - src->y_width - 4, ppl); - - vp9_post_proc_down_and_across(src->u_buffer + 2 * src->uv_stride + 2, - src->u_buffer + 2 * src->uv_stride + 2, - src->uv_stride, src->uv_stride, - src->uv_height - 4, src->uv_width - 4, ppl); +void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, + int q) { + const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + + 0.0065 + 0.5); + int i; - vp9_post_proc_down_and_across(src->v_buffer + 2 * src->uv_stride + 2, - src->v_buffer + 2 * src->uv_stride + 2, - src->uv_stride, src->uv_stride, - src->uv_height - 4, src->uv_width - 4, ppl); + const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + const int src_widths[4] = {src->y_width, src->uv_width, src->uv_width, + src->alpha_width}; + const int src_heights[4] = {src->y_height, src->uv_height, src->uv_height, + src->alpha_height}; + + uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer, + dst->alpha_buffer}; + const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride, + dst->alpha_stride}; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + const int src_stride = src_strides[i]; + const uint8_t *const src = srcs[i] + 2 * src_stride + 2; + const int src_width = src_widths[i] - 4; + const int src_height = src_heights[i] - 4; + + const int dst_stride = dst_strides[i]; + uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; + + vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, + src_height, src_width, ppl); + } } double vp9_gaussian(double sigma, double mu, double x) { @@ -631,13 +645,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, if (!flags) { *dest = *oci->frame_to_show; - - /* handle problem with extending borders */ - dest->y_width = oci->width; - dest->y_height = oci->height; - dest->uv_height = dest->y_height / 2; return 0; - } #if ARCH_X86||ARCH_X86_64 @@ -648,7 +656,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer, q + (deblock_level - 5) * 10, 1, 0); } else if (flags & VP9D_DEBLOCK) { - vp9_deblock(oci->frame_to_show, &oci->post_proc_buffer, q, 1, 0); + vp9_deblock(oci->frame_to_show, &oci->post_proc_buffer, q); } else { vp8_yv12_copy_frame(oci->frame_to_show, &oci->post_proc_buffer); } @@ -727,7 +735,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, for (i = 0; i < mb_rows; i++) { for (j = 0; j < mb_cols; j++) { char zz[4]; - int dc_diff = !(mi[mb_index].mbmi.mode != B_PRED && + int dc_diff = !(mi[mb_index].mbmi.mode != I4X4_PRED && mi[mb_index].mbmi.mode != SPLITMV && mi[mb_index].mbmi.mb_skip_coeff); @@ -913,8 +921,8 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, for (x = 0; x < width; x += 16) { int Y = 0, U = 0, V = 0; - if (mi->mbmi.mode == B_PRED && - ((ppflags->display_mb_modes_flag & B_PRED) || + if (mi->mbmi.mode == I4X4_PRED && + ((ppflags->display_mb_modes_flag & I4X4_PRED) || ppflags->display_b_modes_flag)) { int by, bx; uint8_t *yl, *ul, *vl; @@ -927,7 +935,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, for (by = 0; by < 16; by += 4) { for (bx = 0; bx < 16; bx += 4) { if ((ppflags->display_b_modes_flag & (1 << mi->mbmi.mode)) - || (ppflags->display_mb_modes_flag & B_PRED)) { + || (ppflags->display_mb_modes_flag & I4X4_PRED)) { Y = B_PREDICTION_MODE_colors[bmi->as_mode.first][0]; U = B_PREDICTION_MODE_colors[bmi->as_mode.first][1]; V = B_PREDICTION_MODE_colors[bmi->as_mode.first][2]; diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h index c2f556e61d86bafb39feac0923cd8bc3b1e8e631..2c0d333b6d27e8842fd4aedb10fde43d7614a589 100644 --- a/vp9/common/vp9_postproc.h +++ b/vp9/common/vp9_postproc.h @@ -29,10 +29,8 @@ struct postproc_state { int vp9_post_proc_frame(struct VP9Common *oci, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags); -void vp9_denoise(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, - int q, int low_var_thresh, int flag); +void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); -void vp9_deblock(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, - int q, int low_var_thresh, int flag); +void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); #endif // VP9_COMMON_VP9_POSTPROC_H_ diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index 9fe66fc5b12741f53db5fee229334db1be5e5f22..17da4f21544bd8eafbb945eade65585f4d2c2b29 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -9,6 +9,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <limits.h> + #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" @@ -21,85 +23,363 @@ unsigned char vp9_get_pred_context(const VP9_COMMON *const cm, const MACROBLOCKD *const xd, PRED_ID pred_id) { int pred_context; - MODE_INFO *m = xd->mode_info_context; - + const MODE_INFO *const mi = xd->mode_info_context; + const MODE_INFO *const above_mi = mi - cm->mode_info_stride; + const MODE_INFO *const left_mi = mi - 1; + const int left_in_image = xd->left_available && left_mi->mbmi.mb_in_image; + const int above_in_image = xd->up_available && above_mi->mbmi.mb_in_image; // Note: // The mode info data structure has a one element border above and to the // left of the entries correpsonding to real macroblocks. // The prediction flags in these dummy entries are initialised to 0. switch (pred_id) { case PRED_SEG_ID: - pred_context = (m - cm->mode_info_stride)->mbmi.seg_id_predicted; + pred_context = above_mi->mbmi.seg_id_predicted; if (xd->left_available) - pred_context += (m - 1)->mbmi.seg_id_predicted; + pred_context += left_mi->mbmi.seg_id_predicted; break; - case PRED_REF: - pred_context = (m - cm->mode_info_stride)->mbmi.ref_predicted; + case PRED_MBSKIP: + pred_context = above_mi->mbmi.mb_skip_coeff; if (xd->left_available) - pred_context += (m - 1)->mbmi.ref_predicted; + pred_context += left_mi->mbmi.mb_skip_coeff; break; - case PRED_COMP: - // Context based on use of comp pred flag by neighbours - // pred_context = - // ((m - 1)->mbmi.second_ref_frame > INTRA_FRAME) + - // ((m - cm->mode_info_stride)->mbmi.second_ref_frame > INTRA_FRAME); - - // Context based on mode and reference frame - // if ( m->mbmi.ref_frame == LAST_FRAME ) - // pred_context = 0 + (m->mbmi.mode != ZEROMV); - // else if ( m->mbmi.ref_frame == GOLDEN_FRAME ) - // pred_context = 2 + (m->mbmi.mode != ZEROMV); - // else - // pred_context = 4 + (m->mbmi.mode != ZEROMV); - - if (m->mbmi.ref_frame == LAST_FRAME) - pred_context = 0; + case PRED_SWITCHABLE_INTERP: { + // left + const int left_mv_pred = is_inter_mode(left_mi->mbmi.mode); + const int left_interp = left_in_image && left_mv_pred ? + vp9_switchable_interp_map[left_mi->mbmi.interp_filter] : + VP9_SWITCHABLE_FILTERS; + + // above + const int above_mv_pred = is_inter_mode(above_mi->mbmi.mode); + const int above_interp = above_in_image && above_mv_pred ? + vp9_switchable_interp_map[above_mi->mbmi.interp_filter] : + VP9_SWITCHABLE_FILTERS; + + assert(left_interp != -1); + assert(above_interp != -1); + + if (left_interp == above_interp) + pred_context = left_interp; + else if (left_interp == VP9_SWITCHABLE_FILTERS && + above_interp != VP9_SWITCHABLE_FILTERS) + pred_context = above_interp; + else if (left_interp != VP9_SWITCHABLE_FILTERS && + above_interp == VP9_SWITCHABLE_FILTERS) + pred_context = left_interp; else + pred_context = VP9_SWITCHABLE_FILTERS; + + break; + } + + case PRED_INTRA_INTER: { + if (above_in_image && left_in_image) { // both edges available + if (left_mi->mbmi.ref_frame[0] == INTRA_FRAME && + above_mi->mbmi.ref_frame[0] == INTRA_FRAME) { // intra/intra (3) + pred_context = 3; + } else { // intra/inter (1) or inter/inter (0) + pred_context = left_mi->mbmi.ref_frame[0] == INTRA_FRAME || + above_mi->mbmi.ref_frame[0] == INTRA_FRAME; + } + } else if (above_in_image || left_in_image) { // one edge available + const MODE_INFO *edge = above_in_image ? above_mi : left_mi; + + // inter: 0, intra: 2 + pred_context = 2 * (edge->mbmi.ref_frame[0] == INTRA_FRAME); + } else { + pred_context = 0; + } + assert(pred_context >= 0 && pred_context < INTRA_INTER_CONTEXTS); + break; + } + + case PRED_COMP_INTER_INTER: { + if (above_in_image && left_in_image) { // both edges available + if (above_mi->mbmi.ref_frame[1] <= INTRA_FRAME && + left_mi->mbmi.ref_frame[1] <= INTRA_FRAME) { + // neither edge uses comp pred (0/1) + pred_context = ((above_mi->mbmi.ref_frame[0] == cm->comp_fixed_ref) ^ + (left_mi->mbmi.ref_frame[0] == cm->comp_fixed_ref)); + } else if (above_mi->mbmi.ref_frame[1] <= INTRA_FRAME) { + // one of two edges uses comp pred (2/3) + pred_context = 2 + + (above_mi->mbmi.ref_frame[0] == cm->comp_fixed_ref || + above_mi->mbmi.ref_frame[0] == INTRA_FRAME); + } else if (left_mi->mbmi.ref_frame[1] <= INTRA_FRAME) { + // one of two edges uses comp pred (2/3) + pred_context = 2 + + (left_mi->mbmi.ref_frame[0] == cm->comp_fixed_ref || + left_mi->mbmi.ref_frame[0] == INTRA_FRAME); + } else { // both edges use comp pred (4) + pred_context = 4; + } + } else if (above_in_image || left_in_image) { // one edge available + const MODE_INFO *edge = above_in_image ? above_mi : left_mi; + + if (edge->mbmi.ref_frame[1] <= INTRA_FRAME) { + // edge does not use comp pred (0/1) + pred_context = edge->mbmi.ref_frame[0] == cm->comp_fixed_ref; + } else { // edge uses comp pred (3) + pred_context = 3; + } + } else { // no edges available (1) pred_context = 1; + } + assert(pred_context >= 0 && pred_context < COMP_INTER_CONTEXTS); + break; + } + case PRED_COMP_REF_P: { + const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int var_ref_idx = !fix_ref_idx; + + if (above_in_image && left_in_image) { // both edges available + if (above_mi->mbmi.ref_frame[0] == INTRA_FRAME && + left_mi->mbmi.ref_frame[0] == INTRA_FRAME) { // intra/intra (2) + pred_context = 2; + } else if (above_mi->mbmi.ref_frame[0] == INTRA_FRAME || + left_mi->mbmi.ref_frame[0] == INTRA_FRAME) { // intra/inter + const MODE_INFO *edge = above_mi->mbmi.ref_frame[0] == INTRA_FRAME ? + left_mi : above_mi; + + if (edge->mbmi.ref_frame[1] <= INTRA_FRAME) { // single pred (1/3) + pred_context = 1 + + 2 * edge->mbmi.ref_frame[0] != cm->comp_var_ref[1]; + } else { // comp pred (1/3) + pred_context = 1 + + 2 * edge->mbmi.ref_frame[var_ref_idx] != cm->comp_var_ref[1]; + } + } else { // inter/inter + int l_sg = left_mi->mbmi.ref_frame[1] <= INTRA_FRAME; + int a_sg = above_mi->mbmi.ref_frame[1] <= INTRA_FRAME; + MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->mbmi.ref_frame[0] : + above_mi->mbmi.ref_frame[var_ref_idx]; + MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->mbmi.ref_frame[0] : + left_mi->mbmi.ref_frame[var_ref_idx]; + + if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { + pred_context = 0; + } else if (l_sg && a_sg) { // single/single + if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) || + (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0])) { + pred_context = 4; + } else if (vrfa == vrfl) { + pred_context = 3; + } else { + pred_context = 1; + } + } else if (l_sg || a_sg) { // single/comp + MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; + MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; + + if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1]) { + pred_context = 1; + } else if (rfs == cm->comp_var_ref[1] && + vrfc != cm->comp_var_ref[1]) { + pred_context = 2; + } else { + pred_context = 4; + } + } else if (vrfa == vrfl) { // comp/comp + pred_context = 4; + } else { + pred_context = 2; + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MODE_INFO *edge = above_in_image ? above_mi : left_mi; + + if (edge->mbmi.ref_frame[0] == INTRA_FRAME) { + pred_context = 2; + } else if (edge->mbmi.ref_frame[1] > INTRA_FRAME) { + pred_context = + 4 * edge->mbmi.ref_frame[var_ref_idx] != cm->comp_var_ref[1]; + } else { + pred_context = 3 * edge->mbmi.ref_frame[0] != cm->comp_var_ref[1]; + } + } else { // no edges available (2) + pred_context = 2; + } + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); break; + } - case PRED_MBSKIP: - pred_context = (m - cm->mode_info_stride)->mbmi.mb_skip_coeff; - if (xd->left_available) - pred_context += (m - 1)->mbmi.mb_skip_coeff; + case PRED_SINGLE_REF_P1: { + if (above_in_image && left_in_image) { // both edges available + if (above_mi->mbmi.ref_frame[0] == INTRA_FRAME && + left_mi->mbmi.ref_frame[0] == INTRA_FRAME) { + pred_context = 2; + } else if (above_mi->mbmi.ref_frame[0] == INTRA_FRAME || + left_mi->mbmi.ref_frame[0] == INTRA_FRAME) { + const MODE_INFO *edge = above_mi->mbmi.ref_frame[0] == INTRA_FRAME ? + left_mi : above_mi; + + if (edge->mbmi.ref_frame[1] <= INTRA_FRAME) { + pred_context = 4 * (edge->mbmi.ref_frame[0] == LAST_FRAME); + } else { + pred_context = 1 + (edge->mbmi.ref_frame[0] == LAST_FRAME || + edge->mbmi.ref_frame[1] == LAST_FRAME); + } + } else if (above_mi->mbmi.ref_frame[1] <= INTRA_FRAME && + left_mi->mbmi.ref_frame[1] <= INTRA_FRAME) { + pred_context = 2 * (above_mi->mbmi.ref_frame[0] == LAST_FRAME) + + 2 * (left_mi->mbmi.ref_frame[0] == LAST_FRAME); + } else if (above_mi->mbmi.ref_frame[1] > INTRA_FRAME && + left_mi->mbmi.ref_frame[1] > INTRA_FRAME) { + pred_context = 1 + (above_mi->mbmi.ref_frame[0] == LAST_FRAME || + above_mi->mbmi.ref_frame[1] == LAST_FRAME || + left_mi->mbmi.ref_frame[0] == LAST_FRAME || + left_mi->mbmi.ref_frame[1] == LAST_FRAME); + } else { + MV_REFERENCE_FRAME rfs = above_mi->mbmi.ref_frame[1] <= INTRA_FRAME ? + above_mi->mbmi.ref_frame[0] : left_mi->mbmi.ref_frame[0]; + MV_REFERENCE_FRAME crf1 = above_mi->mbmi.ref_frame[1] > INTRA_FRAME ? + above_mi->mbmi.ref_frame[0] : left_mi->mbmi.ref_frame[0]; + MV_REFERENCE_FRAME crf2 = above_mi->mbmi.ref_frame[1] > INTRA_FRAME ? + above_mi->mbmi.ref_frame[1] : left_mi->mbmi.ref_frame[1]; + + if (rfs == LAST_FRAME) { + pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + } else { + pred_context = crf1 == LAST_FRAME || crf2 == LAST_FRAME; + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MODE_INFO *edge = above_in_image ? above_mi : left_mi; + + if (edge->mbmi.ref_frame[0] == INTRA_FRAME) { + pred_context = 2; + } else if (edge->mbmi.ref_frame[1] <= INTRA_FRAME) { + pred_context = 4 * (edge->mbmi.ref_frame[0] == LAST_FRAME); + } else { + pred_context = 1 + (edge->mbmi.ref_frame[0] == LAST_FRAME || + edge->mbmi.ref_frame[1] == LAST_FRAME); + } + } else { // no edges available (2) + pred_context = 2; + } + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); break; + } - case PRED_SWITCHABLE_INTERP: - { - int left_in_image = xd->left_available && (m - 1)->mbmi.mb_in_image; - int above_in_image = (m - cm->mode_info_stride)->mbmi.mb_in_image; - int left_mode = (m - 1)->mbmi.mode; - int above_mode = (m - cm->mode_info_stride)->mbmi.mode; - int left_interp, above_interp; - if (left_in_image && left_mode >= NEARESTMV && left_mode <= SPLITMV) - left_interp = vp9_switchable_interp_map[(m - 1)->mbmi.interp_filter]; - else - left_interp = VP9_SWITCHABLE_FILTERS; - assert(left_interp != -1); - if (above_in_image && above_mode >= NEARESTMV && above_mode <= SPLITMV) - above_interp = vp9_switchable_interp_map[ - (m - cm->mode_info_stride)->mbmi.interp_filter]; - else - above_interp = VP9_SWITCHABLE_FILTERS; - assert(above_interp != -1); - - if (left_interp == above_interp) - pred_context = left_interp; - else if (left_interp == VP9_SWITCHABLE_FILTERS && - above_interp != VP9_SWITCHABLE_FILTERS) - pred_context = above_interp; - else if (left_interp != VP9_SWITCHABLE_FILTERS && - above_interp == VP9_SWITCHABLE_FILTERS) - pred_context = left_interp; - else - pred_context = VP9_SWITCHABLE_FILTERS; + case PRED_SINGLE_REF_P2: { + if (above_in_image && left_in_image) { // both edges available + if (above_mi->mbmi.ref_frame[0] == INTRA_FRAME && + left_mi->mbmi.ref_frame[0] == INTRA_FRAME) { + pred_context = 2; + } else if (above_mi->mbmi.ref_frame[0] == INTRA_FRAME || + left_mi->mbmi.ref_frame[0] == INTRA_FRAME) { + const MODE_INFO *edge = above_mi->mbmi.ref_frame[0] == INTRA_FRAME ? + left_mi : above_mi; + + if (edge->mbmi.ref_frame[1] <= INTRA_FRAME) { + if (edge->mbmi.ref_frame[0] == LAST_FRAME) { + pred_context = 3; + } else { + pred_context = 4 * (edge->mbmi.ref_frame[0] == GOLDEN_FRAME); + } + } else { + pred_context = 1 + 2 * (edge->mbmi.ref_frame[0] == GOLDEN_FRAME || + edge->mbmi.ref_frame[1] == GOLDEN_FRAME); + } + } else if (above_mi->mbmi.ref_frame[1] <= INTRA_FRAME && + left_mi->mbmi.ref_frame[1] <= INTRA_FRAME) { + if (above_mi->mbmi.ref_frame[0] == LAST_FRAME && + left_mi->mbmi.ref_frame[0] == LAST_FRAME) { + pred_context = 3; + } else if (above_mi->mbmi.ref_frame[0] == LAST_FRAME || + left_mi->mbmi.ref_frame[0] == LAST_FRAME) { + const MODE_INFO *edge = above_mi->mbmi.ref_frame[0] == LAST_FRAME ? + left_mi : above_mi; + + pred_context = 4 * (edge->mbmi.ref_frame[0] == GOLDEN_FRAME); + } else { + pred_context = 2 * (above_mi->mbmi.ref_frame[0] == GOLDEN_FRAME) + + 2 * (left_mi->mbmi.ref_frame[0] == GOLDEN_FRAME); + } + } else if (above_mi->mbmi.ref_frame[1] > INTRA_FRAME && + left_mi->mbmi.ref_frame[1] > INTRA_FRAME) { + if (above_mi->mbmi.ref_frame[0] == left_mi->mbmi.ref_frame[0] && + above_mi->mbmi.ref_frame[1] == left_mi->mbmi.ref_frame[1]) { + pred_context = 3 * (above_mi->mbmi.ref_frame[0] == GOLDEN_FRAME || + above_mi->mbmi.ref_frame[1] == GOLDEN_FRAME || + left_mi->mbmi.ref_frame[0] == GOLDEN_FRAME || + left_mi->mbmi.ref_frame[1] == GOLDEN_FRAME); + } else { + pred_context = 2; + } + } else { + MV_REFERENCE_FRAME rfs = above_mi->mbmi.ref_frame[1] <= INTRA_FRAME ? + above_mi->mbmi.ref_frame[0] : left_mi->mbmi.ref_frame[0]; + MV_REFERENCE_FRAME crf1 = above_mi->mbmi.ref_frame[1] > INTRA_FRAME ? + above_mi->mbmi.ref_frame[0] : left_mi->mbmi.ref_frame[0]; + MV_REFERENCE_FRAME crf2 = above_mi->mbmi.ref_frame[1] > INTRA_FRAME ? + above_mi->mbmi.ref_frame[1] : left_mi->mbmi.ref_frame[1]; + + if (rfs == GOLDEN_FRAME) { + pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + } else if (rfs == ALTREF_FRAME) { + pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME; + } else { + pred_context = + 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MODE_INFO *edge = above_in_image ? above_mi : left_mi; + + if (edge->mbmi.ref_frame[0] == INTRA_FRAME || + (edge->mbmi.ref_frame[0] == LAST_FRAME && + edge->mbmi.ref_frame[1] <= INTRA_FRAME)) { + pred_context = 2; + } else if (edge->mbmi.ref_frame[1] <= INTRA_FRAME) { + pred_context = 4 * (edge->mbmi.ref_frame[0] == GOLDEN_FRAME); + } else { + pred_context = 3 * (edge->mbmi.ref_frame[0] == GOLDEN_FRAME || + edge->mbmi.ref_frame[1] == GOLDEN_FRAME); + } + } else { // no edges available (2) + pred_context = 2; } + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); break; + } + + case PRED_TX_SIZE: { + int above_context, left_context; + int max_tx_size; + if (mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) + max_tx_size = TX_4X4; + else if (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16) + max_tx_size = TX_8X8; + else if (mi->mbmi.sb_type < BLOCK_SIZE_SB32X32) + max_tx_size = TX_16X16; + else + max_tx_size = TX_32X32; + above_context = left_context = max_tx_size; + if (above_in_image) { + above_context = (above_mi->mbmi.mb_skip_coeff ? + max_tx_size : above_mi->mbmi.txfm_size); + } + if (left_in_image) { + left_context = (left_mi->mbmi.mb_skip_coeff ? + max_tx_size : left_mi->mbmi.txfm_size); + } + if (!left_in_image) { + left_context = above_context; + } + if (!above_in_image) { + above_context = left_context; + } + pred_context = (above_context + left_context > max_tx_size); + break; + } default: + assert(0); pred_context = 0; // *** add error trap code. break; } @@ -117,16 +397,20 @@ vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm, switch (pred_id) { case PRED_SEG_ID: return cm->segment_pred_probs[pred_context]; - case PRED_REF: - return cm->ref_pred_probs[pred_context]; - case PRED_COMP: - // In keeping with convention elsewhre the probability returned is - // the probability of a "0" outcome which in this case means the - // probability of comp pred off. - return cm->prob_comppred[pred_context]; case PRED_MBSKIP: - return cm->mbskip_pred_probs[pred_context]; + return cm->fc.mbskip_probs[pred_context]; + case PRED_INTRA_INTER: + return cm->fc.intra_inter_prob[pred_context]; + case PRED_COMP_INTER_INTER: + return cm->fc.comp_inter_prob[pred_context]; + case PRED_COMP_REF_P: + return cm->fc.comp_ref_prob[pred_context]; + case PRED_SINGLE_REF_P1: + return cm->fc.single_ref_prob[pred_context][0]; + case PRED_SINGLE_REF_P2: + return cm->fc.single_ref_prob[pred_context][1]; default: + assert(0); return 128; // *** add error trap code. } } @@ -136,23 +420,23 @@ vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm, const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm, const MACROBLOCKD *const xd, PRED_ID pred_id) { + const MODE_INFO *const mi = xd->mode_info_context; const int pred_context = vp9_get_pred_context(cm, xd, pred_id); switch (pred_id) { - case PRED_SEG_ID: - return &cm->segment_pred_probs[pred_context]; - case PRED_REF: - return &cm->ref_pred_probs[pred_context]; - case PRED_COMP: - // In keeping with convention elsewhre the probability returned is - // the probability of a "0" outcome which in this case means the - // probability of comp pred off. - return &cm->prob_comppred[pred_context]; - case PRED_MBSKIP: - return &cm->mbskip_pred_probs[pred_context]; case PRED_SWITCHABLE_INTERP: return &cm->fc.switchable_interp_prob[pred_context][0]; + + case PRED_TX_SIZE: + if (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16) + return cm->fc.tx_probs_8x8p[pred_context]; + else if (mi->mbmi.sb_type < BLOCK_SIZE_SB32X32) + return cm->fc.tx_probs_16x16p[pred_context]; + else + return cm->fc.tx_probs_32x32p[pred_context]; + default: + assert(0); return NULL; // *** add error trap code. } } @@ -164,11 +448,10 @@ unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd, switch (pred_id) { case PRED_SEG_ID: return xd->mode_info_context->mbmi.seg_id_predicted; - case PRED_REF: - return xd->mode_info_context->mbmi.ref_predicted; case PRED_MBSKIP: return xd->mode_info_context->mbmi.mb_skip_coeff; default: + assert(0); return 0; // *** add error trap code. } } @@ -179,59 +462,34 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, PRED_ID pred_id, unsigned char pred_flag) { const int mis = xd->mode_info_stride; + BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + const int bh = 1 << mi_height_log2(bsize); + const int bw = 1 << mi_width_log2(bsize); +#define sub(a, b) (b) < 0 ? (a) + (b) : (a) + const int x_mis = sub(bw, xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)); + const int y_mis = sub(bh, xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)); +#undef sub + int x, y; switch (pred_id) { case PRED_SEG_ID: - xd->mode_info_context->mbmi.seg_id_predicted = pred_flag; - if (xd->mode_info_context->mbmi.sb_type) { -#define sub(a, b) (b) < 0 ? (a) + (b) : (a) - const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; - const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); - const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); - int x, y; - - for (y = 0; y < y_mbs; y++) { - for (x = !y; x < x_mbs; x++) { - xd->mode_info_context[y * mis + x].mbmi.seg_id_predicted = - pred_flag; - } - } - } - break; - - case PRED_REF: - xd->mode_info_context->mbmi.ref_predicted = pred_flag; - if (xd->mode_info_context->mbmi.sb_type) { - const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; - const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); - const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); - int x, y; - - for (y = 0; y < y_mbs; y++) { - for (x = !y; x < x_mbs; x++) { - xd->mode_info_context[y * mis + x].mbmi.ref_predicted = pred_flag; - } + for (y = 0; y < y_mis; y++) { + for (x = 0; x < x_mis; x++) { + xd->mode_info_context[y * mis + x].mbmi.seg_id_predicted = pred_flag; } } break; case PRED_MBSKIP: - xd->mode_info_context->mbmi.mb_skip_coeff = pred_flag; - if (xd->mode_info_context->mbmi.sb_type) { - const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; - const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); - const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); - int x, y; - - for (y = 0; y < y_mbs; y++) { - for (x = !y; x < x_mbs; x++) { - xd->mode_info_context[y * mis + x].mbmi.mb_skip_coeff = pred_flag; - } + for (y = 0; y < y_mis; y++) { + for (x = 0; x < x_mis; x++) { + xd->mode_info_context[y * mis + x].mbmi.mb_skip_coeff = pred_flag; } } break; default: + assert(0); // *** add error trap code. break; } @@ -242,162 +500,21 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, // peredict various bitstream signals. // Macroblock segment id prediction function -unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm, - const MACROBLOCKD *const xd, int MbIndex) { - // Currently the prediction for the macroblock segment ID is - // the value stored for this macroblock in the previous frame. - if (!xd->mode_info_context->mbmi.sb_type) { - return cm->last_frame_seg_map[MbIndex]; - } else { - const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; - const int mb_col = MbIndex % cm->mb_cols; - const int mb_row = MbIndex / cm->mb_cols; - const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); - const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); - int x, y; - unsigned seg_id = -1; - - for (y = mb_row; y < mb_row + y_mbs; y++) { - for (x = mb_col; x < mb_col + x_mbs; x++) { - seg_id = MIN(seg_id, cm->last_frame_seg_map[cm->mb_cols * y + x]); - } - } - - return seg_id; - } -} - -MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm, - const MACROBLOCKD *const xd) { - MODE_INFO *m = xd->mode_info_context; - - MV_REFERENCE_FRAME left; - MV_REFERENCE_FRAME above; - MV_REFERENCE_FRAME above_left; - MV_REFERENCE_FRAME pred_ref = LAST_FRAME; - - int segment_id = xd->mode_info_context->mbmi.segment_id; - int i; - - unsigned char frame_allowed[MAX_REF_FRAMES] = {1, 1, 1, 1}; - unsigned char ref_score[MAX_REF_FRAMES]; - unsigned char best_score = 0; - unsigned char left_in_image; - unsigned char above_in_image; - unsigned char above_left_in_image; - - // Is segment coding ennabled - int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); - - // Special case treatment if segment coding is enabled. - // Dont allow prediction of a reference frame that the segment - // does not allow - if (seg_ref_active) { - for (i = 0; i < MAX_REF_FRAMES; i++) { - frame_allowed[i] = - vp9_check_segref(xd, segment_id, i); - - // Score set to 0 if ref frame not allowed - ref_score[i] = cm->ref_scores[i] * frame_allowed[i]; - } - } else - vpx_memcpy(ref_score, cm->ref_scores, sizeof(ref_score)); - - // Reference frames used by neighbours - left = (m - 1)->mbmi.ref_frame; - above = (m - cm->mode_info_stride)->mbmi.ref_frame; - above_left = (m - 1 - cm->mode_info_stride)->mbmi.ref_frame; - - // Are neighbours in image - left_in_image = (m - 1)->mbmi.mb_in_image && xd->left_available; - above_in_image = (m - cm->mode_info_stride)->mbmi.mb_in_image; - above_left_in_image = (m - 1 - cm->mode_info_stride)->mbmi.mb_in_image && - xd->left_available; - - // Adjust scores for candidate reference frames based on neigbours - if (frame_allowed[left] && left_in_image) { - ref_score[left] += 16; - if (above_left_in_image && (left == above_left)) - ref_score[left] += 4; - } - if (frame_allowed[above] && above_in_image) { - ref_score[above] += 16; - if (above_left_in_image && (above == above_left)) - ref_score[above] += 4; - } - - // Now choose the candidate with the highest score - for (i = 0; i < MAX_REF_FRAMES; i++) { - if (ref_score[i] > best_score) { - pred_ref = i; - best_score = ref_score[i]; +int vp9_get_pred_mi_segid(VP9_COMMON *cm, BLOCK_SIZE_TYPE sb_type, + int mi_row, int mi_col) { + const int mi_index = mi_row * cm->mi_cols + mi_col; + const int bw = 1 << mi_width_log2(sb_type); + const int bh = 1 << mi_height_log2(sb_type); + const int ymis = MIN(cm->mi_rows - mi_row, bh); + const int xmis = MIN(cm->mi_cols - mi_col, bw); + int segment_id = INT_MAX; + int x, y; + + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + const int index = mi_index + (y * cm->mi_cols + x); + segment_id = MIN(segment_id, cm->last_frame_seg_map[index]); } } - - return pred_ref; -} - -// Functions to computes a set of modified reference frame probabilities -// to use when the prediction of the reference frame value fails -void vp9_calc_ref_probs(int *count, vp9_prob *probs) { - int tot_count = count[0] + count[1] + count[2] + count[3]; - probs[0] = get_prob(count[0], tot_count); - - tot_count -= count[0]; - probs[1] = get_prob(count[1], tot_count); - - tot_count -= count[1]; - probs[2] = get_prob(count[2], tot_count); -} - -// Computes a set of modified conditional probabilities for the reference frame -// Values willbe set to 0 for reference frame options that are not possible -// because wither they were predicted and prediction has failed or because -// they are not allowed for a given segment. -void vp9_compute_mod_refprobs(VP9_COMMON *const cm) { - int norm_cnt[MAX_REF_FRAMES]; - const int intra_count = cm->prob_intra_coded; - const int inter_count = (255 - intra_count); - const int last_count = (inter_count * cm->prob_last_coded) / 255; - const int gfarf_count = inter_count - last_count; - const int gf_count = (gfarf_count * cm->prob_gf_coded) / 255; - const int arf_count = gfarf_count - gf_count; - - // Work out modified reference frame probabilities to use where prediction - // of the reference frame fails - norm_cnt[0] = 0; - norm_cnt[1] = last_count; - norm_cnt[2] = gf_count; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[INTRA_FRAME]); - cm->mod_refprobs[INTRA_FRAME][0] = 0; // This branch implicit - - norm_cnt[0] = intra_count; - norm_cnt[1] = 0; - norm_cnt[2] = gf_count; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[LAST_FRAME]); - cm->mod_refprobs[LAST_FRAME][1] = 0; // This branch implicit - - norm_cnt[0] = intra_count; - norm_cnt[1] = last_count; - norm_cnt[2] = 0; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[GOLDEN_FRAME]); - cm->mod_refprobs[GOLDEN_FRAME][2] = 0; // This branch implicit - - norm_cnt[0] = intra_count; - norm_cnt[1] = last_count; - norm_cnt[2] = gf_count; - norm_cnt[3] = 0; - vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[ALTREF_FRAME]); - cm->mod_refprobs[ALTREF_FRAME][2] = 0; // This branch implicit - - // Score the reference frames based on overal frequency. - // These scores contribute to the prediction choices. - // Max score 17 min 1 - cm->ref_scores[INTRA_FRAME] = 1 + (intra_count * 16 / 255); - cm->ref_scores[LAST_FRAME] = 1 + (last_count * 16 / 255); - cm->ref_scores[GOLDEN_FRAME] = 1 + (gf_count * 16 / 255); - cm->ref_scores[ALTREF_FRAME] = 1 + (arf_count * 16 / 255); + return segment_id; } diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h index 49dcf0a4cf2bb523a7d4b450dd9f4ef97589becc..b728724b747e59a053c0853080ccf0a0bf13c136 100644 --- a/vp9/common/vp9_pred_common.h +++ b/vp9/common/vp9_pred_common.h @@ -17,10 +17,14 @@ // Predicted items typedef enum { PRED_SEG_ID = 0, // Segment identifier - PRED_REF = 1, - PRED_COMP = 2, - PRED_MBSKIP = 3, - PRED_SWITCHABLE_INTERP = 4 + PRED_MBSKIP = 1, + PRED_SWITCHABLE_INTERP = 2, + PRED_INTRA_INTER = 3, + PRED_COMP_INTER_INTER = 4, + PRED_SINGLE_REF_P1 = 5, + PRED_SINGLE_REF_P2 = 6, + PRED_COMP_REF_P = 7, + PRED_TX_SIZE = 8 } PRED_ID; unsigned char vp9_get_pred_context(const VP9_COMMON *const cm, @@ -43,13 +47,7 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, unsigned char pred_flag); -unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm, - const MACROBLOCKD *const xd, - int MbIndex); - -MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm, - const MACROBLOCKD *const xd); - -void vp9_compute_mod_refprobs(VP9_COMMON *const cm); +int vp9_get_pred_mi_segid(VP9_COMMON *cm, BLOCK_SIZE_TYPE sb_type, + int mi_row, int mi_col); #endif // VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c index a94c772bea1f1a48459fcbdbef3aa9f3c4feb023..295c8e7386b6123e59ad187d795f3c97d9a1fbb7 100644 --- a/vp9/common/vp9_quant_common.c +++ b/vp9/common/vp9_quant_common.c @@ -10,46 +10,60 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_seg_common.h" -static int dc_qlookup[QINDEX_RANGE]; -static int ac_qlookup[QINDEX_RANGE]; +static int16_t dc_qlookup[QINDEX_RANGE]; +static int16_t ac_qlookup[QINDEX_RANGE]; -#define ACDC_MIN 4 +#define ACDC_MIN 8 + +// TODO(dkovalev) move to common and reuse +static double poly3(double a, double b, double c, double d, double x) { + return a*x*x*x + b*x*x + c*x + d; +} void vp9_init_quant_tables() { - int i; - int current_val = 4; - int last_val = 4; - int ac_val; - - for (i = 0; i < QINDEX_RANGE; i++) { - ac_qlookup[i] = current_val; - current_val = (int)(current_val * 1.02); - if (current_val == last_val) - current_val++; - last_val = current_val; - - ac_val = ac_qlookup[i]; - dc_qlookup[i] = (int)((0.000000305 * ac_val * ac_val * ac_val) + - (-0.00065 * ac_val * ac_val) + - (0.9 * ac_val) + 0.5); - if (dc_qlookup[i] < ACDC_MIN) - dc_qlookup[i] = ACDC_MIN; + int i, val = 4; + + // A "real" q of 1.0 forces lossless mode. + // In practice non lossless Q's between 1.0 and 2.0 (represented here by + // integer values from 5-7 give poor rd results (lower psnr and often + // larger size than the lossless encode. To block out those "not very useful" + // values we increment the ac and dc q lookup values by 4 after position 0. + ac_qlookup[0] = val; + dc_qlookup[0] = val; + val += 4; + + for (i = 1; i < QINDEX_RANGE; i++) { + const int ac_val = val; + + val = (int)(val * 1.01975); + if (val == ac_val) + ++val; + + ac_qlookup[i] = (int16_t)ac_val; + dc_qlookup[i] = (int16_t)MAX(ACDC_MIN, poly3(0.000000305, -0.00065, 0.9, + 0.5, ac_val)); } } -int vp9_dc_quant(int qindex, int delta) { +int16_t vp9_dc_quant(int qindex, int delta) { return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; } -int vp9_dc_uv_quant(int qindex, int delta) { - return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; +int16_t vp9_ac_quant(int qindex, int delta) { + return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; } -int vp9_ac_yquant(int qindex) { - return ac_qlookup[clamp(qindex, 0, MAXQ)]; -} -int vp9_ac_uv_quant(int qindex, int delta) { - return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; +int vp9_get_qindex(MACROBLOCKD *xd, int segment_id, int base_qindex) { + if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) { + const int data = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); + return xd->mb_segment_abs_delta == SEGMENT_ABSDATA ? + data : // Abs value + clamp(base_qindex + data, 0, MAXQ); // Delta value + } else { + return base_qindex; + } } + diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h index 1520c37977076f11c7aeaee1de258e39a4faaa91..ded94269ad5b7fd9a4f3c8a9703f6ac79b65e003 100644 --- a/vp9/common/vp9_quant_common.h +++ b/vp9/common/vp9_quant_common.h @@ -12,14 +12,17 @@ #define VP9_COMMON_VP9_QUANT_COMMON_H_ #include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_onyxc_int.h" + +#define MINQ 0 +#define MAXQ 255 +#define QINDEX_RANGE (MAXQ - MINQ + 1) +#define QINDEX_BITS 8 void vp9_init_quant_tables(); -int vp9_ac_yquant(int qindex); -int vp9_dc_quant(int qindex, int delta); -int vp9_dc2quant(int qindex, int delta); -int vp9_ac2quant(int qindex, int delta); -int vp9_dc_uv_quant(int qindex, int delta); -int vp9_ac_uv_quant(int qindex, int delta); + +int16_t vp9_dc_quant(int qindex, int delta); +int16_t vp9_ac_quant(int qindex, int delta); + +int vp9_get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex); #endif // VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c deleted file mode 100644 index d67b6d3dfbdd0fe64c0dec179053b0c6a19099e6..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_recon.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "./vpx_config.h" -#include "vp9_rtcd.h" -#include "vp9/common/vp9_blockd.h" - -void vp9_recon_b_c(uint8_t *pred_ptr, - int16_t *diff_ptr, - uint8_t *dst_ptr, - int stride) { - int r, c; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); - } - - dst_ptr += stride; - diff_ptr += 16; - pred_ptr += 16; - } -} - -void vp9_recon_uv_b_c(uint8_t *pred_ptr, - int16_t *diff_ptr, - uint8_t *dst_ptr, - int stride) { - int r, c; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); - } - - dst_ptr += stride; - diff_ptr += 8; - pred_ptr += 8; - } -} - -void vp9_recon4b_c(uint8_t *pred_ptr, - int16_t *diff_ptr, - uint8_t *dst_ptr, - int stride) { - int r, c; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 16; c++) { - dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); - } - - dst_ptr += stride; - diff_ptr += 16; - pred_ptr += 16; - } -} - -void vp9_recon2b_c(uint8_t *pred_ptr, - int16_t *diff_ptr, - uint8_t *dst_ptr, - int stride) { - int r, c; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 8; c++) { - dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); - } - - dst_ptr += stride; - diff_ptr += 8; - pred_ptr += 8; - } -} - -void vp9_recon_mby_s_c(MACROBLOCKD *xd, uint8_t *dst) { - int x, y; - BLOCKD *b = &xd->block[0]; - int stride = b->dst_stride; - int16_t *diff = b->diff; - - for (y = 0; y < 16; y++) { - for (x = 0; x < 16; x++) { - dst[x] = clip_pixel(dst[x] + diff[x]); - } - dst += stride; - diff += 16; - } -} - -void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { - int x, y, i; - uint8_t *dst = udst; - - for (i = 0; i < 2; i++, dst = vdst) { - BLOCKD *b = &xd->block[16 + 4 * i]; - int stride = b->dst_stride; - int16_t *diff = b->diff; - - for (y = 0; y < 8; y++) { - for (x = 0; x < 8; x++) { - dst[x] = clip_pixel(dst[x] + diff[x]); - } - dst += stride; - diff += 8; - } - } -} - -void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { - int x, y, stride = xd->block[0].dst_stride; - int16_t *diff = xd->diff; - - for (y = 0; y < 32; y++) { - for (x = 0; x < 32; x++) { - dst[x] = clip_pixel(dst[x] + diff[x]); - } - dst += stride; - diff += 32; - } -} - -void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { - int x, y, stride = xd->block[16].dst_stride; - int16_t *udiff = xd->diff + 1024; - int16_t *vdiff = xd->diff + 1280; - - for (y = 0; y < 16; y++) { - for (x = 0; x < 16; x++) { - udst[x] = clip_pixel(udst[x] + udiff[x]); - vdst[x] = clip_pixel(vdst[x] + vdiff[x]); - } - udst += stride; - vdst += stride; - udiff += 16; - vdiff += 16; - } -} - -void vp9_recon_sb64y_s_c(MACROBLOCKD *xd, uint8_t *dst) { - int x, y, stride = xd->block[0].dst_stride; - int16_t *diff = xd->diff; - - for (y = 0; y < 64; y++) { - for (x = 0; x < 64; x++) { - dst[x] = clip_pixel(dst[x] + diff[x]); - } - dst += stride; - diff += 64; - } -} - -void vp9_recon_sb64uv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { - int x, y, stride = xd->block[16].dst_stride; - int16_t *udiff = xd->diff + 4096; - int16_t *vdiff = xd->diff + 4096 + 1024; - - for (y = 0; y < 32; y++) { - for (x = 0; x < 32; x++) { - udst[x] = clip_pixel(udst[x] + udiff[x]); - vdst[x] = clip_pixel(vdst[x] + vdiff[x]); - } - udst += stride; - vdst += stride; - udiff += 32; - vdiff += 32; - } -} - -void vp9_recon_mby_c(MACROBLOCKD *xd) { - int i; - - for (i = 0; i < 16; i += 4) { - BLOCKD *b = &xd->block[i]; - - vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } -} - -void vp9_recon_mb_c(MACROBLOCKD *xd) { - int i; - - for (i = 0; i < 16; i += 4) { - BLOCKD *b = &xd->block[i]; - - vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } - - for (i = 16; i < 24; i += 2) { - BLOCKD *b = &xd->block[i]; - - vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } -} diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index a654c7df430d7d69ca819b98137c2cb702b2b189..b28d33319964a86d94928317d8d15edd5f795cf8 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -17,21 +17,109 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" +static int scale_value_x_with_scaling(int val, + const struct scale_factors *scale) { + return (val * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT); +} + +static int scale_value_y_with_scaling(int val, + const struct scale_factors *scale) { + return (val * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT); +} + +static int unscaled_value(int val, const struct scale_factors *scale) { + (void) scale; + return val; +} + +static int_mv32 mv_q3_to_q4_with_scaling(const int_mv *src_mv, + const struct scale_factors *scale) { + // returns mv * scale + offset + int_mv32 result; + const int32_t mv_row_q4 = src_mv->as_mv.row << 1; + const int32_t mv_col_q4 = src_mv->as_mv.col << 1; + + result.as_mv.row = (mv_row_q4 * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT) + + scale->y_offset_q4; + result.as_mv.col = (mv_col_q4 * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT) + + scale->x_offset_q4; + return result; +} + +static int_mv32 mv_q3_to_q4_without_scaling(const int_mv *src_mv, + const struct scale_factors *scale) { + // returns mv * scale + offset + int_mv32 result; + + result.as_mv.row = src_mv->as_mv.row << 1; + result.as_mv.col = src_mv->as_mv.col << 1; + return result; +} + +static int32_t mv_component_q4_with_scaling(int mv_q4, int scale_fp, + int offset_q4) { + int32_t scaled_mv; + // returns the scaled and offset value of the mv component. + scaled_mv = (mv_q4 * scale_fp >> VP9_REF_SCALE_SHIFT) + offset_q4; + + return scaled_mv; +} + +static int32_t mv_component_q4_without_scaling(int mv_q4, int scale_fp, + int offset_q4) { + // returns the scaled and offset value of the mv component. + (void)scale_fp; + (void)offset_q4; + return mv_q4; +} + +static void set_offsets_with_scaling(struct scale_factors *scale, + int row, int col) { + const int x_q4 = 16 * col; + const int y_q4 = 16 * row; + + scale->x_offset_q4 = (x_q4 * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT) & 0xf; + scale->y_offset_q4 = (y_q4 * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT) & 0xf; +} + +static void set_offsets_without_scaling(struct scale_factors *scale, + int row, int col) { + scale->x_offset_q4 = 0; + scale->y_offset_q4 = 0; +} + +static int get_fixed_point_scale_factor(int other_size, int this_size) { + // Calculate scaling factor once for each reference frame + // and use fixed point scaling factors in decoding and encoding routines. + // Hardware implementations can calculate scale factor in device driver + // and use multiplication and shifting on hardware instead of division. + return (other_size << VP9_REF_SCALE_SHIFT) / this_size; +} + void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, - YV12_BUFFER_CONFIG *other, + int other_w, int other_h, int this_w, int this_h) { - int other_h = other->y_crop_height; - int other_w = other->y_crop_width; - - scale->x_num = other_w; - scale->x_den = this_w; + scale->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); scale->x_offset_q4 = 0; // calculated per-mb - scale->x_step_q4 = 16 * other_w / this_w; + scale->x_step_q4 = (16 * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT); - scale->y_num = other_h; - scale->y_den = this_h; + scale->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); scale->y_offset_q4 = 0; // calculated per-mb - scale->y_step_q4 = 16 * other_h / this_h; + scale->y_step_q4 = (16 * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT); + + if ((other_w == this_w) && (other_h == this_h)) { + scale->scale_value_x = unscaled_value; + scale->scale_value_y = unscaled_value; + scale->set_scaled_offsets = set_offsets_without_scaling; + scale->scale_mv_q3_to_q4 = mv_q3_to_q4_without_scaling; + scale->scale_mv_component_q4 = mv_component_q4_without_scaling; + } else { + scale->scale_value_x = scale_value_x_with_scaling; + scale->scale_value_y = scale_value_y_with_scaling; + scale->set_scaled_offsets = set_offsets_with_scaling; + scale->scale_mv_q3_to_q4 = mv_q3_to_q4_with_scaling; + scale->scale_mv_component_q4 = mv_component_q4_with_scaling; + } // TODO(agrange): Investigate the best choice of functions to use here // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what @@ -39,127 +127,6 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, // applied in one direction only, and not at all for 0,0, seems to give the // best quality, but it may be worth trying an additional mode that does // do the filtering on full-pel. -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT - if (scale->x_step_q4 == 16) { - if (scale->y_step_q4 == 16) { - // No scaling in either direction. - scale->predict[0][0][0] = vp9_convolve_copy; - scale->predict[0][0][1] = vp9_convolve_1by8; - scale->predict[0][0][2] = vp9_convolve_qtr; - scale->predict[0][0][3] = vp9_convolve_3by8; - scale->predict[0][0][4] = vp9_convolve_avg; - scale->predict[0][0][5] = vp9_convolve_5by8; - scale->predict[0][0][6] = vp9_convolve_3qtr; - scale->predict[0][0][7] = vp9_convolve_7by8; - scale->predict[0][1][0] = vp9_convolve8_vert; - scale->predict[0][1][1] = vp9_convolve8_1by8_vert; - scale->predict[0][1][2] = vp9_convolve8_qtr_vert; - scale->predict[0][1][3] = vp9_convolve8_3by8_vert; - scale->predict[0][1][4] = vp9_convolve8_avg_vert; - scale->predict[0][1][5] = vp9_convolve8_5by8_vert; - scale->predict[0][1][6] = vp9_convolve8_3qtr_vert; - scale->predict[0][1][7] = vp9_convolve8_7by8_vert; - scale->predict[1][0][0] = vp9_convolve8_horiz; - scale->predict[1][0][1] = vp9_convolve8_1by8_horiz; - scale->predict[1][0][2] = vp9_convolve8_qtr_horiz; - scale->predict[1][0][3] = vp9_convolve8_3by8_horiz; - scale->predict[1][0][4] = vp9_convolve8_avg_horiz; - scale->predict[1][0][5] = vp9_convolve8_5by8_horiz; - scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz; - scale->predict[1][0][7] = vp9_convolve8_7by8_horiz; - } else { - // No scaling in x direction. Must always scale in the y direction. - scale->predict[0][0][0] = vp9_convolve8_vert; - scale->predict[0][0][1] = vp9_convolve8_1by8_vert; - scale->predict[0][0][2] = vp9_convolve8_qtr_vert; - scale->predict[0][0][3] = vp9_convolve8_3by8_vert; - scale->predict[0][0][4] = vp9_convolve8_avg_vert; - scale->predict[0][0][5] = vp9_convolve8_5by8_vert; - scale->predict[0][0][6] = vp9_convolve8_3qtr_vert; - scale->predict[0][0][7] = vp9_convolve8_7by8_vert; - scale->predict[0][1][0] = vp9_convolve8_vert; - scale->predict[0][1][1] = vp9_convolve8_1by8_vert; - scale->predict[0][1][2] = vp9_convolve8_qtr_vert; - scale->predict[0][1][3] = vp9_convolve8_3by8_vert; - scale->predict[0][1][4] = vp9_convolve8_avg_vert; - scale->predict[0][1][5] = vp9_convolve8_5by8_vert; - scale->predict[0][1][6] = vp9_convolve8_3qtr_vert; - scale->predict[0][1][7] = vp9_convolve8_7by8_vert; - scale->predict[1][0][0] = vp9_convolve8; - scale->predict[1][0][1] = vp9_convolve8_1by8; - scale->predict[1][0][2] = vp9_convolve8_qtr; - scale->predict[1][0][3] = vp9_convolve8_3by8; - scale->predict[1][0][4] = vp9_convolve8_avg; - scale->predict[1][0][5] = vp9_convolve8_5by8; - scale->predict[1][0][6] = vp9_convolve8_3qtr; - scale->predict[1][0][7] = vp9_convolve8_7by8; - } - } else { - if (scale->y_step_q4 == 16) { - // No scaling in the y direction. Must always scale in the x direction. - scale->predict[0][0][0] = vp9_convolve8_horiz; - scale->predict[0][0][1] = vp9_convolve8_1by8_horiz; - scale->predict[0][0][2] = vp9_convolve8_qtr_horiz; - scale->predict[0][0][3] = vp9_convolve8_3by8_horiz; - scale->predict[0][0][4] = vp9_convolve8_avg_horiz; - scale->predict[0][0][5] = vp9_convolve8_5by8_horiz; - scale->predict[0][0][6] = vp9_convolve8_3qtr_horiz; - scale->predict[0][0][7] = vp9_convolve8_7by8_horiz; - scale->predict[0][1][0] = vp9_convolve8; - scale->predict[0][1][1] = vp9_convolve8_1by8; - scale->predict[0][1][2] = vp9_convolve8_qtr; - scale->predict[0][1][3] = vp9_convolve8_3by8; - scale->predict[0][1][4] = vp9_convolve8_avg; - scale->predict[0][1][5] = vp9_convolve8_5by8; - scale->predict[0][1][6] = vp9_convolve8_3qtr; - scale->predict[0][1][7] = vp9_convolve8_7by8; - scale->predict[1][0][0] = vp9_convolve8_horiz; - scale->predict[1][0][1] = vp9_convolve8_1by8_horiz; - scale->predict[1][0][2] = vp9_convolve8_qtr_horiz; - scale->predict[1][0][3] = vp9_convolve8_3by8_horiz; - scale->predict[1][0][4] = vp9_convolve8_avg_horiz; - scale->predict[1][0][5] = vp9_convolve8_5by8_horiz; - scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz; - scale->predict[1][0][7] = vp9_convolve8_7by8_horiz; - } else { - // Must always scale in both directions. - scale->predict[0][0][0] = vp9_convolve8; - scale->predict[0][0][1] = vp9_convolve8_1by8; - scale->predict[0][0][2] = vp9_convolve8_qtr; - scale->predict[0][0][3] = vp9_convolve8_3by8; - scale->predict[0][0][4] = vp9_convolve8_avg; - scale->predict[0][0][5] = vp9_convolve8_5by8; - scale->predict[0][0][6] = vp9_convolve8_3qtr; - scale->predict[0][0][7] = vp9_convolve8_7by8; - scale->predict[0][1][0] = vp9_convolve8; - scale->predict[0][1][1] = vp9_convolve8_1by8; - scale->predict[0][1][2] = vp9_convolve8_qtr; - scale->predict[0][1][3] = vp9_convolve8_3by8; - scale->predict[0][1][4] = vp9_convolve8_avg; - scale->predict[0][1][5] = vp9_convolve8_5by8; - scale->predict[0][1][6] = vp9_convolve8_3qtr; - scale->predict[0][1][7] = vp9_convolve8_7by8; - scale->predict[1][0][0] = vp9_convolve8; - scale->predict[1][0][1] = vp9_convolve8_1by8; - scale->predict[1][0][2] = vp9_convolve8_qtr; - scale->predict[1][0][3] = vp9_convolve8_3by8; - scale->predict[1][0][4] = vp9_convolve8_avg; - scale->predict[1][0][5] = vp9_convolve8_5by8; - scale->predict[1][0][6] = vp9_convolve8_3qtr; - scale->predict[1][0][7] = vp9_convolve8_7by8; - } - } - // 2D subpel motion always gets filtered in both directions - scale->predict[1][1][0] = vp9_convolve8; - scale->predict[1][1][1] = vp9_convolve8_1by8; - scale->predict[1][1][2] = vp9_convolve8_qtr; - scale->predict[1][1][3] = vp9_convolve8_3by8; - scale->predict[1][1][4] = vp9_convolve8_avg; - scale->predict[1][1][5] = vp9_convolve8_5by8; - scale->predict[1][1][6] = vp9_convolve8_3qtr; - scale->predict[1][1][7] = vp9_convolve8_7by8; -} -#else if (scale->x_step_q4 == 16) { if (scale->y_step_q4 == 16) { // No scaling in either direction. @@ -201,35 +168,19 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, scale->predict[1][1][0] = vp9_convolve8; scale->predict[1][1][1] = vp9_convolve8_avg; } -#endif void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERPOLATIONFILTERTYPE mcomp_filter_type, VP9_COMMON *cm) { - int i; - - /* Calculate scaling factors for each of the 3 available references */ - for (i = 0; i < 3; ++i) { - if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) { - memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i])); - continue; - } - - vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i], - &cm->yv12_fb[cm->active_ref_idx[i]], - cm->width, cm->height); - } - if (xd->mode_info_context) { MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; set_scale_factors(xd, - mbmi->ref_frame - 1, - mbmi->second_ref_frame - 1, + mbmi->ref_frame[0] - 1, + mbmi->ref_frame[1] - 1, cm->active_ref_scale); } - switch (mcomp_filter_type) { case EIGHTTAP: case SWITCHABLE: @@ -244,11 +195,6 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, case BILINEAR: xd->subpix.filter_x = xd->subpix.filter_y = vp9_bilinear_filters; break; -#if CONFIG_ENABLE_6TAP - case SIXTAP: - xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_6; - break; -#endif } assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0); } @@ -340,60 +286,13 @@ void vp9_copy_mem8x4_c(const uint8_t *src, } } -static void set_scaled_offsets(struct scale_factors *scale, - int row, int col) { - const int x_q4 = 16 * col; - const int y_q4 = 16 * row; - - scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf; - scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf; -} - -static int32_t scale_motion_vector_component_q3(int mv_q3, - int num, - int den, - int offset_q4) { - // returns the scaled and offset value of the mv component. - const int32_t mv_q4 = mv_q3 << 1; - - /* TODO(jkoleszar): make fixed point, or as a second multiply? */ - return mv_q4 * num / den + offset_q4; -} - -static int32_t scale_motion_vector_component_q4(int mv_q4, - int num, - int den, - int offset_q4) { - // returns the scaled and offset value of the mv component. - - /* TODO(jkoleszar): make fixed point, or as a second multiply? */ - return mv_q4 * num / den + offset_q4; -} - -static int_mv32 scale_motion_vector_q3_to_q4( - const int_mv *src_mv, - const struct scale_factors *scale) { - // returns mv * scale + offset - int_mv32 result; - - result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row, - scale->y_num, - scale->y_den, - scale->y_offset_q4); - result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col, - scale->x_num, - scale->x_den, - scale->x_offset_q4); - return result; -} - void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int_mv *mv_q3, const struct scale_factors *scale, int w, int h, int weight, const struct subpix_fn_table *subpix) { - int_mv32 mv = scale_motion_vector_q3_to_q4(mv_q3, scale); + int_mv32 mv = scale->scale_mv_q3_to_q4(mv_q3, scale); src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight]( src, src_stride, dst, dst_stride, @@ -402,26 +301,18 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, w, h); } -/* Like vp9_build_inter_predictor, but takes the full-pel part of the - * mv separately, and the fractional part as a q4. - */ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int_mv *fullpel_mv_q3, - const int_mv *frac_mv_q4, + const int_mv *mv_q4, const struct scale_factors *scale, int w, int h, int weight, const struct subpix_fn_table *subpix) { - const int mv_row_q4 = ((fullpel_mv_q3->as_mv.row >> 3) << 4) - + (frac_mv_q4->as_mv.row & 0xf); - const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4) - + (frac_mv_q4->as_mv.col & 0xf); - const int scaled_mv_row_q4 = - scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den, - scale->y_offset_q4); - const int scaled_mv_col_q4 = - scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den, - scale->x_offset_q4); + const int scaled_mv_row_q4 = scale->scale_mv_component_q4(mv_q4->as_mv.row, + scale->y_scale_fp, + scale->y_offset_q4); + const int scaled_mv_col_q4 = scale->scale_mv_component_q4(mv_q4->as_mv.col, + scale->x_scale_fp, + scale->x_offset_q4); const int subpel_x = scaled_mv_col_q4 & 15; const int subpel_y = scaled_mv_row_q4 & 15; @@ -433,1367 +324,205 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, w, h); } -static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1, - struct scale_factors *scale, - uint8_t *predictor, - int block_size, int stride, - int which_mv, int weight, - int width, int height, - const struct subpix_fn_table *subpix, - int row, int col) { - assert(d1->predictor - d0->predictor == block_size); - assert(d1->pre == d0->pre + block_size); - - set_scaled_offsets(&scale[which_mv], row, col); - - if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) { - uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre; - - vp9_build_inter_predictor(*base_pre + d0->pre, - d0->pre_stride, - predictor, stride, - &d0->bmi.as_mv[which_mv], - &scale[which_mv], - width, height, - weight, subpix); - - } else { - uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre; - uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre; - - vp9_build_inter_predictor(*base_pre0 + d0->pre, - d0->pre_stride, - predictor, stride, - &d0->bmi.as_mv[which_mv], - &scale[which_mv], - width > block_size ? block_size : width, height, - weight, subpix); - - if (width <= block_size) return; - - set_scaled_offsets(&scale[which_mv], row, col + block_size); - - vp9_build_inter_predictor(*base_pre1 + d1->pre, - d1->pre_stride, - predictor + block_size, stride, - &d1->bmi.as_mv[which_mv], - &scale[which_mv], - width - block_size, height, - weight, subpix); - } +static INLINE int round_mv_comp_q4(int value) { + return (value < 0 ? value - 2 : value + 2) / 4; } -static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, - struct scale_factors *scale, - int block_size, int stride, - int which_mv, int weight, - const struct subpix_fn_table *subpix, - int row, int col) { - assert(d1->predictor - d0->predictor == block_size); - assert(d1->pre == d0->pre + block_size); - - set_scaled_offsets(&scale[which_mv], row, col); - - if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) { - uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre; - - vp9_build_inter_predictor(*base_pre + d0->pre, - d0->pre_stride, - d0->predictor, stride, - &d0->bmi.as_mv[which_mv], - &scale[which_mv], - 2 * block_size, block_size, - weight, subpix); - - } else { - uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre; - uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre; - - vp9_build_inter_predictor(*base_pre0 + d0->pre, - d0->pre_stride, - d0->predictor, stride, - &d0->bmi.as_mv[which_mv], - &scale[which_mv], - block_size, block_size, - weight, subpix); - - set_scaled_offsets(&scale[which_mv], row, col + block_size); +static int mi_mv_pred_row_q4(MACROBLOCKD *mb, int idx) { + const int temp = mb->mode_info_context->bmi[0].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[1].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[2].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[3].as_mv[idx].as_mv.row; + return round_mv_comp_q4(temp); +} - vp9_build_inter_predictor(*base_pre1 + d1->pre, - d1->pre_stride, - d1->predictor, stride, - &d1->bmi.as_mv[which_mv], - &scale[which_mv], - block_size, block_size, - weight, subpix); - } +static int mi_mv_pred_col_q4(MACROBLOCKD *mb, int idx) { + const int temp = mb->mode_info_context->bmi[0].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[1].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[2].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[3].as_mv[idx].as_mv.col; + return round_mv_comp_q4(temp); } -static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { +// TODO(jkoleszar): yet another mv clamping function :-( +MV clamp_mv_to_umv_border_sb(const MV *src_mv, + int bwl, int bhl, int ss_x, int ss_y, + int mb_to_left_edge, int mb_to_top_edge, + int mb_to_right_edge, int mb_to_bottom_edge) { /* If the MV points so far into the UMV border that no visible pixels * are used for reconstruction, the subpel part of the MV can be * discarded and the MV limited to 16 pixels with equivalent results. - * - * This limit kicks in at 19 pixels for the top and left edges, for - * the 16 pixels plus 3 taps right of the central pixel when subpel - * filtering. The bottom and right edges use 16 pixels plus 2 pixels - * left of the central pixel when filtering. */ - if (mv->col < (xd->mb_to_left_edge - ((16 + VP9_INTERP_EXTEND) << 3))) - mv->col = xd->mb_to_left_edge - (16 << 3); - else if (mv->col > xd->mb_to_right_edge + ((15 + VP9_INTERP_EXTEND) << 3)) - mv->col = xd->mb_to_right_edge + (16 << 3); - - if (mv->row < (xd->mb_to_top_edge - ((16 + VP9_INTERP_EXTEND) << 3))) - mv->row = xd->mb_to_top_edge - (16 << 3); - else if (mv->row > xd->mb_to_bottom_edge + ((15 + VP9_INTERP_EXTEND) << 3)) - mv->row = xd->mb_to_bottom_edge + (16 << 3); -} - -/* A version of the above function for chroma block MVs.*/ -static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { - const int extend = VP9_INTERP_EXTEND; - - mv->col = (2 * mv->col < (xd->mb_to_left_edge - ((16 + extend) << 3))) ? - (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; - mv->col = (2 * mv->col > xd->mb_to_right_edge + ((15 + extend) << 3)) ? - (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; - - mv->row = (2 * mv->row < (xd->mb_to_top_edge - ((16 + extend) << 3))) ? - (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; - mv->row = (2 * mv->row > xd->mb_to_bottom_edge + ((15 + extend) << 3)) ? - (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; -} - -#define AVERAGE_WEIGHT (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT)) - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT - -// Whether to use implicit weighting for UV -#define USE_IMPLICIT_WEIGHT_UV - -// Whether to use implicit weighting for SplitMV -// #define USE_IMPLICIT_WEIGHT_SPLITMV - -// #define SEARCH_MIN3 -static int64_t get_consistency_metric(MACROBLOCKD *xd, - uint8_t *tmp_y, int tmp_ystride) { - int block_size = 16 << xd->mode_info_context->mbmi.sb_type; - uint8_t *rec_y = xd->dst.y_buffer; - int rec_ystride = xd->dst.y_stride; - int64_t metric = 0; - int i; - if (xd->up_available) { - for (i = 0; i < block_size; ++i) { - int diff = abs(*(rec_y - rec_ystride + i) - - *(tmp_y + i)); -#ifdef SEARCH_MIN3 - // Searches for the min abs diff among 3 pixel neighbors in the border - int diff1 = xd->left_available ? - abs(*(rec_y - rec_ystride + i - 1) - *(tmp_y + i)) : diff; - int diff2 = i < block_size - 1 ? - abs(*(rec_y - rec_ystride + i + 1) - *(tmp_y + i)) : diff; - diff = diff <= diff1 ? diff : diff1; - diff = diff <= diff2 ? diff : diff2; -#endif - metric += diff; - } - } - if (xd->left_available) { - for (i = 0; i < block_size; ++i) { - int diff = abs(*(rec_y - 1 + i * rec_ystride) - - *(tmp_y + i * tmp_ystride)); -#ifdef SEARCH_MIN3 - // Searches for the min abs diff among 3 pixel neighbors in the border - int diff1 = xd->up_available ? - abs(*(rec_y - 1 + (i - 1) * rec_ystride) - - *(tmp_y + i * tmp_ystride)) : diff; - int diff2 = i < block_size - 1 ? - abs(*(rec_y - 1 + (i + 1) * rec_ystride) - - *(tmp_y + i * tmp_ystride)) : diff; - diff = diff <= diff1 ? diff : diff1; - diff = diff <= diff2 ? diff : diff2; -#endif - metric += diff; - } - } - return metric; -} - -static int get_weight(MACROBLOCKD *xd, int64_t metric_1, int64_t metric_2) { - int weight = AVERAGE_WEIGHT; - if (2 * metric_1 < metric_2) - weight = 6; - else if (4 * metric_1 < 3 * metric_2) - weight = 5; - else if (2 * metric_2 < metric_1) - weight = 2; - else if (4 * metric_2 < 3 * metric_1) - weight = 3; - return weight; -} - -#ifdef USE_IMPLICIT_WEIGHT_SPLITMV -static int get_implicit_compoundinter_weight_splitmv( - MACROBLOCKD *xd, int mb_row, int mb_col) { - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - BLOCKD *blockd = xd->block; - const int use_second_ref = mbmi->second_ref_frame > 0; - int64_t metric_2 = 0, metric_1 = 0; - int i, which_mv, weight; - uint8_t tmp_y[256]; - const int tmp_ystride = 16; - - if (!use_second_ref) return 0; - if (!(xd->up_available || xd->left_available)) - return AVERAGE_WEIGHT; - - assert(xd->mode_info_context->mbmi.mode == SPLITMV); - - which_mv = 1; // second predictor - if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { - for (i = 0; i < 16; i += 8) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 2]; - const int y = i & 8; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2]; - - if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); - } - if (i == 0) { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, - which_mv, 0, 16, 1, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, - which_mv, 0, 1, 8, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - } else { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16, - 8, 16, which_mv, 0, 1, 8, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - } - } - } else { - for (i = 0; i < 16; i += 2) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; - const int x = (i & 3) * 4; - const int y = (i >> 2) * 4; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; - - if (i >= 4 && (i & 3) != 0) continue; - - if (i == 0) { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, - which_mv, 0, 8, 1, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, - which_mv, 0, 1, 4, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } else if (i < 4) { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16, - which_mv, 0, 8, 1, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } else { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16, - 4, 16, which_mv, 0, 1, 4, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } - } - } - metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride); - - which_mv = 0; // first predictor - if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { - for (i = 0; i < 16; i += 8) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 2]; - const int y = i & 8; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2]; - - if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); - } - if (i == 0) { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, - which_mv, 0, 16, 1, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16, - which_mv, 0, 1, 8, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - } else { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16, - 8, 16, which_mv, 0, 1, 8, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - } - } - } else { - for (i = 0; i < 16; i += 2) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; - const int x = (i & 3) * 4; - const int y = (i >> 2) * 4; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; - - if (i >= 4 && (i & 3) != 0) continue; - - if (i == 0) { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, - which_mv, 0, 8, 1, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16, - which_mv, 0, 1, 4, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } else if (i < 4) { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16, - which_mv, 0, 8, 1, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } else { - build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16, - 4, 16, which_mv, 0, 1, 4, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } - } - } - metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride); - - // Choose final weight for averaging - weight = get_weight(xd, metric_1, metric_2); - return weight; -} -#endif - -static int get_implicit_compoundinter_weight(MACROBLOCKD *xd, - int mb_row, - int mb_col) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - int64_t metric_2 = 0, metric_1 = 0; - int n, clamp_mvs, pre_stride; - uint8_t *base_pre; - int_mv ymv; - uint8_t tmp_y[4096]; - const int tmp_ystride = 64; - int weight; - int edge[4]; - int block_size = 16 << xd->mode_info_context->mbmi.sb_type; - - if (!use_second_ref) return 0; - if (!(xd->up_available || xd->left_available)) - return AVERAGE_WEIGHT; - - edge[0] = xd->mb_to_top_edge; - edge[1] = xd->mb_to_bottom_edge; - edge[2] = xd->mb_to_left_edge; - edge[3] = xd->mb_to_right_edge; - - clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_secondmv; - base_pre = xd->second_pre.y_buffer; - pre_stride = xd->second_pre.y_stride; - ymv.as_int = xd->mode_info_context->mbmi.mv[1].as_int; - // First generate the second predictor - for (n = 0; n < block_size; n += 16) { - xd->mb_to_left_edge = edge[2] - (n << 3); - xd->mb_to_right_edge = edge[3] + ((16 - n) << 3); - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[1], mb_row * 16, mb_col * 16 + n); - // predict a single row of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[1]), - pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[1], - 16, 1, 0, &xd->subpix); - } - xd->mb_to_left_edge = edge[2]; - xd->mb_to_right_edge = edge[3]; - for (n = 0; n < block_size; n += 16) { - xd->mb_to_top_edge = edge[0] - (n << 3); - xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3); - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[1], mb_row * 16 + n, mb_col * 16); - // predict a single col of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[1]), - pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv, - &xd->scale_factor[1], 1, 16, 0, &xd->subpix); - } - xd->mb_to_top_edge = edge[0]; - xd->mb_to_bottom_edge = edge[1]; - // Compute consistency metric - metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride); - - clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_mvs; - base_pre = xd->pre.y_buffer; - pre_stride = xd->pre.y_stride; - ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int; - // Now generate the first predictor - for (n = 0; n < block_size; n += 16) { - xd->mb_to_left_edge = edge[2] - (n << 3); - xd->mb_to_right_edge = edge[3] + ((16 - n) << 3); - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[0], mb_row * 16, mb_col * 16 + n); - // predict a single row of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[0]), - pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[0], - 16, 1, 0, &xd->subpix); - } - xd->mb_to_left_edge = edge[2]; - xd->mb_to_right_edge = edge[3]; - for (n = 0; n < block_size; n += 16) { - xd->mb_to_top_edge = edge[0] - (n << 3); - xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3); - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - set_scaled_offsets(&xd->scale_factor[0], mb_row * 16 + n, mb_col * 16); - // predict a single col of pixels - vp9_build_inter_predictor( - base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[0]), - pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv, - &xd->scale_factor[0], 1, 16, 0, &xd->subpix); - } - xd->mb_to_top_edge = edge[0]; - xd->mb_to_bottom_edge = edge[1]; - metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride); - - // Choose final weight for averaging - weight = get_weight(xd, metric_1, metric_2); - return weight; -} - -static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int weight, - int mb_row, - int mb_col) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + const int spel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 4; + const int spel_right = spel_left - (1 << 4); + const int spel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 4; + const int spel_bottom = spel_top - (1 << 4); + MV clamped_mv; + + assert(ss_x <= 1); + assert(ss_y <= 1); + clamped_mv.col = clamp(src_mv->col << (1 - ss_x), + (mb_to_left_edge << (1 - ss_x)) - spel_left, + (mb_to_right_edge << (1 - ss_x)) + spel_right); + clamped_mv.row = clamp(src_mv->row << (1 - ss_y), + (mb_to_top_edge << (1 - ss_y)) - spel_top, + (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom); + return clamped_mv; +} + +struct build_inter_predictors_args { + MACROBLOCKD *xd; + int x; + int y; + uint8_t* dst[MAX_MB_PLANE]; + int dst_stride[MAX_MB_PLANE]; + uint8_t* pre[2][MAX_MB_PLANE]; + int pre_stride[2][MAX_MB_PLANE]; +}; +static void build_inter_predictors(int plane, int block, + BLOCK_SIZE_TYPE bsize, + int pred_w, int pred_h, + void *argv) { + const struct build_inter_predictors_args* const arg = argv; + MACROBLOCKD * const xd = arg->xd; + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + const int bh = 4 << bhl, bw = 4 << bwl; + const int x = 4 * (block & ((1 << bwl) - 1)), y = 4 * (block >> bwl); + const int use_second_ref = xd->mode_info_context->mbmi.ref_frame[1] > 0; int which_mv; - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const int clamp_mvs = which_mv ? - xd->mode_info_context->mbmi.need_to_clamp_secondmv : - xd->mode_info_context->mbmi.need_to_clamp_mvs; - - uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; - int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride; - int_mv ymv; - ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; - - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - - set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16); - - vp9_build_inter_predictor(base_pre, pre_stride, - dst_y, dst_ystride, - &ymv, &xd->scale_factor[which_mv], - 16, 16, which_mv ? weight : 0, &xd->subpix); - } -} - -void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); - - build_inter16x16_predictors_mby_w(xd, dst_y, dst_ystride, weight, - mb_row, mb_col); -} - -#else - -void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - int which_mv; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const int clamp_mvs = which_mv ? - xd->mode_info_context->mbmi.need_to_clamp_secondmv : - xd->mode_info_context->mbmi.need_to_clamp_mvs; - - uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; - int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride; - int_mv ymv; - ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; - - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - - set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16); - - vp9_build_inter_predictor(base_pre, pre_stride, - dst_y, dst_ystride, - &ymv, &xd->scale_factor[which_mv], - 16, 16, which_mv, &xd->subpix); - } -} -#endif - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int weight, - int mb_row, - int mb_col) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - int which_mv; + assert(x < bw); + assert(y < bh); + assert(xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 || + 4 << pred_w == bw); + assert(xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 || + 4 << pred_h == bh); for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const int clamp_mvs = - which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv - : xd->mode_info_context->mbmi.need_to_clamp_mvs; - uint8_t *uptr, *vptr; - int pre_stride = which_mv ? xd->second_pre.uv_stride - : xd->pre.uv_stride; - int_mv _o16x16mv; - int_mv _16x16mv; - - _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; - - if (clamp_mvs) - clamp_mv_to_umv_border(&_16x16mv.as_mv, xd); - - _o16x16mv = _16x16mv; - /* calc uv motion vectors */ - if (_16x16mv.as_mv.row < 0) - _16x16mv.as_mv.row -= 1; - else - _16x16mv.as_mv.row += 1; - - if (_16x16mv.as_mv.col < 0) - _16x16mv.as_mv.col -= 1; - else - _16x16mv.as_mv.col += 1; - - _16x16mv.as_mv.row /= 2; - _16x16mv.as_mv.col /= 2; - - _16x16mv.as_mv.row &= xd->fullpixel_mask; - _16x16mv.as_mv.col &= xd->fullpixel_mask; - - uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); - vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); - - set_scaled_offsets(&xd->scale_factor_uv[which_mv], - mb_row * 16, mb_col * 16); - - vp9_build_inter_predictor_q4( - uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, - which_mv ? weight : 0, &xd->subpix); - - vp9_build_inter_predictor_q4( - vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, - which_mv ? weight : 0, &xd->subpix); - } -} - -void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col) { -#ifdef USE_IMPLICIT_WEIGHT_UV - int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); -#else - int weight = AVERAGE_WEIGHT; -#endif - build_inter16x16_predictors_mbuv_w(xd, dst_u, dst_v, dst_uvstride, - weight, mb_row, mb_col); -} - -#else - -void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - int which_mv; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const int clamp_mvs = - which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv - : xd->mode_info_context->mbmi.need_to_clamp_mvs; - uint8_t *uptr, *vptr; - int pre_stride = which_mv ? xd->second_pre.uv_stride - : xd->pre.uv_stride; - int_mv _o16x16mv; - int_mv _16x16mv; - - _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; - - if (clamp_mvs) - clamp_mv_to_umv_border(&_16x16mv.as_mv, xd); - - _o16x16mv = _16x16mv; - /* calc uv motion vectors */ - if (_16x16mv.as_mv.row < 0) - _16x16mv.as_mv.row -= 1; - else - _16x16mv.as_mv.row += 1; - - if (_16x16mv.as_mv.col < 0) - _16x16mv.as_mv.col -= 1; - else - _16x16mv.as_mv.col += 1; - - _16x16mv.as_mv.row /= 2; - _16x16mv.as_mv.col /= 2; - - _16x16mv.as_mv.row &= xd->fullpixel_mask; - _16x16mv.as_mv.col &= xd->fullpixel_mask; - - uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); - vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); - - set_scaled_offsets(&xd->scale_factor_uv[which_mv], - mb_row * 16, mb_col * 16); - - vp9_build_inter_predictor_q4( - uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, - which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); - - vp9_build_inter_predictor_q4( - vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv, - &xd->scale_factor_uv[which_mv], 8, 8, - which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix); - } -} -#endif - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -static void build_inter32x32_predictors_sby_w(MACROBLOCKD *x, - uint8_t *dst_y, - int dst_ystride, - int weight, - int mb_row, - int mb_col) { - uint8_t *y1 = x->pre.y_buffer; - uint8_t *y2 = x->second_pre.y_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); - x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); - x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); - - x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->pre.y_stride, - &x->scale_factor[0]); - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + - scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->second_pre.y_stride, - &x->scale_factor[1]); - } - build_inter16x16_predictors_mby_w(x, - dst_y + y_idx * 16 * dst_ystride + x_idx * 16, - dst_ystride, weight, mb_row + y_idx, mb_col + x_idx); - } - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.y_buffer = y1; - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2; - } -} - -void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col); - build_inter32x32_predictors_sby_w(x, dst_y, dst_ystride, weight, - mb_row, mb_col); -} - -#else - -// TODO(all): Can we use 32x32 specific implementations of this rather than -// using 16x16 implementations ? -void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - uint8_t *y1 = x->pre.y_buffer; - uint8_t *y2 = x->second_pre.y_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); - x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); - x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); - - x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->pre.y_stride, - &x->scale_factor[0]); - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + - scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->second_pre.y_stride, - &x->scale_factor[1]); - } - vp9_build_inter16x16_predictors_mby(x, - dst_y + y_idx * 16 * dst_ystride + x_idx * 16, - dst_ystride, mb_row + y_idx, mb_col + x_idx); - } - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.y_buffer = y1; - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2; - } -} - -#endif - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -static void build_inter32x32_predictors_sbuv_w(MACROBLOCKD *x, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int weight, - int mb_row, - int mb_col) { - uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; - uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < 4; n++) { - int scaled_uv_offset; - const int x_idx = n & 1, y_idx = n >> 1; - - x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); - x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); - x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); - - scaled_uv_offset = scaled_buffer_offset(x_idx * 8, - y_idx * 8, - x->pre.uv_stride, - &x->scale_factor_uv[0]); - x->pre.u_buffer = u1 + scaled_uv_offset; - x->pre.v_buffer = v1 + scaled_uv_offset; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - scaled_uv_offset = scaled_buffer_offset(x_idx * 8, - y_idx * 8, - x->second_pre.uv_stride, - &x->scale_factor_uv[1]); - x->second_pre.u_buffer = u2 + scaled_uv_offset; - x->second_pre.v_buffer = v2 + scaled_uv_offset; - } - - build_inter16x16_predictors_mbuv_w(x, - dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_uvstride, weight, mb_row + y_idx, mb_col + x_idx); - } - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.u_buffer = u1; - x->pre.v_buffer = v1; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.u_buffer = u2; - x->second_pre.v_buffer = v2; - } -} - -void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col) { -#ifdef USE_IMPLICIT_WEIGHT_UV - int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); -#else - int weight = AVERAGE_WEIGHT; -#endif - build_inter32x32_predictors_sbuv_w(xd, dst_u, dst_v, dst_uvstride, - weight, mb_row, mb_col); -} - -#else - -void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *x, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col) { - uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; - uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < 4; n++) { - int scaled_uv_offset; - const int x_idx = n & 1, y_idx = n >> 1; - - x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); - x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); - x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); - - scaled_uv_offset = scaled_buffer_offset(x_idx * 8, - y_idx * 8, - x->pre.uv_stride, - &x->scale_factor_uv[0]); - x->pre.u_buffer = u1 + scaled_uv_offset; - x->pre.v_buffer = v1 + scaled_uv_offset; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - scaled_uv_offset = scaled_buffer_offset(x_idx * 8, - y_idx * 8, - x->second_pre.uv_stride, - &x->scale_factor_uv[1]); - x->second_pre.u_buffer = u2 + scaled_uv_offset; - x->second_pre.v_buffer = v2 + scaled_uv_offset; - } - - vp9_build_inter16x16_predictors_mbuv(x, - dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_uvstride, mb_row + y_idx, mb_col + x_idx); - } - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.u_buffer = u1; - x->pre.v_buffer = v1; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.u_buffer = u2; - x->second_pre.v_buffer = v2; - } -} -#endif - -void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col) { - vp9_build_inter32x32_predictors_sby(x, dst_y, dst_ystride, - mb_row, mb_col); - vp9_build_inter32x32_predictors_sbuv(x, dst_u, dst_v, dst_uvstride, - mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_32x32_predictors_sb( - x, dst_y, dst_u, dst_v, dst_ystride, dst_uvstride); - } -#endif -} - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -static void build_inter64x64_predictors_sby_w(MACROBLOCKD *x, - uint8_t *dst_y, - int dst_ystride, - int weight, - int mb_row, - int mb_col) { - uint8_t *y1 = x->pre.y_buffer; - uint8_t *y2 = x->second_pre.y_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); - x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); - x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); - - x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32, - y_idx * 32, - x->pre.y_stride, - &x->scale_factor[0]); - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + - scaled_buffer_offset(x_idx * 32, - y_idx * 32, - x->second_pre.y_stride, - &x->scale_factor[1]); - } - - build_inter32x32_predictors_sby_w(x, - dst_y + y_idx * 32 * dst_ystride + x_idx * 32, - dst_ystride, weight, mb_row + y_idx * 2, mb_col + x_idx * 2); - } - - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.y_buffer = y1; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2; - } -} - -void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col); - build_inter64x64_predictors_sby_w(x, dst_y, dst_ystride, weight, - mb_row, mb_col); -} - -#else - -void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - uint8_t *y1 = x->pre.y_buffer; - uint8_t *y2 = x->second_pre.y_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); - x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); - x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); - - x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32, - y_idx * 32, - x->pre.y_stride, - &x->scale_factor[0]); - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + - scaled_buffer_offset(x_idx * 32, - y_idx * 32, - x->second_pre.y_stride, - &x->scale_factor[1]); - } - - vp9_build_inter32x32_predictors_sby(x, - dst_y + y_idx * 32 * dst_ystride + x_idx * 32, - dst_ystride, mb_row + y_idx * 2, mb_col + x_idx * 2); - } - - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.y_buffer = y1; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2; - } -} -#endif - -void vp9_build_inter64x64_predictors_sbuv(MACROBLOCKD *x, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col) { - uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; - uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - int scaled_uv_offset; - - x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); - x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); - x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); - - scaled_uv_offset = scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->pre.uv_stride, - &x->scale_factor_uv[0]); - x->pre.u_buffer = u1 + scaled_uv_offset; - x->pre.v_buffer = v1 + scaled_uv_offset; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - scaled_uv_offset = scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->second_pre.uv_stride, - &x->scale_factor_uv[1]); - x->second_pre.u_buffer = u2 + scaled_uv_offset; - x->second_pre.v_buffer = v2 + scaled_uv_offset; - } - - vp9_build_inter32x32_predictors_sbuv(x, - dst_u + y_idx * 16 * dst_uvstride + x_idx * 16, - dst_v + y_idx * 16 * dst_uvstride + x_idx * 16, - dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2); - } - - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.u_buffer = u1; - x->pre.v_buffer = v1; - - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.u_buffer = u2; - x->second_pre.v_buffer = v2; - } -} - -void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col) { - vp9_build_inter64x64_predictors_sby(x, dst_y, dst_ystride, - mb_row, mb_col); - vp9_build_inter64x64_predictors_sbuv(x, dst_u, dst_v, dst_uvstride, - mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v, - dst_ystride, dst_uvstride); - } -#endif -} - -static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, - int mb_row, int mb_col) { - int i; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - BLOCKD *blockd = xd->block; - int which_mv = 0; - const int use_second_ref = mbmi->second_ref_frame > 0; -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && defined(USE_IMPLICIT_WEIGHT_SPLITMV) - int weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col); -#else - int weight = AVERAGE_WEIGHT; -#endif - - if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) { - for (i = 0; i < 16; i += 8) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 2]; - const int y = i & 8; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2]; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - if (mbmi->need_to_clamp_mvs) { - clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd); - clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd); - } - - build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv, - which_mv ? weight : 0, - &xd->subpix, mb_row * 16 + y, mb_col * 16); - } - } - } else { - for (i = 0; i < 16; i += 2) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; - const int x = (i & 3) * 4; - const int y = (i >> 2) * 4; - - blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0]; - blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1]; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv, - which_mv ? weight : 0, - &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); - } - } - } -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -#if !defined(USE_IMPLICIT_WEIGHT_UV) - weight = AVERAGE_WEIGHT; -#endif -#endif - for (i = 16; i < 24; i += 2) { - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; - const int x = 4 * (i & 1); - const int y = ((i - 16) >> 1) * 4; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, - which_mv ? weight : 0, &xd->subpix, - mb_row * 8 + y, mb_col * 8 + x); - } - } -} - -static INLINE int round_mv_comp(int value) { - return (value < 0 ? value - 4 : value + 4) / 8; -} - -static int mi_mv_pred_row(MACROBLOCKD *mb, int off, int idx) { - const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.row; - return round_mv_comp(temp) & mb->fullpixel_mask; -} - -static int mi_mv_pred_col(MACROBLOCKD *mb, int off, int idx) { - const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.col; - return round_mv_comp(temp) & mb->fullpixel_mask; -} - -static int b_mv_pred_row(MACROBLOCKD *mb, int off, int idx) { - BLOCKD *const blockd = mb->block; - const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.row + - blockd[off + 1].bmi.as_mv[idx].as_mv.row + - blockd[off + 4].bmi.as_mv[idx].as_mv.row + - blockd[off + 5].bmi.as_mv[idx].as_mv.row; - return round_mv_comp(temp) & mb->fullpixel_mask; -} - -static int b_mv_pred_col(MACROBLOCKD *mb, int off, int idx) { - BLOCKD *const blockd = mb->block; - const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.col + - blockd[off + 1].bmi.as_mv[idx].as_mv.col + - blockd[off + 4].bmi.as_mv[idx].as_mv.col + - blockd[off + 5].bmi.as_mv[idx].as_mv.col; - return round_mv_comp(temp) & mb->fullpixel_mask; -} - - -static void build_4x4uvmvs(MACROBLOCKD *xd) { - int i, j; - BLOCKD *blockd = xd->block; - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - const int yoffset = i * 8 + j * 2; - const int uoffset = 16 + i * 2 + j; - const int voffset = 20 + i * 2 + j; - - MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv; - MV *v = &blockd[voffset].bmi.as_mv[0].as_mv; - u->row = mi_mv_pred_row(xd, yoffset, 0); - u->col = mi_mv_pred_col(xd, yoffset, 0); - - // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - v->row = u->row; - v->col = u->col; - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - u = &blockd[uoffset].bmi.as_mv[1].as_mv; - v = &blockd[voffset].bmi.as_mv[1].as_mv; - u->row = mi_mv_pred_row(xd, yoffset, 1); - u->col = mi_mv_pred_col(xd, yoffset, 1); - - // if (mbmi->need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - // if (mbmi->need_to_clamp_mvs) - clamp_uvmv_to_umv_border(u, xd); - - v->row = u->row; - v->col = u->col; + // source + const uint8_t * const base_pre = arg->pre[which_mv][plane]; + const int pre_stride = arg->pre_stride[which_mv][plane]; + const uint8_t *const pre = base_pre + + scaled_buffer_offset(x, y, pre_stride, &xd->scale_factor[which_mv]); + struct scale_factors * const scale = + plane == 0 ? &xd->scale_factor[which_mv] : &xd->scale_factor_uv[which_mv]; + + // dest + uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x; + + // motion vector + const MV *mv; + MV split_chroma_mv; + int_mv clamped_mv; + + if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) { + if (plane == 0) { + mv = &xd->mode_info_context->bmi[block].as_mv[which_mv].as_mv; + } else { + // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the + // same MV (the average of the 4 luma MVs) but we could do something + // smarter for non-4:2:0. Just punt for now, pending the changes to get + // rid of SPLITMV mode entirely. + split_chroma_mv.row = mi_mv_pred_row_q4(xd, which_mv); + split_chroma_mv.col = mi_mv_pred_col_q4(xd, which_mv); + mv = &split_chroma_mv; } + } else { + mv = &xd->mode_info_context->mbmi.mv[which_mv].as_mv; } - } -} -void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col) { - vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col); - vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride, - mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_16x16_predictors_mb(xd, dst_y, dst_u, dst_v, - dst_ystride, dst_uvstride); - } + /* TODO(jkoleszar): This clamping is done in the incorrect place for the + * scaling case. It needs to be done on the scaled MV, not the pre-scaling + * MV. Note however that it performs the subsampling aware scaling so + * that the result is always q4. + */ + clamped_mv.as_mv = clamp_mv_to_umv_border_sb(mv, bwl, bhl, + xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y, + xd->mb_to_left_edge, + xd->mb_to_top_edge, + xd->mb_to_right_edge, + xd->mb_to_bottom_edge); + scale->set_scaled_offsets(scale, arg->y + y, arg->x + x); + + vp9_build_inter_predictor_q4(pre, pre_stride, + dst, arg->dst_stride[plane], + &clamped_mv, &xd->scale_factor[which_mv], + 4 << pred_w, 4 << pred_h, which_mv, + &xd->subpix); + } +} +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, + int mi_row, + int mi_col, + BLOCK_SIZE_TYPE bsize) { + struct build_inter_predictors_args args = { + xd, mi_col * MI_SIZE, mi_row * MI_SIZE, + {xd->plane[0].dst.buf, NULL, NULL}, {xd->plane[0].dst.stride, 0, 0}, + {{xd->plane[0].pre[0].buf, NULL, NULL}, + {xd->plane[0].pre[1].buf, NULL, NULL}}, + {{xd->plane[0].pre[0].stride, 0, 0}, {xd->plane[0].pre[1].stride, 0, 0}}, + }; + + foreach_predicted_block_in_plane(xd, bsize, 0, build_inter_predictors, &args); +} +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, + int mi_row, + int mi_col, + BLOCK_SIZE_TYPE bsize) { + struct build_inter_predictors_args args = { + xd, mi_col * MI_SIZE, mi_row * MI_SIZE, +#if CONFIG_ALPHA + {NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf, + xd->plane[3].dst.buf}, + {0, xd->plane[1].dst.stride, xd->plane[1].dst.stride, + xd->plane[3].dst.stride}, + {{NULL, xd->plane[1].pre[0].buf, xd->plane[2].pre[0].buf, + xd->plane[3].pre[0].buf}, + {NULL, xd->plane[1].pre[1].buf, xd->plane[2].pre[1].buf, + xd->plane[3].pre[1].buf}}, + {{0, xd->plane[1].pre[0].stride, xd->plane[1].pre[0].stride, + xd->plane[3].pre[0].stride}, + {0, xd->plane[1].pre[1].stride, xd->plane[1].pre[1].stride, + xd->plane[3].pre[1].stride}}, +#else + {NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf}, + {0, xd->plane[1].dst.stride, xd->plane[1].dst.stride}, + {{NULL, xd->plane[1].pre[0].buf, xd->plane[2].pre[0].buf}, + {NULL, xd->plane[1].pre[1].buf, xd->plane[2].pre[1].buf}}, + {{0, xd->plane[1].pre[0].stride, xd->plane[1].pre[0].stride}, + {0, xd->plane[1].pre[1].stride, xd->plane[1].pre[1].stride}}, #endif + }; + foreach_predicted_block_uv(xd, bsize, build_inter_predictors, &args); } +void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize) { -void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, - int mb_row, - int mb_col) { - if (xd->mode_info_context->mbmi.mode != SPLITMV) { - vp9_build_inter16x16_predictors_mb(xd, xd->predictor, - &xd->predictor[256], - &xd->predictor[320], 16, 8, - mb_row, mb_col); - - } else { - build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd, mb_row, mb_col); - } + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize); } /*encoder only*/ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, int mb_row, int mb_col) { - int i, j, weight; - BLOCKD *const blockd = xd->block; - - /* build uv mvs */ - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - const int yoffset = i * 8 + j * 2; - const int uoffset = 16 + i * 2 + j; - const int voffset = 20 + i * 2 + j; - - MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv; - MV *v = &blockd[voffset].bmi.as_mv[0].as_mv; - - v->row = u->row = b_mv_pred_row(xd, yoffset, 0); - v->col = u->col = b_mv_pred_col(xd, yoffset, 0); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - u = &blockd[uoffset].bmi.as_mv[1].as_mv; - v = &blockd[voffset].bmi.as_mv[1].as_mv; - - v->row = u->row = b_mv_pred_row(xd, yoffset, 1); - v->row = u->col = b_mv_pred_row(xd, yoffset, 1); - } - } - } - -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && \ - defined(USE_IMPLICIT_WEIGHT_SPLITMV) && \ - defined(USE_IMPLICIT_WEIGHT_UV) - weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col); -#else - weight = AVERAGE_WEIGHT; -#endif - for (i = 16; i < 24; i += 2) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - const int x = 4 * (i & 1); - const int y = ((i - 16) >> 1) * 4; - - int which_mv; - BLOCKD *d0 = &blockd[i]; - BLOCKD *d1 = &blockd[i + 1]; + vp9_build_inter_predictors_sbuv(xd, mb_row, mb_col, + BLOCK_SIZE_MB16X16); +} - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, - which_mv ? weight : 0, - &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); - } +// TODO(dkovalev: find better place for this function) +void vp9_setup_scale_factors(VP9_COMMON *cm, int i) { + const int ref = cm->active_ref_idx[i]; + struct scale_factors *const sf = &cm->active_ref_scale[i]; + if (ref >= NUM_YV12_BUFFERS) { + memset(sf, 0, sizeof(*sf)); + } else { + YV12_BUFFER_CONFIG *const fb = &cm->yv12_fb[ref]; + vp9_setup_scale_factors_for_frame(sf, + fb->y_crop_width, fb->y_crop_height, + cm->width, cm->height); } } + diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 831ce2a73b8e01531cf7915d7696ecaf54408e15..4e521850d738691a7f8de677c0df93d4f88bc6c7 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -15,61 +15,26 @@ #include "vp9/common/vp9_onyxc_int.h" struct subpix_fn_table; +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, + int mb_row, + int mb_col, + BLOCK_SIZE_TYPE bsize); -void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col); - -void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_uvstride, - int mb_row, - int mb_col); - -void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col); - -void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col); - -void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, - uint8_t *dst_y, - uint8_t *dst_u, - uint8_t *dst_v, - int dst_ystride, - int dst_uvstride, - int mb_row, - int mb_col); - -void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, - int mb_row, - int mb_col); - -void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, - int mb_row, - int mb_col); +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, + int mb_row, + int mb_col, + BLOCK_SIZE_TYPE bsize); + +void vp9_build_inter_predictors_sb(MACROBLOCKD *mb, + int mb_row, int mb_col, + BLOCK_SIZE_TYPE bsize); void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERPOLATIONFILTERTYPE filter, VP9_COMMON *cm); void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, - YV12_BUFFER_CONFIG *other, + int other_w, int other_h, int this_w, int this_h); void vp9_build_inter_predictor(const uint8_t *src, int src_stride, @@ -81,51 +46,73 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int_mv *fullpel_mv_q3, - const int_mv *frac_mv_q4, + const int_mv *mv_q4, const struct scale_factors *scale, int w, int h, int do_avg, const struct subpix_fn_table *subpix); -static int scale_value_x(int val, const struct scale_factors *scale) { - return val * scale->x_num / scale->x_den; +static int scaled_buffer_offset(int x_offset, int y_offset, int stride, + const struct scale_factors *scale) { + const int x = scale ? scale->scale_value_x(x_offset, scale) : x_offset; + const int y = scale ? scale->scale_value_y(y_offset, scale) : y_offset; + return y * stride + x; } -static int scale_value_y(int val, const struct scale_factors *scale) { - return val * scale->y_num / scale->y_den; +static void setup_pred_plane(struct buf_2d *dst, + uint8_t *src, int stride, + int mi_row, int mi_col, + const struct scale_factors *scale, + int subsampling_x, int subsampling_y) { + const int x = (MI_SIZE * mi_col) >> subsampling_x; + const int y = (MI_SIZE * mi_row) >> subsampling_y; + dst->buf = src + scaled_buffer_offset(x, y, stride, scale); + dst->stride = stride; } -static int scaled_buffer_offset(int x_offset, - int y_offset, - int stride, - const struct scale_factors *scale) { - return scale_value_y(y_offset, scale) * stride + - scale_value_x(x_offset, scale); +// TODO(jkoleszar): audit all uses of this that don't set mb_row, mb_col +static void setup_dst_planes(MACROBLOCKD *xd, + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col) { + uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *pd = &xd->plane[i]; + setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, + pd->subsampling_x, pd->subsampling_y); + } } -static void setup_pred_block(YV12_BUFFER_CONFIG *dst, - const YV12_BUFFER_CONFIG *src, - int mb_row, int mb_col, +static void setup_pre_planes(MACROBLOCKD *xd, + const YV12_BUFFER_CONFIG *src0, + const YV12_BUFFER_CONFIG *src1, + int mi_row, int mi_col, const struct scale_factors *scale, const struct scale_factors *scale_uv) { - const int recon_y_stride = src->y_stride; - const int recon_uv_stride = src->uv_stride; - int recon_yoffset; - int recon_uvoffset; - - if (scale) { - recon_yoffset = scaled_buffer_offset(16 * mb_col, 16 * mb_row, - recon_y_stride, scale); - recon_uvoffset = scaled_buffer_offset(8 * mb_col, 8 * mb_row, - recon_uv_stride, scale_uv); - } else { - recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; - recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; + const YV12_BUFFER_CONFIG *srcs[2] = {src0, src1}; + int i, j; + + for (i = 0; i < 2; ++i) { + const YV12_BUFFER_CONFIG *src = srcs[i]; + if (src) { + uint8_t* buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + struct macroblockd_plane *pd = &xd->plane[j]; + const struct scale_factors *sf = j ? scale_uv : scale; + setup_pred_plane(&pd->pre[i], + buffers[j], strides[j], + mi_row, mi_col, sf ? &sf[i] : NULL, + pd->subsampling_x, pd->subsampling_y); + } + } } - *dst = *src; - dst->y_buffer += recon_yoffset; - dst->u_buffer += recon_uvoffset; - dst->v_buffer += recon_uvoffset; } static void set_scale_factors(MACROBLOCKD *xd, @@ -138,4 +125,6 @@ static void set_scale_factors(MACROBLOCKD *xd, xd->scale_factor_uv[1] = xd->scale_factor[1]; } +void vp9_setup_scale_factors(VP9_COMMON *cm, int i); + #endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 186532c8b10907711a86444d24e16a4eea92ac38..85dfe5137798bf493bfe27175e0383504b3ed9d1 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -13,773 +13,345 @@ #include "./vpx_config.h" #include "vp9_rtcd.h" #include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_onyxc_int.h" #include "vpx_mem/vpx_mem.h" -// For skip_recon_mb(), add vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd) -// and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd). - -// Using multiplication and shifting instead of division in diagonal prediction. -// iscale table is calculated from ((1 << 16) + (i + 2) / 2) / (i+2) and used as -// ((A + B) * iscale[i] + (1 << 15)) >> 16; -// where A and B are weighted pixel values. -static const unsigned int iscale[64] = { - 32768, 21845, 16384, 13107, 10923, 9362, 8192, 7282, - 6554, 5958, 5461, 5041, 4681, 4369, 4096, 3855, - 3641, 3449, 3277, 3121, 2979, 2849, 2731, 2621, - 2521, 2427, 2341, 2260, 2185, 2114, 2048, 1986, - 1928, 1872, 1820, 1771, 1725, 1680, 1638, 1598, - 1560, 1524, 1489, 1456, 1425, 1394, 1365, 1337, - 1311, 1285, 1260, 1237, 1214, 1192, 1170, 1150, - 1130, 1111, 1092, 1074, 1057, 1040, 1024, 1008, -}; - -static INLINE int iscale_round(int value, int i) { - return ROUND_POWER_OF_TWO(value * iscale[i], 16); -} - -static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n, +static void d27_predictor(uint8_t *ypred_ptr, int y_stride, + int bw, int bh, uint8_t *yabove_row, uint8_t *yleft_col) { int r, c; - - r = 0; - for (c = 0; c < n - 2; c++) { - int a = c & 1 ? yleft_col[r + 1] - : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1); - int b = yabove_row[c + 2]; - ypred_ptr[c] = iscale_round(2 * a + (c + 1) * b, 1 + c); + // first column + for (r = 0; r < bh - 1; ++r) { + ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(yleft_col[r] + + yleft_col[r + 1], 1); } - - for (r = 1; r < n / 2 - 1; r++) { - for (c = 0; c < n - 2 - 2 * r; c++) { - int a = c & 1 ? yleft_col[r + 1] - : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1); - int b = ypred_ptr[(r - 1) * y_stride + c + 2]; - ypred_ptr[r * y_stride + c] = iscale_round(2 * a + (c + 1) * b, 1 + c); - } - } - - for (; r < n - 1; r++) { - for (c = 0; c < n; c++) { - int v = c & 1 ? yleft_col[r + 1] - : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1); - int h = r - c / 2; - ypred_ptr[h * y_stride + c] = v; - } + ypred_ptr[(bh - 1) * y_stride] = yleft_col[bh-1]; + ypred_ptr++; + // second column + for (r = 0; r < bh - 2; ++r) { + ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(yleft_col[r] + + yleft_col[r + 1] * 2 + + yleft_col[r + 2], 2); } + ypred_ptr[(bh - 2) * y_stride] = ROUND_POWER_OF_TWO(yleft_col[bh - 2] + + yleft_col[bh - 1] * 3, + 2); + ypred_ptr[(bh - 1) * y_stride] = yleft_col[bh-1]; + ypred_ptr++; - c = 0; - r = n - 1; - ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride] + - yleft_col[r], 1); - for (r = n - 2; r >= n / 2; --r) { - int w = c + (n - 1 - r) * 2; - ypred_ptr[r * y_stride + w] = - ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] + - ypred_ptr[r * y_stride + w - 1], 1); + // rest of last row + for (c = 0; c < bw - 2; ++c) { + ypred_ptr[(bh - 1) * y_stride + c] = yleft_col[bh-1]; } - for (c = 1; c < n; c++) { - for (r = n - 1; r >= n / 2 + c / 2; --r) { - int w = c + (n - 1 - r) * 2; - ypred_ptr[r * y_stride + w] = - ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] + - ypred_ptr[r * y_stride + w - 1], 1); + for (r = bh - 2; r >= 0; --r) { + for (c = 0; c < bw - 2; ++c) { + ypred_ptr[r * y_stride + c] = ypred_ptr[(r + 1) * y_stride + c - 2]; } } } -static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n, +static void d63_predictor(uint8_t *ypred_ptr, int y_stride, + int bw, int bh, uint8_t *yabove_row, uint8_t *yleft_col) { int r, c; - - c = 0; - for (r = 0; r < n - 2; r++) { - int a = r & 1 ? yabove_row[c + 1] - : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1); - int b = yleft_col[r + 2]; - ypred_ptr[r * y_stride] = iscale_round(2 * a + (r + 1) * b, 1 + r); - } - - for (c = 1; c < n / 2 - 1; c++) { - for (r = 0; r < n - 2 - 2 * c; r++) { - int a = r & 1 ? yabove_row[c + 1] - : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1); - int b = ypred_ptr[(r + 2) * y_stride + c - 1]; - ypred_ptr[r * y_stride + c] = iscale_round(2 * a + (c + 1) * b, 1 + c); - } - } - - for (; c < n - 1; ++c) { - for (r = 0; r < n; r++) { - int v = r & 1 ? yabove_row[c + 1] - : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1); - int w = c - r / 2; - ypred_ptr[r * y_stride + w] = v; - } - } - - r = 0; - c = n - 1; - ypred_ptr[c] = ROUND_POWER_OF_TWO(ypred_ptr[(c - 1)] + yabove_row[c], 1); - for (c = n - 2; c >= n / 2; --c) { - int h = r + (n - 1 - c) * 2; - ypred_ptr[h * y_stride + c] = - ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] + - ypred_ptr[(h - 1) * y_stride + c], 1); - } - - for (r = 1; r < n; r++) { - for (c = n - 1; c >= n / 2 + r / 2; --c) { - int h = r + (n - 1 - c) * 2; - ypred_ptr[h * y_stride + c] = - ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] + - ypred_ptr[(h - 1) * y_stride + c], 1); + for (r = 0; r < bh; ++r) { + for (c = 0; c < bw; ++c) { + if (r & 1) { + ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[r/2 + c] + + yabove_row[r/2 + c + 1] * 2 + + yabove_row[r/2 + c + 2], 2); + } else { + ypred_ptr[c] =ROUND_POWER_OF_TWO(yabove_row[r/2 + c] + + yabove_row[r/2+ c + 1], 1); + } } + ypred_ptr += y_stride; } } -static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n, +static void d45_predictor(uint8_t *ypred_ptr, int y_stride, + int bw, int bh, uint8_t *yabove_row, uint8_t *yleft_col) { int r, c; - - for (r = 0; r < n - 1; ++r) { - for (c = 0; c <= r; ++c) { - ypred_ptr[(r - c) * y_stride + c] = iscale_round( - yabove_row[r + 1] * (c + 1) + yleft_col[r + 1] * (r - c + 1), r); - } - } - - for (c = 0; c <= r; ++c) { - int yabove_ext = yabove_row[r]; // clip_pixel(2 * yabove_row[r] - - // yabove_row[r - 1]); - int yleft_ext = yleft_col[r]; // clip_pixel(2 * yleft_col[r] - - // yleft_col[r-1]); - ypred_ptr[(r - c) * y_stride + c] = - iscale_round(yabove_ext * (c + 1) + yleft_ext * (r - c + 1), r); - } - for (r = 1; r < n; ++r) { - for (c = n - r; c < n; ++c) { - const int yabove_ext = ypred_ptr[(r - 1) * y_stride + c]; - const int yleft_ext = ypred_ptr[r * y_stride + c - 1]; - ypred_ptr[r * y_stride + c] = - ROUND_POWER_OF_TWO(yabove_ext + yleft_ext, 1); + for (r = 0; r < bh; ++r) { + for (c = 0; c < bw; ++c) { + if (r + c + 2 < bw * 2) + ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[r + c] + + yabove_row[r + c + 1] * 2 + + yabove_row[r + c + 2], 2); + else + ypred_ptr[c] = yabove_row[bw * 2 - 1]; } + ypred_ptr += y_stride; } } -static void d117_predictor(uint8_t *ypred_ptr, int y_stride, int n, +static void d117_predictor(uint8_t *ypred_ptr, int y_stride, + int bw, int bh, uint8_t *yabove_row, uint8_t *yleft_col) { int r, c; - for (c = 0; c < n; c++) + // first row + for (c = 0; c < bw; c++) ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[c - 1] + yabove_row[c], 1); ypred_ptr += y_stride; - for (c = 0; c < n; c++) - ypred_ptr[c] = yabove_row[c - 1]; + + // second row + ypred_ptr[0] = ROUND_POWER_OF_TWO(yleft_col[0] + + yabove_row[-1] * 2 + + yabove_row[0], 2); + for (c = 1; c < bw; c++) + ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[c - 2] + + yabove_row[c - 1] * 2 + + yabove_row[c], 2); ypred_ptr += y_stride; - for (r = 2; r < n; ++r) { - ypred_ptr[0] = yleft_col[r - 2]; - for (c = 1; c < n; c++) + + // the rest of first col + ypred_ptr[0] = ROUND_POWER_OF_TWO(yabove_row[-1] + + yleft_col[0] * 2 + + yleft_col[1], 2); + for (r = 3; r < bh; ++r) + ypred_ptr[(r-2) * y_stride] = ROUND_POWER_OF_TWO(yleft_col[r - 3] + + yleft_col[r - 2] * 2 + + yleft_col[r - 1], 2); + // the rest of the block + for (r = 2; r < bh; ++r) { + for (c = 1; c < bw; c++) ypred_ptr[c] = ypred_ptr[-2 * y_stride + c - 1]; ypred_ptr += y_stride; } } -static void d135_predictor(uint8_t *ypred_ptr, int y_stride, int n, + +static void d135_predictor(uint8_t *ypred_ptr, int y_stride, + int bw, int bh, uint8_t *yabove_row, uint8_t *yleft_col) { int r, c; - ypred_ptr[0] = yabove_row[-1]; - for (c = 1; c < n; c++) - ypred_ptr[c] = yabove_row[c - 1]; - for (r = 1; r < n; ++r) - ypred_ptr[r * y_stride] = yleft_col[r - 1]; + ypred_ptr[0] = ROUND_POWER_OF_TWO(yleft_col[0] + + yabove_row[-1] * 2 + + yabove_row[0], 2); + for (c = 1; c < bw; c++) + ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[c - 2] + + yabove_row[c - 1] * 2 + + yabove_row[c], 2); + + ypred_ptr[y_stride] = ROUND_POWER_OF_TWO(yabove_row[-1] + + yleft_col[0] * 2 + + yleft_col[1], 2); + for (r = 2; r < bh; ++r) + ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(yleft_col[r - 2] + + yleft_col[r - 1] * 2 + + yleft_col[r], 2); ypred_ptr += y_stride; - for (r = 1; r < n; ++r) { - for (c = 1; c < n; c++) { + for (r = 1; r < bh; ++r) { + for (c = 1; c < bw; c++) ypred_ptr[c] = ypred_ptr[-y_stride + c - 1]; - } ypred_ptr += y_stride; } } -static void d153_predictor(uint8_t *ypred_ptr, int y_stride, int n, - uint8_t *yabove_row, uint8_t *yleft_col) { +static void d153_predictor(uint8_t *ypred_ptr, + int y_stride, + int bw, int bh, + uint8_t *yabove_row, + uint8_t *yleft_col) { int r, c; ypred_ptr[0] = ROUND_POWER_OF_TWO(yabove_row[-1] + yleft_col[0], 1); - for (r = 1; r < n; r++) + for (r = 1; r < bh; r++) ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(yleft_col[r - 1] + yleft_col[r], 1); ypred_ptr++; - ypred_ptr[0] = yabove_row[-1]; - for (r = 1; r < n; r++) - ypred_ptr[r * y_stride] = yleft_col[r - 1]; + + ypred_ptr[0] = ROUND_POWER_OF_TWO(yleft_col[0] + + yabove_row[-1] * 2 + + yabove_row[0], 2); + ypred_ptr[y_stride] = ROUND_POWER_OF_TWO(yabove_row[-1] + + yleft_col[0] * 2 + + yleft_col[1], 2); + for (r = 2; r < bh; r++) + ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(yleft_col[r - 2] + + yleft_col[r - 1] * 2 + + yleft_col[r], 2); ypred_ptr++; - for (c = 0; c < n - 2; c++) - ypred_ptr[c] = yabove_row[c]; + for (c = 0; c < bw - 2; c++) + ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[c - 1] + + yabove_row[c] * 2 + + yabove_row[c + 1], 2); ypred_ptr += y_stride; - for (r = 1; r < n; ++r) { - for (c = 0; c < n - 2; c++) + for (r = 1; r < bh; ++r) { + for (c = 0; c < bw - 2; c++) ypred_ptr[c] = ypred_ptr[-y_stride + c - 2]; ypred_ptr += y_stride; } } -static void corner_predictor(uint8_t *ypred_ptr, int y_stride, int n, - uint8_t *yabove_row, - uint8_t *yleft_col) { - int mh, mv, maxgradh, maxgradv, x, y, nx, ny; - int i, j; - int top_left = yabove_row[-1]; - mh = mv = 0; - maxgradh = yabove_row[1] - top_left; - maxgradv = yleft_col[1] - top_left; - for (i = 2; i < n; ++i) { - int gh = yabove_row[i] - yabove_row[i - 2]; - int gv = yleft_col[i] - yleft_col[i - 2]; - if (gh > maxgradh) { - maxgradh = gh; - mh = i - 1; - } - if (gv > maxgradv) { - maxgradv = gv; - mv = i - 1; - } - } - nx = mh + mv + 3; - ny = 2 * n + 1 - nx; - - x = top_left; - for (i = 0; i <= mh; ++i) x += yabove_row[i]; - for (i = 0; i <= mv; ++i) x += yleft_col[i]; - x += (nx >> 1); - x /= nx; - y = 0; - for (i = mh + 1; i < n; ++i) y += yabove_row[i]; - for (i = mv + 1; i < n; ++i) y += yleft_col[i]; - y += (ny >> 1); - y /= ny; - - for (i = 0; i < n; ++i) { - for (j = 0; j < n; ++j) - ypred_ptr[j] = (i <= mh && j <= mv ? x : y); - ypred_ptr += y_stride; - } -} - -void vp9_recon_intra_mbuv(MACROBLOCKD *xd) { - int i; - for (i = 16; i < 24; i += 2) { - BLOCKD *b = &xd->block[i]; - vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } -} - -static INLINE int log2_minus_1(int n) { - switch (n) { - case 4: return 1; - case 8: return 2; - case 16: return 3; - case 32: return 4; - case 64: return 5; - default: - assert(0); - return 0; - } -} - - -void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride, - uint8_t *ypred_ptr, - int y_stride, int mode, int bsize, - int up_available, int left_available, - int right_available) { +void vp9_build_intra_predictors(uint8_t *src, int src_stride, + uint8_t *ypred_ptr, + int y_stride, int mode, + int bw, int bh, + int up_available, int left_available, + int right_available) { int r, c, i; - uint8_t yleft_col[64], yabove_data[65], ytop_left; + uint8_t yleft_col[64], yabove_data[129], ytop_left; uint8_t *yabove_row = yabove_data + 1; - /* - * 127 127 127 .. 127 127 127 127 127 127 - * 129 A B .. Y Z - * 129 C D .. W X - * 129 E F .. U V - * 129 G H .. S T T T T T - * .. - */ + + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // .. + + assert(bw == bh); if (left_available) { - for (i = 0; i < bsize; i++) + for (i = 0; i < bh; i++) yleft_col[i] = src[i * src_stride - 1]; } else { - vpx_memset(yleft_col, 129, bsize); + vpx_memset(yleft_col, 129, bh); } if (up_available) { uint8_t *yabove_ptr = src - src_stride; - vpx_memcpy(yabove_row, yabove_ptr, bsize); - if (left_available) { - ytop_left = yabove_ptr[-1]; - } else { - ytop_left = 127; - } + vpx_memcpy(yabove_row, yabove_ptr, bw); + if (bw == 4 && right_available) + vpx_memcpy(yabove_row + bw, yabove_ptr + bw, bw); + else + vpx_memset(yabove_row + bw, yabove_row[bw -1], bw); + ytop_left = left_available ? yabove_ptr[-1] : 129; } else { - vpx_memset(yabove_row, 127, bsize); + vpx_memset(yabove_row, 127, bw * 2); ytop_left = 127; } yabove_row[-1] = ytop_left; - /* for Y */ switch (mode) { case DC_PRED: { - int expected_dc; int i; - int shift; + int expected_dc = 128; int average = 0; - int log2_bsize_minus_1 = log2_minus_1(bsize); + int count = 0; if (up_available || left_available) { if (up_available) { - for (i = 0; i < bsize; i++) { + for (i = 0; i < bw; i++) average += yabove_row[i]; - } + count += bw; } - if (left_available) { - for (i = 0; i < bsize; i++) { + for (i = 0; i < bh; i++) average += yleft_col[i]; - } + count += bh; } - shift = log2_bsize_minus_1 + up_available + left_available; - expected_dc = ROUND_POWER_OF_TWO(average, shift); - } else { - expected_dc = 128; + expected_dc = (average + (count >> 1)) / count; } - - for (r = 0; r < bsize; r++) { - vpx_memset(ypred_ptr, expected_dc, bsize); + for (r = 0; r < bh; r++) { + vpx_memset(ypred_ptr, expected_dc, bw); ypred_ptr += y_stride; } } break; case V_PRED: - for (r = 0; r < bsize; r++) { - memcpy(ypred_ptr, yabove_row, bsize); + for (r = 0; r < bh; r++) { + vpx_memcpy(ypred_ptr, yabove_row, bw); ypred_ptr += y_stride; } break; case H_PRED: - for (r = 0; r < bsize; r++) { - vpx_memset(ypred_ptr, yleft_col[r], bsize); + for (r = 0; r < bh; r++) { + vpx_memset(ypred_ptr, yleft_col[r], bw); ypred_ptr += y_stride; } break; case TM_PRED: - for (r = 0; r < bsize; r++) { - for (c = 0; c < bsize; c++) { + for (r = 0; r < bh; r++) { + for (c = 0; c < bw; c++) ypred_ptr[c] = clip_pixel(yleft_col[r] + yabove_row[c] - ytop_left); - } - ypred_ptr += y_stride; } break; case D45_PRED: - d45_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); + d45_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); break; case D135_PRED: - d135_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); + d135_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); break; case D117_PRED: - d117_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); + d117_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); break; case D153_PRED: - d153_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); + d153_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); break; case D27_PRED: - d27_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); + d27_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); break; case D63_PRED: - d63_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); - break; - case I8X8_PRED: - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; - } -} - -#if CONFIG_COMP_INTERINTRA_PRED -static void combine_interintra(MB_PREDICTION_MODE mode, - uint8_t *interpred, - int interstride, - uint8_t *intrapred, - int intrastride, - int size) { - // TODO(debargha): Explore different ways of combining predictors - // or designing the tables below - static const int scale_bits = 8; - static const int scale_max = 256; // 1 << scale_bits; - static const int scale_round = 127; // (1 << (scale_bits - 1)); - // This table is a function A + B*exp(-kx), where x is hor. index - static const int weights1d[64] = { - 128, 125, 122, 119, 116, 114, 111, 109, - 107, 105, 103, 101, 99, 97, 96, 94, - 93, 91, 90, 89, 88, 86, 85, 84, - 83, 82, 81, 81, 80, 79, 78, 78, - 77, 76, 76, 75, 75, 74, 74, 73, - 73, 72, 72, 71, 71, 71, 70, 70, - 70, 70, 69, 69, 69, 69, 68, 68, - 68, 68, 68, 67, 67, 67, 67, 67, - }; - - int size_scale = (size >= 64 ? 1: - size == 32 ? 2 : - size == 16 ? 4 : - size == 8 ? 8 : 16); - int i, j; - switch (mode) { - case V_PRED: - for (i = 0; i < size; ++i) { - for (j = 0; j < size; ++j) { - int k = i * interstride + j; - int scale = weights1d[i * size_scale]; - interpred[k] = - ((scale_max - scale) * interpred[k] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; - } - } + d63_predictor(ypred_ptr, y_stride, bw, bh, yabove_row, yleft_col); break; - - case H_PRED: - for (i = 0; i < size; ++i) { - for (j = 0; j < size; ++j) { - int k = i * interstride + j; - int scale = weights1d[j * size_scale]; - interpred[k] = - ((scale_max - scale) * interpred[k] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; - } - } - break; - - case D63_PRED: - case D117_PRED: - for (i = 0; i < size; ++i) { - for (j = 0; j < size; ++j) { - int k = i * interstride + j; - int scale = (weights1d[i * size_scale] * 3 + - weights1d[j * size_scale]) >> 2; - interpred[k] = - ((scale_max - scale) * interpred[k] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; - } - } - break; - - case D27_PRED: - case D153_PRED: - for (i = 0; i < size; ++i) { - for (j = 0; j < size; ++j) { - int k = i * interstride + j; - int scale = (weights1d[j * size_scale] * 3 + - weights1d[i * size_scale]) >> 2; - interpred[k] = - ((scale_max - scale) * interpred[k] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; - } - } - break; - - case D135_PRED: - for (i = 0; i < size; ++i) { - for (j = 0; j < size; ++j) { - int k = i * interstride + j; - int scale = weights1d[(i < j ? i : j) * size_scale]; - interpred[k] = - ((scale_max - scale) * interpred[k] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; - } - } - break; - - case D45_PRED: - for (i = 0; i < size; ++i) { - for (j = 0; j < size; ++j) { - int k = i * interstride + j; - int scale = (weights1d[i * size_scale] + - weights1d[j * size_scale]) >> 1; - interpred[k] = - ((scale_max - scale) * interpred[k] + - scale * intrapred[i * intrastride + j] + scale_round) - >> scale_bits; - } - } - break; - - case TM_PRED: - case DC_PRED: default: - // simple average - for (i = 0; i < size; ++i) { - for (j = 0; j < size; ++j) { - int k = i * interstride + j; - interpred[k] = (interpred[k] + intrapred[i * intrastride + j]) >> 1; - } - } break; } } -void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, int uvstride) { - vp9_build_interintra_16x16_predictors_mby(xd, ypred, ystride); - vp9_build_interintra_16x16_predictors_mbuv(xd, upred, vpred, uvstride); -} - -void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride) { - uint8_t intrapredictor[256]; - vp9_build_intra_predictors_internal( - xd->dst.y_buffer, xd->dst.y_stride, - intrapredictor, 16, - xd->mode_info_context->mbmi.interintra_mode, 16, - xd->up_available, xd->left_available, xd->right_available); - combine_interintra(xd->mode_info_context->mbmi.interintra_mode, - ypred, ystride, intrapredictor, 16, 16); -} - -void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int uvstride) { - uint8_t uintrapredictor[64]; - uint8_t vintrapredictor[64]; - vp9_build_intra_predictors_internal( - xd->dst.u_buffer, xd->dst.uv_stride, - uintrapredictor, 8, - xd->mode_info_context->mbmi.interintra_uv_mode, 8, - xd->up_available, xd->left_available, xd->right_available); - vp9_build_intra_predictors_internal( - xd->dst.v_buffer, xd->dst.uv_stride, - vintrapredictor, 8, - xd->mode_info_context->mbmi.interintra_uv_mode, 8, - xd->up_available, xd->left_available, xd->right_available); - combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, - upred, uvstride, uintrapredictor, 8, 8); - combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, - vpred, uvstride, vintrapredictor, 8, 8); -} - -void vp9_build_interintra_32x32_predictors_sby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride) { - uint8_t intrapredictor[1024]; - vp9_build_intra_predictors_internal( - xd->dst.y_buffer, xd->dst.y_stride, - intrapredictor, 32, - xd->mode_info_context->mbmi.interintra_mode, 32, - xd->up_available, xd->left_available, xd->right_available); - combine_interintra(xd->mode_info_context->mbmi.interintra_mode, - ypred, ystride, intrapredictor, 32, 32); -} - -void vp9_build_interintra_32x32_predictors_sbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int uvstride) { - uint8_t uintrapredictor[256]; - uint8_t vintrapredictor[256]; - vp9_build_intra_predictors_internal( - xd->dst.u_buffer, xd->dst.uv_stride, - uintrapredictor, 16, - xd->mode_info_context->mbmi.interintra_uv_mode, 16, - xd->up_available, xd->left_available, xd->right_available); - vp9_build_intra_predictors_internal( - xd->dst.v_buffer, xd->dst.uv_stride, - vintrapredictor, 16, - xd->mode_info_context->mbmi.interintra_uv_mode, 16, - xd->up_available, xd->left_available, xd->right_available); - combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, - upred, uvstride, uintrapredictor, 16, 16); - combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, - vpred, uvstride, vintrapredictor, 16, 16); -} - -void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride) { - vp9_build_interintra_32x32_predictors_sby(xd, ypred, ystride); - vp9_build_interintra_32x32_predictors_sbuv(xd, upred, vpred, uvstride); -} - -void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride) { - uint8_t intrapredictor[4096]; - const int mode = xd->mode_info_context->mbmi.interintra_mode; - vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, - intrapredictor, 64, mode, 64, - xd->up_available, xd->left_available, - xd->right_available); - combine_interintra(xd->mode_info_context->mbmi.interintra_mode, - ypred, ystride, intrapredictor, 64, 64); -} - -void vp9_build_interintra_64x64_predictors_sbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int uvstride) { - uint8_t uintrapredictor[1024]; - uint8_t vintrapredictor[1024]; - const int mode = xd->mode_info_context->mbmi.interintra_uv_mode; - vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride, - uintrapredictor, 32, mode, 32, - xd->up_available, xd->left_available, - xd->right_available); - vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride, - vintrapredictor, 32, mode, 32, - xd->up_available, xd->left_available, - xd->right_available); - combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, - upred, uvstride, uintrapredictor, 32, 32); - combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, - vpred, uvstride, vintrapredictor, 32, 32); -} - -void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride) { - vp9_build_interintra_64x64_predictors_sby(xd, ypred, ystride); - vp9_build_interintra_64x64_predictors_sbuv(xd, upred, vpred, uvstride); -} -#endif // CONFIG_COMP_INTERINTRA_PRED - -void vp9_build_intra_predictors_mby(MACROBLOCKD *xd) { - vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, - xd->predictor, 16, - xd->mode_info_context->mbmi.mode, 16, - xd->up_available, xd->left_available, - xd->right_available); -} - -void vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd) { - vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, - xd->dst.y_buffer, xd->dst.y_stride, - xd->mode_info_context->mbmi.mode, 16, - xd->up_available, xd->left_available, - xd->right_available); -} - -void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) { - vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, - xd->dst.y_buffer, xd->dst.y_stride, - xd->mode_info_context->mbmi.mode, 32, - xd->up_available, xd->left_available, - xd->right_available); -} - -void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) { - vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, - xd->dst.y_buffer, xd->dst.y_stride, - xd->mode_info_context->mbmi.mode, 64, - xd->up_available, xd->left_available, - xd->right_available); -} - -void vp9_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd, - uint8_t *upred_ptr, - uint8_t *vpred_ptr, - int uv_stride, - int mode, int bsize) { - vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride, - upred_ptr, uv_stride, mode, bsize, - xd->up_available, xd->left_available, - xd->right_available); - vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride, - vpred_ptr, uv_stride, mode, bsize, - xd->up_available, xd->left_available, - xd->right_available); -} - -void vp9_build_intra_predictors_mbuv(MACROBLOCKD *xd) { - vp9_build_intra_predictors_mbuv_internal(xd, &xd->predictor[256], - &xd->predictor[320], 8, - xd->mode_info_context->mbmi.uv_mode, - 8); -} - -void vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd) { - vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd->mode_info_context->mbmi.uv_mode, - 8); -} - -void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { - vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, - xd->dst.v_buffer, xd->dst.uv_stride, - xd->mode_info_context->mbmi.uv_mode, - 16); -} - -void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) { - vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, - xd->dst.v_buffer, xd->dst.uv_stride, - xd->mode_info_context->mbmi.uv_mode, - 32); -} - -void vp9_intra8x8_predict(MACROBLOCKD *xd, - BLOCKD *b, +void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize) { + const struct macroblockd_plane* const pd = &xd->plane[0]; + const int bw = plane_block_width(bsize, pd); + const int bh = plane_block_height(bsize, pd); + vp9_build_intra_predictors(pd->dst.buf, pd->dst.stride, + pd->dst.buf, pd->dst.stride, + xd->mode_info_context->mbmi.mode, + bw, bh, xd->up_available, xd->left_available, + 0 /*xd->right_available*/); +} + +void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize) { + const int bwl = b_width_log2(bsize), bw = 2 << bwl; + const int bhl = b_height_log2(bsize), bh = 2 << bhl; + + vp9_build_intra_predictors(xd->plane[1].dst.buf, xd->plane[1].dst.stride, + xd->plane[1].dst.buf, xd->plane[1].dst.stride, + xd->mode_info_context->mbmi.uv_mode, + bw, bh, xd->up_available, + xd->left_available, 0 /*xd->right_available*/); + vp9_build_intra_predictors(xd->plane[2].dst.buf, xd->plane[1].dst.stride, + xd->plane[2].dst.buf, xd->plane[1].dst.stride, + xd->mode_info_context->mbmi.uv_mode, + bw, bh, xd->up_available, + xd->left_available, 0 /*xd->right_available*/); +} + +void vp9_predict_intra_block(MACROBLOCKD *xd, + int block_idx, + int bwl_in, + TX_SIZE tx_size, + int mode, + uint8_t *predictor, int pre_stride) { + const int bwl = bwl_in - tx_size; + const int wmask = (1 << bwl) - 1; + const int have_top = (block_idx >> bwl) || xd->up_available; + const int have_left = (block_idx & wmask) || xd->left_available; + const int have_right = ((block_idx & wmask) != wmask); + const int txfm_block_size = 4 << tx_size; + + assert(bwl >= 0); + vp9_build_intra_predictors(predictor, pre_stride, + predictor, pre_stride, + mode, + txfm_block_size, + txfm_block_size, + have_top, have_left, + have_right); +} + +void vp9_intra4x4_predict(MACROBLOCKD *xd, + int block_idx, + BLOCK_SIZE_TYPE bsize, int mode, - uint8_t *predictor) { - const int block4x4_idx = (b - xd->block); - const int block_idx = (block4x4_idx >> 2) | !!(block4x4_idx & 2); - const int have_top = (block_idx >> 1) || xd->up_available; - const int have_left = (block_idx & 1) || xd->left_available; - const int have_right = !(block_idx & 1) || xd->right_available; - - vp9_build_intra_predictors_internal(*(b->base_dst) + b->dst, - b->dst_stride, predictor, 16, - mode, 8, have_top, have_left, - have_right); + uint8_t *predictor, int pre_stride) { + vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize), TX_4X4, + mode, predictor, pre_stride); } - -void vp9_intra_uv4x4_predict(MACROBLOCKD *xd, - BLOCKD *b, - int mode, - uint8_t *predictor) { - const int block_idx = (b - xd->block) & 3; - const int have_top = (block_idx >> 1) || xd->up_available; - const int have_left = (block_idx & 1) || xd->left_available; - const int have_right = !(block_idx & 1) || xd->right_available; - - vp9_build_intra_predictors_internal(*(b->base_dst) + b->dst, - b->dst_stride, predictor, 8, - mode, 4, have_top, have_left, - have_right); -} - -/* TODO: try different ways of use Y-UV mode correlation - Current code assumes that a uv 4x4 block use same mode - as corresponding Y 8x8 area - */ diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index b97b6089dd7c0dd975b565f7622f893275c58aeb..f5f5f42c4d432efe4a5064735a30b4feea586f39 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -14,44 +14,17 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" -void vp9_recon_intra_mbuv(MACROBLOCKD *xd); - -B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n, - int tx, int ty); - -B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, BLOCKD *x); - -#if CONFIG_COMP_INTERINTRA_PRED -void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride); - -void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride); - -void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int uvstride); -#endif // CONFIG_COMP_INTERINTRA_PRED - -void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride); - -void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int uvstride); - +MB_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, + int stride, int n, + int tx, int ty); + +MB_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, int block, + uint8_t *ptr, int stride); + +void vp9_predict_intra_block(MACROBLOCKD *xd, + int block_idx, + int bwl_in, + TX_SIZE tx_size, + int mode, + uint8_t *predictor, int pre_stride); #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c deleted file mode 100644 index eab5ab4955e87ffc7cf72869368d5b194ed72f06..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_reconintra4x4.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "./vpx_config.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_reconintra.h" -#include "vp9_rtcd.h" - -#if CONFIG_NEWBINTRAMODES -static int find_grad_measure(uint8_t *x, int stride, int n, int tx, int ty, - int dx, int dy) { - int i, j; - int count = 0, gsum = 0, gdiv; - /* TODO: Make this code more efficient by breaking up into two loops */ - for (i = -ty; i < n; ++i) - for (j = -tx; j < n; ++j) { - int g; - if (i >= 0 && j >= 0) continue; - if (i + dy >= 0 && j + dx >= 0) continue; - if (i + dy < -ty || i + dy >= n || j + dx < -tx || j + dx >= n) continue; - g = abs(x[(i + dy) * stride + j + dx] - x[i * stride + j]); - gsum += g * g; - count++; - } - gdiv = (dx * dx + dy * dy) * count; - return ((gsum << 8) + (gdiv >> 1)) / gdiv; -} - -#if CONTEXT_PRED_REPLACEMENTS == 6 -B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n, - int tx, int ty) { - int g[8], i, imin, imax; - g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1); - g[2] = find_grad_measure(ptr, stride, n, tx, ty, 1, 1); - g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2); - g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2); - g[6] = find_grad_measure(ptr, stride, n, tx, ty, -1, 1); - g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1); - imin = 1; - for (i = 2; i < 8; i += 1 + (i == 3)) - imin = (g[i] < g[imin] ? i : imin); - imax = 1; - for (i = 2; i < 8; i += 1 + (i == 3)) - imax = (g[i] > g[imax] ? i : imax); - /* - printf("%d %d %d %d %d %d = %d %d\n", - g[1], g[2], g[3], g[5], g[6], g[7], imin, imax); - */ - switch (imin) { - case 1: - return B_HD_PRED; - case 2: - return B_RD_PRED; - case 3: - return B_VR_PRED; - case 5: - return B_VL_PRED; - case 6: - return B_LD_PRED; - case 7: - return B_HU_PRED; - default: - assert(0); - } -} -#elif CONTEXT_PRED_REPLACEMENTS == 4 -B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n, - int tx, int ty) { - int g[8], i, imin, imax; - g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1); - g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2); - g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2); - g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1); - imin = 1; - for (i = 3; i < 8; i+=2) - imin = (g[i] < g[imin] ? i : imin); - imax = 1; - for (i = 3; i < 8; i+=2) - imax = (g[i] > g[imax] ? i : imax); - /* - printf("%d %d %d %d = %d %d\n", - g[1], g[3], g[5], g[7], imin, imax); - */ - switch (imin) { - case 1: - return B_HD_PRED; - case 3: - return B_VR_PRED; - case 5: - return B_VL_PRED; - case 7: - return B_HU_PRED; - default: - assert(0); - } -} -#elif CONTEXT_PRED_REPLACEMENTS == 0 -B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, - int stride, int n, - int tx, int ty) { - int g[8], i, imin, imax; - g[0] = find_grad_measure(ptr, stride, n, tx, ty, 1, 0); - g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1); - g[2] = find_grad_measure(ptr, stride, n, tx, ty, 1, 1); - g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2); - g[4] = find_grad_measure(ptr, stride, n, tx, ty, 0, 1); - g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2); - g[6] = find_grad_measure(ptr, stride, n, tx, ty, -1, 1); - g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1); - imax = 0; - for (i = 1; i < 8; i++) - imax = (g[i] > g[imax] ? i : imax); - imin = 0; - for (i = 1; i < 8; i++) - imin = (g[i] < g[imin] ? i : imin); - - switch (imin) { - case 0: - return B_HE_PRED; - case 1: - return B_HD_PRED; - case 2: - return B_RD_PRED; - case 3: - return B_VR_PRED; - case 4: - return B_VE_PRED; - case 5: - return B_VL_PRED; - case 6: - return B_LD_PRED; - case 7: - return B_HU_PRED; - default: - assert(0); - } -} -#endif - -B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, BLOCKD *x) { - const int block_idx = x - xd->block; - const int have_top = (block_idx >> 2) || xd->up_available; - const int have_left = (block_idx & 3) || xd->left_available; - uint8_t *ptr = *(x->base_dst) + x->dst; - int stride = x->dst_stride; - int tx = have_left ? 4 : 0; - int ty = have_top ? 4 : 0; - if (!have_left && !have_top) - return B_DC_PRED; - return vp9_find_dominant_direction(ptr, stride, 4, tx, ty); -} -#endif - -void vp9_intra4x4_predict(MACROBLOCKD *xd, - BLOCKD *x, - int b_mode, - uint8_t *predictor) { - int i, r, c; - const int block_idx = x - xd->block; - const int have_top = (block_idx >> 2) || xd->up_available; - const int have_left = (block_idx & 3) || xd->left_available; - const int have_right = (block_idx & 3) != 3 || xd->right_available; - uint8_t left[4], above[8], top_left; - /* - * 127 127 127 .. 127 127 127 127 127 127 - * 129 A B .. Y Z - * 129 C D .. W X - * 129 E F .. U V - * 129 G H .. S T T T T T - * .. - */ - - if (have_left) { - uint8_t *left_ptr = *(x->base_dst) + x->dst - 1; - const int stride = x->dst_stride; - - left[0] = left_ptr[0 * stride]; - left[1] = left_ptr[1 * stride]; - left[2] = left_ptr[2 * stride]; - left[3] = left_ptr[3 * stride]; - } else { - left[0] = left[1] = left[2] = left[3] = 129; - } - - if (have_top) { - uint8_t *above_ptr = *(x->base_dst) + x->dst - x->dst_stride; - - if (have_left) { - top_left = above_ptr[-1]; - } else { - top_left = 127; - } - - above[0] = above_ptr[0]; - above[1] = above_ptr[1]; - above[2] = above_ptr[2]; - above[3] = above_ptr[3]; - if (((block_idx & 3) != 3) || - (have_right && block_idx == 3 && - ((xd->mb_index != 3 && xd->sb_index != 3) || - ((xd->mb_index & 1) == 0 && xd->sb_index == 3)))) { - above[4] = above_ptr[4]; - above[5] = above_ptr[5]; - above[6] = above_ptr[6]; - above[7] = above_ptr[7]; - } else if (have_right) { - uint8_t *above_right = above_ptr + 4; - - if (xd->sb_index == 3 && (xd->mb_index & 1)) - above_right -= 32 * x->dst_stride; - if (xd->mb_index == 3) - above_right -= 16 * x->dst_stride; - above_right -= (block_idx & ~3) * x->dst_stride; - - /* use a more distant above-right (from closest available top-right - * corner), but with a "localized DC" (similar'ish to TM-pred): - * - * A B C D E F G H - * I J K L - * M N O P - * Q R S T - * U V W X x1 x2 x3 x4 - * - * Where: - * x1 = clip_pixel(E + X - D) - * x2 = clip_pixel(F + X - D) - * x3 = clip_pixel(G + X - D) - * x4 = clip_pixel(H + X - D) - * - * This is applied anytime when we use a "distant" above-right edge - * that is not immediately top-right to the block that we're going - * to do intra prediction for. - */ - above[4] = clip_pixel(above_right[0] + above_ptr[3] - above_right[-1]); - above[5] = clip_pixel(above_right[1] + above_ptr[3] - above_right[-1]); - above[6] = clip_pixel(above_right[2] + above_ptr[3] - above_right[-1]); - above[7] = clip_pixel(above_right[3] + above_ptr[3] - above_right[-1]); - } else { - // extend edge - above[4] = above[5] = above[6] = above[7] = above[3]; - } - } else { - above[0] = above[1] = above[2] = above[3] = 127; - above[4] = above[5] = above[6] = above[7] = 127; - top_left = 127; - } - -#if CONFIG_NEWBINTRAMODES - if (b_mode == B_CONTEXT_PRED) - b_mode = x->bmi.as_mode.context; -#endif - - switch (b_mode) { - case B_DC_PRED: { - int expected_dc = 0; - - for (i = 0; i < 4; i++) { - expected_dc += above[i]; - expected_dc += left[i]; - } - - expected_dc = (expected_dc + 4) >> 3; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = expected_dc; - } - - predictor += 16; - } - } - break; - case B_TM_PRED: { - /* prediction similar to true_motion prediction */ - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = clip_pixel(above[c] - top_left + left[r]); - } - - predictor += 16; - } - } - break; - - case B_VE_PRED: { - unsigned int ap[4]; - - ap[0] = above[0]; - ap[1] = above[1]; - ap[2] = above[2]; - ap[3] = above[3]; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = ap[c]; - } - - predictor += 16; - } - } - break; - - case B_HE_PRED: { - unsigned int lp[4]; - - lp[0] = left[0]; - lp[1] = left[1]; - lp[2] = left[2]; - lp[3] = left[3]; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - predictor[c] = lp[r]; - } - - predictor += 16; - } - } - break; - case B_LD_PRED: { - uint8_t *ptr = above; - - predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; - predictor[0 * 16 + 1] = - predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; - predictor[0 * 16 + 2] = - predictor[1 * 16 + 1] = - predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2; - predictor[0 * 16 + 3] = - predictor[1 * 16 + 2] = - predictor[2 * 16 + 1] = - predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2; - predictor[1 * 16 + 3] = - predictor[2 * 16 + 2] = - predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2; - predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2; - - } - break; - case B_RD_PRED: { - uint8_t pp[9]; - - pp[0] = left[3]; - pp[1] = left[2]; - pp[2] = left[1]; - pp[3] = left[0]; - pp[4] = top_left; - pp[5] = above[0]; - pp[6] = above[1]; - pp[7] = above[2]; - pp[8] = above[3]; - - predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[3 * 16 + 1] = - predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[3 * 16 + 2] = - predictor[2 * 16 + 1] = - predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[3 * 16 + 3] = - predictor[2 * 16 + 2] = - predictor[1 * 16 + 1] = - predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[1 * 16 + 2] = - predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[1 * 16 + 3] = - predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - - } - break; - case B_VR_PRED: { - uint8_t pp[9]; - - pp[0] = left[3]; - pp[1] = left[2]; - pp[2] = left[1]; - pp[3] = left[0]; - pp[4] = top_left; - pp[5] = above[0]; - pp[6] = above[1]; - pp[7] = above[2]; - pp[8] = above[3]; - - predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[3 * 16 + 1] = - predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 1] = - predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1; - predictor[3 * 16 + 2] = - predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1; - predictor[3 * 16 + 3] = - predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1; - predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1; - - } - break; - case B_VL_PRED: { - uint8_t *pp = above; - - predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[2 * 16 + 0] = - predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1; - predictor[1 * 16 + 1] = - predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 1] = - predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1; - predictor[3 * 16 + 1] = - predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[0 * 16 + 3] = - predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - } - break; - - case B_HD_PRED: { - uint8_t pp[9]; - - pp[0] = left[3]; - pp[1] = left[2]; - pp[2] = left[1]; - pp[3] = left[0]; - pp[4] = top_left; - pp[5] = above[0]; - pp[6] = above[1]; - pp[7] = above[2]; - pp[8] = above[3]; - - - predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[2 * 16 + 0] = - predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1; - predictor[2 * 16 + 1] = - predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; - predictor[2 * 16 + 3] = - predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[1 * 16 + 2] = - predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - } - break; - - - case B_HU_PRED: { - uint8_t *pp = left; - predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[0 * 16 + 2] = - predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1; - predictor[0 * 16 + 3] = - predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[1 * 16 + 2] = - predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[2 * 16 + 3] = - predictor[3 * 16 + 0] = - predictor[3 * 16 + 1] = - predictor[3 * 16 + 2] = - predictor[3 * 16 + 3] = pp[3]; - } - break; - -#if CONFIG_NEWBINTRAMODES - case B_CONTEXT_PRED: - break; - /* - case B_CORNER_PRED: - corner_predictor(predictor, 16, 4, above, left); - break; - */ -#endif - } -} diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 43bc3cb1fc2520c40dc63577903e9828bc7fddbf..f281e08e9175b3e05c0a7cc5665063d9eec14be6 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -5,14 +5,13 @@ cat <<EOF */ #include "vpx/vpx_integer.h" +#include "vp9/common/vp9_enums.h" struct loop_filter_info; -struct blockd; struct macroblockd; struct loop_filter_info; /* Encoder forward decls */ -struct block; struct macroblock; struct vp9_variance_vtable; @@ -26,32 +25,26 @@ forward_decls vp9_common_forward_decls # # Dequant # -prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_y_block_8x8 +prototype void vp9_idct_add_y_block_8x8 "int16_t *q, uint8_t *dst, int stride, struct macroblockd *xd" +specialize vp9_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_uv_block_8x8 +prototype void vp9_idct_add_16x16 "int16_t *input, uint8_t *dest, int stride, int eob" +specialize vp9_idct_add_16x16 -prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" -specialize vp9_dequant_idct_add_16x16 +prototype void vp9_idct_add_8x8 "int16_t *input, uint8_t *dest, int stride, int eob" +specialize vp9_idct_add_8x8 -prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" -specialize vp9_dequant_idct_add_8x8 +prototype void vp9_idct_add "int16_t *input, uint8_t *dest, int stride, int eob" +specialize vp9_idct_add -prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" -specialize vp9_dequant_idct_add +prototype void vp9_idct_add_y_block "int16_t *q, uint8_t *dst, int stride, struct macroblockd *xd" +specialize vp9_idct_add_y_block -prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_y_block +prototype void vp9_idct_add_uv_block "int16_t *q, uint8_t *dst, int stride, uint16_t *eobs" +specialize vp9_idct_add_uv_block -prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_uv_block - -prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob" -specialize vp9_dequant_idct_add_32x32 - -prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_uv_block_16x16 +prototype void vp9_idct_add_32x32 "int16_t *q, uint8_t *dst, int stride, int eob" +specialize vp9_idct_add_32x32 # # RECON @@ -67,151 +60,49 @@ vp9_copy_mem8x8_dspr2=vp9_copy_mem8x8_dspr2 prototype void vp9_copy_mem8x4 "const uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch" specialize vp9_copy_mem8x4 mmx -prototype void vp9_recon_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" -specialize vp9_recon_b - -prototype void vp9_recon_uv_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" -specialize vp9_recon_uv_b - -prototype void vp9_recon2b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" -specialize vp9_recon2b sse2 - -prototype void vp9_recon4b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" -specialize vp9_recon4b sse2 - -prototype void vp9_recon_mb "struct macroblockd *x" -specialize vp9_recon_mb - -prototype void vp9_recon_mby "struct macroblockd *x" -specialize vp9_recon_mby - -prototype void vp9_recon_mby_s "struct macroblockd *x, uint8_t *dst" -specialize vp9_recon_mby_s - -prototype void vp9_recon_mbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" -specialize void vp9_recon_mbuv_s - -prototype void vp9_recon_sby_s "struct macroblockd *x, uint8_t *dst" -specialize vp9_recon_sby_s - -prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" -specialize void vp9_recon_sbuv_s - -prototype void vp9_recon_sb64y_s "struct macroblockd *x, uint8_t *dst" -specialize vp9_recon_sb64y_s - -prototype void vp9_recon_sb64uv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" -specialize void vp9_recon_sb64uv_s - -prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x" -specialize vp9_build_intra_predictors_mby_s +prototype void vp9_build_intra_predictors "uint8_t *src, int src_stride, uint8_t *pred, int y_stride, int mode, int bw, int bh, int up_available, int left_available, int right_available" +specialize void vp9_build_intra_predictors -prototype void vp9_build_intra_predictors_sby_s "struct macroblockd *x" -specialize vp9_build_intra_predictors_sby_s; +prototype void vp9_build_intra_predictors_sby_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize" +specialize vp9_build_intra_predictors_sby_s -prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x" -specialize vp9_build_intra_predictors_sbuv_s; +prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize" +specialize vp9_build_intra_predictors_sbuv_s -prototype void vp9_build_intra_predictors_mby "struct macroblockd *x" -specialize vp9_build_intra_predictors_mby; - -prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x" -specialize vp9_build_intra_predictors_mby_s; - -prototype void vp9_build_intra_predictors_mbuv "struct macroblockd *x" -specialize vp9_build_intra_predictors_mbuv; - -prototype void vp9_build_intra_predictors_mbuv_s "struct macroblockd *x" -specialize vp9_build_intra_predictors_mbuv_s; - -prototype void vp9_build_intra_predictors_sb64y_s "struct macroblockd *x" -specialize vp9_build_intra_predictors_sb64y_s; - -prototype void vp9_build_intra_predictors_sb64uv_s "struct macroblockd *x" -specialize vp9_build_intra_predictors_sb64uv_s; - -prototype void vp9_intra4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor" +prototype void vp9_intra4x4_predict "struct macroblockd *xd, int block, enum BLOCK_SIZE_TYPE bsize, int b_mode, uint8_t *predictor, int pre_stride" specialize vp9_intra4x4_predict; -prototype void vp9_intra8x8_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor" -specialize vp9_intra8x8_predict; - -prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor" -specialize vp9_intra_uv4x4_predict; - if [ "$CONFIG_VP9_DECODER" = "yes" ]; then -prototype void vp9_add_residual_4x4 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" -specialize vp9_add_residual_4x4 sse2 - -prototype void vp9_add_residual_8x8 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" -specialize vp9_add_residual_8x8 sse2 - -prototype void vp9_add_residual_16x16 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" -specialize vp9_add_residual_16x16 sse2 - -prototype void vp9_add_residual_32x32 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" -specialize vp9_add_residual_32x32 sse2 - -prototype void vp9_add_constant_residual_8x8 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_constant_residual_8x8 "const int16_t diff, uint8_t *dest, int stride" specialize vp9_add_constant_residual_8x8 sse2 -prototype void vp9_add_constant_residual_16x16 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_constant_residual_16x16 "const int16_t diff, uint8_t *dest, int stride" specialize vp9_add_constant_residual_16x16 sse2 -prototype void vp9_add_constant_residual_32x32 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride" +prototype void vp9_add_constant_residual_32x32 "const int16_t diff, uint8_t *dest, int stride" specialize vp9_add_constant_residual_32x32 sse2 fi # # Loopfilter # -prototype void vp9_loop_filter_mbv "uint8_t *y, uint8_t *u, uint8_t *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_loop_filter_mbv sse2 - -prototype void vp9_loop_filter_bv "uint8_t *y, uint8_t *u, uint8_t *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_loop_filter_bv sse2 +prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_mb_lpf_vertical_edge_w -prototype void vp9_loop_filter_bv8x8 "uint8_t *y, uint8_t *u, uint8_t *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_loop_filter_bv8x8 sse2 +prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_mbloop_filter_vertical_edge -prototype void vp9_loop_filter_mbh "uint8_t *y, uint8_t *u, uint8_t *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_loop_filter_mbh sse2 +prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_loop_filter_vertical_edge -prototype void vp9_loop_filter_bh "uint8_t *y, uint8_t *u, uint8_t *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_loop_filter_bh sse2 +prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_mb_lpf_horizontal_edge_w -prototype void vp9_loop_filter_bh8x8 "uint8_t *y, uint8_t *u, uint8_t *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_loop_filter_bh8x8 sse2 +prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_mbloop_filter_horizontal_edge -prototype void vp9_loop_filter_simple_mbv "uint8_t *y, int ystride, const uint8_t *blimit" -specialize vp9_loop_filter_simple_mbv mmx sse2 -vp9_loop_filter_simple_mbv_c=vp9_loop_filter_simple_vertical_edge_c -vp9_loop_filter_simple_mbv_mmx=vp9_loop_filter_simple_vertical_edge_mmx -vp9_loop_filter_simple_mbv_sse2=vp9_loop_filter_simple_vertical_edge_sse2 - -prototype void vp9_loop_filter_simple_mbh "uint8_t *y, int ystride, const uint8_t *blimit" -specialize vp9_loop_filter_simple_mbh mmx sse2 -vp9_loop_filter_simple_mbh_c=vp9_loop_filter_simple_horizontal_edge_c -vp9_loop_filter_simple_mbh_mmx=vp9_loop_filter_simple_horizontal_edge_mmx -vp9_loop_filter_simple_mbh_sse2=vp9_loop_filter_simple_horizontal_edge_sse2 - -prototype void vp9_loop_filter_simple_bv "uint8_t *y, int ystride, const uint8_t *blimit" -specialize vp9_loop_filter_simple_bv mmx sse2 -vp9_loop_filter_simple_bv_c=vp9_loop_filter_bvs_c -vp9_loop_filter_simple_bv_mmx=vp9_loop_filter_bvs_mmx -vp9_loop_filter_simple_bv_sse2=vp9_loop_filter_bvs_sse2 - -prototype void vp9_loop_filter_simple_bh "uint8_t *y, int ystride, const uint8_t *blimit" -specialize vp9_loop_filter_simple_bh mmx sse2 -vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c -vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx -vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2 - -prototype void vp9_lpf_mbh_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_lpf_mbh_w sse2 - -prototype void vp9_lpf_mbv_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_lpf_mbv_w sse2 +prototype void vp9_loop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_loop_filter_horizontal_edge # # post proc @@ -225,7 +116,7 @@ prototype void vp9_mbpost_proc_across_ip "uint8_t *src, int pitch, int rows, int specialize vp9_mbpost_proc_across_ip sse2 vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm -prototype void vp9_post_proc_down_and_across "uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit" +prototype void vp9_post_proc_down_and_across "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit" specialize vp9_post_proc_down_and_across mmx sse2 vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm @@ -243,18 +134,6 @@ specialize vp9_blend_mb_outer prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" specialize vp9_blend_b -# -# sad 16x3, 3x16 -# -prototype unsigned int vp9_sad16x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride" -specialize vp9_sad16x3 sse2 - -prototype unsigned int vp9_sad3x16 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride" -specialize vp9_sad3x16 sse2 - -prototype unsigned int vp9_sub_pixel_variance16x2 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance16x2 sse2 - # # Sub Pixel Filters # @@ -276,122 +155,63 @@ specialize vp9_convolve8_avg_horiz ssse3 prototype void vp9_convolve8_avg_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" specialize vp9_convolve8_avg_vert ssse3 -#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT -prototype void vp9_convolve8_1by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_1by8 - -prototype void vp9_convolve8_qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_qtr - -prototype void vp9_convolve8_3by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_3by8 - -prototype void vp9_convolve8_5by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_5by8 - -prototype void vp9_convolve8_3qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_3qtr - -prototype void vp9_convolve8_7by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_7by8 - -prototype void vp9_convolve8_1by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_1by8_horiz - -prototype void vp9_convolve8_qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_qtr_horiz - -prototype void vp9_convolve8_3by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_3by8_horiz - -prototype void vp9_convolve8_5by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_5by8_horiz - -prototype void vp9_convolve8_3qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_3qtr_horiz - -prototype void vp9_convolve8_7by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_7by8_horiz - -prototype void vp9_convolve8_1by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_1by8_vert - -prototype void vp9_convolve8_qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_qtr_vert - -prototype void vp9_convolve8_3by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_3by8_vert - -prototype void vp9_convolve8_5by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_5by8_vert - -prototype void vp9_convolve8_3qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_3qtr_vert - -prototype void vp9_convolve8_7by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_7by8_vert -#endif - # # dct # -prototype void vp9_short_idct4x4_1 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct4x4_1 +prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct4x4_1_add -prototype void vp9_short_idct4x4 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct4x4 sse2 +prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct4x4_add sse2 -prototype void vp9_short_idct8x8 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct8x8 sse2 +prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct8x8_add sse2 -prototype void vp9_short_idct10_8x8 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct10_8x8 sse2 +prototype void vp9_short_idct10_8x8_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct10_8x8_add sse2 prototype void vp9_short_idct1_8x8 "int16_t *input, int16_t *output" specialize vp9_short_idct1_8x8 -prototype void vp9_short_idct16x16 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct16x16 sse2 +prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct16x16_add sse2 -prototype void vp9_short_idct10_16x16 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct10_16x16 sse2 +prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct10_16x16_add sse2 prototype void vp9_short_idct1_16x16 "int16_t *input, int16_t *output" specialize vp9_short_idct1_16x16 - -prototype void vp9_short_idct32x32 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct32x32 sse2 +prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct32x32_add sse2 prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output" specialize vp9_short_idct1_32x32 -prototype void vp9_short_idct10_32x32 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct10_32x32 +prototype void vp9_short_idct10_32x32_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct10_32x32_add -prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int pitch, int tx_type" -specialize vp9_short_iht8x8 +prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type" +specialize vp9_short_iht4x4_add -prototype void vp9_short_iht4x4 "int16_t *input, int16_t *output, int pitch, int tx_type" -specialize vp9_short_iht4x4 +prototype void vp9_short_iht8x8_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type" +specialize vp9_short_iht8x8_add -prototype void vp9_short_iht16x16 "int16_t *input, int16_t *output, int pitch, int tx_type" -specialize vp9_short_iht16x16 +prototype void vp9_short_iht16x16_add "int16_t *input, uint8_t *output, int pitch, int tx_type" +specialize vp9_short_iht16x16_add prototype void vp9_idct4_1d "int16_t *input, int16_t *output" specialize vp9_idct4_1d sse2 - # dct and add prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" specialize vp9_dc_only_idct_add sse2 -prototype void vp9_short_iwalsh4x4_1 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_iwalsh4x4_1 -prototype void vp9_short_iwalsh4x4 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_iwalsh4x4 -prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" -specialize vp9_dc_only_inv_walsh_add +prototype void vp9_short_iwalsh4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_iwalsh4x4_1_add + +prototype void vp9_short_iwalsh4x4_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_iwalsh4x4_add prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad" specialize vp9_sad32x3 @@ -408,66 +228,148 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then # variance [ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 +prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance32x16 sse2 + +prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance16x32 sse2 + +prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance64x32 sse2 + +prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance32x64 sse2 + prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance32x32 +specialize vp9_variance32x32 sse2 prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance64x64 +specialize vp9_variance64x64 sse2 prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance16x16 mmx sse2 -vp9_variance16x16_sse2=vp9_variance16x16_wmt -vp9_variance16x16_mmx=vp9_variance16x16_mmx prototype unsigned int vp9_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance16x8 mmx sse2 -vp9_variance16x8_sse2=vp9_variance16x8_wmt -vp9_variance16x8_mmx=vp9_variance16x8_mmx prototype unsigned int vp9_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance8x16 mmx sse2 -vp9_variance8x16_sse2=vp9_variance8x16_wmt -vp9_variance8x16_mmx=vp9_variance8x16_mmx prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance8x8 mmx sse2 -vp9_variance8x8_sse2=vp9_variance8x8_wmt -vp9_variance8x8_mmx=vp9_variance8x8_mmx + +prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum" +specialize vp9_get_sse_sum_8x8 sse2 +vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2 + +prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance8x4 sse2 + +prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance4x8 sse2 prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance4x4 mmx sse2 -vp9_variance4x4_sse2=vp9_variance4x4_wmt -vp9_variance4x4_mmx=vp9_variance4x4_mmx prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance64x64 sse2 +prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance64x64 + +prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance32x64 + +prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance32x64 + +prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance64x32 + +prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance64x32 + +prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance32x16 + +prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance32x16 + +prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance16x32 + +prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance16x32 + prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance32x32 sse2 +prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance32x32 + prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance16x16 sse2 mmx ssse3 +prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance16x16 + prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance8x16 sse2 mmx vp9_sub_pixel_variance8x16_sse2=vp9_sub_pixel_variance8x16_wmt +prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance8x16 + prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance16x8 sse2 mmx ssse3 vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_ssse3; vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_wmt +prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance16x8 + prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance8x8 sse2 mmx vp9_sub_pixel_variance8x8_sse2=vp9_sub_pixel_variance8x8_wmt +prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance8x8 + +# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form +prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance8x4 + +prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance8x4 + +prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_sub_pixel_variance4x8 + +prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance4x8 + prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance4x4 sse2 mmx vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt +prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" +specialize vp9_sub_pixel_avg_variance4x4 + prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad64x64 sse2 +prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad32x64 sse2 + +prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad64x32 sse2 + +prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad32x16 sse2 + +prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad16x32 sse2 + prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad32x32 sse2 @@ -483,6 +385,13 @@ specialize vp9_sad8x16 mmx sse2 prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad8x8 mmx sse2 +# TODO(jingning): need to covert these functions into mmx/sse2 form +prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad8x4 + +prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad4x8 + prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad4x4 mmx sse @@ -555,12 +464,30 @@ specialize vp9_sad8x16x8 sse4 prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" specialize vp9_sad8x8x8 sse4 +prototype void vp9_sad8x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" +specialize vp9_sad8x4x8 + +prototype void vp9_sad4x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" +specialize vp9_sad4x8x8 + prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" specialize vp9_sad4x4x8 sse4 prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad64x64x4d sse2 +prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad32x64x4d sse2 + +prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad64x32x4d sse2 + +prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad32x16x4d sse2 + +prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad16x32x4d sse2 + prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x4d sse2 @@ -576,6 +503,13 @@ specialize vp9_sad8x16x4d sse2 prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad8x8x4d sse2 +# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form +prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad8x4x4d + +prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp9_sad4x8x4d + prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad4x4x4d sse prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" @@ -585,6 +519,15 @@ prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int source_stride, specialize vp9_mse16x16 mmx sse2 vp9_mse16x16_sse2=vp9_mse16x16_wmt +prototype unsigned int vp9_mse8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" +specialize vp9_mse8x16 + +prototype unsigned int vp9_mse16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" +specialize vp9_mse16x8 + +prototype unsigned int vp9_mse8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" +specialize vp9_mse8x8 + prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_mse64x64 @@ -594,30 +537,11 @@ specialize vp9_sub_pixel_mse32x32 prototype unsigned int vp9_get_mb_ss "const int16_t *" specialize vp9_get_mb_ss mmx sse2 # ENCODEMB INVOKE -prototype int vp9_mbblock_error "struct macroblock *mb" -specialize vp9_mbblock_error mmx sse2 -vp9_mbblock_error_sse2=vp9_mbblock_error_xmm prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size" specialize vp9_block_error mmx sse2 vp9_block_error_sse2=vp9_block_error_xmm -prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" -specialize vp9_subtract_b mmx sse2 - -prototype int vp9_mbuverror "struct macroblock *mb" -specialize vp9_mbuverror mmx sse2 -vp9_mbuverror_sse2=vp9_mbuverror_xmm - -prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" -specialize vp9_subtract_b mmx sse2 - -prototype void vp9_subtract_mby "int16_t *diff, uint8_t *src, uint8_t *pred, int stride" -specialize vp9_subtract_mby mmx sse2 - -prototype void vp9_subtract_mbuv "int16_t *diff, uint8_t *usrc, uint8_t *vsrc, uint8_t *pred, int stride" -specialize vp9_subtract_mbuv mmx sse2 - # # Structured Similarity (SSIM) # @@ -665,16 +589,16 @@ specialize vp9_short_walsh8x4 # # Motion search # -prototype int vp9_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +prototype int vp9_full_search_sad "struct macroblock *x, union int_mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv, int n" specialize vp9_full_search_sad sse3 sse4_1 vp9_full_search_sad_sse3=vp9_full_search_sadx3 vp9_full_search_sad_sse4_1=vp9_full_search_sadx8 -prototype int vp9_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +prototype int vp9_refining_search_sad "struct macroblock *x, union int_mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" specialize vp9_refining_search_sad sse3 vp9_refining_search_sad_sse3=vp9_refining_search_sadx4 -prototype int vp9_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +prototype int vp9_diamond_search_sad "struct macroblock *x, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" specialize vp9_diamond_search_sad sse3 vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4 diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c index 44d3172939801ecef3425094e232905af764c88d..df7747c909fdcadce64eb95d1fe9b76a703f4ca0 100644 --- a/vp9/common/vp9_seg_common.c +++ b/vp9/common/vp9_seg_common.c @@ -12,21 +12,18 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_seg_common.h" -static const int segfeaturedata_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; -static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, 63, 0xf, 0xf }; +static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; +static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, 63, 3, 0 }; // These functions provide access to new segment level features. // Eventually these function may be "optimized out" but for the moment, // the coding mechanism is still subject to change so these provide a // convenient single point of change. -int vp9_segfeature_active(const MACROBLOCKD *xd, - int segment_id, +int vp9_segfeature_active(const MACROBLOCKD *xd, int segment_id, SEG_LVL_FEATURES feature_id) { - // Return true if mask bit set and segmentation enabled. - return (xd->segmentation_enabled && - (xd->segment_feature_mask[segment_id] & - (0x01 << feature_id))); + return xd->segmentation_enabled && + (xd->segment_feature_mask[segment_id] & (1 << feature_id)); } void vp9_clearall_segfeatures(MACROBLOCKD *xd) { @@ -34,14 +31,12 @@ void vp9_clearall_segfeatures(MACROBLOCKD *xd) { vpx_memset(xd->segment_feature_mask, 0, sizeof(xd->segment_feature_mask)); } -void vp9_enable_segfeature(MACROBLOCKD *xd, - int segment_id, +void vp9_enable_segfeature(MACROBLOCKD *xd, int segment_id, SEG_LVL_FEATURES feature_id) { - xd->segment_feature_mask[segment_id] |= (0x01 << feature_id); + xd->segment_feature_mask[segment_id] |= 1 << feature_id; } -void vp9_disable_segfeature(MACROBLOCKD *xd, - int segment_id, +void vp9_disable_segfeature(MACROBLOCKD *xd, int segment_id, SEG_LVL_FEATURES feature_id) { xd->segment_feature_mask[segment_id] &= ~(1 << feature_id); } @@ -51,55 +46,35 @@ int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) { } int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { - return segfeaturedata_signed[feature_id]; + return seg_feature_data_signed[feature_id]; } -void vp9_clear_segdata(MACROBLOCKD *xd, - int segment_id, +void vp9_clear_segdata(MACROBLOCKD *xd, int segment_id, SEG_LVL_FEATURES feature_id) { xd->segment_feature_data[segment_id][feature_id] = 0; } -void vp9_set_segdata(MACROBLOCKD *xd, - int segment_id, - SEG_LVL_FEATURES feature_id, - int seg_data) { +void vp9_set_segdata(MACROBLOCKD *xd, int segment_id, + SEG_LVL_FEATURES feature_id, int seg_data) { assert(seg_data <= seg_feature_data_max[feature_id]); if (seg_data < 0) { - assert(segfeaturedata_signed[feature_id]); + assert(seg_feature_data_signed[feature_id]); assert(-seg_data <= seg_feature_data_max[feature_id]); } xd->segment_feature_data[segment_id][feature_id] = seg_data; } -int vp9_get_segdata(const MACROBLOCKD *xd, - int segment_id, +int vp9_get_segdata(const MACROBLOCKD *xd, int segment_id, SEG_LVL_FEATURES feature_id) { return xd->segment_feature_data[segment_id][feature_id]; } -void vp9_clear_segref(MACROBLOCKD *xd, int segment_id) { - xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] = 0; -} -void vp9_set_segref(MACROBLOCKD *xd, - int segment_id, - MV_REFERENCE_FRAME ref_frame) { - xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] |= - (1 << ref_frame); -} +const vp9_tree_index vp9_segment_tree[14] = { + 2, 4, 6, 8, 10, 12, + 0, -1, -2, -3, -4, -5, -6, -7 +}; -int vp9_check_segref(const MACROBLOCKD *xd, - int segment_id, - MV_REFERENCE_FRAME ref_frame) { - return (xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] & - (1 << ref_frame)) ? 1 : 0; -} - -int vp9_check_segref_inter(MACROBLOCKD *xd, int segment_id) { - return (xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] & - ~(1 << INTRA_FRAME)) ? 1 : 0; -} // TBD? Functions to read and write segment data with range / validity checking diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h index 2d0018b47fa6282a7678f482a2a382f2d91b9565..74ba03c3e36b7960f673a5c23b0e1d972d92ad97 100644 --- a/vp9/common/vp9_seg_common.h +++ b/vp9/common/vp9_seg_common.h @@ -45,17 +45,7 @@ int vp9_get_segdata(const MACROBLOCKD *xd, int segment_id, SEG_LVL_FEATURES feature_id); -void vp9_clear_segref(MACROBLOCKD *xd, int segment_id); - -void vp9_set_segref(MACROBLOCKD *xd, - int segment_id, - MV_REFERENCE_FRAME ref_frame); - -int vp9_check_segref(const MACROBLOCKD *xd, - int segment_id, - MV_REFERENCE_FRAME ref_frame); - -int vp9_check_segref_inter(MACROBLOCKD *xd, int segment_id); +extern const vp9_tree_index vp9_segment_tree[14]; #endif // VP9_COMMON_VP9_SEG_COMMON_H_ diff --git a/vp9/common/vp9_setupintrarecon.c b/vp9/common/vp9_setupintrarecon.c deleted file mode 100644 index e02f5393ed0142fc0d99b95e09b5d0799f0e9e1e..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_setupintrarecon.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_setupintrarecon.h" -#include "vpx_mem/vpx_mem.h" - -void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) { - int i; - - /* set up frame new frame for intra coded blocks */ - vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); - for (i = 0; i < ybf->y_height; i++) - ybf->y_buffer[ybf->y_stride * i - 1] = (uint8_t) 129; - - vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) - ybf->u_buffer[ybf->uv_stride * i - 1] = (uint8_t) 129; - - vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) - ybf->v_buffer[ybf->uv_stride * i - 1] = (uint8_t) 129; - -} diff --git a/vp9/common/vp9_setupintrarecon.h b/vp9/common/vp9_setupintrarecon.h deleted file mode 100644 index e389f3c91627a236236010b09ab4d9e50f5f69d7..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_setupintrarecon.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SETUPINTRARECON_H_ -#define VP9_COMMON_VP9_SETUPINTRARECON_H_ - -#include "vpx_scale/yv12config.h" - -void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); - -#endif // VP9_COMMON_VP9_SETUPINTRARECON_H_ diff --git a/vp9/common/vp9_swapyv12buffer.c b/vp9/common/vp9_swapyv12buffer.c deleted file mode 100644 index 10c6b41713b07afe6f14123221bbf9a43d4c0edf..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_swapyv12buffer.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_swapyv12buffer.h" - -void vp9_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, - YV12_BUFFER_CONFIG *last_frame) { - uint8_t *temp; - - temp = last_frame->buffer_alloc; - last_frame->buffer_alloc = new_frame->buffer_alloc; - new_frame->buffer_alloc = temp; - - temp = last_frame->y_buffer; - last_frame->y_buffer = new_frame->y_buffer; - new_frame->y_buffer = temp; - - temp = last_frame->u_buffer; - last_frame->u_buffer = new_frame->u_buffer; - new_frame->u_buffer = temp; - - temp = last_frame->v_buffer; - last_frame->v_buffer = new_frame->v_buffer; - new_frame->v_buffer = temp; -} diff --git a/vp9/common/vp9_swapyv12buffer.h b/vp9/common/vp9_swapyv12buffer.h deleted file mode 100644 index 2e112069a58b3b22877c7470b1549d3356b7bcee..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_swapyv12buffer.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SWAPYV12BUFFER_H_ -#define VP9_COMMON_VP9_SWAPYV12BUFFER_H_ - -#include "vpx_scale/yv12config.h" - -void vp9_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, - YV12_BUFFER_CONFIG *last_frame); - -#endif // VP9_COMMON_VP9_SWAPYV12BUFFER_H_ diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c index b6178f27d9646f8b89a1ee744576fbebd2bbe807..95296ad6f2a4b7a64a67ce7eda12045395f1ceb5 100644 --- a/vp9/common/vp9_tile_common.c +++ b/vp9/common/vp9_tile_common.c @@ -17,27 +17,27 @@ static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off, int *max_tile_off, int tile_idx, - int log2_n_tiles, int n_mbs) { - const int n_sbs = (n_mbs + 3) >> 2; + int log2_n_tiles, int n_mis) { + const int n_sbs = (n_mis + 7) >> 3; const int sb_off1 = (tile_idx * n_sbs) >> log2_n_tiles; const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles; - *min_tile_off = MIN(sb_off1 << 2, n_mbs); - *max_tile_off = MIN(sb_off2 << 2, n_mbs); + *min_tile_off = MIN(sb_off1 << 3, n_mis); + *max_tile_off = MIN(sb_off2 << 3, n_mis); } void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx) { cm->cur_tile_col_idx = tile_col_idx; - vp9_get_tile_offsets(cm, &cm->cur_tile_mb_col_start, - &cm->cur_tile_mb_col_end, tile_col_idx, - cm->log2_tile_columns, cm->mb_cols); + vp9_get_tile_offsets(cm, &cm->cur_tile_mi_col_start, + &cm->cur_tile_mi_col_end, tile_col_idx, + cm->log2_tile_columns, cm->mi_cols); } void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx) { cm->cur_tile_row_idx = tile_row_idx; - vp9_get_tile_offsets(cm, &cm->cur_tile_mb_row_start, - &cm->cur_tile_mb_row_end, tile_row_idx, - cm->log2_tile_rows, cm->mb_rows); + vp9_get_tile_offsets(cm, &cm->cur_tile_mi_row_start, + &cm->cur_tile_mi_row_end, tile_row_idx, + cm->log2_tile_rows, cm->mi_rows); } @@ -49,10 +49,15 @@ void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr, for (max_log2_n_tiles = 0; (sb_cols >> max_log2_n_tiles) >= MIN_TILE_WIDTH_SBS; max_log2_n_tiles++) {} + max_log2_n_tiles--; + if (max_log2_n_tiles < 0) + max_log2_n_tiles = 0; + for (min_log2_n_tiles = 0; (MAX_TILE_WIDTH_SBS << min_log2_n_tiles) < sb_cols; min_log2_n_tiles++) {} + assert(max_log2_n_tiles >= min_log2_n_tiles); *min_log2_n_tiles_ptr = min_log2_n_tiles; *delta_log2_n_tiles = max_log2_n_tiles - min_log2_n_tiles; } diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c index 6e2597954b454e3cdb516e1f5d4df72a4d06ea1d..531fa752b4ffb3f8d45021bc7ede487505009ead 100644 --- a/vp9/common/vp9_treecoder.c +++ b/vp9/common/vp9_treecoder.c @@ -14,36 +14,30 @@ #if defined(CONFIG_DEBUG) && CONFIG_DEBUG #include <assert.h> #endif -#include <stdio.h> #include "vp9/common/vp9_treecoder.h" -static void tree2tok( - struct vp9_token_struct *const p, - vp9_tree t, - int i, - int v, - int L -) { +static void tree2tok(struct vp9_token *const p, vp9_tree t, + int i, int v, int l) { v += v; - ++L; + ++l; do { const vp9_tree_index j = t[i++]; if (j <= 0) { p[-j].value = v; - p[-j].Len = L; + p[-j].len = l; } else - tree2tok(p, t, j, v, L); + tree2tok(p, t, j, v, l); } while (++v & 1); } -void vp9_tokens_from_tree(struct vp9_token_struct *p, vp9_tree t) { +void vp9_tokens_from_tree(struct vp9_token *p, vp9_tree t) { tree2tok(p, t, 0, 0, 0); } -void vp9_tokens_from_tree_offset(struct vp9_token_struct *p, vp9_tree t, +void vp9_tokens_from_tree_offset(struct vp9_token *p, vp9_tree t, int offset) { tree2tok(p - offset, t, 0, 0, 0); } @@ -62,12 +56,12 @@ static unsigned int convert_distribution(unsigned int i, left = convert_distribution(tree[i], tree, probs, branch_ct, num_events, tok0_offset); } - if (tree[i + 1] <= 0) { + if (tree[i + 1] <= 0) right = num_events[-tree[i + 1] - tok0_offset]; - } else { + else right = convert_distribution(tree[i + 1], tree, probs, branch_ct, - num_events, tok0_offset); - } + num_events, tok0_offset); + probs[i>>1] = get_binary_prob(left, right); branch_ct[i>>1][0] = left; branch_ct[i>>1][1] = right; diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h index 9297d5280157691ec8b490508079a44566d511a1..ebcd4116f8c05159302024b151ce4607ffb445c7 100644 --- a/vp9/common/vp9_treecoder.h +++ b/vp9/common/vp9_treecoder.h @@ -13,6 +13,7 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vp9/common/vp9_common.h" typedef uint8_t vp9_prob; @@ -31,16 +32,15 @@ typedef int8_t vp9_tree_index; typedef const vp9_tree_index vp9_tree[], *vp9_tree_p; -typedef const struct vp9_token_struct { +struct vp9_token { int value; - int Len; -} vp9_token; + int len; +}; /* Construct encoding array from tree. */ -void vp9_tokens_from_tree(struct vp9_token_struct *, vp9_tree); -void vp9_tokens_from_tree_offset(struct vp9_token_struct *, vp9_tree, - int offset); +void vp9_tokens_from_tree(struct vp9_token*, vp9_tree); +void vp9_tokens_from_tree_offset(struct vp9_token*, vp9_tree, int offset); /* Convert array of token occurrence counts into a table of probabilities for the associated binary encoding tree. Also writes count of branches @@ -76,7 +76,7 @@ static INLINE vp9_prob get_binary_prob(int n0, int n1) { /* this function assumes prob1 and prob2 are already within [1,255] range */ static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) { - return (prob1 * (256 - factor) + prob2 * factor + 128) >> 8; + return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); } #endif // VP9_COMMON_VP9_TREECODER_H_ diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 6d3bb021a7d31ddad0ac8cdce3067abb01121146..2b66834a783bd72602edbfcfb601fa841b98b982 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -278,45 +278,20 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23); + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71); - // check w/h due to fixed size fdata2 array - assert(w <= 16); - assert(h <= 16); - - if (x_step_q4 == 16 && y_step_q4 == 16 && - filter_x[3] != 128 && filter_y[3] != 128) { - if (w == 16) { - vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d16_v8_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 8) { - vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d8_v8_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 4) { - vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d4_v8_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } + assert(w <= 64); + assert(h <= 64); + if (x_step_q4 == 16 && y_step_q4 == 16) { + vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h + 7); + vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, w, h); + } else { + vp9_convolve8_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, w, h); } - vp9_convolve8_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); } void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride, @@ -324,44 +299,20 @@ void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23); - - // check w/h due to fixed size fdata2 array - assert(w <= 16); - assert(h <= 16); + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71); - if (x_step_q4 == 16 && y_step_q4 == 16 && - filter_x[3] != 128 && filter_y[3] != 128) { - if (w == 16) { - vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d16_v8_avg_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 8) { - vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d8_v8_avg_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } - if (w == 4) { - vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, - fdata2, 16, - h + 7, filter_x); - vp9_filter_block1d4_v8_avg_ssse3(fdata2, 16, - dst, dst_stride, - h, filter_y); - return; - } + assert(w <= 64); + assert(h <= 64); + if (x_step_q4 == 16 && y_step_q4 == 16) { + vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h + 7); + vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h); + } else { + vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, w, h); } - vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); } #endif diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index dd7e68aa3be68227bfc3b6ef3e8d8797775c6822..599dcff93ea7c2d06e14949684fe61f1b0998df0 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -73,7 +73,7 @@ void vp9_dc_only_idct_add_sse2(int input_dc, uint8_t *pred_ptr, *(int *)dst_ptr = _mm_cvtsi128_si32(p1); } -void vp9_short_idct4x4_sse2(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, @@ -81,7 +81,6 @@ void vp9_short_idct4x4_sse2(int16_t *input, int16_t *output, int pitch) { (int16_t)cospi_24_64, (int16_t)-cospi_8_64, (int16_t)cospi_8_64, (int16_t)cospi_24_64); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const int half_pitch = pitch >> 1; __m128i input0, input1, input2, input3; // Rows @@ -188,14 +187,23 @@ void vp9_short_idct4x4_sse2(int16_t *input, int16_t *output, int pitch) { input2 = _mm_srai_epi16(input2, 4); input3 = _mm_srai_epi16(input3, 4); - // Store results - _mm_storel_epi64((__m128i *)output, input2); - input2 = _mm_srli_si128(input2, 8); - _mm_storel_epi64((__m128i *)(output + half_pitch), input2); +#define RECON_AND_STORE4X4(dest, in_x) \ + { \ + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + d0 = _mm_add_epi16(in_x, d0); \ + d0 = _mm_packus_epi16(d0, d0); \ + *(int *)dest = _mm_cvtsi128_si32(d0); \ + dest += stride; \ + } - _mm_storel_epi64((__m128i *)(output + 3 * half_pitch), input3); - input3 = _mm_srli_si128(input3, 8); - _mm_storel_epi64((__m128i *)(output + 2 * half_pitch), input3); + input0 = _mm_srli_si128(input2, 8); + input1 = _mm_srli_si128(input3, 8); + + RECON_AND_STORE4X4(dest, input2); + RECON_AND_STORE4X4(dest, input0); + RECON_AND_STORE4X4(dest, input1); + RECON_AND_STORE4X4(dest, input3); } void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) { @@ -403,8 +411,18 @@ void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) { in6 = _mm_subs_epi16(stp1_1, stp1_6); \ in7 = _mm_subs_epi16(stp1_0, stp2_7); -void vp9_short_idct8x8_sse2(int16_t *input, int16_t *output, int pitch) { - const int half_pitch = pitch >> 1; +#define RECON_AND_STORE(dest, in_x) \ + { \ + __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + in_x = _mm_add_epi16(in_x, d0); \ + in_x = _mm_packus_epi16(in_x, in_x); \ + _mm_storel_epi64((__m128i *)(dest), in_x); \ + dest += stride; \ + } + +void vp9_short_idct8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) { + const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<4); const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); @@ -461,19 +479,17 @@ void vp9_short_idct8x8_sse2(int16_t *input, int16_t *output, int pitch) { in6 = _mm_srai_epi16(in6, 5); in7 = _mm_srai_epi16(in7, 5); - // Store results - _mm_store_si128((__m128i *)output, in0); - _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); - _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); - _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); - _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); - _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); - _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); - _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); + RECON_AND_STORE(dest, in0); + RECON_AND_STORE(dest, in1); + RECON_AND_STORE(dest, in2); + RECON_AND_STORE(dest, in3); + RECON_AND_STORE(dest, in4); + RECON_AND_STORE(dest, in5); + RECON_AND_STORE(dest, in6); + RECON_AND_STORE(dest, in7); } -void vp9_short_idct10_8x8_sse2(int16_t *input, int16_t *output, int pitch) { - const int half_pitch = pitch >> 1; +void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<4); @@ -612,15 +628,14 @@ void vp9_short_idct10_8x8_sse2(int16_t *input, int16_t *output, int pitch) { in6 = _mm_srai_epi16(in6, 5); in7 = _mm_srai_epi16(in7, 5); - // Store results - _mm_store_si128((__m128i *)output, in0); - _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); - _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); - _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); - _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); - _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); - _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); - _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); + RECON_AND_STORE(dest, in0); + RECON_AND_STORE(dest, in1); + RECON_AND_STORE(dest, in2); + RECON_AND_STORE(dest, in3); + RECON_AND_STORE(dest, in4); + RECON_AND_STORE(dest, in5); + RECON_AND_STORE(dest, in6); + RECON_AND_STORE(dest, in7); } #define IDCT16x16_1D \ @@ -752,8 +767,7 @@ void vp9_short_idct10_8x8_sse2(int16_t *input, int16_t *output, int pitch) { stp2_10, stp2_13, stp2_11, stp2_12) \ } -void vp9_short_idct16x16_sse2(int16_t *input, int16_t *output, int pitch) { - const int half_pitch = pitch >> 1; +void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); const __m128i zero = _mm_setzero_si128(); @@ -938,31 +952,30 @@ void vp9_short_idct16x16_sse2(int16_t *input, int16_t *output, int pitch) { in14 = _mm_srai_epi16(in14, 6); in15 = _mm_srai_epi16(in15, 6); - // Store results - _mm_store_si128((__m128i *)output, in0); - _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); - _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); - _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); - _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); - _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); - _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); - _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); - _mm_store_si128((__m128i *)(output + half_pitch * 8), in8); - _mm_store_si128((__m128i *)(output + half_pitch * 9), in9); - _mm_store_si128((__m128i *)(output + half_pitch * 10), in10); - _mm_store_si128((__m128i *)(output + half_pitch * 11), in11); - _mm_store_si128((__m128i *)(output + half_pitch * 12), in12); - _mm_store_si128((__m128i *)(output + half_pitch * 13), in13); - _mm_store_si128((__m128i *)(output + half_pitch * 14), in14); - _mm_store_si128((__m128i *)(output + half_pitch * 15), in15); - - output += 8; + RECON_AND_STORE(dest, in0); + RECON_AND_STORE(dest, in1); + RECON_AND_STORE(dest, in2); + RECON_AND_STORE(dest, in3); + RECON_AND_STORE(dest, in4); + RECON_AND_STORE(dest, in5); + RECON_AND_STORE(dest, in6); + RECON_AND_STORE(dest, in7); + RECON_AND_STORE(dest, in8); + RECON_AND_STORE(dest, in9); + RECON_AND_STORE(dest, in10); + RECON_AND_STORE(dest, in11); + RECON_AND_STORE(dest, in12); + RECON_AND_STORE(dest, in13); + RECON_AND_STORE(dest, in14); + RECON_AND_STORE(dest, in15); + + dest += 8 - (stride * 16); } } } -void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) { - const int half_pitch = pitch >> 1; +void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest, + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); const __m128i zero = _mm_setzero_si128(); @@ -1007,7 +1020,6 @@ void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) { stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; - // 1-D idct. Load input data. in0 = _mm_load_si128((__m128i *)input); in8 = _mm_load_si128((__m128i *)(input + 8 * 1)); @@ -1298,29 +1310,28 @@ void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) { in14 = _mm_srai_epi16(in14, 6); in15 = _mm_srai_epi16(in15, 6); - // Store results - _mm_store_si128((__m128i *)output, in0); - _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); - _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); - _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); - _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); - _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); - _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); - _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); - _mm_store_si128((__m128i *)(output + half_pitch * 8), in8); - _mm_store_si128((__m128i *)(output + half_pitch * 9), in9); - _mm_store_si128((__m128i *)(output + half_pitch * 10), in10); - _mm_store_si128((__m128i *)(output + half_pitch * 11), in11); - _mm_store_si128((__m128i *)(output + half_pitch * 12), in12); - _mm_store_si128((__m128i *)(output + half_pitch * 13), in13); - _mm_store_si128((__m128i *)(output + half_pitch * 14), in14); - _mm_store_si128((__m128i *)(output + half_pitch * 15), in15); - output += 8; + RECON_AND_STORE(dest, in0); + RECON_AND_STORE(dest, in1); + RECON_AND_STORE(dest, in2); + RECON_AND_STORE(dest, in3); + RECON_AND_STORE(dest, in4); + RECON_AND_STORE(dest, in5); + RECON_AND_STORE(dest, in6); + RECON_AND_STORE(dest, in7); + RECON_AND_STORE(dest, in8); + RECON_AND_STORE(dest, in9); + RECON_AND_STORE(dest, in10); + RECON_AND_STORE(dest, in11); + RECON_AND_STORE(dest, in12); + RECON_AND_STORE(dest, in13); + RECON_AND_STORE(dest, in14); + RECON_AND_STORE(dest, in15); + + dest += 8 - (stride * 16); } } -void vp9_short_idct32x32_sse2(int16_t *input, int16_t *output, int pitch) { - const int half_pitch = pitch >> 1; +void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); @@ -1832,6 +1843,8 @@ void vp9_short_idct32x32_sse2(int16_t *input, int16_t *output, int pitch) { col[i * 32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); col[i * 32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); } else { + const __m128i zero = _mm_setzero_si128(); + // 2_D: Calculate the results and store them to destination. in0 = _mm_add_epi16(stp1_0, stp1_31); in1 = _mm_add_epi16(stp1_1, stp1_30); @@ -1933,41 +1946,40 @@ void vp9_short_idct32x32_sse2(int16_t *input, int16_t *output, int pitch) { in30 = _mm_srai_epi16(in30, 6); in31 = _mm_srai_epi16(in31, 6); - // Store results - _mm_store_si128((__m128i *)output, in0); - _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); - _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); - _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); - _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); - _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); - _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); - _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); - _mm_store_si128((__m128i *)(output + half_pitch * 8), in8); - _mm_store_si128((__m128i *)(output + half_pitch * 9), in9); - _mm_store_si128((__m128i *)(output + half_pitch * 10), in10); - _mm_store_si128((__m128i *)(output + half_pitch * 11), in11); - _mm_store_si128((__m128i *)(output + half_pitch * 12), in12); - _mm_store_si128((__m128i *)(output + half_pitch * 13), in13); - _mm_store_si128((__m128i *)(output + half_pitch * 14), in14); - _mm_store_si128((__m128i *)(output + half_pitch * 15), in15); - _mm_store_si128((__m128i *)(output + half_pitch * 16), in16); - _mm_store_si128((__m128i *)(output + half_pitch * 17), in17); - _mm_store_si128((__m128i *)(output + half_pitch * 18), in18); - _mm_store_si128((__m128i *)(output + half_pitch * 19), in19); - _mm_store_si128((__m128i *)(output + half_pitch * 20), in20); - _mm_store_si128((__m128i *)(output + half_pitch * 21), in21); - _mm_store_si128((__m128i *)(output + half_pitch * 22), in22); - _mm_store_si128((__m128i *)(output + half_pitch * 23), in23); - _mm_store_si128((__m128i *)(output + half_pitch * 24), in24); - _mm_store_si128((__m128i *)(output + half_pitch * 25), in25); - _mm_store_si128((__m128i *)(output + half_pitch * 26), in26); - _mm_store_si128((__m128i *)(output + half_pitch * 27), in27); - _mm_store_si128((__m128i *)(output + half_pitch * 28), in28); - _mm_store_si128((__m128i *)(output + half_pitch * 29), in29); - _mm_store_si128((__m128i *)(output + half_pitch * 30), in30); - _mm_store_si128((__m128i *)(output + half_pitch * 31), in31); - - output += 8; + RECON_AND_STORE(dest, in0); + RECON_AND_STORE(dest, in1); + RECON_AND_STORE(dest, in2); + RECON_AND_STORE(dest, in3); + RECON_AND_STORE(dest, in4); + RECON_AND_STORE(dest, in5); + RECON_AND_STORE(dest, in6); + RECON_AND_STORE(dest, in7); + RECON_AND_STORE(dest, in8); + RECON_AND_STORE(dest, in9); + RECON_AND_STORE(dest, in10); + RECON_AND_STORE(dest, in11); + RECON_AND_STORE(dest, in12); + RECON_AND_STORE(dest, in13); + RECON_AND_STORE(dest, in14); + RECON_AND_STORE(dest, in15); + RECON_AND_STORE(dest, in16); + RECON_AND_STORE(dest, in17); + RECON_AND_STORE(dest, in18); + RECON_AND_STORE(dest, in19); + RECON_AND_STORE(dest, in20); + RECON_AND_STORE(dest, in21); + RECON_AND_STORE(dest, in22); + RECON_AND_STORE(dest, in23); + RECON_AND_STORE(dest, in24); + RECON_AND_STORE(dest, in25); + RECON_AND_STORE(dest, in26); + RECON_AND_STORE(dest, in27); + RECON_AND_STORE(dest, in28); + RECON_AND_STORE(dest, in29); + RECON_AND_STORE(dest, in30); + RECON_AND_STORE(dest, in31); + + dest += 8 - (stride * 32); } } } diff --git a/vp9/common/x86/vp9_idct_sse2.asm b/vp9/common/x86/vp9_idct_sse2.asm deleted file mode 100644 index 8f3c6dfc399b7e43050614a9dc6cdbd737c58156..0000000000000000000000000000000000000000 --- a/vp9/common/x86/vp9_idct_sse2.asm +++ /dev/null @@ -1,712 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_idct_dequant_0_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *pre - 2 -; unsigned char *dst - 3 -; int dst_stride - 4 -; int blk_stride - 5 -; ) - -global sym(vp9_idct_dequant_0_2x_sse2) PRIVATE -sym(vp9_idct_dequant_0_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - ; end prolog - - mov rdx, arg(1) ; dequant - mov rax, arg(0) ; qcoeff - - movd xmm4, [rax] - movd xmm5, [rdx] - - pinsrw xmm4, [rax+32], 4 - pinsrw xmm5, [rdx], 4 - - pmullw xmm4, xmm5 - - ; Zero out xmm5, for use unpacking - pxor xmm5, xmm5 - - ; clear coeffs - movd [rax], xmm5 - movd [rax+32], xmm5 -;pshufb - pshuflw xmm4, xmm4, 00000000b - pshufhw xmm4, xmm4, 00000000b - - mov rax, arg(2) ; pre - paddw xmm4, [GLOBAL(fours)] - - movsxd rcx, dword ptr arg(5) ; blk_stride - psraw xmm4, 3 - - movq xmm0, [rax] - movq xmm1, [rax+rcx] - movq xmm2, [rax+2*rcx] - lea rcx, [3*rcx] - movq xmm3, [rax+rcx] - - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - - mov rax, arg(3) ; dst - movsxd rdx, dword ptr arg(4) ; dst_stride - - ; Add to predict buffer - paddw xmm0, xmm4 - paddw xmm1, xmm4 - paddw xmm2, xmm4 - paddw xmm3, xmm4 - - ; pack up before storing - packuswb xmm0, xmm5 - packuswb xmm1, xmm5 - packuswb xmm2, xmm5 - packuswb xmm3, xmm5 - - ; store blocks back out - movq [rax], xmm0 - movq [rax + rdx], xmm1 - - lea rax, [rax + 2*rdx] - - movq [rax], xmm2 - movq [rax + rdx], xmm3 - - ; begin epilog - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_idct_dequant_full_2x_sse2) PRIVATE -sym(vp9_idct_dequant_full_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - mov rsi, arg(2) ; pre - mov rdi, arg(3) ; dst - movsxd rcx, dword ptr arg(5) ; blk_stride - - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - - mov rdx, arg(1) ; dequant - - ; note the transpose of xmm1 and xmm2, necessary for shuffle - ; to spit out sensicle data - movdqa xmm0, [rax] - movdqa xmm2, [rax+16] - movdqa xmm1, [rax+32] - movdqa xmm3, [rax+48] - - ; Clear out coeffs - movdqa [rax], xmm7 - movdqa [rax+16], xmm7 - movdqa [rax+32], xmm7 - movdqa [rax+48], xmm7 - - ; dequantize qcoeff buffer - pmullw xmm0, [rdx] - pmullw xmm2, [rdx+16] - pmullw xmm1, [rdx] - pmullw xmm3, [rdx+16] - - ; repack so block 0 row x and block 1 row x are together - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - punpckhdq xmm4, xmm1 - - pshufd xmm0, xmm0, 11011000b - pshufd xmm1, xmm4, 11011000b - - movdqa xmm4, xmm2 - punpckldq xmm2, xmm3 - punpckhdq xmm4, xmm3 - - pshufd xmm2, xmm2, 11011000b - pshufd xmm3, xmm4, 11011000b - - ; first pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 ; - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - - ; transpose for the second pass - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - ; second pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - paddw xmm0, [GLOBAL(fours)] - - paddw xmm2, [GLOBAL(fours)] - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - psraw xmm2, 3 - - psraw xmm0, 3 - psraw xmm4, 3 - - psraw xmm6, 3 - - ; transpose to save - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - pxor xmm7, xmm7 - - ; Load up predict blocks - movq xmm4, [rsi] - movq xmm5, [rsi+rcx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - - movq xmm4, [rsi+2*rcx] - lea rcx, [3*rcx] - movq xmm5, [rsi+rcx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm2, xmm4 - paddw xmm3, xmm5 - -.finish: - - ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 - - ; Load destination stride before writing out, - ; doesn't need to persist - movsxd rdx, dword ptr arg(4) ; dst_stride - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - - lea rdi, [rdi + 2*rdx] - - movq [rdi], xmm2 - movq [rdi + rdx], xmm3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_idct_dequant_dc_0_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *pre - 2 -; unsigned char *dst - 3 -; int dst_stride - 4 -; short *dc - 5 -; ) -global sym(vp9_idct_dequant_dc_0_2x_sse2) PRIVATE -sym(vp9_idct_dequant_dc_0_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - mov rsi, arg(2) ; pre - mov rdi, arg(3) ; dst - mov rdx, arg(5) ; dc - - ; Zero out xmm5, for use unpacking - pxor xmm5, xmm5 - - ; load up 2 dc words here == 2*16 = doubleword - movd xmm4, [rdx] - - ; Load up predict blocks - movq xmm0, [rsi] - movq xmm1, [rsi+16] - movq xmm2, [rsi+32] - movq xmm3, [rsi+48] - - ; Duplicate and expand dc across - punpcklwd xmm4, xmm4 - punpckldq xmm4, xmm4 - - ; Rounding to dequant and downshift - paddw xmm4, [GLOBAL(fours)] - psraw xmm4, 3 - - ; Predict buffer needs to be expanded from bytes to words - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - - ; Add to predict buffer - paddw xmm0, xmm4 - paddw xmm1, xmm4 - paddw xmm2, xmm4 - paddw xmm3, xmm4 - - ; pack up before storing - packuswb xmm0, xmm5 - packuswb xmm1, xmm5 - packuswb xmm2, xmm5 - packuswb xmm3, xmm5 - - ; Load destination stride before writing out, - ; doesn't need to persist - movsxd rdx, dword ptr arg(4) ; dst_stride - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - - lea rdi, [rdi + 2*rdx] - - movq [rdi], xmm2 - movq [rdi + rdx], xmm3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -global sym(vp9_idct_dequant_dc_full_2x_sse2) PRIVATE -sym(vp9_idct_dequant_dc_full_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - mov rsi, arg(2) ; pre - mov rdi, arg(3) ; dst - - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - - mov rdx, arg(1) ; dequant - - ; note the transpose of xmm1 and xmm2, necessary for shuffle - ; to spit out sensicle data - movdqa xmm0, [rax] - movdqa xmm2, [rax+16] - movdqa xmm1, [rax+32] - movdqa xmm3, [rax+48] - - ; Clear out coeffs - movdqa [rax], xmm7 - movdqa [rax+16], xmm7 - movdqa [rax+32], xmm7 - movdqa [rax+48], xmm7 - - ; dequantize qcoeff buffer - pmullw xmm0, [rdx] - pmullw xmm2, [rdx+16] - pmullw xmm1, [rdx] - pmullw xmm3, [rdx+16] - - ; DC component - mov rdx, arg(5) - - ; repack so block 0 row x and block 1 row x are together - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - punpckhdq xmm4, xmm1 - - pshufd xmm0, xmm0, 11011000b - pshufd xmm1, xmm4, 11011000b - - movdqa xmm4, xmm2 - punpckldq xmm2, xmm3 - punpckhdq xmm4, xmm3 - - pshufd xmm2, xmm2, 11011000b - pshufd xmm3, xmm4, 11011000b - - ; insert DC component - pinsrw xmm0, [rdx], 0 - pinsrw xmm0, [rdx+2], 4 - - ; first pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 ; - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - - ; transpose for the second pass - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - ; second pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - paddw xmm0, [GLOBAL(fours)] - - paddw xmm2, [GLOBAL(fours)] - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - psraw xmm2, 3 - - psraw xmm0, 3 - psraw xmm4, 3 - - psraw xmm6, 3 - - ; transpose to save - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - pxor xmm7, xmm7 - - ; Load up predict blocks - movq xmm4, [rsi] - movq xmm5, [rsi+16] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - - movq xmm4, [rsi+32] - movq xmm5, [rsi+48] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm2, xmm4 - paddw xmm3, xmm5 - -.finish: - - ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 - - ; Load destination stride before writing out, - ; doesn't need to persist - movsxd rdx, dword ptr arg(4) ; dst_stride - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - - lea rdi, [rdi + 2*rdx] - - movq [rdi], xmm2 - movq [rdi + rdx], xmm3 - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -fours: - times 8 dw 0x0004 -align 16 -x_s1sqr2: - times 8 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 8 dw 0x4E7B diff --git a/vp9/common/x86/vp9_loopfilter_intrin_mmx.c b/vp9/common/x86/vp9_loopfilter_intrin_mmx.c index 2be9e31796b9b361b6fb74538fb805717662c57c..7e6c4be2c763ad2f3bac5c38db780b9f0095f35c 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_mmx.c +++ b/vp9/common/x86/vp9_loopfilter_intrin_mmx.c @@ -35,16 +35,6 @@ void vp9_loop_filter_bh_mmx(unsigned char *y_ptr, } -void vp9_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, - y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, - y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, - y_stride, blimit); -} - /* Vertical B Filtering */ void vp9_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -66,9 +56,3 @@ void vp9_loop_filter_bv_mmx(unsigned char *y_ptr, lfi->blim, lfi->lim, lfi->hev_thr, 1); } -void vp9_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit); -} diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c index 08447a62de101406f18fe615bdde1e430417b400..7982ca6a2006f3a0c84f8bb6630325bc94b971a7 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c +++ b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c @@ -1115,16 +1115,6 @@ void vp9_loop_filter_bh_sse2(unsigned char *y_ptr, v_ptr + 4 * uv_stride); } -void vp9_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, - y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, - y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, - y_stride, blimit); -} - /* Vertical B Filtering */ void vp9_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -1143,9 +1133,3 @@ void vp9_loop_filter_bv_sse2(unsigned char *y_ptr, v_ptr + 4); } -void vp9_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit); -} diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vp9/common/x86/vp9_loopfilter_mmx.asm index ceffdf5580749081be885d81fcff46995c0bb53a..4ebb51b772742100f0674f119d501f051f301d6a 100644 --- a/vp9/common/x86/vp9_loopfilter_mmx.asm +++ b/vp9/common/x86/vp9_loopfilter_mmx.asm @@ -593,349 +593,6 @@ sym(vp9_loop_filter_vertical_edge_mmx): pop rbp ret - -;void vp9_loop_filter_simple_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit -;) -global sym(vp9_loop_filter_simple_horizontal_edge_mmx) PRIVATE -sym(vp9_loop_filter_simple_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - mov rcx, 2 ; count -.nexts8_h: - mov rdx, arg(2) ;blimit ; get blimit - movq mm3, [rdx] ; - - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - neg rax - - ; calculate mask - movq mm1, [rsi+2*rax] ; p1 - movq mm0, [rdi] ; q1 - movq mm2, mm1 - movq mm7, mm0 - movq mm4, mm0 - psubusb mm0, mm1 ; q1-=p1 - psubusb mm1, mm4 ; p1-=q1 - por mm1, mm0 ; abs(p1-q1) - pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm1, 1 ; abs(p1-q1)/2 - - movq mm5, [rsi+rax] ; p0 - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - movq mm6, mm5 ; p0 - psubusb mm5, mm4 ; p0-=q0 - psubusb mm4, mm6 ; q0-=p0 - por mm5, mm4 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor mm3, mm3 - pcmpeqb mm5, mm3 - - ; start work on filters - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0) - paddsb mm2, mm0 ; p1 - q1 + 2 * (q0 - p0) - paddsb mm2, mm0 ; p1 - q1 + 3 * (q0 - p0) - pand mm5, mm2 ; mask filter values we don't care about - - ; do + 4 side - paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - movq mm1, mm5 ; get a copy of filters - psraw mm1, 11 ; arithmetic shift right 11 - psllw mm1, 8 ; shift left 8 to put it back - - por mm0, mm1 ; put the two together to get result - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - movq [rsi], mm3 ; write back - - - ; now do +3 side - psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - psraw mm5, 11 ; arithmetic shift right 11 - psllw mm5, 8 ; shift left 8 to put it back - por mm0, mm5 ; put the two together to get result - - - paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+rax], mm6 ; write back - - add rsi,8 - neg rax - dec rcx - jnz .nexts8_h - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_loop_filter_simple_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit -;) -global sym(vp9_loop_filter_simple_vertical_edge_mmx) PRIVATE -sym(vp9_loop_filter_simple_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4- 2]; ; - mov rcx, 2 ; count -.nexts8_v: - - lea rdi, [rsi + rax]; - movd mm0, [rdi + rax * 2] ; xx xx xx xx 73 72 71 70 - - movd mm6, [rsi + rax * 2] ; xx xx xx xx 63 62 61 60 - punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 - - movd mm0, [rsi + rax] ; xx xx xx xx 53 52 51 50 - movd mm4, [rsi] ; xx xx xx xx 43 42 41 40 - - punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 - movq mm5, mm4 ; 53 43 52 42 51 41 50 40 - - punpcklwd mm4, mm6 ; 71 61 51 41 70 60 50 40 - punpckhwd mm5, mm6 ; 73 63 53 43 72 62 52 42 - - neg rax - - movd mm7, [rsi + rax] ; xx xx xx xx 33 32 31 30 - movd mm6, [rsi + rax * 2] ; xx xx xx xx 23 22 21 20 - - punpcklbw mm6, mm7 ; 33 23 32 22 31 21 30 20 - movd mm1, [rdi + rax * 4] ; xx xx xx xx 13 12 11 10 - - movd mm0, [rsi + rax * 4] ; xx xx xx xx 03 02 01 00 - punpcklbw mm0, mm1 ; 13 03 12 02 11 01 10 00 - - movq mm2, mm0 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm6 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm2, mm6 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 13 03 12 02 11 01 10 00 - - punpckldq mm0, mm4 ; 70 60 50 40 30 20 10 00 = p1 - movq mm3, mm2 ; 33 23 13 03 32 22 12 02 - - punpckhdq mm1, mm4 ; 71 61 51 41 31 21 11 01 = p0 - punpckldq mm2, mm5 ; 72 62 52 42 32 22 12 02 = q0 - - punpckhdq mm3, mm5 ; 73 63 53 43 33 23 13 03 = q1 - - - ; calculate mask - movq mm6, mm0 ; p1 - movq mm7, mm3 ; q1 - psubusb mm7, mm6 ; q1-=p1 - psubusb mm6, mm3 ; p1-=q1 - por mm6, mm7 ; abs(p1-q1) - pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm6, 1 ; abs(p1-q1)/2 - - movq mm5, mm1 ; p0 - movq mm4, mm2 ; q0 - - psubusb mm5, mm2 ; p0-=q0 - psubusb mm4, mm1 ; q0-=p0 - - por mm5, mm4 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] - - psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor mm7, mm7 - pcmpeqb mm5, mm7 ; mm5 = mask - - ; start work on filters - movq t0, mm0 - movq t1, mm3 - - pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values - - psubsb mm0, mm3 ; p1 - q1 - movq mm6, mm1 ; p0 - - movq mm7, mm2 ; q0 - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - - pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm7 ; offseted ; q0 - - psubsb mm7, mm6 ; q0 - p0 - paddsb mm0, mm7 ; p1 - q1 + 1 * (q0 - p0) - - paddsb mm0, mm7 ; p1 - q1 + 2 * (q0 - p0) - paddsb mm0, mm7 ; p1 - q1 + 3 * (q0 - p0) - - pand mm5, mm0 ; mask filter values we don't care about - - paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - - movq mm7, mm5 ; get a copy of filters - psraw mm7, 11 ; arithmetic shift right 11 - psllw mm7, 8 ; shift left 8 to put it back - - por mm0, mm7 ; put the two together to get result - - psubsb mm3, mm0 ; q0-= q0sz add - pxor mm3, [GLOBAL(t80)] ; unoffset - - ; now do +3 side - psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - - psraw mm5, 11 ; arithmetic shift right 11 - psllw mm5, 8 ; shift left 8 to put it back - por mm0, mm5 ; put the two together to get result - - paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - - - movq mm0, t0 - movq mm4, t1 - - ; mm0 = 70 60 50 40 30 20 10 00 - ; mm6 = 71 61 51 41 31 21 11 01 - ; mm3 = 72 62 52 42 32 22 12 02 - ; mm4 = 73 63 53 43 33 23 13 03 - ; transpose back to write out - - movq mm1, mm0 ; - punpcklbw mm0, mm6 ; 31 30 21 20 11 10 01 00 - - punpckhbw mm1, mm6 ; 71 70 61 60 51 50 41 40 - movq mm2, mm3 ; - - punpcklbw mm2, mm4 ; 33 32 23 22 13 12 03 02 - movq mm5, mm1 ; 71 70 61 60 51 50 41 40 - - punpckhbw mm3, mm4 ; 73 72 63 62 53 52 43 42 - movq mm6, mm0 ; 31 30 21 20 11 10 01 00 - - punpcklwd mm0, mm2 ; 13 12 11 10 03 02 01 00 - punpckhwd mm6, mm2 ; 33 32 31 30 23 22 21 20 - - movd [rsi+rax*4], mm0 ; write 03 02 01 00 - punpcklwd mm1, mm3 ; 53 52 51 50 43 42 41 40 - - psrlq mm0, 32 ; xx xx xx xx 13 12 11 10 - punpckhwd mm5, mm3 ; 73 72 71 70 63 62 61 60 - - movd [rdi+rax*4], mm0 ; write 13 12 11 10 - movd [rsi+rax*2], mm6 ; write 23 22 21 20 - - psrlq mm6, 32 ; 33 32 31 30 - movd [rsi], mm1 ; write 43 42 41 40 - - movd [rsi + rax], mm6 ; write 33 32 31 30 - neg rax - - movd [rsi + rax*2], mm5 ; write 63 62 61 60 - psrlq mm1, 32 ; 53 52 51 50 - - movd [rdi], mm1 ; write out 53 52 51 50 - psrlq mm5, 32 ; 73 72 71 70 - - movd [rdi + rax*2], mm5 ; write 73 72 71 70 - - lea rsi, [rsi+rax*8] ; next 8 - - dec rcx - jnz .nexts8_v - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - -;void fast_loop_filter_vertical_edges_mmx(unsigned char *y_ptr, -; int y_stride, -; loop_filter_info *lfi) -;{ -; -; -; vp9_loop_filter_simple_vertical_edge_mmx(y_ptr+4, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -; vp9_loop_filter_simple_vertical_edge_mmx(y_ptr+8, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -; vp9_loop_filter_simple_vertical_edge_mmx(y_ptr+12, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -;} - SECTION_RODATA align 16 tfe: diff --git a/vp9/common/x86/vp9_loopfilter_sse2.asm b/vp9/common/x86/vp9_loopfilter_sse2.asm index ae4c60f539bfd2620e3c280786ef7d68c0e5eba1..74236cfbb3b77b42927415c00b0304f1df7e0f53 100644 --- a/vp9/common/x86/vp9_loopfilter_sse2.asm +++ b/vp9/common/x86/vp9_loopfilter_sse2.asm @@ -845,372 +845,6 @@ sym(vp9_loop_filter_vertical_edge_uv_sse2): pop rbp ret -;void vp9_loop_filter_simple_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -;) -global sym(vp9_loop_filter_simple_horizontal_edge_sse2) PRIVATE -sym(vp9_loop_filter_simple_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - mov rdx, arg(2) ;blimit - movdqa xmm3, XMMWORD PTR [rdx] - - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - neg rax - - ; calculate mask - movdqa xmm1, [rsi+2*rax] ; p1 - movdqa xmm0, [rdi] ; q1 - movdqa xmm2, xmm1 - movdqa xmm7, xmm0 - movdqa xmm4, xmm0 - psubusb xmm0, xmm1 ; q1-=p1 - psubusb xmm1, xmm4 ; p1-=q1 - por xmm1, xmm0 ; abs(p1-q1) - pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw xmm1, 1 ; abs(p1-q1)/2 - - movdqa xmm5, [rsi+rax] ; p0 - movdqa xmm4, [rsi] ; q0 - movdqa xmm0, xmm4 ; q0 - movdqa xmm6, xmm5 ; p0 - psubusb xmm5, xmm4 ; p0-=q0 - psubusb xmm4, xmm6 ; q0-=p0 - por xmm5, xmm4 ; abs(p0 - q0) - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor xmm3, xmm3 - pcmpeqb xmm5, xmm3 - - ; start work on filters - pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb xmm2, xmm7 ; p1 - q1 - - pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) - paddsb xmm2, xmm0 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0) - pand xmm5, xmm2 ; mask filter values we don't care about - - ; do + 4 side - paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - movdqa xmm1, xmm5 ; get a copy of filters - psraw xmm1, 11 ; arithmetic shift right 11 - psllw xmm1, 8 ; shift left 8 to put it back - - por xmm0, xmm1 ; put the two together to get result - - psubsb xmm3, xmm0 ; q0-= q0 add - pxor xmm3, [GLOBAL(t80)] ; unoffset - movdqa [rsi], xmm3 ; write back - - ; now do +3 side - psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 - - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - psraw xmm5, 11 ; arithmetic shift right 11 - psllw xmm5, 8 ; shift left 8 to put it back - por xmm0, xmm5 ; put the two together to get result - - - paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [GLOBAL(t80)] ; unoffset - movdqa [rsi+rax], xmm6 ; write back - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_loop_filter_simple_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -;) -global sym(vp9_loop_filter_simple_vertical_edge_sse2) PRIVATE -sym(vp9_loop_filter_simple_vertical_edge_sse2): - push rbp ; save old base pointer value. - mov rbp, rsp ; set new base pointer value. - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 7 - GET_GOT rbx ; save callee-saved reg - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi - 2 ] - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd xmm0, [rsi] ; (high 96 bits unused) 03 02 01 00 - movd xmm1, [rdx] ; (high 96 bits unused) 43 42 41 40 - movd xmm2, [rdi] ; 13 12 11 10 - movd xmm3, [rcx] ; 53 52 51 50 - punpckldq xmm0, xmm1 ; (high 64 bits unused) 43 42 41 40 03 02 01 00 - punpckldq xmm2, xmm3 ; 53 52 51 50 13 12 11 10 - - movd xmm4, [rsi + rax*2] ; 23 22 21 20 - movd xmm5, [rdx + rax*2] ; 63 62 61 60 - movd xmm6, [rdi + rax*2] ; 33 32 31 30 - movd xmm7, [rcx + rax*2] ; 73 72 71 70 - punpckldq xmm4, xmm5 ; 63 62 61 60 23 22 21 20 - punpckldq xmm6, xmm7 ; 73 72 71 70 33 32 31 30 - - punpcklbw xmm0, xmm2 ; 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00 - punpcklbw xmm4, xmm6 ; 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20 - - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm4 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm4 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 - - movdqa xmm2, xmm0 - punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - - movdqa t0, xmm0 ; save to t0 - movdqa t1, xmm2 ; save to t1 - - lea rsi, [rsi + rax*8] - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd xmm4, [rsi] ; 83 82 81 80 - movd xmm1, [rdx] ; c3 c2 c1 c0 - movd xmm6, [rdi] ; 93 92 91 90 - movd xmm3, [rcx] ; d3 d2 d1 d0 - punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80 - punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90 - - movd xmm0, [rsi + rax*2] ; a3 a2 a1 a0 - movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0 - movd xmm2, [rdi + rax*2] ; b3 b2 b1 b0 - movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0 - punpckldq xmm0, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 - punpckldq xmm2, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 - - punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80 - punpcklbw xmm0, xmm2 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 - - movdqa xmm1, xmm4 - punpcklwd xmm4, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - punpckhwd xmm1, xmm0 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 - - movdqa xmm6, xmm4 - punpckldq xmm4, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - punpckhdq xmm6, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - - movdqa xmm0, t0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - movdqa xmm2, t1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - - punpcklqdq xmm0, xmm4 ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - punpckhqdq xmm1, xmm4 ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - - ; calculate mask - movdqa xmm6, xmm0 ; p1 - movdqa xmm7, xmm3 ; q1 - psubusb xmm7, xmm0 ; q1-=p1 - psubusb xmm6, xmm3 ; p1-=q1 - por xmm6, xmm7 ; abs(p1-q1) - pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw xmm6, 1 ; abs(p1-q1)/2 - - movdqa xmm5, xmm1 ; p0 - movdqa xmm4, xmm2 ; q0 - psubusb xmm5, xmm2 ; p0-=q0 - psubusb xmm4, xmm1 ; q0-=p0 - por xmm5, xmm4 ; abs(p0 - q0) - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit - movdqa xmm7, XMMWORD PTR [rdx] - - psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor xmm7, xmm7 - pcmpeqb xmm5, xmm7 ; mm5 = mask - - ; start work on filters - movdqa t0, xmm0 - movdqa t1, xmm3 - - pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values - - psubsb xmm0, xmm3 ; p1 - q1 - movdqa xmm6, xmm1 ; p0 - - movdqa xmm7, xmm2 ; q0 - pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - - pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values - movdqa xmm3, xmm7 ; offseted ; q0 - - psubsb xmm7, xmm6 ; q0 - p0 - paddsb xmm0, xmm7 ; p1 - q1 + 1 * (q0 - p0) - - paddsb xmm0, xmm7 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm0, xmm7 ; p1 - q1 + 3 * (q0 - p0) - - pand xmm5, xmm0 ; mask filter values we don't care about - - - paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - - movdqa xmm7, xmm5 ; get a copy of filters - psraw xmm7, 11 ; arithmetic shift right 11 - - psllw xmm7, 8 ; shift left 8 to put it back - por xmm0, xmm7 ; put the two together to get result - - psubsb xmm3, xmm0 ; q0-= q0sz add - pxor xmm3, [GLOBAL(t80)] ; unoffset q0 - - ; now do +3 side - psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 - movdqa xmm0, xmm5 ; get a copy of filters - - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 - - psrlw xmm0, 8 - psraw xmm5, 11 ; arithmetic shift right 11 - - psllw xmm5, 8 ; shift left 8 to put it back - por xmm0, xmm5 ; put the two together to get result - - paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [GLOBAL(t80)] ; unoffset p0 - - movdqa xmm0, t0 ; p1 - movdqa xmm4, t1 ; q1 - - ; transpose back to write out - ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm6 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpckhbw xmm1, xmm6 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - movdqa xmm5, xmm3 - punpcklbw xmm3, xmm4 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpckhbw xmm5, xmm4 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm3 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - punpckhwd xmm2, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - - movdqa xmm3, xmm1 - punpcklwd xmm1, xmm5 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckhwd xmm3, xmm5 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - - ; write out order: xmm0 xmm2 xmm1 xmm3 - lea rdx, [rsi + rax*4] - - movd [rsi], xmm1 ; write the second 8-line result - psrldq xmm1, 4 - movd [rdi], xmm1 - psrldq xmm1, 4 - movd [rsi + rax*2], xmm1 - psrldq xmm1, 4 - movd [rdi + rax*2], xmm1 - - movd [rdx], xmm3 - psrldq xmm3, 4 - movd [rcx], xmm3 - psrldq xmm3, 4 - movd [rdx + rax*2], xmm3 - psrldq xmm3, 4 - movd [rcx + rax*2], xmm3 - - neg rax - lea rsi, [rsi + rax*8] - neg rax - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd [rsi], xmm0 ; write the first 8-line result - psrldq xmm0, 4 - movd [rdi], xmm0 - psrldq xmm0, 4 - movd [rsi + rax*2], xmm0 - psrldq xmm0, 4 - movd [rdi + rax*2], xmm0 - - movd [rdx], xmm2 - psrldq xmm2, 4 - movd [rcx], xmm2 - psrldq xmm2, 4 - movd [rdx + rax*2], xmm2 - psrldq xmm2, 4 - movd [rcx + rax*2], xmm2 - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - SECTION_RODATA align 16 tfe: diff --git a/vp9/common/x86/vp9_loopfilter_x86.h b/vp9/common/x86/vp9_loopfilter_x86.h index 46a6202d20c29b0b9c36a0134540da61ee4e2e48..fb5af05f70d1e3d365d84cd636fc1f025bb511ee 100644 --- a/vp9/common/x86/vp9_loopfilter_x86.h +++ b/vp9/common/x86/vp9_loopfilter_x86.h @@ -23,10 +23,6 @@ extern prototype_loopfilter_block(vp9_loop_filter_mbv_mmx); extern prototype_loopfilter_block(vp9_loop_filter_bv_mmx); extern prototype_loopfilter_block(vp9_loop_filter_mbh_mmx); extern prototype_loopfilter_block(vp9_loop_filter_bh_mmx); -extern prototype_simple_loopfilter(vp9_loop_filter_simple_vertical_edge_mmx); -extern prototype_simple_loopfilter(vp9_loop_filter_bvs_mmx); -extern prototype_simple_loopfilter(vp9_loop_filter_simple_horizontal_edge_mmx); -extern prototype_simple_loopfilter(vp9_loop_filter_bhs_mmx); #endif #if HAVE_SSE2 @@ -34,10 +30,6 @@ extern prototype_loopfilter_block(vp9_loop_filter_mbv_sse2); extern prototype_loopfilter_block(vp9_loop_filter_bv_sse2); extern prototype_loopfilter_block(vp9_loop_filter_mbh_sse2); extern prototype_loopfilter_block(vp9_loop_filter_bh_sse2); -extern prototype_simple_loopfilter(vp9_loop_filter_simple_vertical_edge_sse2); -extern prototype_simple_loopfilter(vp9_loop_filter_bvs_sse2); -extern prototype_simple_loopfilter(vp9_loop_filter_simple_horizontal_edge_sse2); -extern prototype_simple_loopfilter(vp9_loop_filter_bhs_sse2); #endif #endif // LOOPFILTER_X86_H diff --git a/vp9/common/x86/vp9_recon_mmx.asm b/vp9/common/x86/vp9_recon_mmx.asm index fc03d3f5b90c505977e32548b685b313af0c9aab..6fbbe48cbe24db0617d6377d4c122d6c64be65fb 100644 --- a/vp9/common/x86/vp9_recon_mmx.asm +++ b/vp9/common/x86/vp9_recon_mmx.asm @@ -10,55 +10,6 @@ %include "vpx_ports/x86_abi_support.asm" -;void vp9_recon_b_mmx(unsigned char *s, short *q, unsigned char *d, int stride) -global sym(vp9_recon_b_mmx) PRIVATE -sym(vp9_recon_b_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rdi, arg(2) ;d - mov rdx, arg(1) ;q - movsxd rax, dword ptr arg(3) ;stride - pxor mm0, mm0 - - movd mm1, [rsi] - punpcklbw mm1, mm0 - paddsw mm1, [rdx] - packuswb mm1, mm0 ; pack and unpack to saturate - movd [rdi], mm1 - - movd mm2, [rsi+16] - punpcklbw mm2, mm0 - paddsw mm2, [rdx+32] - packuswb mm2, mm0 ; pack and unpack to saturate - movd [rdi+rax], mm2 - - movd mm3, [rsi+32] - punpcklbw mm3, mm0 - paddsw mm3, [rdx+64] - packuswb mm3, mm0 ; pack and unpack to saturate - movd [rdi+2*rax], mm3 - - add rdi, rax - movd mm4, [rsi+48] - punpcklbw mm4, mm0 - paddsw mm4, [rdx+96] - packuswb mm4, mm0 ; pack and unpack to saturate - movd [rdi+2*rax], mm4 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - ;void copy_mem8x8_mmx( ; unsigned char *src, ; int src_stride, diff --git a/vp9/common/x86/vp9_recon_sse2.asm b/vp9/common/x86/vp9_recon_sse2.asm index 154442dc8f07099868ef003404b4d06358d8e57a..9ee30432aa59cec05618f64f1668c18204ff6e24 100644 --- a/vp9/common/x86/vp9_recon_sse2.asm +++ b/vp9/common/x86/vp9_recon_sse2.asm @@ -10,122 +10,6 @@ %include "vpx_ports/x86_abi_support.asm" -;void vp9_recon2b_sse2(unsigned char *s, short *q, unsigned char *d, int stride) -global sym(vp9_recon2b_sse2) PRIVATE -sym(vp9_recon2b_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rdi, arg(2) ;d - mov rdx, arg(1) ;q - movsxd rax, dword ptr arg(3) ;stride - pxor xmm0, xmm0 - - movq xmm1, MMWORD PTR [rsi] - punpcklbw xmm1, xmm0 - paddsw xmm1, XMMWORD PTR [rdx] - packuswb xmm1, xmm0 ; pack and unpack to saturate - movq MMWORD PTR [rdi], xmm1 - - - movq xmm2, MMWORD PTR [rsi+8] - punpcklbw xmm2, xmm0 - paddsw xmm2, XMMWORD PTR [rdx+16] - packuswb xmm2, xmm0 ; pack and unpack to saturate - movq MMWORD PTR [rdi+rax], xmm2 - - - movq xmm3, MMWORD PTR [rsi+16] - punpcklbw xmm3, xmm0 - paddsw xmm3, XMMWORD PTR [rdx+32] - packuswb xmm3, xmm0 ; pack and unpack to saturate - movq MMWORD PTR [rdi+rax*2], xmm3 - - add rdi, rax - movq xmm4, MMWORD PTR [rsi+24] - punpcklbw xmm4, xmm0 - paddsw xmm4, XMMWORD PTR [rdx+48] - packuswb xmm4, xmm0 ; pack and unpack to saturate - movq MMWORD PTR [rdi+rax*2], xmm4 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_recon4b_sse2(unsigned char *s, short *q, unsigned char *d, int stride) -global sym(vp9_recon4b_sse2) PRIVATE -sym(vp9_recon4b_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rdi, arg(2) ;d - mov rdx, arg(1) ;q - movsxd rax, dword ptr arg(3) ;stride - pxor xmm0, xmm0 - - movdqa xmm1, XMMWORD PTR [rsi] - movdqa xmm5, xmm1 - punpcklbw xmm1, xmm0 - punpckhbw xmm5, xmm0 - paddsw xmm1, XMMWORD PTR [rdx] - paddsw xmm5, XMMWORD PTR [rdx+16] - packuswb xmm1, xmm5 ; pack and unpack to saturate - movdqa XMMWORD PTR [rdi], xmm1 - - - movdqa xmm2, XMMWORD PTR [rsi+16] - movdqa xmm6, xmm2 - punpcklbw xmm2, xmm0 - punpckhbw xmm6, xmm0 - paddsw xmm2, XMMWORD PTR [rdx+32] - paddsw xmm6, XMMWORD PTR [rdx+48] - packuswb xmm2, xmm6 ; pack and unpack to saturate - movdqa XMMWORD PTR [rdi+rax], xmm2 - - - movdqa xmm3, XMMWORD PTR [rsi+32] - movdqa xmm7, xmm3 - punpcklbw xmm3, xmm0 - punpckhbw xmm7, xmm0 - paddsw xmm3, XMMWORD PTR [rdx+64] - paddsw xmm7, XMMWORD PTR [rdx+80] - packuswb xmm3, xmm7 ; pack and unpack to saturate - movdqa XMMWORD PTR [rdi+rax*2], xmm3 - - add rdi, rax - movdqa xmm4, XMMWORD PTR [rsi+48] - movdqa xmm5, xmm4 - punpcklbw xmm4, xmm0 - punpckhbw xmm5, xmm0 - paddsw xmm4, XMMWORD PTR [rdx+96] - paddsw xmm5, XMMWORD PTR [rdx+112] - packuswb xmm4, xmm5 ; pack and unpack to saturate - movdqa XMMWORD PTR [rdi+rax*2], xmm4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - ;void copy_mem16x16_sse2( ; unsigned char *src, ; int src_stride, diff --git a/vp9/common/x86/vp9_recon_wrapper_sse2.c b/vp9/common/x86/vp9_recon_wrapper_sse2.c index bb7baf8a0bfbb0eb4c982f0b5bddc5f39dd24d67..97148fbb84f865a3920f967e2b88e7d5847f18ec 100644 --- a/vp9/common/x86/vp9_recon_wrapper_sse2.c +++ b/vp9/common/x86/vp9_recon_wrapper_sse2.c @@ -35,7 +35,7 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd, build_intra_pred_mbuv_fn_t ho_fn) { int mode = xd->mode_info_context->mbmi.uv_mode; build_intra_pred_mbuv_fn_t fn; - int src_stride = xd->dst.uv_stride; + int src_stride = xd->plane[1].dst.stride; switch (mode) { case V_PRED: @@ -68,34 +68,34 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd, return; } - fn(dst_u, dst_stride, xd->dst.u_buffer, src_stride); - fn(dst_v, dst_stride, xd->dst.v_buffer, src_stride); + fn(dst_u, dst_stride, xd->plane[1].dst.buf, src_stride); + fn(dst_v, dst_stride, xd->plane[2].dst.buf, src_stride); } void vp9_build_intra_predictors_mbuv_sse2(MACROBLOCKD *xd) { - build_intra_predictors_mbuv_x86(xd, &xd->predictor[256], - &xd->predictor[320], 8, + build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf, + xd->plane[2].dst.buf, xd->plane[1].dst.stride, vp9_intra_pred_uv_tm_sse2, vp9_intra_pred_uv_ho_mmx2); } void vp9_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *xd) { - build_intra_predictors_mbuv_x86(xd, &xd->predictor[256], - &xd->predictor[320], 8, + build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf, + xd->plane[2].dst.buf, xd->plane[1].dst.stride, vp9_intra_pred_uv_tm_ssse3, vp9_intra_pred_uv_ho_ssse3); } void vp9_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *xd) { - build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer, - xd->dst.v_buffer, xd->dst.uv_stride, + build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf, + xd->plane[2].dst.buf, xd->plane[1].dst.stride, vp9_intra_pred_uv_tm_sse2, vp9_intra_pred_uv_ho_mmx2); } void vp9_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *xd) { - build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer, - xd->dst.v_buffer, xd->dst.uv_stride, + build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf, + xd->plane[2].dst.buf, xd->plane[1].dst.stride, vp9_intra_pred_uv_tm_ssse3, vp9_intra_pred_uv_ho_ssse3); } diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm index 32f00e2893dade99ee9085866fee80cbc39b893c..bbf9888caf67ba8e8414b2de7879788095c623c4 100644 --- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm +++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm @@ -81,10 +81,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm0, krd - paddsw xmm4, xmm6 paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -165,10 +165,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm0, krd - paddsw xmm4, xmm6 paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -250,10 +250,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm0, krd - paddsw xmm4, xmm6 paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -285,10 +285,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm4, xmm6 - paddsw xmm0, krd paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 diff --git a/vp9/common/x86/vp9_subpixel_variance_sse2.c b/vp9/common/x86/vp9_subpixel_variance_sse2.c index 91cd75f224b27355e2e1dc052cd3e2b3b750974c..c20b9fbe9a29b6a76b688c6750247277513aa26f 100644 --- a/vp9/common/x86/vp9_subpixel_variance_sse2.c +++ b/vp9/common/x86/vp9_subpixel_variance_sse2.c @@ -43,48 +43,3 @@ void vp9_filter_block2d_bil_var_sse2(const unsigned char *ref_ptr, int yoffset, int *sum, unsigned int *sumsquared); - -unsigned int vp9_sub_pixel_variance16x2_sse2(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; - - if (xoffset == HALFNDX && yoffset == 0) { - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 2, - &xsum0, &xxsum0); - } else if (xoffset == 0 && yoffset == HALFNDX) { - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 2, - &xsum0, &xxsum0); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 2, - &xsum0, &xxsum0); - } else { - vp9_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 2, - xoffset, yoffset, - &xsum0, &xxsum0); - - vp9_filter_block2d_bil_var_sse2( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 2, - xoffset, yoffset, - &xsum1, &xxsum1); - xsum0 += xsum1; - xxsum0 += xxsum1; - } - - *sse = xxsum0; - return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 5)); -} diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c index 7e3b4646b2060ae33cfb614e2b0a49455a2e2bbd..df77d650d2021121e883fdbff62977275f08397b 100644 --- a/vp9/decoder/vp9_dboolhuff.c +++ b/vp9/decoder/vp9_dboolhuff.c @@ -13,34 +13,32 @@ #include "vp9/decoder/vp9_dboolhuff.h" -int vp9_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz) { - br->user_buffer_end = source + source_sz; - br->user_buffer = source; - br->value = 0; - br->count = -8; - br->range = 255; +int vp9_reader_init(vp9_reader *r, const uint8_t *buffer, size_t size) { + int marker_bit; - if (source_sz && !source) - return 1; + r->buffer_end = buffer + size; + r->buffer = buffer; + r->value = 0; + r->count = -8; + r->range = 255; - /* Populate the buffer */ - vp9_bool_decoder_fill(br); + if (size && !buffer) + return 1; - return 0; + vp9_reader_fill(r); + marker_bit = vp9_read_bit(r); + return marker_bit != 0; } - -void vp9_bool_decoder_fill(BOOL_DECODER *br) { - const unsigned char *bufptr = br->user_buffer; - const unsigned char *bufend = br->user_buffer_end; - VP9_BD_VALUE value = br->value; - int count = br->count; +void vp9_reader_fill(vp9_reader *r) { + const uint8_t *const buffer_end = r->buffer_end; + const uint8_t *buffer = r->buffer; + VP9_BD_VALUE value = r->value; + int count = r->count; int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8); int loop_end = 0; - int bits_left = (int)((bufend - bufptr)*CHAR_BIT); - int x = shift + CHAR_BIT - bits_left; + const int bits_left = (int)((buffer_end - buffer)*CHAR_BIT); + const int x = shift + CHAR_BIT - bits_left; if (x >= 0) { count += VP9_LOTS_OF_BITS; @@ -50,79 +48,22 @@ void vp9_bool_decoder_fill(BOOL_DECODER *br) { if (x < 0 || bits_left) { while (shift >= loop_end) { count += CHAR_BIT; - value |= (VP9_BD_VALUE)*bufptr++ << shift; + value |= (VP9_BD_VALUE)*buffer++ << shift; shift -= CHAR_BIT; } } - br->user_buffer = bufptr; - br->value = value; - br->count = count; + r->buffer = buffer; + r->value = value; + r->count = count; } - -static int get_unsigned_bits(unsigned num_values) { - int cat = 0; - if (num_values <= 1) - return 0; - num_values--; - while (num_values > 0) { - cat++; - num_values >>= 1; +const uint8_t *vp9_reader_find_end(vp9_reader *r) { + // Find the end of the coded buffer + while (r->count > CHAR_BIT && r->count < VP9_BD_VALUE_SIZE) { + r->count -= CHAR_BIT; + r->buffer--; } - return cat; -} - -int vp9_inv_recenter_nonneg(int v, int m) { - if (v > (m << 1)) - return v; - else if ((v & 1) == 0) - return (v >> 1) + m; - else - return m - ((v + 1) >> 1); -} - -int vp9_decode_uniform(BOOL_DECODER *br, int n) { - int v; - int l = get_unsigned_bits(n); - int m = (1 << l) - n; - if (!l) return 0; - v = decode_value(br, l - 1); - if (v < m) - return v; - else - return (v << 1) - m + decode_value(br, 1); + return r->buffer; } -int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms) { - int i = 0, mk = 0, word; - while (1) { - int b = (i ? k + i - 1 : k); - int a = (1 << b); - if (num_syms <= mk + 3 * a) { - word = vp9_decode_uniform(br, num_syms - mk) + mk; - break; - } else { - if (decode_value(br, 1)) { - i++; - mk += a; - } else { - word = decode_value(br, b) + mk; - break; - } - } - } - return word; -} - -int vp9_decode_unsigned_max(BOOL_DECODER *br, int max) { - int data = 0, bit = 0, lmax = max; - - while (lmax) { - data |= decode_bool(br, 128) << bit++; - lmax >>= 1; - } - if (data > max) - return max; - return data; -} diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index 02ae1d3c8a85f9c91db05aa6a7fa452c99bbd132..b50aa35fd61946ec4daaae03d8206279f8470647 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -21,32 +21,29 @@ typedef size_t VP9_BD_VALUE; #define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT) -/*This is meant to be a large, positive constant that can still be efficiently - loaded as an immediate (on platforms like ARM, for example). - Even relatively modest values like 100 would work fine.*/ -#define VP9_LOTS_OF_BITS (0x40000000) + +// This is meant to be a large, positive constant that can still be efficiently +// loaded as an immediate (on platforms like ARM, for example). +// Even relatively modest values like 100 would work fine. +#define VP9_LOTS_OF_BITS 0x40000000 typedef struct { - const unsigned char *user_buffer_end; - const unsigned char *user_buffer; - VP9_BD_VALUE value; - int count; - unsigned int range; -} BOOL_DECODER; + const uint8_t *buffer_end; + const uint8_t *buffer; + VP9_BD_VALUE value; + int count; + unsigned int range; +} vp9_reader; DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); -int vp9_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz); +int vp9_reader_init(vp9_reader *r, const uint8_t *buffer, size_t size); -void vp9_bool_decoder_fill(BOOL_DECODER *br); +void vp9_reader_fill(vp9_reader *r); -int vp9_decode_uniform(BOOL_DECODER *br, int n); -int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms); -int vp9_inv_recenter_nonneg(int v, int m); +const uint8_t *vp9_reader_find_end(vp9_reader *r); -static int decode_bool(BOOL_DECODER *br, int probability) { +static int vp9_read(vp9_reader *br, int probability) { unsigned int bit = 0; VP9_BD_VALUE value; VP9_BD_VALUE bigsplit; @@ -55,7 +52,7 @@ static int decode_bool(BOOL_DECODER *br, int probability) { unsigned int split = 1 + (((br->range - 1) * probability) >> 8); if (br->count < 0) - vp9_bool_decoder_fill(br); + vp9_reader_fill(br); value = br->value; count = br->count; @@ -83,18 +80,20 @@ static int decode_bool(BOOL_DECODER *br, int probability) { return bit; } -static int decode_value(BOOL_DECODER *br, int bits) { - int z = 0; - int bit; +static int vp9_read_bit(vp9_reader *r) { + return vp9_read(r, 128); // vp9_prob_half +} - for (bit = bits - 1; bit >= 0; bit--) { - z |= decode_bool(br, 0x80) << bit; - } +static int vp9_read_literal(vp9_reader *br, int bits) { + int z = 0, bit; + + for (bit = bits - 1; bit >= 0; bit--) + z |= vp9_read_bit(br) << bit; return z; } -static int bool_error(BOOL_DECODER *br) { +static int vp9_reader_has_error(vp9_reader *r) { // Check if we have reached the end of the buffer. // // Variable 'count' stores the number of bits in the 'value' buffer, minus @@ -109,9 +108,7 @@ static int bool_error(BOOL_DECODER *br) { // // 1 if we have tried to decode bits after the end of stream was encountered. // 0 No error. - return br->count > VP9_BD_VALUE_SIZE && br->count < VP9_LOTS_OF_BITS; + return r->count > VP9_BD_VALUE_SIZE && r->count < VP9_LOTS_OF_BITS; } -int vp9_decode_unsigned_max(BOOL_DECODER *br, int max); - #endif // VP9_DECODER_VP9_DBOOLHUFF_H_ diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 353e94fa58d69a24b9a4535b94949a18fab6db55..7a8fb0e58fadc5f9fa536d53a06546a939949560 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -20,6 +20,7 @@ #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/decoder/vp9_decodemv.h" +#include "vp9/decoder/vp9_decodframe.h" #include "vp9/common/vp9_mvref_common.h" #if CONFIG_DEBUG #include <assert.h> @@ -35,435 +36,261 @@ int dec_mvcount = 0; extern int dec_debug; #endif -static B_PREDICTION_MODE read_bmode(vp9_reader *bc, const vp9_prob *p) { - B_PREDICTION_MODE m = treed_read(bc, vp9_bmode_tree, p); -#if CONFIG_NEWBINTRAMODES - if (m == B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS) - m = B_CONTEXT_PRED; - assert(m < B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS || m == B_CONTEXT_PRED); -#endif +static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { + MB_PREDICTION_MODE m = treed_read(r, vp9_intra_mode_tree, p); return m; } -static B_PREDICTION_MODE read_kf_bmode(vp9_reader *bc, const vp9_prob *p) { - return (B_PREDICTION_MODE)treed_read(bc, vp9_kf_bmode_tree, p); -} - -static MB_PREDICTION_MODE read_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_ymode_tree, p); -} - -static MB_PREDICTION_MODE read_sb_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_sb_ymode_tree, p); -} - -static MB_PREDICTION_MODE read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p); -} - -static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_kf_ymode_tree, p); -} - -static int read_i8x8_mode(vp9_reader *bc, const vp9_prob *p) { - return treed_read(bc, vp9_i8x8_mode_tree, p); -} - -static MB_PREDICTION_MODE read_uv_mode(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p); -} - -// This function reads the current macro block's segnent id from the bitstream -// It should only be called if a segment map update is indicated. -static void read_mb_segid(vp9_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *xd) { - if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { - const vp9_prob *const p = xd->mb_segment_tree_probs; - mi->segment_id = vp9_read(r, p[0]) ? 2 + vp9_read(r, p[2]) - : vp9_read(r, p[1]); - } -} - -// This function reads the current macro block's segnent id from the bitstream -// It should only be called if a segment map update is indicated. -static void read_mb_segid_except(VP9_COMMON *cm, - vp9_reader *r, MB_MODE_INFO *mi, - MACROBLOCKD *xd, int mb_row, int mb_col) { - const int mb_index = mb_row * cm->mb_cols + mb_col; - const int pred_seg_id = vp9_get_pred_mb_segid(cm, xd, mb_index); - const vp9_prob *const p = xd->mb_segment_tree_probs; - const vp9_prob prob = xd->mb_segment_mispred_tree_probs[pred_seg_id]; - - if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { - mi->segment_id = vp9_read(r, prob) - ? 2 + (pred_seg_id < 2 ? vp9_read(r, p[2]) : (pred_seg_id == 2)) - : (pred_seg_id >= 2 ? vp9_read(r, p[1]) : (pred_seg_id == 0)); - } +static int read_mb_segid(vp9_reader *r, MACROBLOCKD *xd) { + return treed_read(r, vp9_segment_tree, xd->mb_segment_tree_probs); } -#if CONFIG_NEW_MVREF -int vp9_read_mv_ref_id(vp9_reader *r, vp9_prob *ref_id_probs) { - int ref_index = 0; +static void set_segment_id(VP9_COMMON *cm, MB_MODE_INFO *mbmi, + int mi_row, int mi_col, int segment_id) { + const int mi_index = mi_row * cm->mi_cols + mi_col; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; + const int bw = 1 << mi_width_log2(sb_type); + const int bh = 1 << mi_height_log2(sb_type); + const int ymis = MIN(cm->mi_rows - mi_row, bh); + const int xmis = MIN(cm->mi_cols - mi_col, bw); + int x, y; - if (vp9_read(r, ref_id_probs[0])) { - ref_index++; - if (vp9_read(r, ref_id_probs[1])) { - ref_index++; - if (vp9_read(r, ref_id_probs[2])) - ref_index++; + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + const int index = mi_index + (y * cm->mi_cols + x); + cm->last_frame_seg_map[index] = segment_id; } } - return ref_index; } -#endif - -extern const int vp9_i8x8_block[4]; -static void kfread_modes(VP9D_COMP *pbi, - MODE_INFO *m, - int mb_row, - int mb_col, - BOOL_DECODER* const bc) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - const int mis = pbi->common.mode_info_stride; - int map_index = mb_row * pbi->common.mb_cols + mb_col; - MB_PREDICTION_MODE y_mode; - - m->mbmi.ref_frame = INTRA_FRAME; - // Read the Macroblock segmentation map if it is being updated explicitly - // this frame (reset to 0 by default). - m->mbmi.segment_id = 0; - if (pbi->mb.update_mb_segmentation_map) { - read_mb_segid(bc, &m->mbmi, &pbi->mb); - if (m->mbmi.sb_type) { - const int nmbs = 1 << m->mbmi.sb_type; - const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); - const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - cm->last_frame_seg_map[map_index + x + y * cm->mb_cols] = - m->mbmi.segment_id; - } - } - } else { - cm->last_frame_seg_map[map_index] = m->mbmi.segment_id; - } +static TX_SIZE select_txfm_size(VP9_COMMON *cm, MACROBLOCKD *xd, + vp9_reader *r, BLOCK_SIZE_TYPE bsize) { + const int context = vp9_get_pred_context(cm, xd, PRED_TX_SIZE); + const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE); + TX_SIZE txfm_size = vp9_read(r, tx_probs[0]); + if (txfm_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) { + txfm_size += vp9_read(r, tx_probs[1]); + if (txfm_size != TX_8X8 && bsize >= BLOCK_SIZE_SB32X32) + txfm_size += vp9_read(r, tx_probs[2]); } - - m->mbmi.mb_skip_coeff = 0; - if (pbi->common.mb_no_coeff_skip && - (!vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, SEG_LVL_SKIP))) { - m->mbmi.mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, &pbi->mb, - PRED_MBSKIP)); + if (bsize >= BLOCK_SIZE_SB32X32) { + cm->fc.tx_count_32x32p[context][txfm_size]++; + } else if (bsize >= BLOCK_SIZE_MB16X16) { + cm->fc.tx_count_16x16p[context][txfm_size]++; } else { - m->mbmi.mb_skip_coeff = vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, - SEG_LVL_SKIP); + cm->fc.tx_count_8x8p[context][txfm_size]++; } + return txfm_size; +} - y_mode = m->mbmi.sb_type ? - read_kf_sb_ymode(bc, - pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]): - read_kf_mb_ymode(bc, - pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); - - m->mbmi.ref_frame = INTRA_FRAME; - if ((m->mbmi.mode = y_mode) == B_PRED) { - int i = 0; - do { - const B_PREDICTION_MODE a = above_block_mode(m, i, mis); - const B_PREDICTION_MODE l = (xd->left_available || (i & 3)) ? - left_block_mode(m, i) : B_DC_PRED; +static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m, + int mi_row, int mi_col, + vp9_reader *r) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + const int mis = cm->mode_info_stride; - m->bmi[i].as_mode.first = read_kf_bmode(bc, - pbi->common.kf_bmode_prob[a][l]); - } while (++i < 16); + // Read segmentation map if it is being updated explicitly this frame + m->mbmi.segment_id = 0; + if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { + m->mbmi.segment_id = read_mb_segid(r, xd); + set_segment_id(cm, &m->mbmi, mi_row, mi_col, m->mbmi.segment_id); } - if ((m->mbmi.mode = y_mode) == I8X8_PRED) { - int i; - for (i = 0; i < 4; i++) { - const int ib = vp9_i8x8_block[i]; - const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); - - m->bmi[ib + 0].as_mode.first = mode8x8; - m->bmi[ib + 1].as_mode.first = mode8x8; - m->bmi[ib + 4].as_mode.first = mode8x8; - m->bmi[ib + 5].as_mode.first = mode8x8; - } - } else { - m->mbmi.uv_mode = read_uv_mode(bc, - pbi->common.kf_uv_mode_prob[m->mbmi.mode]); + m->mbmi.mb_skip_coeff = vp9_segfeature_active(xd, m->mbmi.segment_id, + SEG_LVL_SKIP); + if (!m->mbmi.mb_skip_coeff) { + m->mbmi.mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); + cm->fc.mbskip_count[vp9_get_pred_context(cm, xd, PRED_MBSKIP)] + [m->mbmi.mb_skip_coeff]++; } if (cm->txfm_mode == TX_MODE_SELECT && - m->mbmi.mb_skip_coeff == 0 && - m->mbmi.mode <= I8X8_PRED) { - // FIXME(rbultje) code ternary symbol once all experiments are merged - m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]); - if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) { - m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]); - if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.sb_type) - m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]); - } - } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.sb_type) { + m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + m->mbmi.txfm_size = select_txfm_size(cm, xd, r, m->mbmi.sb_type); + } else if (cm->txfm_mode >= ALLOW_32X32 && + m->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { m->mbmi.txfm_size = TX_32X32; - } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) { + } else if (cm->txfm_mode >= ALLOW_16X16 && + m->mbmi.sb_type >= BLOCK_SIZE_MB16X16) { m->mbmi.txfm_size = TX_16X16; - } else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != B_PRED) { + } else if (cm->txfm_mode >= ALLOW_8X8 && + m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { m->mbmi.txfm_size = TX_8X8; } else { m->mbmi.txfm_size = TX_4X4; } + + // luma mode + m->mbmi.ref_frame[0] = INTRA_FRAME; + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis); + const MB_PREDICTION_MODE L = xd->left_available ? + left_block_mode(m, 0) : DC_PRED; + m->mbmi.mode = read_intra_mode(r, cm->kf_y_mode_prob[A][L]); + } else { + int idx, idy; + int bw = 1 << b_width_log2(m->mbmi.sb_type); + int bh = 1 << b_height_log2(m->mbmi.sb_type); + + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int ib = idy * 2 + idx; + int k; + const MB_PREDICTION_MODE A = above_block_mode(m, ib, mis); + const MB_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(m, ib) : DC_PRED; + m->bmi[ib].as_mode.first = + read_intra_mode(r, cm->kf_y_mode_prob[A][L]); + for (k = 1; k < bh; ++k) + m->bmi[ib + k * 2].as_mode.first = m->bmi[ib].as_mode.first; + for (k = 1; k < bw; ++k) + m->bmi[ib + k].as_mode.first = m->bmi[ib].as_mode.first; + } + } + m->mbmi.mode = m->bmi[3].as_mode.first; + } + + m->mbmi.uv_mode = read_intra_mode(r, cm->kf_uv_mode_prob[m->mbmi.mode]); } -static int read_nmv_component(vp9_reader *r, - int rv, - const nmv_component *mvcomp) { - int mag, d; +static int read_mv_component(vp9_reader *r, + const nmv_component *mvcomp, int usehp) { + + int mag, d, fr, hp; const int sign = vp9_read(r, mvcomp->sign); const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes); + // Integer part if (mv_class == MV_CLASS_0) { d = treed_read(r, vp9_mv_class0_tree, mvcomp->class0); } else { int i; - int n = mv_class + CLASS0_BITS - 1; // number of bits + const int n = mv_class + CLASS0_BITS - 1; // number of bits d = 0; for (i = 0; i < n; ++i) d |= vp9_read(r, mvcomp->bits[i]) << i; } - mag = vp9_get_mv_mag(mv_class, d << 3); - return sign ? -(mag + 8) : (mag + 8); -} + // Fractional part + fr = treed_read(r, vp9_mv_fp_tree, + mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp); -static int read_nmv_component_fp(vp9_reader *r, - int v, - int rv, - const nmv_component *mvcomp, - int usehp) { - const int sign = v < 0; - int mag = ((sign ? -v : v) - 1) & ~7; // magnitude - 1 - int offset; - const int mv_class = vp9_get_mv_class(mag, &offset); - const int f = mv_class == MV_CLASS_0 ? - treed_read(r, vp9_mv_fp_tree, mvcomp->class0_fp[offset >> 3]): - treed_read(r, vp9_mv_fp_tree, mvcomp->fp); - - offset += f << 1; - if (usehp) { - const vp9_prob p = mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp; - offset += vp9_read(r, p); - } else { - offset += 1; // If hp is not used, the default value of the hp bit is 1 - } - mag = vp9_get_mv_mag(mv_class, offset); - return sign ? -(mag + 1) : (mag + 1); -} - -static void read_nmv(vp9_reader *r, MV *mv, const MV *ref, - const nmv_context *mvctx) { - const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints); - mv->row = mv-> col = 0; - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { - mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]); - } + // High precision part (if hp is not used, the default value of the hp is 1) + hp = usehp ? vp9_read(r, + mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp) + : 1; - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { - mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]); - } + // result + mag = vp9_get_mv_mag(mv_class, (d << 3) | (fr << 1) | hp) + 1; + return sign ? -mag : mag; } -static void read_nmv_fp(vp9_reader *r, MV *mv, const MV *ref, - const nmv_context *mvctx, int usehp) { - const MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); - usehp = usehp && vp9_use_nmv_hp(ref); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { - mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0], - usehp); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { - mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1], - usehp); - } - /* - printf("MV: %d %d REF: %d %d\n", mv->row + ref->row, mv->col + ref->col, - ref->row, ref->col); - */ -} - -static void update_nmv(vp9_reader *bc, vp9_prob *const p, +static void update_nmv(vp9_reader *r, vp9_prob *const p, const vp9_prob upd_p) { - if (vp9_read(bc, upd_p)) { + if (vp9_read(r, upd_p)) { #ifdef LOW_PRECISION_MV_UPDATE - *p = (vp9_read_literal(bc, 7) << 1) | 1; + *p = (vp9_read_literal(r, 7) << 1) | 1; #else - *p = (vp9_read_literal(bc, 8)); + *p = (vp9_read_literal(r, 8)); #endif } } -static void read_nmvprobs(vp9_reader *bc, nmv_context *mvctx, +static void read_nmvprobs(vp9_reader *r, nmv_context *mvctx, int usehp) { int i, j, k; #ifdef MV_GROUP_UPDATE - if (!vp9_read_bit(bc)) + if (!vp9_read_bit(r)) return; #endif for (j = 0; j < MV_JOINTS - 1; ++j) - update_nmv(bc, &mvctx->joints[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->joints[j], VP9_NMV_UPDATE_PROB); for (i = 0; i < 2; ++i) { - update_nmv(bc, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB); for (j = 0; j < MV_CLASSES - 1; ++j) - update_nmv(bc, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB); for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_nmv(bc, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB); for (j = 0; j < MV_OFFSET_BITS; ++j) - update_nmv(bc, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB); } for (i = 0; i < 2; ++i) { - for (j = 0; j < CLASS0_SIZE; ++j) { + for (j = 0; j < CLASS0_SIZE; ++j) for (k = 0; k < 3; ++k) - update_nmv(bc, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB); - } + update_nmv(r, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB); for (j = 0; j < 3; ++j) - update_nmv(bc, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB); } if (usehp) { for (i = 0; i < 2; ++i) { - update_nmv(bc, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); - update_nmv(bc, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); + update_nmv(r, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB); } } } // Read the referncence frame -static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, - vp9_reader *const bc, - unsigned char segment_id) { - MV_REFERENCE_FRAME ref_frame; +static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r, + int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; + const int seg_ref_active = vp9_segfeature_active(xd, segment_id, + SEG_LVL_REF_FRAME); - int seg_ref_count = 0; - int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); - - // If segment coding enabled does the segment allow for more than one - // possible reference frame - if (seg_ref_active) { - seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) + - vp9_check_segref(xd, segment_id, LAST_FRAME) + - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, segment_id, ALTREF_FRAME); - } + // Segment reference frame features not available. + if (!seg_ref_active) { + int is_comp; + int comp_ctx = vp9_get_pred_context(cm, xd, PRED_COMP_INTER_INTER); - // Segment reference frame features not available or allows for - // multiple reference frame options - if (!seg_ref_active || (seg_ref_count > 1)) { - // Values used in prediction model coding - MV_REFERENCE_FRAME pred_ref; - - // Get the context probability the prediction flag - vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF); - - // Read the prediction status flag - unsigned char prediction_flag = vp9_read(bc, pred_prob); - - // Store the prediction flag. - vp9_set_pred_flag(xd, PRED_REF, prediction_flag); + if (cm->comp_pred_mode == HYBRID_PREDICTION) { + is_comp = vp9_read(r, cm->fc.comp_inter_prob[comp_ctx]); + cm->fc.comp_inter_count[comp_ctx][is_comp]++; + } else { + is_comp = cm->comp_pred_mode == COMP_PREDICTION_ONLY; + } - // Get the predicted reference frame. - pred_ref = vp9_get_pred_ref(cm, xd); + // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding + if (is_comp) { + int b, fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + int ref_ctx = vp9_get_pred_context(cm, xd, PRED_COMP_REF_P); - // If correctly predicted then use the predicted value - if (prediction_flag) { - ref_frame = pred_ref; + ref_frame[fix_ref_idx] = cm->comp_fixed_ref; + b = vp9_read(r, cm->fc.comp_ref_prob[ref_ctx]); + cm->fc.comp_ref_count[ref_ctx][b]++; + ref_frame[!fix_ref_idx] = cm->comp_var_ref[b]; } else { - // decode the explicitly coded value - vp9_prob mod_refprobs[PREDICTION_PROBS]; - vpx_memcpy(mod_refprobs, - cm->mod_refprobs[pred_ref], sizeof(mod_refprobs)); - - // If segment coding enabled blank out options that cant occur by - // setting the branch probability to 0. - if (seg_ref_active) { - mod_refprobs[INTRA_FRAME] *= - vp9_check_segref(xd, segment_id, INTRA_FRAME); - mod_refprobs[LAST_FRAME] *= - vp9_check_segref(xd, segment_id, LAST_FRAME); - mod_refprobs[GOLDEN_FRAME] *= - (vp9_check_segref(xd, segment_id, GOLDEN_FRAME) * - vp9_check_segref(xd, segment_id, ALTREF_FRAME)); - } - - // Default to INTRA_FRAME (value 0) - ref_frame = INTRA_FRAME; - - // Do we need to decode the Intra/Inter branch - if (mod_refprobs[0]) - ref_frame = (MV_REFERENCE_FRAME) vp9_read(bc, mod_refprobs[0]); - else - ref_frame++; - - if (ref_frame) { - // Do we need to decode the Last/Gf_Arf branch - if (mod_refprobs[1]) - ref_frame += vp9_read(bc, mod_refprobs[1]); - else - ref_frame++; - - if (ref_frame > 1) { - // Do we need to decode the GF/Arf branch - if (mod_refprobs[2]) - ref_frame += vp9_read(bc, mod_refprobs[2]); - else { - if (seg_ref_active) { - if ((pred_ref == GOLDEN_FRAME) || - !vp9_check_segref(xd, segment_id, GOLDEN_FRAME)) { - ref_frame = ALTREF_FRAME; - } else - ref_frame = GOLDEN_FRAME; - } else - ref_frame = (pred_ref == GOLDEN_FRAME) - ? ALTREF_FRAME : GOLDEN_FRAME; - } - } + int ref1_ctx = vp9_get_pred_context(cm, xd, PRED_SINGLE_REF_P1); + ref_frame[1] = NONE; + if (vp9_read(r, cm->fc.single_ref_prob[ref1_ctx][0])) { + int ref2_ctx = vp9_get_pred_context(cm, xd, PRED_SINGLE_REF_P2); + int b2 = vp9_read(r, cm->fc.single_ref_prob[ref2_ctx][1]); + ref_frame[0] = b2 ? ALTREF_FRAME : GOLDEN_FRAME; + cm->fc.single_ref_count[ref1_ctx][0][1]++; + cm->fc.single_ref_count[ref2_ctx][1][b2]++; + } else { + ref_frame[0] = LAST_FRAME; + cm->fc.single_ref_count[ref1_ctx][0][0]++; } } } else { - // Segment reference frame features are enabled - // The reference frame for the mb is considered as correclty predicted - // if it is signaled at the segment level for the purposes of the - // common prediction model - vp9_set_pred_flag(xd, PRED_REF, 1); - ref_frame = vp9_get_pred_ref(cm, xd); + ref_frame[0] = vp9_get_segdata(xd, segment_id, SEG_LVL_REF_FRAME); + ref_frame[1] = NONE; } - - return (MV_REFERENCE_FRAME)ref_frame; -} - -static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE) treed_read(bc, vp9_sb_mv_ref_tree, p); -} - -static MB_PREDICTION_MODE read_mv_ref(vp9_reader *bc, const vp9_prob *p) { - return (MB_PREDICTION_MODE) treed_read(bc, vp9_mv_ref_tree, p); } -static B_PREDICTION_MODE sub_mv_ref(vp9_reader *bc, const vp9_prob *p) { - return (B_PREDICTION_MODE) treed_read(bc, vp9_sub_mv_ref_tree, p); +static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *r, const vp9_prob *p) { + return (MB_PREDICTION_MODE) treed_read(r, vp9_sb_mv_ref_tree, p); } #ifdef VPX_MODE_COUNT @@ -476,159 +303,143 @@ unsigned int vp9_mv_cont_count[5][4] = { }; #endif -static const unsigned char mbsplit_fill_count[4] = { 8, 8, 4, 1 }; -static const unsigned char mbsplit_fill_offset[4][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 }, - { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } -}; - -static void read_switchable_interp_probs(VP9D_COMP* const pbi, - BOOL_DECODER* const bc) { - VP9_COMMON *const cm = &pbi->common; +static void read_switchable_interp_probs(VP9_COMMON* const cm, vp9_reader *r) { int i, j; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { + for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { - cm->fc.switchable_interp_prob[j][i] = vp9_read_prob(bc); + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) { + cm->fc.switchable_interp_prob[j][i] = + // vp9_read_prob(r); + vp9_read_prob_diff_update(r, cm->fc.switchable_interp_prob[j][i]); + } + } +} + +static void read_inter_mode_probs(VP9_COMMON *const cm, vp9_reader *r) { + int i, j; + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + for (j = 0; j < VP9_INTER_MODES - 1; ++j) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) { + // cm->fc.inter_mode_probs[i][j] = vp9_read_prob(r); + cm->fc.inter_mode_probs[i][j] = + vp9_read_prob_diff_update(r, cm->fc.inter_mode_probs[i][j]); + } } - } - //printf("DECODER: %d %d\n", cm->fc.switchable_interp_prob[0], - //cm->fc.switchable_interp_prob[1]); } -static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) { +static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { + COMPPREDMODE_TYPE mode = vp9_read_bit(r); + if (mode) + mode += vp9_read_bit(r); + return mode; +} + +static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; - nmv_context *const nmvc = &pbi->common.fc.nmvc; - MACROBLOCKD *const xd = &pbi->mb; - if (cm->frame_type == KEY_FRAME) { - if (!cm->kf_ymode_probs_update) - cm->kf_ymode_probs_index = vp9_read_literal(bc, 3); - } else { - if (cm->mcomp_filter_type == SWITCHABLE) - read_switchable_interp_probs(pbi, bc); -#if CONFIG_COMP_INTERINTRA_PRED - if (cm->use_interintra) { - if (vp9_read(bc, VP9_UPD_INTERINTRA_PROB)) - cm->fc.interintra_prob = vp9_read_prob(bc); - } -#endif - // Decode the baseline probabilities for decoding reference frame - cm->prob_intra_coded = vp9_read_prob(bc); - cm->prob_last_coded = vp9_read_prob(bc); - cm->prob_gf_coded = vp9_read_prob(bc); - - // Computes a modified set of probabilities for use when reference - // frame prediction fails. - vp9_compute_mod_refprobs(cm); - - pbi->common.comp_pred_mode = vp9_read(bc, 128); - if (cm->comp_pred_mode) - cm->comp_pred_mode += vp9_read(bc, 128); - if (cm->comp_pred_mode == HYBRID_PREDICTION) { - int i; - for (i = 0; i < COMP_PRED_CONTEXTS; i++) - cm->prob_comppred[i] = vp9_read_prob(bc); - } + if ((cm->frame_type != KEY_FRAME) && (!cm->intra_only)) { + nmv_context *const nmvc = &pbi->common.fc.nmvc; + MACROBLOCKD *const xd = &pbi->mb; + int i, j; - if (vp9_read_bit(bc)) { - int i = 0; + read_inter_mode_probs(cm, r); - do { - cm->fc.ymode_prob[i] = vp9_read_prob(bc); - } while (++i < VP9_YMODES - 1); + if (cm->mcomp_filter_type == SWITCHABLE) + read_switchable_interp_probs(cm, r); + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + cm->fc.intra_inter_prob[i] = + vp9_read_prob_diff_update(r, cm->fc.intra_inter_prob[i]); } - if (vp9_read_bit(bc)) { - int i = 0; + if (cm->allow_comp_inter_inter) { + cm->comp_pred_mode = read_comp_pred_mode(r); + if (cm->comp_pred_mode == HYBRID_PREDICTION) + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + cm->fc.comp_inter_prob[i] = + vp9_read_prob_diff_update(r, cm->fc.comp_inter_prob[i]); + } else { + cm->comp_pred_mode = SINGLE_PREDICTION_ONLY; + } - do { - cm->fc.sb_ymode_prob[i] = vp9_read_prob(bc); - } while (++i < VP9_I32X32_MODES - 1); + if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) + for (i = 0; i < REF_CONTEXTS; i++) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + cm->fc.single_ref_prob[i][0] = + vp9_read_prob_diff_update(r, cm->fc.single_ref_prob[i][0]); + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + cm->fc.single_ref_prob[i][1] = + vp9_read_prob_diff_update(r, cm->fc.single_ref_prob[i][1]); + } + + if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) + for (i = 0; i < REF_CONTEXTS; i++) + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + cm->fc.comp_ref_prob[i] = + vp9_read_prob_diff_update(r, cm->fc.comp_ref_prob[i]); + + // VP9_INTRA_MODES + for (j = 0; j < BLOCK_SIZE_GROUPS; j++) { + for (i = 0; i < VP9_INTRA_MODES - 1; ++i) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) { + cm->fc.y_mode_prob[j][i] = + vp9_read_prob_diff_update(r, cm->fc.y_mode_prob[j][i]); + } + } + } + for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j) { + for (i = 0; i < PARTITION_TYPES - 1; ++i) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) { + cm->fc.partition_prob[INTER_FRAME][j][i] = + vp9_read_prob_diff_update(r, + cm->fc.partition_prob[INTER_FRAME][j][i]); + } + } } - read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv); + read_nmvprobs(r, nmvc, xd->allow_high_precision_mv); } } // This function either reads the segment id for the current macroblock from // the bitstream or if the value is temporally predicted asserts the predicted // value -static void read_mb_segment_id(VP9D_COMP *pbi, - int mb_row, int mb_col, - BOOL_DECODER* const bc) { +static int read_mb_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - MODE_INFO *mi = xd->mode_info_context; - MB_MODE_INFO *mbmi = &mi->mbmi; - int mb_index = mb_row * pbi->common.mb_cols + mb_col; - - if (xd->segmentation_enabled) { - if (xd->update_mb_segmentation_map) { - // Is temporal coding of the segment id for this mb enabled. - if (cm->temporal_update) { - // Get the context based probability for reading the - // prediction status flag - vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_SEG_ID); - - // Read the prediction status flag - unsigned char seg_pred_flag = vp9_read(bc, pred_prob); - - // Store the prediction flag. - vp9_set_pred_flag(xd, PRED_SEG_ID, seg_pred_flag); - - // If the value is flagged as correctly predicted - // then use the predicted value - if (seg_pred_flag) { - mbmi->segment_id = vp9_get_pred_mb_segid(cm, xd, mb_index); - } else { - // Decode it explicitly - read_mb_segid_except(cm, bc, mbmi, xd, mb_row, mb_col); - } - } else { - // Normal unpredicted coding mode - read_mb_segid(bc, mbmi, xd); - } - - if (mbmi->sb_type) { - const int nmbs = 1 << mbmi->sb_type; - const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); - const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); - int x, y; + MODE_INFO *const mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - cm->last_frame_seg_map[mb_index + x + y * cm->mb_cols] = - mbmi->segment_id; - } - } - } else { - cm->last_frame_seg_map[mb_index] = mbmi->segment_id; - } + if (!xd->segmentation_enabled) + return 0; // Default for disabled segmentation + + if (xd->update_mb_segmentation_map) { + int segment_id; + + if (cm->temporal_update) { + // Temporal coding of the segment id for this mb is enabled. + // Get the context based probability for reading the + // prediction status flag + const vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_SEG_ID); + const int pred_flag = vp9_read(r, pred_prob); + vp9_set_pred_flag(xd, PRED_SEG_ID, pred_flag); + + // If the value is flagged as correctly predicted + // then use the predicted value, otherwise decode it explicitly + segment_id = pred_flag ? vp9_get_pred_mi_segid(cm, mbmi->sb_type, + mi_row, mi_col) + : read_mb_segid(r, xd); } else { - if (mbmi->sb_type) { - const int nmbs = 1 << mbmi->sb_type; - const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); - const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); - unsigned segment_id = -1; - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - segment_id = MIN(segment_id, - cm->last_frame_seg_map[mb_index + x + y * cm->mb_cols]); - } - } - mbmi->segment_id = segment_id; - } else { - mbmi->segment_id = cm->last_frame_seg_map[mb_index]; - } + segment_id = read_mb_segid(r, xd); // Normal unpredicted coding mode } + + set_segment_id(cm, mbmi, mi_row, mi_col, segment_id); // Side effect + return segment_id; } else { - // The encoder explicitly sets the segment_id to 0 - // when segmentation is disabled - mbmi->segment_id = 0; + return vp9_get_pred_mi_segid(cm, mbmi->sb_type, mi_row, mi_col); } } @@ -643,48 +454,66 @@ static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src, mb_to_bottom_edge); } -static INLINE void process_mv(BOOL_DECODER* bc, MV *mv, MV *ref, - nmv_context *nmvc, nmv_context_counts *mvctx, - int usehp) { - read_nmv(bc, mv, ref, nmvc); - read_nmv_fp(bc, mv, ref, nmvc, usehp); - vp9_increment_nmv(mv, ref, mvctx, usehp); - mv->row += ref->row; - mv->col += ref->col; +static INLINE void decode_mv(vp9_reader *r, MV *mv, const MV *ref, + const nmv_context *ctx, + nmv_context_counts *counts, + int usehp) { + const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, ctx->joints); + MV diff = {0, 0}; + + usehp = usehp && vp9_use_nmv_hp(ref); + if (mv_joint_vertical(j)) + diff.row = read_mv_component(r, &ctx->comps[0], usehp); + + if (mv_joint_horizontal(j)) + diff.col = read_mv_component(r, &ctx->comps[1], usehp); + + vp9_increment_nmv(&diff, ref, counts, usehp); + + mv->row = diff.row + ref->row; + mv->col = diff.col + ref->col; +} + +static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( + VP9D_COMP *pbi, vp9_reader *r) { + const int index = treed_read(r, vp9_switchable_interp_tree, + vp9_get_pred_probs(&pbi->common, &pbi->mb, + PRED_SWITCHABLE_INTERP)); + ++pbi->common.fc.switchable_interp_count + [vp9_get_pred_context( + &pbi->common, &pbi->mb, PRED_SWITCHABLE_INTERP)][index]; + return vp9_switchable_interp[index]; } static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, - MODE_INFO *prev_mi, - int mb_row, int mb_col, - BOOL_DECODER* const bc) { + int mi_row, int mi_col, + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; - nmv_context *const nmvc = &pbi->common.fc.nmvc; - const int mis = pbi->common.mode_info_stride; + nmv_context *const nmvc = &cm->fc.nmvc; MACROBLOCKD *const xd = &pbi->mb; - int_mv *const mv = &mbmi->mv[0]; - const int mb_size = 1 << mi->mbmi.sb_type; - - const int use_prev_in_find_mv_refs = cm->width == cm->last_width && - cm->height == cm->last_height && - !cm->error_resilient_mode; + int_mv *const mv0 = &mbmi->mv[0]; + int_mv *const mv1 = &mbmi->mv[1]; + BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); int mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge; + int j, idx, idy; mbmi->need_to_clamp_mvs = 0; mbmi->need_to_clamp_secondmv = 0; - mbmi->second_ref_frame = NONE; + mbmi->ref_frame[1] = NONE; // Make sure the MACROBLOCKD mode info pointer is pointed at the // correct entry for the current macroblock. xd->mode_info_context = mi; - xd->prev_mode_info_context = prev_mi; // Distance of Mb to the various image edges. // These specified to 8th pel as they are always compared to MV values // that are in 1/8th pel units - set_mb_row(cm, xd, mb_row, mb_size); - set_mb_col(cm, xd, mb_col, mb_size); + set_mi_row_col(cm, xd, mi_row, 1 << mi_height_log2(bsize), + mi_col, 1 << mi_width_log2(bsize)); mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN; mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; @@ -692,81 +521,78 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; // Read the macroblock segment id. - read_mb_segment_id(pbi, mb_row, mb_col, bc); + mbmi->segment_id = read_mb_segment_id(pbi, mi_row, mi_col, r); - if (pbi->common.mb_no_coeff_skip && - (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP))) { - // Read the macroblock coeff skip flag if this feature is in use, - // else default to 0 - mbmi->mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); - } else { - mbmi->mb_skip_coeff = vp9_segfeature_active(xd, mbmi->segment_id, - SEG_LVL_SKIP); + mbmi->mb_skip_coeff = vp9_segfeature_active(xd, mbmi->segment_id, + SEG_LVL_SKIP); + if (!mbmi->mb_skip_coeff) { + mbmi->mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP)); + cm->fc.mbskip_count[vp9_get_pred_context(cm, xd, PRED_MBSKIP)] + [mbmi->mb_skip_coeff]++; } // Read the reference frame - mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id); + if (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_REF_FRAME)) { + mbmi->ref_frame[0] = + vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_INTRA_INTER)); + cm->fc.intra_inter_count[vp9_get_pred_context(cm, xd, PRED_INTRA_INTER)] + [mbmi->ref_frame[0] != INTRA_FRAME]++; + } else { + mbmi->ref_frame[0] = + vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; + } - /* - if (pbi->common.current_video_frame == 1) - printf("ref frame: %d [%d %d]\n", mbmi->ref_frame, mb_row, mb_col); - */ + if (cm->txfm_mode == TX_MODE_SELECT && + (mbmi->mb_skip_coeff == 0 || mbmi->ref_frame[0] == INTRA_FRAME) && + bsize >= BLOCK_SIZE_SB8X8) { + mbmi->txfm_size = select_txfm_size(cm, xd, r, bsize); + } else if (bsize >= BLOCK_SIZE_SB32X32 && + cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + } else if (cm->txfm_mode >= ALLOW_16X16 && + bsize >= BLOCK_SIZE_MB16X16) { + mbmi->txfm_size = TX_16X16; + } else if (cm->txfm_mode >= ALLOW_8X8 && (bsize >= BLOCK_SIZE_SB8X8)) { + mbmi->txfm_size = TX_8X8; + } else { + mbmi->txfm_size = TX_4X4; + } // If reference frame is an Inter frame - if (mbmi->ref_frame) { + if (mbmi->ref_frame[0] != INTRA_FRAME) { int_mv nearest, nearby, best_mv; int_mv nearest_second, nearby_second, best_mv_second; - vp9_prob mv_ref_p[VP9_MVREFS - 1]; + vp9_prob mv_ref_p[VP9_INTER_MODES - 1]; - MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; - xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; + read_ref_frame(pbi, r, mbmi->segment_id, mbmi->ref_frame); { - const int use_prev_in_find_best_ref = - xd->scale_factor[0].x_num == xd->scale_factor[0].x_den && - xd->scale_factor[0].y_num == xd->scale_factor[0].y_den && - !cm->error_resilient_mode && - !cm->frame_parallel_decoding_mode; - - /* Select the appropriate reference frame for this MB */ - const int ref_fb_idx = cm->active_ref_idx[ref_frame - 1]; - - setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], - mb_row, mb_col, &xd->scale_factor[0], &xd->scale_factor_uv[0]); - #ifdef DEC_DEBUG if (dec_debug) printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row, xd->mode_info_context->mbmi.mv[0].as_mv.col); #endif - // if (cm->current_video_frame == 1 && mb_row == 4 && mb_col == 5) - // printf("Dello\n"); - vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL, - ref_frame, mbmi->ref_mvs[ref_frame], + vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, + mbmi->ref_frame[0], mbmi->ref_mvs[mbmi->ref_frame[0]], cm->ref_frame_sign_bias); - vp9_mv_ref_probs(&pbi->common, mv_ref_p, - mbmi->mb_mode_context[ref_frame]); + vp9_mv_ref_probs(cm, mv_ref_p, mbmi->mb_mode_context[mbmi->ref_frame[0]]); // If the segment level skip mode enabled if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) { mbmi->mode = ZEROMV; - } else { - mbmi->mode = mbmi->sb_type ? read_sb_mv_ref(bc, mv_ref_p) - : read_mv_ref(bc, mv_ref_p); - vp9_accum_mv_refs(&pbi->common, mbmi->mode, - mbmi->mb_mode_context[ref_frame]); + } else if (bsize >= BLOCK_SIZE_SB8X8) { + mbmi->mode = read_sb_mv_ref(r, mv_ref_p); + vp9_accum_mv_refs(cm, mbmi->mode, + mbmi->mb_mode_context[mbmi->ref_frame[0]]); } - if (mbmi->mode != ZEROMV) { + if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { vp9_find_best_ref_mvs(xd, - use_prev_in_find_best_ref ? - xd->pre.y_buffer : NULL, - xd->pre.y_stride, - mbmi->ref_mvs[ref_frame], + mbmi->ref_mvs[mbmi->ref_frame[0]], &nearest, &nearby); - best_mv.as_int = (mbmi->ref_mvs[ref_frame][0]).as_int; + best_mv.as_int = mbmi->ref_mvs[mbmi->ref_frame[0]][0].as_int; } #ifdef DEC_DEBUG @@ -777,176 +603,79 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif } - if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) { - if (cm->mcomp_filter_type == SWITCHABLE) { - mbmi->interp_filter = vp9_switchable_interp[ - treed_read(bc, vp9_switchable_interp_tree, - vp9_get_pred_probs(cm, xd, PRED_SWITCHABLE_INTERP))]; - } else { - mbmi->interp_filter = cm->mcomp_filter_type; - } - } - - if (cm->comp_pred_mode == COMP_PREDICTION_ONLY || - (cm->comp_pred_mode == HYBRID_PREDICTION && - vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_COMP)))) { - /* Since we have 3 reference frames, we can only have 3 unique - * combinations of combinations of 2 different reference frames - * (A-G, G-L or A-L). In the bitstream, we use this to simply - * derive the second reference frame from the first reference - * frame, by saying it's the next one in the enumerator, and - * if that's > n_refs, then the second reference frame is the - * first one in the enumerator. */ - mbmi->second_ref_frame = mbmi->ref_frame + 1; - if (mbmi->second_ref_frame == 4) - mbmi->second_ref_frame = 1; - if (mbmi->second_ref_frame > 0) { - int second_ref_fb_idx; - int use_prev_in_find_best_ref; - - xd->scale_factor[1] = cm->active_ref_scale[mbmi->second_ref_frame - 1]; - use_prev_in_find_best_ref = - xd->scale_factor[1].x_num == xd->scale_factor[1].x_den && - xd->scale_factor[1].y_num == xd->scale_factor[1].y_den && - !cm->error_resilient_mode && - !cm->frame_parallel_decoding_mode; - - /* Select the appropriate reference frame for this MB */ - second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; - - setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], - mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]); - - vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL, - mbmi->second_ref_frame, - mbmi->ref_mvs[mbmi->second_ref_frame], - cm->ref_frame_sign_bias); - - if (mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, - use_prev_in_find_best_ref ? - xd->second_pre.y_buffer : NULL, - xd->second_pre.y_stride, - mbmi->ref_mvs[mbmi->second_ref_frame], - &nearest_second, - &nearby_second); - best_mv_second = mbmi->ref_mvs[mbmi->second_ref_frame][0]; - } - } - - } else { -#if CONFIG_COMP_INTERINTRA_PRED - if (pbi->common.use_interintra && - mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV && - mbmi->second_ref_frame == NONE) { - mbmi->second_ref_frame = (vp9_read(bc, pbi->common.fc.interintra_prob) ? - INTRA_FRAME : NONE); - // printf("-- %d (%d)\n", mbmi->second_ref_frame == INTRA_FRAME, - // pbi->common.fc.interintra_prob); - pbi->common.fc.interintra_counts[ - mbmi->second_ref_frame == INTRA_FRAME]++; - if (mbmi->second_ref_frame == INTRA_FRAME) { - mbmi->interintra_mode = read_ymode(bc, pbi->common.fc.ymode_prob); - pbi->common.fc.ymode_counts[mbmi->interintra_mode]++; -#if SEPARATE_INTERINTRA_UV - mbmi->interintra_uv_mode = read_uv_mode(bc, - pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]); - pbi->common.fc.uv_mode_counts[mbmi->interintra_mode] - [mbmi->interintra_uv_mode]++; -#else - mbmi->interintra_uv_mode = mbmi->interintra_mode; -#endif - // printf("** %d %d\n", - // mbmi->interintra_mode, mbmi->interintra_uv_mode); - } - } -#endif - } - -#if CONFIG_NEW_MVREF - // if ((mbmi->mode == NEWMV) || (mbmi->mode == SPLITMV)) - if (mbmi->mode == NEWMV) { - int best_index; - MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; + mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE + ? read_switchable_filter_type(pbi, r) + : cm->mcomp_filter_type; - // Encode the index of the choice. - best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]); - - best_mv.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; - - if (mbmi->second_ref_frame > 0) { - ref_frame = mbmi->second_ref_frame; + if (mbmi->ref_frame[1] > INTRA_FRAME) { + vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, + mbmi->ref_frame[1], + mbmi->ref_mvs[mbmi->ref_frame[1]], + cm->ref_frame_sign_bias); - // Encode the index of the choice. - best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]); - best_mv_second.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; + if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { + vp9_find_best_ref_mvs(xd, + mbmi->ref_mvs[mbmi->ref_frame[1]], + &nearest_second, + &nearby_second); + best_mv_second.as_int = mbmi->ref_mvs[mbmi->ref_frame[1]][0].as_int; } } -#endif mbmi->uv_mode = DC_PRED; - switch (mbmi->mode) { - case SPLITMV: { - const int s = treed_read(bc, vp9_mbsplit_tree, cm->fc.mbsplit_prob); - const int num_p = vp9_mbsplit_count[s]; - int j = 0; - - cm->fc.mbsplit_counts[s]++; - mbmi->need_to_clamp_mvs = 0; - mbmi->partitioning = s; - do { // for each subset j - int_mv leftmv, abovemv, second_leftmv, second_abovemv; + if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + mbmi->need_to_clamp_mvs = 0; + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { int_mv blockmv, secondmv; - int mv_contz; int blockmode; - int k = vp9_mbsplit_offset[s][j]; // first block in subset j - - leftmv.as_int = left_block_mv(xd, mi, k); - abovemv.as_int = above_block_mv(mi, k, mis); - second_leftmv.as_int = 0; - second_abovemv.as_int = 0; - if (mbmi->second_ref_frame > 0) { - second_leftmv.as_int = left_block_second_mv(xd, mi, k); - second_abovemv.as_int = above_block_second_mv(mi, k, mis); + int i; + j = idy * 2 + idx; + + blockmode = read_sb_mv_ref(r, mv_ref_p); + vp9_accum_mv_refs(cm, blockmode, + mbmi->mb_mode_context[mbmi->ref_frame[0]]); + if (blockmode == NEARESTMV || blockmode == NEARMV) { + MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1]; + vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0); + if (rf2 > 0) { + vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second, + &nearby_second, j, 1); + } } - mv_contz = vp9_mv_cont(&leftmv, &abovemv); - blockmode = sub_mv_ref(bc, cm->fc.sub_mv_ref_prob [mv_contz]); - cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++; switch (blockmode) { - case NEW4X4: - process_mv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc, + case NEWMV: + decode_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount, xd->allow_high_precision_mv); - if (mbmi->second_ref_frame > 0) - process_mv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, - &cm->fc.NMVcount, xd->allow_high_precision_mv); + if (mbmi->ref_frame[1] > 0) + decode_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, + &cm->fc.NMVcount, xd->allow_high_precision_mv); #ifdef VPX_MODE_COUNT vp9_mv_cont_count[mv_contz][3]++; #endif break; - case LEFT4X4: - blockmv.as_int = leftmv.as_int; - if (mbmi->second_ref_frame > 0) - secondmv.as_int = second_leftmv.as_int; + case NEARESTMV: + blockmv.as_int = nearest.as_int; + if (mbmi->ref_frame[1] > 0) + secondmv.as_int = nearest_second.as_int; #ifdef VPX_MODE_COUNT vp9_mv_cont_count[mv_contz][0]++; #endif break; - case ABOVE4X4: - blockmv.as_int = abovemv.as_int; - if (mbmi->second_ref_frame > 0) - secondmv.as_int = second_abovemv.as_int; + case NEARMV: + blockmv.as_int = nearby.as_int; + if (mbmi->ref_frame[1] > 0) + secondmv.as_int = nearby_second.as_int; #ifdef VPX_MODE_COUNT vp9_mv_cont_count[mv_contz][1]++; #endif break; - case ZERO4X4: + case ZEROMV: blockmv.as_int = 0; - if (mbmi->second_ref_frame > 0) + if (mbmi->ref_frame[1] > 0) secondmv.as_int = 0; #ifdef VPX_MODE_COUNT vp9_mv_cont_count[mv_contz][2]++; @@ -955,490 +684,154 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, default: break; } + mi->bmi[j].as_mv[0].as_int = blockmv.as_int; + if (mbmi->ref_frame[1] > 0) + mi->bmi[j].as_mv[1].as_int = secondmv.as_int; + + for (i = 1; i < bh; ++i) + vpx_memcpy(&mi->bmi[j + i * 2], &mi->bmi[j], sizeof(mi->bmi[j])); + for (i = 1; i < bw; ++i) + vpx_memcpy(&mi->bmi[j + i], &mi->bmi[j], sizeof(mi->bmi[j])); + mi->mbmi.mode = blockmode; + } + } - /* Commenting this section out, not sure why this was needed, and - * there are mismatches with this section in rare cases since it is - * not done in the encoder at all. - mbmi->need_to_clamp_mvs |= check_mv_bounds(&blockmv, - mb_to_left_edge, + mv0->as_int = mi->bmi[3].as_mv[0].as_int; + mv1->as_int = mi->bmi[3].as_mv[1].as_int; + } else { + switch (mbmi->mode) { + case NEARMV: + // Clip "next_nearest" so that it does not extend to far out of image + assign_and_clamp_mv(mv0, &nearby, mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + if (mbmi->ref_frame[1] > 0) + assign_and_clamp_mv(mv1, &nearby_second, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge); - if (mbmi->second_ref_frame > 0) { - mbmi->need_to_clamp_mvs |= check_mv_bounds(&secondmv, - mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - } - */ - - { - /* Fill (uniform) modes, mvs of jth subset. - Must do it here because ensuing subsets can - refer back to us via "left" or "above". */ - unsigned int fill_count = mbsplit_fill_count[s]; - const unsigned char *fill_offset = - &mbsplit_fill_offset[s][j * fill_count]; - - do { - mi->bmi[*fill_offset].as_mv[0].as_int = blockmv.as_int; - if (mbmi->second_ref_frame > 0) - mi->bmi[*fill_offset].as_mv[1].as_int = secondmv.as_int; - fill_offset++; - } while (--fill_count); - } - - } while (++j < num_p); - } - - mv->as_int = mi->bmi[15].as_mv[0].as_int; - mbmi->mv[1].as_int = mi->bmi[15].as_mv[1].as_int; - - break; /* done with SPLITMV */ - - case NEARMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv, &nearby, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (mbmi->second_ref_frame > 0) - assign_and_clamp_mv(&mbmi->mv[1], &nearby_second, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - break; - - case NEARESTMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv, &nearest, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (mbmi->second_ref_frame > 0) - assign_and_clamp_mv(&mbmi->mv[1], &nearest_second, mb_to_left_edge, + break; + + case NEARESTMV: + // Clip "next_nearest" so that it does not extend to far out of image + assign_and_clamp_mv(mv0, &nearest, mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + if (mbmi->ref_frame[1] > 0) + assign_and_clamp_mv(mv1, &nearest_second, mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + break; + + case ZEROMV: + mv0->as_int = 0; + if (mbmi->ref_frame[1] > 0) + mv1->as_int = 0; + break; + + case NEWMV: + decode_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount, + xd->allow_high_precision_mv); + mbmi->need_to_clamp_mvs = check_mv_bounds(mv0, + mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + + if (mbmi->ref_frame[1] > 0) { + decode_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, + &cm->fc.NMVcount, xd->allow_high_precision_mv); + mbmi->need_to_clamp_secondmv = check_mv_bounds(mv1, + mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge); - break; - - case ZEROMV: - mv->as_int = 0; - if (mbmi->second_ref_frame > 0) - mbmi->mv[1].as_int = 0; - break; - - case NEWMV: - process_mv(bc, &mv->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount, - xd->allow_high_precision_mv); - - // Don't need to check this on NEARMV and NEARESTMV modes - // since those modes clamp the MV. The NEWMV mode does not, - // so signal to the prediction stage whether special - // handling may be required. - mbmi->need_to_clamp_mvs = check_mv_bounds(mv, - mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - - if (mbmi->second_ref_frame > 0) { - process_mv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc, - &cm->fc.NMVcount, xd->allow_high_precision_mv); - mbmi->need_to_clamp_secondmv |= check_mv_bounds(&mbmi->mv[1], - mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - } - break; - default: -; + } + break; + default: #if CONFIG_DEBUG - assert(0); + assert(0); #endif + break; + } } } else { - /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; - - if (mbmi->sb_type) { - mbmi->mode = read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob); - pbi->common.fc.sb_ymode_counts[mbmi->mode]++; + // required for left and above block mv + mv0->as_int = 0; + + if (bsize >= BLOCK_SIZE_SB8X8) { + const BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + const int bsl = MIN(bwl, bhl); + mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[MIN(3, bsl)]); + cm->fc.y_mode_counts[MIN(3, bsl)][mbmi->mode]++; } else { - mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob); - pbi->common.fc.ymode_counts[mbmi->mode]++; - } - - // If MB mode is BPRED read the block modes - if (mbmi->mode == B_PRED) { - int j = 0; - do { - int m = read_bmode(bc, pbi->common.fc.bmode_prob); - mi->bmi[j].as_mode.first = m; -#if CONFIG_NEWBINTRAMODES - if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; -#endif - pbi->common.fc.bmode_counts[m]++; - } while (++j < 16); - } - - if (mbmi->mode == I8X8_PRED) { - int i; - for (i = 0; i < 4; i++) { - const int ib = vp9_i8x8_block[i]; - const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob); - - mi->bmi[ib + 0].as_mode.first = mode8x8; - mi->bmi[ib + 1].as_mode.first = mode8x8; - mi->bmi[ib + 4].as_mode.first = mode8x8; - mi->bmi[ib + 5].as_mode.first = mode8x8; - pbi->common.fc.i8x8_mode_counts[mode8x8]++; + int idx, idy; + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int ib = idy * 2 + idx, k; + int m = read_intra_mode(r, cm->fc.y_mode_prob[0]); + mi->bmi[ib].as_mode.first = m; + cm->fc.y_mode_counts[0][m]++; + for (k = 1; k < bh; ++k) + mi->bmi[ib + k * 2].as_mode.first = m; + for (k = 1; k < bw; ++k) + mi->bmi[ib + k].as_mode.first = m; + } } - } else { - mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob[mbmi->mode]); - pbi->common.fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++; - } - } - /* - if (pbi->common.current_video_frame == 1) - printf("mode: %d skip: %d\n", mbmi->mode, mbmi->mb_skip_coeff); - */ - - if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && - ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) || - (mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV && - mbmi->partitioning == PARTITIONING_4X4)))) { - // FIXME(rbultje) code ternary symbol once all experiments are merged - mbmi->txfm_size = vp9_read(bc, cm->prob_tx[0]); - if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV) { - mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]); - if (mbmi->sb_type && mbmi->txfm_size != TX_8X8) - mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]); + mbmi->mode = mi->bmi[3].as_mode.first; } - } else if (mbmi->sb_type && cm->txfm_mode >= ALLOW_32X32) { - mbmi->txfm_size = TX_32X32; - } else if (cm->txfm_mode >= ALLOW_16X16 && - ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) || - (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { - mbmi->txfm_size = TX_16X16; - } else if (cm->txfm_mode >= ALLOW_8X8 && - (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == B_PRED) && - !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV && - mbmi->partitioning == PARTITIONING_4X4))) { - mbmi->txfm_size = TX_8X8; - } else { - mbmi->txfm_size = TX_4X4; - } -} - -void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { - VP9_COMMON *cm = &pbi->common; - vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs)); - if (pbi->common.mb_no_coeff_skip) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - cm->mbskip_pred_probs[k] = vp9_read_prob(bc); - } + mbmi->uv_mode = read_intra_mode(r, cm->fc.uv_mode_prob[mbmi->mode]); + cm->fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++; } - - mb_mode_mv_init(pbi, bc); } -#if CONFIG_CODE_NONZEROCOUNT -static uint16_t read_nzc(VP9_COMMON *const cm, - int nzc_context, - TX_SIZE tx_size, - int ref, - int type, - BOOL_DECODER* const bc) { - int c, e; - uint16_t nzc; - if (tx_size == TX_32X32) { - c = treed_read(bc, vp9_nzc32x32_tree, - cm->fc.nzc_probs_32x32[nzc_context][ref][type]); - cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++; - } else if (tx_size == TX_16X16) { - c = treed_read(bc, vp9_nzc16x16_tree, - cm->fc.nzc_probs_16x16[nzc_context][ref][type]); - cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++; - } else if (tx_size == TX_8X8) { - c = treed_read(bc, vp9_nzc8x8_tree, - cm->fc.nzc_probs_8x8[nzc_context][ref][type]); - cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++; - } else if (tx_size == TX_4X4) { - c = treed_read(bc, vp9_nzc4x4_tree, - cm->fc.nzc_probs_4x4[nzc_context][ref][type]); - cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++; - } else { - assert(0); - } - nzc = vp9_basenzcvalue[c]; - if ((e = vp9_extranzcbits[c])) { - int x = 0; - while (e--) { - int b = vp9_read( - bc, cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]); - x |= (b << e); - cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++; +void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, vp9_reader *r) { + VP9_COMMON *cm = &pbi->common; + int k; + + // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove. + // vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs)); + for (k = 0; k < MBSKIP_CONTEXTS; ++k) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) { + cm->fc.mbskip_probs[k] = + vp9_read_prob_diff_update(r, cm->fc.mbskip_probs[k]); } - nzc += x; + // cm->fc.mbskip_probs[k] = vp9_read_prob(r); } - if (tx_size == TX_32X32) - assert(nzc <= 1024); - else if (tx_size == TX_16X16) - assert(nzc <= 256); - else if (tx_size == TX_8X8) - assert(nzc <= 64); - else if (tx_size == TX_4X4) - assert(nzc <= 16); - return nzc; -} - -static void read_nzcs_sb64(VP9_COMMON *const cm, - MACROBLOCKD* xd, - int mb_row, - int mb_col, - BOOL_DECODER* const bc) { - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0])); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_32X32: - for (j = 0; j < 256; j += 64) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc); - } - for (j = 256; j < 384; j += 64) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, bc); - } - break; - - case TX_16X16: - for (j = 0; j < 256; j += 16) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); - } - for (j = 256; j < 384; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); - } - break; - - case TX_8X8: - for (j = 0; j < 256; j += 4) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); - } - for (j = 256; j < 384; j += 4) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); - } - break; - - case TX_4X4: - for (j = 0; j < 256; ++j) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); - } - for (j = 256; j < 384; ++j) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); - } - break; - default: - break; - } + mb_mode_mv_init(pbi, r); } -static void read_nzcs_sb32(VP9_COMMON *const cm, - MACROBLOCKD* xd, - int mb_row, - int mb_col, - BOOL_DECODER* const bc) { - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0])); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_32X32: - for (j = 0; j < 64; j += 64) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc); - } - for (j = 64; j < 96; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); - } - break; - - case TX_16X16: - for (j = 0; j < 64; j += 16) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); - } - for (j = 64; j < 96; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); - } - break; - - case TX_8X8: - for (j = 0; j < 64; j += 4) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); - } - for (j = 64; j < 96; j += 4) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); - } - break; - - case TX_4X4: - for (j = 0; j < 64; ++j) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); - } - for (j = 64; j < 96; ++j) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); - } - break; - - default: - break; - } -} - -static void read_nzcs_mb16(VP9_COMMON *const cm, - MACROBLOCKD* xd, - int mb_row, - int mb_col, - BOOL_DECODER* const bc) { - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0])); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_16X16: - for (j = 0; j < 16; j += 16) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); - } - for (j = 16; j < 24; j += 4) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); - } - break; - - case TX_8X8: - for (j = 0; j < 16; j += 4) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); - } - if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) { - for (j = 16; j < 24; ++j) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); - } - } else { - for (j = 16; j < 24; j += 4) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); - } - } - break; - - case TX_4X4: - for (j = 0; j < 16; ++j) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); - } - for (j = 16; j < 24; ++j) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); - } - break; - - default: - break; - } -} -#endif // CONFIG_CODE_NONZEROCOUNT - void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MACROBLOCKD* const xd, - int mb_row, - int mb_col, - BOOL_DECODER* const bc) { + int mi_row, + int mi_col, + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MODE_INFO *mi = xd->mode_info_context; - MODE_INFO *prev_mi = xd->prev_mode_info_context; MB_MODE_INFO *const mbmi = &mi->mbmi; - if (pbi->common.frame_type == KEY_FRAME) { - kfread_modes(pbi, mi, mb_row, mb_col, bc); + if ((cm->frame_type == KEY_FRAME) || cm->intra_only) { + kfread_modes(pbi, mi, mi_row, mi_col, r); } else { - read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc); - set_scale_factors(xd, - mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1, - pbi->common.active_ref_scale); + read_mb_modes_mv(pbi, mi, &mi->mbmi, mi_row, mi_col, r); } -#if CONFIG_CODE_NONZEROCOUNT - if (mbmi->sb_type == BLOCK_SIZE_SB64X64) - read_nzcs_sb64(cm, xd, mb_row, mb_col, bc); - else if (mbmi->sb_type == BLOCK_SIZE_SB32X32) - read_nzcs_sb32(cm, xd, mb_row, mb_col, bc); - else - read_nzcs_mb16(cm, xd, mb_row, mb_col, bc); -#endif // CONFIG_CODE_NONZEROCOUNT - - if (mbmi->sb_type) { - const int n_mbs = 1 << mbmi->sb_type; - const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); - const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); + + if (1) { + const int bw = 1 << mi_width_log2(mbmi->sb_type); + const int bh = 1 << mi_height_log2(mbmi->sb_type); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int x_mis = MIN(bw, cm->mi_cols - mi_col); const int mis = cm->mode_info_stride; int x, y; - for (y = 0; y < y_mbs; y++) { - for (x = !y; x < x_mbs; x++) { + for (y = 0; y < y_mis; y++) + for (x = !y; x < x_mis; x++) mi[y * mis + x] = *mi; - } - } - } else { - update_blockd_bmi(xd); } } diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h index 5cd93576098c9dc89baf3884d39de44d013b99c6..bf5e83c77d1ba5038e3742c9177f80fe6df480a5 100644 --- a/vp9/decoder/vp9_decodemv.h +++ b/vp9/decoder/vp9_decodemv.h @@ -17,7 +17,7 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MACROBLOCKD* const xd, int mb_row, int mb_col, - BOOL_DECODER* const bc); -void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc); + vp9_reader *r); +void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, vp9_reader *r); #endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index eb1b4896e5dc382bfe705de016bcd249ba95ce9f..703aa0623974749eba7c0e5a9d85e259672df0a5 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -8,60 +8,143 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> -#include "vp9/decoder/vp9_onyxd_int.h" +#include "./vp9_rtcd.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_scale/vpx_scale.h" + +#include "vp9/common/vp9_extend.h" +#include "vp9/common/vp9_modecont.h" #include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_header.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_entropy.h" -#include "vp9/decoder/vp9_decodframe.h" -#include "vp9/decoder/vp9_detokenize.h" #include "vp9/common/vp9_invtrans.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_quant_common.h" -#include "vpx_scale/vpx_scale.h" -#include "vp9/common/vp9_setupintrarecon.h" - -#include "vp9/decoder/vp9_decodemv.h" -#include "vp9/common/vp9_extend.h" -#include "vp9/common/vp9_modecont.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/decoder/vp9_dboolhuff.h" - #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" -#include "vp9_rtcd.h" -#include <assert.h> -#include <stdio.h> +#include "vp9/decoder/vp9_dboolhuff.h" +#include "vp9/decoder/vp9_decodframe.h" +#include "vp9/decoder/vp9_detokenize.h" +#include "vp9/decoder/vp9_decodemv.h" +#include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_read_bit_buffer.h" -#define COEFCOUNT_TESTING // #define DEC_DEBUG #ifdef DEC_DEBUG int dec_debug = 0; #endif -static int read_le16(const uint8_t *p) { - return (p[1] << 8) | p[0]; -} - -static int read_le32(const uint8_t *p) { - return (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]; +static int read_be32(const uint8_t *p) { + return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; } // len == 0 is not allowed -static int read_is_valid(const unsigned char *start, size_t len, - const unsigned char *end) { +static int read_is_valid(const uint8_t *start, size_t len, + const uint8_t *end) { return start + len > start && start + len <= end; } +static void setup_txfm_mode(VP9_COMMON *pc, int lossless, vp9_reader *r) { + if (lossless) { + pc->txfm_mode = ONLY_4X4; + } else { + pc->txfm_mode = vp9_read_literal(r, 2); + if (pc->txfm_mode == ALLOW_32X32) + pc->txfm_mode += vp9_read_bit(r); + if (pc->txfm_mode == TX_MODE_SELECT) { + int i, j; + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + pc->fc.tx_probs_8x8p[i][j] = + vp9_read_prob_diff_update(r, pc->fc.tx_probs_8x8p[i][j]); + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + pc->fc.tx_probs_16x16p[i][j] = + vp9_read_prob_diff_update(r, pc->fc.tx_probs_16x16p[i][j]); + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) { + if (vp9_read(r, VP9_MODE_UPDATE_PROB)) + pc->fc.tx_probs_32x32p[i][j] = + vp9_read_prob_diff_update(r, pc->fc.tx_probs_32x32p[i][j]); + } + } + } + } +} + +static int get_unsigned_bits(unsigned int num_values) { + int cat = 0; + if (num_values <= 1) + return 0; + num_values--; + while (num_values > 0) { + cat++; + num_values >>= 1; + } + return cat; +} + +static int inv_recenter_nonneg(int v, int m) { + if (v > 2 * m) + return v; + + return v % 2 ? m - (v + 1) / 2 : m + v / 2; +} + +static int decode_uniform(vp9_reader *r, int n) { + int v; + const int l = get_unsigned_bits(n); + const int m = (1 << l) - n; + if (!l) + return 0; + + v = vp9_read_literal(r, l - 1); + return v < m ? v : (v << 1) - m + vp9_read_bit(r); +} + +static int decode_term_subexp(vp9_reader *r, int k, int num_syms) { + int i = 0, mk = 0, word; + while (1) { + const int b = i ? k + i - 1 : k; + const int a = 1 << b; + if (num_syms <= mk + 3 * a) { + word = decode_uniform(r, num_syms - mk) + mk; + break; + } else { + if (vp9_read_bit(r)) { + i++; + mk += a; + } else { + word = vp9_read_literal(r, b) + mk; + break; + } + } + } + return word; +} + +static int decode_unsigned_max(struct vp9_read_bit_buffer *rb, int max) { + const int data = vp9_rb_read_literal(rb, get_unsigned_bits(max)); + return data > max ? max : data; +} + static int merge_index(int v, int n, int modulus) { int max1 = (n - 1 - modulus / 2) / modulus + 1; - if (v < max1) v = v * modulus + modulus / 2; - else { + if (v < max1) { + v = v * modulus + modulus / 2; + } else { int w; v -= max1; w = v; @@ -73,1166 +156,427 @@ static int merge_index(int v, int n, int modulus) { } static int inv_remap_prob(int v, int m) { - const int n = 256; - const int modulus = MODULUS_PARAM; + const int n = 255; - v = merge_index(v, n - 1, modulus); + v = merge_index(v, n - 1, MODULUS_PARAM); + m--; if ((m << 1) <= n) { - return vp9_inv_recenter_nonneg(v + 1, m); + return 1 + inv_recenter_nonneg(v + 1, m); } else { - return n - 1 - vp9_inv_recenter_nonneg(v + 1, n - 1 - m); + return n - inv_recenter_nonneg(v + 1, n - 1 - m); } } -static vp9_prob read_prob_diff_update(vp9_reader *const bc, int oldp) { - int delp = vp9_decode_term_subexp(bc, SUBEXP_PARAM, 255); +vp9_prob vp9_read_prob_diff_update(vp9_reader *r, int oldp) { + int delp = decode_term_subexp(r, SUBEXP_PARAM, 255); return (vp9_prob)inv_remap_prob(delp, oldp); } -void vp9_init_de_quantizer(VP9D_COMP *pbi) { - int i; +void vp9_init_dequantizer(VP9_COMMON *pc) { int q; - VP9_COMMON *const pc = &pbi->common; for (q = 0; q < QINDEX_RANGE; q++) { - pc->Y1dequant[q][0] = (int16_t)vp9_dc_quant(q, pc->y1dc_delta_q); - pc->UVdequant[q][0] = (int16_t)vp9_dc_uv_quant(q, pc->uvdc_delta_q); + // DC value + pc->y_dequant[q][0] = vp9_dc_quant(q, pc->y_dc_delta_q); + pc->uv_dequant[q][0] = vp9_dc_quant(q, pc->uv_dc_delta_q); - /* all the ac values =; */ - for (i = 1; i < 16; i++) { - int rc = vp9_default_zig_zag1d_4x4[i]; - - pc->Y1dequant[q][rc] = (int16_t)vp9_ac_yquant(q); - pc->UVdequant[q][rc] = (int16_t)vp9_ac_uv_quant(q, pc->uvac_delta_q); - } - } -} - -static int get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex) { - // Set the Q baseline allowing for any segment level adjustment - if (vp9_segfeature_active(mb, segment_id, SEG_LVL_ALT_Q)) { - if (mb->mb_segment_abs_delta == SEGMENT_ABSDATA) - return vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q); // Abs Value - else - return clamp(base_qindex + vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q), - 0, MAXQ); // Delta Value - } else { - return base_qindex; + // AC values + pc->y_dequant[q][1] = vp9_ac_quant(q, 0); + pc->uv_dequant[q][1] = vp9_ac_quant(q, pc->uv_ac_delta_q); } } -static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *mb) { +static void mb_init_dequantizer(VP9_COMMON *pc, MACROBLOCKD *xd) { int i; + const int segment_id = xd->mode_info_context->mbmi.segment_id; + xd->q_index = vp9_get_qindex(xd, segment_id, pc->base_qindex); - VP9_COMMON *const pc = &pbi->common; - const int segment_id = mb->mode_info_context->mbmi.segment_id; - const int qindex = get_qindex(mb, segment_id, pc->base_qindex); - mb->q_index = qindex; - - for (i = 0; i < 16; i++) - mb->block[i].dequant = pc->Y1dequant[qindex]; - - for (i = 16; i < 24; i++) - mb->block[i].dequant = pc->UVdequant[qindex]; - - if (mb->lossless) { - assert(qindex == 0); - mb->inv_txm4x4_1 = vp9_short_iwalsh4x4_1; - mb->inv_txm4x4 = vp9_short_iwalsh4x4; - mb->itxm_add = vp9_dequant_idct_add_lossless_c; - mb->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; - mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; - } else { - mb->inv_txm4x4_1 = vp9_short_idct4x4_1; - mb->inv_txm4x4 = vp9_short_idct4x4; - mb->itxm_add = vp9_dequant_idct_add; - mb->itxm_add_y_block = vp9_dequant_idct_add_y_block; - mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; - } + xd->plane[0].dequant = pc->y_dequant[xd->q_index]; + for (i = 1; i < MAX_MB_PLANE; i++) + xd->plane[i].dequant = pc->uv_dequant[xd->q_index]; } -/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it - * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy. - */ -static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, int mb_col) { - BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - if (sb_type == BLOCK_SIZE_SB64X64) { - vp9_build_intra_predictors_sb64uv_s(xd); - vp9_build_intra_predictors_sb64y_s(xd); - } else if (sb_type == BLOCK_SIZE_SB32X32) { - vp9_build_intra_predictors_sbuv_s(xd); - vp9_build_intra_predictors_sby_s(xd); - } else { - vp9_build_intra_predictors_mbuv_s(xd); - vp9_build_intra_predictors_mby_s(xd); - } - } else { - if (sb_type == BLOCK_SIZE_SB64X64) { - vp9_build_inter64x64_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - } else if (sb_type == BLOCK_SIZE_SB32X32) { - vp9_build_inter32x32_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - } else { - vp9_build_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - } - } -} +static void decode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + MACROBLOCKD* const xd = arg; + int16_t* const qcoeff = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); + const int stride = xd->plane[plane].dst.stride; + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + uint8_t* const dst = raster_block_offset_uint8(xd, bsize, plane, + raster_block, + xd->plane[plane].dst.buf, + stride); -static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc) { - TX_TYPE tx_type = get_tx_type_16x16(xd, 0); -#if 0 // def DEC_DEBUG - if (dec_debug) { - int i; - printf("\n"); - printf("qcoeff 16x16\n"); - for (i = 0; i < 400; i++) { - printf("%3d ", xd->qcoeff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("predictor\n"); - for (i = 0; i < 400; i++) { - printf("%3d ", xd->predictor[i]); - if (i % 16 == 15) printf("\n"); - } - } -#endif - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff, - xd->block[0].dequant, xd->predictor, - xd->dst.y_buffer, 16, xd->dst.y_stride, - xd->eobs[0]); - } else { - vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - 16, xd->dst.y_stride, xd->eobs[0]); - } - vp9_dequant_idct_add_uv_block_8x8( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); -} - -static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc) { - // First do Y - // if the first one is DCT_DCT assume all the rest are as well - TX_TYPE tx_type = get_tx_type_8x8(xd, 0); -#if 0 // def DEC_DEBUG - if (dec_debug) { - int i; - printf("\n"); - printf("qcoeff 8x8\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->qcoeff[i]); - if (i % 16 == 15) printf("\n"); - } - } -#endif - if (tx_type != DCT_DCT || xd->mode_info_context->mbmi.mode == I8X8_PRED) { - int i; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - int idx = (ib & 0x02) ? (ib + 2) : ib; - int16_t *q = xd->block[idx].qcoeff; - int16_t *dq = xd->block[0].dequant; - uint8_t *pre = xd->block[ib].predictor; - uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; - int stride = xd->dst.y_stride; - BLOCKD *b = &xd->block[ib]; - if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { - int i8x8mode = b->bmi.as_mode.first; - vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor); - } - tx_type = get_tx_type_8x8(xd, ib); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride, - xd->eobs[idx]); - } else { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, - xd->eobs[idx]); - } - } - } else { - vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, - xd->block[0].dequant, - xd->predictor, - xd->dst.y_buffer, - xd->dst.y_stride, - xd); - } + TX_TYPE tx_type; - // Now do UV - if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { - int i; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - BLOCKD *b = &xd->block[ib]; - int i8x8mode = b->bmi.as_mode.first; - - b = &xd->block[16 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); - - b = &xd->block[20 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); - } - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); - } else { - vp9_dequant_idct_add_uv_block_8x8 - (xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); - } -#if 0 // def DEC_DEBUG - if (dec_debug) { - int i; - printf("\n"); - printf("predictor\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->predictor[i]); - if (i % 16 == 15) printf("\n"); - } + switch (ss_txfrm_size / 2) { + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + xd->itxm_add(qcoeff, dst, stride, xd->plane[plane].eobs[block]); + else + vp9_iht_add_c(tx_type, qcoeff, dst, stride, + xd->plane[plane].eobs[block]); + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + vp9_iht_add_8x8_c(tx_type, qcoeff, dst, stride, + xd->plane[plane].eobs[block]); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + vp9_iht_add_16x16_c(tx_type, qcoeff, dst, stride, + xd->plane[plane].eobs[block]); + break; + case TX_32X32: + vp9_idct_add_32x32(qcoeff, dst, stride, xd->plane[plane].eobs[block]); + break; } -#endif } -static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc) { +static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + MACROBLOCKD* const xd = arg; + int16_t* const qcoeff = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); + const int stride = xd->plane[plane].dst.stride; + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + uint8_t* const dst = raster_block_offset_uint8(xd, bsize, plane, + raster_block, + xd->plane[plane].dst.buf, + stride); + const TX_SIZE tx_size = (TX_SIZE)(ss_txfrm_size / 2); TX_TYPE tx_type; - int i, eobtotal = 0; - MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; -#if 0 // def DEC_DEBUG - if (dec_debug) { - int i; - printf("\n"); - printf("predictor\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->predictor[i]); - if (i % 16 == 15) printf("\n"); - } - } -#endif - if (mode == I8X8_PRED) { - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - const int iblock[4] = {0, 1, 4, 5}; - int j; - BLOCKD *b = &xd->block[ib]; - int i8x8mode = b->bmi.as_mode.first; - vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor); - for (j = 0; j < 4; j++) { - b = &xd->block[ib + iblock[j]]; - tx_type = get_tx_type_4x4(xd, ib + iblock[j]); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, - b->dst_stride, xd->eobs[ib + iblock[j]]); - } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, - xd->eobs[ib + iblock[j]]); - } - } - b = &xd->block[16 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); - b = &xd->block[20 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); - } - } else if (mode == B_PRED) { - for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; - int b_mode = xd->mode_info_context->bmi[i].as_mode.first; -#if CONFIG_NEWBINTRAMODES - xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context = - vp9_find_bpred_context(xd, b); -#endif - if (!xd->mode_info_context->mbmi.mb_skip_coeff) - eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i); - - vp9_intra4x4_predict(xd, b, b_mode, b->predictor); - tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, - xd->eobs[i]); - } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); - } - } - if (!xd->mode_info_context->mbmi.mb_skip_coeff) { - vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); - } - vp9_build_intra_predictors_mbuv(xd); - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); - } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) { - xd->itxm_add_y_block(xd->qcoeff, - xd->block[0].dequant, - xd->predictor, - xd->dst.y_buffer, - xd->dst.y_stride, - xd); - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); - } else { -#if 0 // def DEC_DEBUG - if (dec_debug) { - int i; - printf("\n"); - printf("qcoeff 4x4\n"); - for (i = 0; i < 400; i++) { - printf("%3d ", xd->qcoeff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("predictor\n"); - for (i = 0; i < 400; i++) { - printf("%3d ", xd->predictor[i]); - if (i % 16 == 15) printf("\n"); - } - } -#endif - for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; - tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, - b->dst_stride, xd->eobs[i]); - } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); - } - } - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); - } -} - -static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, int mb_col, - BOOL_DECODER* const bc) { - int n, eobtotal; - VP9_COMMON *const pc = &pbi->common; - MODE_INFO *mi = xd->mode_info_context; - const int mis = pc->mode_info_stride; - - assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64); + int mode, b_mode; + int plane_b_size; + int tx_ib = raster_block >> tx_size; + mode = plane == 0? xd->mode_info_context->mbmi.mode: + xd->mode_info_context->mbmi.uv_mode; - if (pbi->common.frame_type != KEY_FRAME) - vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc); - - // re-initialize macroblock dequantizer before detokenization - if (xd->segmentation_enabled) - mb_init_dequantizer(pbi, xd); - if (xd->mode_info_context->mbmi.mb_skip_coeff) { - vp9_reset_sb64_tokens_context(xd); - - /* Special case: Force the loopfilter to skip when eobtotal and - * mb_skip_coeff are zero. - */ - skip_recon_mb(pbi, xd, mb_row, mb_col); - return; - } - - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sb64y_s(xd); - vp9_build_intra_predictors_sb64uv_s(xd); + if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 && plane == 0) { + assert(bsize == BLOCK_SIZE_SB8X8); + b_mode = xd->mode_info_context->bmi[raster_block].as_mode.first; } else { - vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride, - mb_row, mb_col); + b_mode = mode; } - /* dequantization and idct */ - eobtotal = vp9_decode_sb64_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + extend_for_intra(xd, plane, block, bsize, ss_txfrm_size); + } - if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows) - mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - } - } else { - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32; - vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]); - } - vp9_dequant_idct_add_32x32(xd->qcoeff + 4096, - xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]); - vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024, - xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]); - break; - case TX_16X16: - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16; - const TX_TYPE tx_type = get_tx_type_16x16(xd, - (y_idx * 16 + x_idx) * 4); - - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); - } else { - vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); - } - } - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const int uv_offset = y_idx * 16 * xd->dst.uv_stride + x_idx * 16; - vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + n * 256, - xd->block[16].dequant, - xd->dst.u_buffer + uv_offset, - xd->dst.u_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 16]); - vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + 1024 + n * 256, - xd->block[20].dequant, - xd->dst.v_buffer + uv_offset, - xd->dst.v_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]); - } - break; - case TX_8X8: - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const int y_offset = y_idx * 8 * xd->dst.y_stride + x_idx * 8; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); - } else { - vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); - } - } - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const int uv_offset = y_idx * 8 * xd->dst.uv_stride + x_idx * 8; - vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096, - xd->block[16].dequant, - xd->dst.u_buffer + uv_offset, - xd->dst.u_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 4]); - vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096 + 1024, - xd->block[20].dequant, - xd->dst.v_buffer + uv_offset, - xd->dst.v_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]); - } - break; - case TX_4X4: - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; - const int y_offset = y_idx * 4 * xd->dst.y_stride + x_idx * 4; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); - if (tx_type == DCT_DCT) { - xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); - } else { - vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); - } - } - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const int uv_offset = y_idx * 4 * xd->dst.uv_stride + x_idx * 4; - xd->itxm_add(xd->qcoeff + 4096 + n * 16, - xd->block[16].dequant, - xd->dst.u_buffer + uv_offset, - xd->dst.u_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n]); - xd->itxm_add(xd->qcoeff + 4096 + 1024 + n * 16, - xd->block[20].dequant, - xd->dst.v_buffer + uv_offset, - xd->dst.v_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n]); - } - break; - default: assert(0); - } + plane_b_size = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, + b_mode, dst, xd->plane[plane].dst.stride); + + switch (ss_txfrm_size / 2) { + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + xd->itxm_add(qcoeff, dst, stride, xd->plane[plane].eobs[block]); + else + vp9_iht_add_c(tx_type, qcoeff, dst, stride, + xd->plane[plane].eobs[block]); + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + vp9_iht_add_8x8_c(tx_type, qcoeff, dst, stride, + xd->plane[plane].eobs[block]); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + vp9_iht_add_16x16_c(tx_type, qcoeff, dst, stride, + xd->plane[plane].eobs[block]); + break; + case TX_32X32: + vp9_idct_add_32x32(qcoeff, dst, stride, xd->plane[plane].eobs[block]); + break; } } -static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, int mb_col, - BOOL_DECODER* const bc) { - int n, eobtotal; - VP9_COMMON *const pc = &pbi->common; - const int mis = pc->mode_info_stride; - - assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32); - - if (pbi->common.frame_type != KEY_FRAME) - vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc); +static void decode_atom(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, + vp9_reader *r, BLOCK_SIZE_TYPE bsize) { + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - // re-initialize macroblock dequantizer before detokenization - if (xd->segmentation_enabled) - mb_init_dequantizer(pbi, xd); + assert(mbmi->ref_frame[0] != INTRA_FRAME); - if (xd->mode_info_context->mbmi.mb_skip_coeff) { - vp9_reset_sb_tokens_context(xd); + if ((pbi->common.frame_type != KEY_FRAME) && (!pbi->common.intra_only)) + vp9_setup_interp_filters(xd, mbmi->interp_filter, &pbi->common); - /* Special case: Force the loopfilter to skip when eobtotal and - * mb_skip_coeff are zero. - */ - skip_recon_mb(pbi, xd, mb_row, mb_col); - return; - } + // prediction + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sby_s(xd); - vp9_build_intra_predictors_sbuv_s(xd); + if (mbmi->mb_skip_coeff) { + vp9_reset_sb_tokens_context(xd, bsize); } else { - vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride, - mb_row, mb_col); - } + // re-initialize macroblock dequantizer before detokenization + if (xd->segmentation_enabled) + mb_init_dequantizer(&pbi->common, xd); - /* dequantization and idct */ - eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; - if (mb_row + 1 < pc->mb_rows) { - xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; + if (!vp9_reader_has_error(r)) { + vp9_decode_tokens(pbi, xd, r, bsize); } + } + foreach_transformed_block(xd, bsize, decode_block, xd); +} + +static void decode_sb_intra(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, + vp9_reader *r, BLOCK_SIZE_TYPE bsize) { + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + if (mbmi->mb_skip_coeff) { + vp9_reset_sb_tokens_context(xd, bsize); } else { - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer, xd->dst.y_buffer, - xd->dst.y_stride, xd->dst.y_stride, - xd->eobs[0]); - vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, xd); - break; - case TX_16X16: - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16; - const TX_TYPE tx_type = get_tx_type_16x16(xd, - (y_idx * 8 + x_idx) * 4); - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_16x16( - xd->qcoeff + n * 256, xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); - } else { - vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); - } - } - vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, xd); - break; - case TX_8X8: - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const int y_offset = y_idx * 8 * xd->dst.y_stride + x_idx * 8; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); - } else { - vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); - } - } - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const int uv_offset = y_idx * 8 * xd->dst.uv_stride + x_idx * 8; - vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1024, - xd->block[16].dequant, - xd->dst.u_buffer + uv_offset, - xd->dst.u_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n * 4]); - vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1280, - xd->block[20].dequant, - xd->dst.v_buffer + uv_offset, - xd->dst.v_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]); - } - break; - case TX_4X4: - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const int y_offset = y_idx * 4 * xd->dst.y_stride + x_idx * 4; - - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); - if (tx_type == DCT_DCT) { - xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); - } else { - vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16, - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); - } - } + // re-initialize macroblock dequantizer before detokenization + if (xd->segmentation_enabled) + mb_init_dequantizer(&pbi->common, xd); - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const int uv_offset = y_idx * 4 * xd->dst.uv_stride + x_idx * 4; - xd->itxm_add(xd->qcoeff + 1024 + n * 16, - xd->block[16].dequant, - xd->dst.u_buffer + uv_offset, - xd->dst.u_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n]); - xd->itxm_add(xd->qcoeff + 1280 + n * 16, - xd->block[20].dequant, - xd->dst.v_buffer + uv_offset, - xd->dst.v_buffer + uv_offset, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n]); - } - break; - default: assert(0); + if (!vp9_reader_has_error(r)) { + vp9_decode_tokens(pbi, xd, r, bsize); } } -} -static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, unsigned int mb_col, - BOOL_DECODER* const bc) { - int eobtotal = 0; - MB_PREDICTION_MODE mode; - int tx_size; - - assert(!xd->mode_info_context->mbmi.sb_type); + foreach_transformed_block(xd, bsize, decode_block_intra, xd); +} - // re-initialize macroblock dequantizer before detokenization - if (xd->segmentation_enabled) - mb_init_dequantizer(pbi, xd); - tx_size = xd->mode_info_context->mbmi.txfm_size; - mode = xd->mode_info_context->mbmi.mode; +static void decode_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mi_row, int mi_col, + vp9_reader *r, BLOCK_SIZE_TYPE bsize) { + const int bwl = mi_width_log2(bsize), bhl = mi_height_log2(bsize); + const int bw = 1 << bwl, bh = 1 << bhl; + int n, eobtotal; + VP9_COMMON *const pc = &pbi->common; + MODE_INFO *const mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + const int mis = pc->mode_info_stride; - if (xd->mode_info_context->mbmi.mb_skip_coeff) { - vp9_reset_mb_tokens_context(xd); - } else if (!bool_error(bc)) { - if (mode != B_PRED) - eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); - } + assert(mbmi->sb_type == bsize); + assert(mbmi->ref_frame[0] != INTRA_FRAME); - //mode = xd->mode_info_context->mbmi.mode; if (pbi->common.frame_type != KEY_FRAME) - vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, - &pbi->common); - - if (eobtotal == 0 && - mode != B_PRED && - mode != SPLITMV && - mode != I8X8_PRED && - !bool_error(bc)) { - /* Special case: Force the loopfilter to skip when eobtotal and - mb_skip_coeff are zero. */ - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - skip_recon_mb(pbi, xd, mb_row, mb_col); - return; - } -#if 0 // def DEC_DEBUG - if (dec_debug) - printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size); -#endif + vp9_setup_interp_filters(xd, mbmi->interp_filter, pc); - // moved to be performed before detokenization - // if (xd->segmentation_enabled) - // mb_init_dequantizer(pbi, xd); + // generate prediction + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - if (mode != I8X8_PRED) { - vp9_build_intra_predictors_mbuv(xd); - if (mode != B_PRED) { - vp9_build_intra_predictors_mby(xd); - } - } - } else { -#if 0 // def DEC_DEBUG - if (dec_debug) - printf("Decoding mb: %d %d interp %d\n", - xd->mode_info_context->mbmi.mode, tx_size, - xd->mode_info_context->mbmi.interp_filter); -#endif - vp9_build_inter_predictors_mb(xd, mb_row, mb_col); - } - - if (tx_size == TX_16X16) { - decode_16x16(pbi, xd, bc); - } else if (tx_size == TX_8X8) { - decode_8x8(pbi, xd, bc); + if (mbmi->mb_skip_coeff) { + vp9_reset_sb_tokens_context(xd, bsize); } else { - decode_4x4(pbi, xd, bc); - } -#ifdef DEC_DEBUG - if (dec_debug) { - int i, j; - printf("\n"); - printf("predictor y\n"); - for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j++) - printf("%3d ", xd->predictor[i * 16 + j]); - printf("\n"); - } - printf("\n"); - printf("final y\n"); - for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j++) - printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]); - printf("\n"); - } - printf("\n"); - printf("final u\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]); - printf("\n"); - } - printf("\n"); - printf("final v\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]); - printf("\n"); + // re-initialize macroblock dequantizer before detokenization + if (xd->segmentation_enabled) + mb_init_dequantizer(pc, xd); + + // dequantization and idct + eobtotal = vp9_decode_tokens(pbi, xd, r, bsize); + if (eobtotal == 0) { // skip loopfilter + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + + if (mi_col + x_idx < pc->mi_cols && mi_row + y_idx < pc->mi_rows) + mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = 1; + } + } else { + foreach_transformed_block(xd, bsize, decode_block, xd); } - fflush(stdout); } -#endif } +static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize, + int mi_row, int mi_col) { + const int bh = 1 << mi_height_log2(bsize); + const int bw = 1 << mi_width_log2(bsize); + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + const int mi_idx = mi_row * cm->mode_info_stride + mi_col; + int i; -static int get_delta_q(vp9_reader *bc, int prev, int *q_update) { - int ret_val = 0; - - if (vp9_read_bit(bc)) { - ret_val = vp9_read_literal(bc, 4); - - if (vp9_read_bit(bc)) - ret_val = -ret_val; + xd->mode_info_context = cm->mi + mi_idx; + xd->mode_info_context->mbmi.sb_type = bsize; + // Special case: if prev_mi is NULL, the previous mode info context + // cannot be used. + xd->prev_mode_info_context = cm->prev_mi ? + cm->prev_mi + mi_idx : NULL; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].above_context = cm->above_context[i] + + (mi_col * 2 >> xd->plane[i].subsampling_x); + xd->plane[i].left_context = cm->left_context[i] + + (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y); } + xd->above_seg_context = cm->above_seg_context + mi_col; + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); - /* Trigger a quantizer update if the delta-q value has changed */ - if (ret_val != prev) - *q_update = 1; + // Distance of Mb to the various image edges. These are specified to 8th pel + // as they are always compared to values that are in 1/8th pel units + set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); - return ret_val; + setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], mi_row, mi_col); } -#ifdef PACKET_TESTING -#include <stdio.h> -FILE *vpxlog = 0; -#endif - -static void set_offsets(VP9D_COMP *pbi, int block_size, - int mb_row, int mb_col) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - const int mis = cm->mode_info_stride; - const int idx = mis * mb_row + mb_col; - const int dst_fb_idx = cm->new_fb_idx; - const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride; - const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride; - const int recon_yoffset = mb_row * 16 * recon_y_stride + 16 * mb_col; - const int recon_uvoffset = mb_row * 8 * recon_uv_stride + 8 * mb_col; - - xd->mode_info_context = cm->mi + idx; - xd->mode_info_context->mbmi.sb_type = block_size >> 5; - xd->prev_mode_info_context = cm->prev_mi + idx; - xd->above_context = cm->above_context + mb_col; - xd->left_context = cm->left_context + (mb_row & 3); - - // Distance of Mb to the various image edges. - // These are specified to 8th pel as they are always compared to - // values that are in 1/8th pel units - block_size >>= 4; // in mb units - - set_mb_row(cm, xd, mb_row, block_size); - set_mb_col(cm, xd, mb_col, block_size); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; -} - -static void set_refs(VP9D_COMP *pbi, int block_size, int mb_row, int mb_col) { +static void set_refs(VP9D_COMP *pbi, int mi_row, int mi_col) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - if (mbmi->ref_frame > INTRA_FRAME) { + if (mbmi->ref_frame[0] > INTRA_FRAME) { // Select the appropriate reference frame for this MB - int ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1]; - xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; - xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; - setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col, - &xd->scale_factor[0], &xd->scale_factor_uv[0]); - - // propagate errors from reference frames - xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted; - - if (mbmi->second_ref_frame > INTRA_FRAME) { + const int fb_idx = cm->active_ref_idx[mbmi->ref_frame[0] - 1]; + const YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[fb_idx]; + xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame[0] - 1]; + xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame[0] - 1]; + setup_pre_planes(xd, cfg, NULL, mi_row, mi_col, + xd->scale_factor, xd->scale_factor_uv); + xd->corrupted |= cfg->corrupted; + + if (mbmi->ref_frame[1] > INTRA_FRAME) { // Select the appropriate reference frame for this MB - int second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; - - setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[1], &xd->scale_factor_uv[1]); - - // propagate errors from reference frames - xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted; + const int second_fb_idx = cm->active_ref_idx[mbmi->ref_frame[1] - 1]; + const YV12_BUFFER_CONFIG *second_cfg = &cm->yv12_fb[second_fb_idx]; + xd->scale_factor[1] = cm->active_ref_scale[mbmi->ref_frame[1] - 1]; + xd->scale_factor_uv[1] = cm->active_ref_scale[mbmi->ref_frame[1] - 1]; + setup_pre_planes(xd, NULL, second_cfg, mi_row, mi_col, + xd->scale_factor, xd->scale_factor_uv); + xd->corrupted |= second_cfg->corrupted; } } } -/* Decode a row of Superblocks (2x2 region of MBs) */ -static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, - int mb_row, MACROBLOCKD *xd, - BOOL_DECODER* const bc) { - int mb_col; - - // For a SB there are 2 left contexts, each pertaining to a MB row within - vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); - - for (mb_col = pc->cur_tile_mb_col_start; - mb_col < pc->cur_tile_mb_col_end; mb_col += 4) { - if (vp9_read(bc, pc->sb64_coded)) { -#ifdef DEC_DEBUG - dec_debug = (pc->current_video_frame == 11 && pc->show_frame && - mb_row == 8 && mb_col == 0); - if (dec_debug) - printf("Debug Decode SB64\n"); -#endif - set_offsets(pbi, 64, mb_row, mb_col); - vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); - set_refs(pbi, 64, mb_row, mb_col); - decode_superblock64(pbi, xd, mb_row, mb_col, bc); - xd->corrupted |= bool_error(bc); - } else { - int j; +static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, + vp9_reader *r, BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *const xd = &pbi->mb; - for (j = 0; j < 4; j++) { - const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index > 0) + return; + set_offsets(pbi, bsize, mi_row, mi_col); + vp9_decode_mb_mode_mv(pbi, xd, mi_row, mi_col, r); + set_refs(pbi, mi_row, mi_col); + + if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) + decode_sb_intra(pbi, xd, mi_row, mi_col, r, (bsize < BLOCK_SIZE_SB8X8) ? + BLOCK_SIZE_SB8X8 : bsize); + else if (bsize < BLOCK_SIZE_SB8X8) + decode_atom(pbi, xd, mi_row, mi_col, r, BLOCK_SIZE_SB8X8); + else + decode_sb(pbi, xd, mi_row, mi_col, r, bsize); - if (mb_row + y_idx_sb >= pc->mb_rows || - mb_col + x_idx_sb >= pc->mb_cols) { - // MB lies outside frame, skip on to next - continue; - } + xd->corrupted |= vp9_reader_has_error(r); +} - xd->sb_index = j; +static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, + vp9_reader* r, BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const pc = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + int bsl = mi_width_log2(bsize), bs = (1 << bsl) / 2; + int n; + PARTITION_TYPE partition = PARTITION_NONE; + BLOCK_SIZE_TYPE subsize; - if (vp9_read(bc, pc->sb32_coded)) { -#ifdef DEC_DEBUG - dec_debug = (pc->current_video_frame == 11 && pc->show_frame && - mb_row + y_idx_sb == 8 && mb_col + x_idx_sb == 0); - if (dec_debug) - printf("Debug Decode SB32\n"); -#endif - set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); - vp9_decode_mb_mode_mv(pbi, - xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); - set_refs(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); - decode_superblock32(pbi, - xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); - xd->corrupted |= bool_error(bc); - } else { - int i; + if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols) + return; - // Process the 4 MBs within the SB in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1); + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index != 0) + return; + + if (bsize >= BLOCK_SIZE_SB8X8) { + int pl; + int idx = check_bsize_coverage(pc, xd, mi_row, mi_col, bsize); + // read the partition information + xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = pc->above_seg_context + mi_col; + pl = partition_plane_context(xd, bsize); + + if (idx == 0) + partition = treed_read(r, vp9_partition_tree, + pc->fc.partition_prob[pc->frame_type][pl]); + else if (idx > 0 && + !vp9_read(r, pc->fc.partition_prob[pc->frame_type][pl][idx])) + partition = (idx == 1) ? PARTITION_HORZ : PARTITION_VERT; + else + partition = PARTITION_SPLIT; - if (mb_row + y_idx >= pc->mb_rows || - mb_col + x_idx >= pc->mb_cols) { - // MB lies outside frame, skip on to next - continue; - } -#ifdef DEC_DEBUG - dec_debug = (pc->current_video_frame == 11 && pc->show_frame && - mb_row + y_idx == 8 && mb_col + x_idx == 0); - if (dec_debug) - printf("Debug Decode MB\n"); -#endif + pc->fc.partition_counts[pl][partition]++; + } - set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx); - xd->mb_index = i; - vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); - set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx); - decode_macroblock(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); + subsize = get_subsize(bsize, partition); + *(get_sb_index(xd, subsize)) = 0; - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= bool_error(bc); - } - } + switch (partition) { + case PARTITION_NONE: + decode_modes_b(pbi, mi_row, mi_col, r, subsize); + break; + case PARTITION_HORZ: + decode_modes_b(pbi, mi_row, mi_col, r, subsize); + *(get_sb_index(xd, subsize)) = 1; + if (mi_row + bs < pc->mi_rows) + decode_modes_b(pbi, mi_row + bs, mi_col, r, subsize); + break; + case PARTITION_VERT: + decode_modes_b(pbi, mi_row, mi_col, r, subsize); + *(get_sb_index(xd, subsize)) = 1; + if (mi_col + bs < pc->mi_cols) + decode_modes_b(pbi, mi_row, mi_col + bs, r, subsize); + break; + case PARTITION_SPLIT: + for (n = 0; n < 4; n++) { + int j = n >> 1, i = n & 0x01; + *(get_sb_index(xd, subsize)) = n; + decode_modes_sb(pbi, mi_row + j * bs, mi_col + i * bs, r, subsize); } - } + break; + default: + assert(0); + } + // update partition context + if (bsize >= BLOCK_SIZE_SB8X8 && + (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { + set_partition_seg_context(pc, xd, mi_row, mi_col); + update_partition_context(xd, subsize, bsize); } } - static void setup_token_decoder(VP9D_COMP *pbi, - const unsigned char *cx_data, - BOOL_DECODER* const bool_decoder) { + const uint8_t *data, size_t read_size, + vp9_reader *r) { VP9_COMMON *pc = &pbi->common; - const unsigned char *user_data_end = pbi->Source + pbi->source_sz; - const unsigned char *partition = cx_data; - ptrdiff_t bytes_left = user_data_end - partition; - ptrdiff_t partition_size = bytes_left; + const uint8_t *data_end = pbi->source + pbi->source_sz; // Validate the calculated partition length. If the buffer // described by the partition can't be fully read, then restrict // it to the portion that can be (for EC mode) or throw an error. - if (!read_is_valid(partition, partition_size, user_data_end)) { + if (!read_is_valid(data, read_size, data_end)) vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition " - "%d length", 1); - } + "Truncated packet or corrupt tile length"); - if (vp9_start_decode(bool_decoder, - partition, (unsigned int)partition_size)) + if (vp9_reader_init(r, data, read_size)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", 1); } -static void init_frame(VP9D_COMP *pbi) { - VP9_COMMON *const pc = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - - if (pc->frame_type == KEY_FRAME) { - vp9_setup_past_independence(pc, xd); - // All buffers are implicitly updated on key frames. - pbi->refresh_frame_flags = (1 << NUM_REF_FRAMES) - 1; - } else if (pc->error_resilient_mode) { - vp9_setup_past_independence(pc, xd); - } - - if (pc->frame_type != KEY_FRAME) { - pc->mcomp_filter_type = pc->use_bilinear_mc_filter ? BILINEAR : EIGHTTAP; - - // To enable choice of different interpolation filters - vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc); - } - - xd->mode_info_context = pc->mi; - xd->prev_mode_info_context = pc->prev_mi; - xd->frame_type = pc->frame_type; - xd->mode_info_context->mbmi.mode = DC_PRED; - xd->mode_info_stride = pc->mode_info_stride; - xd->corrupted = 0; - xd->fullpixel_mask = pc->full_pixel ? 0xfffffff8 : 0xffffffff; -} - -#if CONFIG_CODE_NONZEROCOUNT -static void read_nzc_probs_common(VP9_COMMON *cm, - BOOL_DECODER* const bc, - int block_size) { - int c, r, b, t; - int tokens, nodes; - vp9_prob *nzc_probs; - vp9_prob upd; - - if (!vp9_read_bit(bc)) return; - - if (block_size == 32) { - tokens = NZC32X32_TOKENS; - nzc_probs = cm->fc.nzc_probs_32x32[0][0][0]; - upd = NZC_UPDATE_PROB_32X32; - } else if (block_size == 16) { - tokens = NZC16X16_TOKENS; - nzc_probs = cm->fc.nzc_probs_16x16[0][0][0]; - upd = NZC_UPDATE_PROB_16X16; - } else if (block_size == 8) { - tokens = NZC8X8_TOKENS; - nzc_probs = cm->fc.nzc_probs_8x8[0][0][0]; - upd = NZC_UPDATE_PROB_8X8; - } else { - tokens = NZC4X4_TOKENS; - nzc_probs = cm->fc.nzc_probs_4x4[0][0][0]; - upd = NZC_UPDATE_PROB_4X4; - } - nodes = tokens - 1; - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; - int offset_nodes = offset * nodes; - for (t = 0; t < nodes; ++t) { - vp9_prob *p = &nzc_probs[offset_nodes + t]; - if (vp9_read(bc, upd)) { - *p = read_prob_diff_update(bc, *p); - } - } - } - } - } -} - -static void read_nzc_pcat_probs(VP9_COMMON *cm, BOOL_DECODER* const bc) { - int c, t, b; - vp9_prob upd = NZC_UPDATE_PROB_PCAT; - if (!vp9_read_bit(bc)) { - return; - } - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (t = 0; t < NZC_TOKENS_EXTRA; ++t) { - int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA]; - for (b = 0; b < bits; ++b) { - vp9_prob *p = &cm->fc.nzc_pcat_probs[c][t][b]; - if (vp9_read(bc, upd)) { - *p = read_prob_diff_update(bc, *p); - } - } - } - } -} - -static void read_nzc_probs(VP9_COMMON *cm, - BOOL_DECODER* const bc) { - read_nzc_probs_common(cm, bc, 4); - if (cm->txfm_mode != ONLY_4X4) - read_nzc_probs_common(cm, bc, 8); - if (cm->txfm_mode > ALLOW_8X8) - read_nzc_probs_common(cm, bc, 16); - if (cm->txfm_mode > ALLOW_16X16) - read_nzc_probs_common(cm, bc, 32); -#ifdef NZC_PCAT_UPDATE - read_nzc_pcat_probs(cm, bc); -#endif -} -#endif // CONFIG_CODE_NONZEROCOUNT - -static void read_coef_probs_common(BOOL_DECODER* const bc, - vp9_coeff_probs *coef_probs, - int block_types) { -#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE - const int entropy_nodes_update = UNCONSTRAINED_UPDATE_NODES; -#else - const int entropy_nodes_update = ENTROPY_NODES; -#endif +static void read_coef_probs_common(FRAME_CONTEXT *fc, TX_SIZE tx_size, + vp9_reader *r) { + const int entropy_nodes_update = UNCONSTRAINED_NODES; + vp9_coeff_probs_model *coef_probs = fc->coef_probs[tx_size]; int i, j, k, l, m; - if (vp9_read_bit(bc)) { - for (i = 0; i < block_types; i++) { + if (vp9_read_bit(r)) { + for (i = 0; i < BLOCK_TYPES; i++) { for (j = 0; j < REF_TYPES; j++) { for (k = 0; k < COEF_BANDS; k++) { for (l = 0; l < PREV_COEF_CONTEXTS; l++) { + const int mstart = 0; if (l >= 3 && k == 0) continue; - for (m = CONFIG_CODE_NONZEROCOUNT; m < entropy_nodes_update; m++) { + + for (m = mstart; m < entropy_nodes_update; m++) { vp9_prob *const p = coef_probs[i][j][k][l] + m; - if (vp9_read(bc, vp9_coef_update_prob[m])) { - *p = read_prob_diff_update(bc, *p); -#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE - if (m == UNCONSTRAINED_NODES - 1) - vp9_get_model_distribution(*p, coef_probs[i][j][k][l], i, j); -#endif + if (vp9_read(r, vp9_coef_update_prob[m])) { + *p = vp9_read_prob_diff_update(r, *p); } } } @@ -1242,327 +586,338 @@ static void read_coef_probs_common(BOOL_DECODER* const bc, } } -static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { - VP9_COMMON *const pc = &pbi->common; - - read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES); - - if (pbi->common.txfm_mode != ONLY_4X4) - read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES); +static void read_coef_probs(VP9D_COMP *pbi, vp9_reader *r) { + const TXFM_MODE txfm_mode = pbi->common.txfm_mode; + FRAME_CONTEXT *const fc = &pbi->common.fc; - if (pbi->common.txfm_mode > ALLOW_8X8) - read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES); + read_coef_probs_common(fc, TX_4X4, r); - if (pbi->common.txfm_mode > ALLOW_16X16) - read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES); -} + if (txfm_mode > ONLY_4X4) + read_coef_probs_common(fc, TX_8X8, r); -static void update_frame_size(VP9D_COMP *pbi) { - VP9_COMMON *cm = &pbi->common; + if (txfm_mode > ALLOW_8X8) + read_coef_probs_common(fc, TX_16X16, r); - /* our internal buffers are always multiples of 16 */ - const int width = (cm->width + 15) & ~15; - const int height = (cm->height + 15) & ~15; - - cm->mb_rows = height >> 4; - cm->mb_cols = width >> 4; - cm->MBs = cm->mb_rows * cm->mb_cols; - cm->mode_info_stride = cm->mb_cols + 1; - memset(cm->mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO)); - vp9_update_mode_info_border(cm, cm->mip); - - cm->mi = cm->mip + cm->mode_info_stride + 1; - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - vp9_update_mode_info_in_image(cm, cm->mi); + if (txfm_mode > ALLOW_16X16) + read_coef_probs_common(fc, TX_32X32, r); } -static void setup_segmentation(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) { +static void setup_segmentation(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { int i, j; - xd->segmentation_enabled = vp9_read_bit(r); - if (xd->segmentation_enabled) { - // Read whether or not the segmentation map is being explicitly updated - // this frame. - xd->update_mb_segmentation_map = vp9_read_bit(r); - - // If so what method will be used. - if (xd->update_mb_segmentation_map) { - // Which macro block level features are enabled. Read the probs used to - // decode the segment id for each macro block. - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { - xd->mb_segment_tree_probs[i] = vp9_read_bit(r) ? vp9_read_prob(r) : 255; - } - - // Read the prediction probs needed to decode the segment id - pc->temporal_update = vp9_read_bit(r); - for (i = 0; i < PREDICTION_PROBS; i++) { - pc->segment_pred_probs[i] = pc->temporal_update - ? (vp9_read_bit(r) ? vp9_read_prob(r) : 255) - : 255; - } - - if (pc->temporal_update) { - const vp9_prob *p = xd->mb_segment_tree_probs; - vp9_prob *p_mod = xd->mb_segment_mispred_tree_probs; - const int c0 = p[0] * p[1]; - const int c1 = p[0] * (256 - p[1]); - const int c2 = (256 - p[0]) * p[2]; - const int c3 = (256 - p[0]) * (256 - p[2]); - - p_mod[0] = get_binary_prob(c1, c2 + c3); - p_mod[1] = get_binary_prob(c0, c2 + c3); - p_mod[2] = get_binary_prob(c0 + c1, c3); - p_mod[3] = get_binary_prob(c0 + c1, c2); - } - } - - xd->update_mb_segmentation_data = vp9_read_bit(r); - if (xd->update_mb_segmentation_data) { - int data; - - xd->mb_segment_abs_delta = vp9_read_bit(r); - - vp9_clearall_segfeatures(xd); + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; - // For each segmentation... - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // For each of the segments features... - for (j = 0; j < SEG_LVL_MAX; j++) { - // Is the feature enabled - if (vp9_read_bit(r)) { - // Update the feature data and mask - vp9_enable_segfeature(xd, i, j); + xd->update_mb_segmentation_map = 0; + xd->update_mb_segmentation_data = 0; - data = vp9_decode_unsigned_max(r, vp9_seg_feature_data_max(j)); + xd->segmentation_enabled = vp9_rb_read_bit(rb); + if (!xd->segmentation_enabled) + return; - // Is the segment data signed.. - if (vp9_is_segfeature_signed(j)) { - if (vp9_read_bit(r)) - data = -data; - } - } else { - data = 0; - } + // Segmentation map update + xd->update_mb_segmentation_map = vp9_rb_read_bit(rb); + if (xd->update_mb_segmentation_map) { + for (i = 0; i < MB_SEG_TREE_PROBS; i++) + xd->mb_segment_tree_probs[i] = vp9_rb_read_bit(rb) ? + vp9_rb_read_literal(rb, 8) : MAX_PROB; + + cm->temporal_update = vp9_rb_read_bit(rb); + if (cm->temporal_update) { + for (i = 0; i < PREDICTION_PROBS; i++) + cm->segment_pred_probs[i] = vp9_rb_read_bit(rb) ? + vp9_rb_read_literal(rb, 8) : MAX_PROB; + } else { + for (i = 0; i < PREDICTION_PROBS; i++) + cm->segment_pred_probs[i] = MAX_PROB; + } + } - vp9_set_segdata(xd, i, j, data); + // Segmentation data update + xd->update_mb_segmentation_data = vp9_rb_read_bit(rb); + if (xd->update_mb_segmentation_data) { + xd->mb_segment_abs_delta = vp9_rb_read_bit(rb); + + vp9_clearall_segfeatures(xd); + + for (i = 0; i < MAX_MB_SEGMENTS; i++) { + for (j = 0; j < SEG_LVL_MAX; j++) { + int data = 0; + const int feature_enabled = vp9_rb_read_bit(rb); + if (feature_enabled) { + vp9_enable_segfeature(xd, i, j); + data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j)); + if (vp9_is_segfeature_signed(j)) + data = vp9_rb_read_bit(rb) ? -data : data; } + vp9_set_segdata(xd, i, j, data); } } } } -static void setup_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) { - int i; - - pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(r); - pc->filter_level = vp9_read_literal(r, 6); - pc->sharpness_level = vp9_read_literal(r, 3); +static void setup_loopfilter(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; -#if CONFIG_LOOP_DERING - if (vp9_read_bit(r)) - pc->dering_enabled = 1 + vp9_read_literal(r, 4); - else - pc->dering_enabled = 0; -#endif + cm->filter_level = vp9_rb_read_literal(rb, 6); + cm->sharpness_level = vp9_rb_read_literal(rb, 3); // Read in loop filter deltas applied at the MB level based on mode or ref // frame. xd->mode_ref_lf_delta_update = 0; - xd->mode_ref_lf_delta_enabled = vp9_read_bit(r); + xd->mode_ref_lf_delta_enabled = vp9_rb_read_bit(rb); if (xd->mode_ref_lf_delta_enabled) { - // Do the deltas need to be updated - xd->mode_ref_lf_delta_update = vp9_read_bit(r); - + xd->mode_ref_lf_delta_update = vp9_rb_read_bit(rb); if (xd->mode_ref_lf_delta_update) { - // Send update - for (i = 0; i < MAX_REF_LF_DELTAS; i++) { - if (vp9_read_bit(r)) { - // sign = vp9_read_bit(r); - xd->ref_lf_deltas[i] = vp9_read_literal(r, 6); + int i; - if (vp9_read_bit(r)) - xd->ref_lf_deltas[i] = -xd->ref_lf_deltas[i]; // Apply sign + for (i = 0; i < MAX_REF_LF_DELTAS; i++) { + if (vp9_rb_read_bit(rb)) { + const int value = vp9_rb_read_literal(rb, 6); + xd->ref_lf_deltas[i] = vp9_rb_read_bit(rb) ? -value : value; } } - // Send update for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { - if (vp9_read_bit(r)) { - // sign = vp9_read_bit(r); - xd->mode_lf_deltas[i] = vp9_read_literal(r, 6); - - if (vp9_read_bit(r)) - xd->mode_lf_deltas[i] = -xd->mode_lf_deltas[i]; // Apply sign + if (vp9_rb_read_bit(rb)) { + const int value = vp9_rb_read_literal(rb, 6); + xd->mode_lf_deltas[i] = vp9_rb_read_bit(rb) ? -value : value; } } } } } -static const uint8_t *setup_frame_size(VP9D_COMP *pbi, int scaling_active, - const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const pc = &pbi->common; - const int width = pc->width; - const int height = pc->height; - - // If error concealment is enabled we should only parse the new size - // if we have enough data. Otherwise we will end up with the wrong size. - if (scaling_active && data + 4 < data_end) { - pc->display_width = read_le16(data + 0); - pc->display_height = read_le16(data + 2); - data += 4; +static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) { + const int old = *delta_q; + if (vp9_rb_read_bit(rb)) { + const int value = vp9_rb_read_literal(rb, 4); + *delta_q = vp9_rb_read_bit(rb) ? -value : value; } + return old != *delta_q; +} - if (data + 4 < data_end) { - pc->width = read_le16(data + 0); - pc->height = read_le16(data + 2); - data += 4; +static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { + MACROBLOCKD *const xd = &pbi->mb; + VP9_COMMON *const cm = &pbi->common; + int update = 0; + + cm->base_qindex = vp9_rb_read_literal(rb, QINDEX_BITS); + update |= read_delta_q(rb, &cm->y_dc_delta_q); + update |= read_delta_q(rb, &cm->uv_dc_delta_q); + update |= read_delta_q(rb, &cm->uv_ac_delta_q); + if (update) + vp9_init_dequantizer(cm); + + xd->lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; + if (xd->lossless) { + xd->itxm_add = vp9_idct_add_lossless_c; + xd->itxm_add_y_block = vp9_idct_add_y_block_lossless_c; + xd->itxm_add_uv_block = vp9_idct_add_uv_block_lossless_c; + } else { + xd->itxm_add = vp9_idct_add; + xd->itxm_add_y_block = vp9_idct_add_y_block; + xd->itxm_add_uv_block = vp9_idct_add_uv_block; } +} - if (!scaling_active) { - pc->display_width = pc->width; - pc->display_height = pc->height; - } +static INTERPOLATIONFILTERTYPE read_interp_filter_type( + struct vp9_read_bit_buffer *rb) { + return vp9_rb_read_bit(rb) ? SWITCHABLE + : vp9_rb_read_literal(rb, 2); +} - if (width != pc->width || height != pc->height) { - if (pc->width <= 0) { - pc->width = width; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame width"); - } +static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb, + int *width, int *height) { + const int w = vp9_rb_read_literal(rb, 16) + 1; + const int h = vp9_rb_read_literal(rb, 16) + 1; + *width = w; + *height = h; +} - if (pc->height <= 0) { - pc->height = height; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame height"); - } +static void setup_display_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { + VP9_COMMON *const cm = &pbi->common; + cm->display_width = cm->width; + cm->display_height = cm->height; + if (vp9_rb_read_bit(rb)) + read_frame_size(cm, rb, &cm->display_width, &cm->display_height); +} + +static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { + VP9_COMMON *cm = &pbi->common; + if (cm->width != width || cm->height != height) { if (!pbi->initial_width || !pbi->initial_height) { - if (vp9_alloc_frame_buffers(pc, pc->width, pc->height)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + if (vp9_alloc_frame_buffers(cm, width, height)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); - pbi->initial_width = pc->width; - pbi->initial_height = pc->height; - } + pbi->initial_width = width; + pbi->initial_height = height; + } else { + if (width > pbi->initial_width) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Frame width too large"); - if (pc->width > pbi->initial_width) { - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Frame width too large"); + if (height > pbi->initial_height) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Frame height too large"); } - if (pc->height > pbi->initial_height) { - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Frame height too large"); - } + cm->width = width; + cm->height = height; - update_frame_size(pbi); + vp9_update_frame_size(cm); } - return data; + vp9_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS); } -static void update_frame_context(VP9D_COMP *pbi, vp9_reader *r) { - FRAME_CONTEXT *const fc = &pbi->common.fc; +static void setup_frame_size(VP9D_COMP *pbi, + struct vp9_read_bit_buffer *rb) { + VP9_COMMON *const cm = &pbi->common; + int width, height; + read_frame_size(cm, rb, &width, &height); + setup_display_size(pbi, rb); + apply_frame_size(pbi, width, height); +} + +static void setup_frame_size_with_refs(VP9D_COMP *pbi, + struct vp9_read_bit_buffer *rb) { + VP9_COMMON *const cm = &pbi->common; + + int width, height; + int found = 0, i; + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { + if (vp9_rb_read_bit(rb)) { + YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->active_ref_idx[i]]; + width = cfg->y_crop_width; + height = cfg->y_crop_height; + found = 1; + break; + } + } + + if (!found) + read_frame_size(cm, rb, &width, &height); + + if (!width || !height) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Referenced frame with invalid size"); + + setup_display_size(pbi, rb); + apply_frame_size(pbi, width, height); +} - vp9_copy(fc->pre_coef_probs_4x4, fc->coef_probs_4x4); - vp9_copy(fc->pre_coef_probs_8x8, fc->coef_probs_8x8); - vp9_copy(fc->pre_coef_probs_16x16, fc->coef_probs_16x16); - vp9_copy(fc->pre_coef_probs_32x32, fc->coef_probs_32x32); - vp9_copy(fc->pre_ymode_prob, fc->ymode_prob); - vp9_copy(fc->pre_sb_ymode_prob, fc->sb_ymode_prob); +static void update_frame_context(FRAME_CONTEXT *fc) { + vp9_copy(fc->pre_coef_probs, fc->coef_probs); + vp9_copy(fc->pre_y_mode_prob, fc->y_mode_prob); vp9_copy(fc->pre_uv_mode_prob, fc->uv_mode_prob); - vp9_copy(fc->pre_bmode_prob, fc->bmode_prob); - vp9_copy(fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob); - vp9_copy(fc->pre_sub_mv_ref_prob, fc->sub_mv_ref_prob); - vp9_copy(fc->pre_mbsplit_prob, fc->mbsplit_prob); + vp9_copy(fc->pre_partition_prob, fc->partition_prob[1]); + vp9_copy(fc->pre_intra_inter_prob, fc->intra_inter_prob); + vp9_copy(fc->pre_comp_inter_prob, fc->comp_inter_prob); + vp9_copy(fc->pre_single_ref_prob, fc->single_ref_prob); + vp9_copy(fc->pre_comp_ref_prob, fc->comp_ref_prob); fc->pre_nmvc = fc->nmvc; - - vp9_zero(fc->coef_counts_4x4); - vp9_zero(fc->coef_counts_8x8); - vp9_zero(fc->coef_counts_16x16); - vp9_zero(fc->coef_counts_32x32); + vp9_copy(fc->pre_switchable_interp_prob, fc->switchable_interp_prob); + vp9_copy(fc->pre_inter_mode_probs, fc->inter_mode_probs); + vp9_copy(fc->pre_tx_probs_8x8p, fc->tx_probs_8x8p); + vp9_copy(fc->pre_tx_probs_16x16p, fc->tx_probs_16x16p); + vp9_copy(fc->pre_tx_probs_32x32p, fc->tx_probs_32x32p); + vp9_copy(fc->pre_mbskip_probs, fc->mbskip_probs); + + vp9_zero(fc->coef_counts); vp9_zero(fc->eob_branch_counts); - vp9_zero(fc->ymode_counts); - vp9_zero(fc->sb_ymode_counts); + vp9_zero(fc->y_mode_counts); vp9_zero(fc->uv_mode_counts); - vp9_zero(fc->bmode_counts); - vp9_zero(fc->i8x8_mode_counts); - vp9_zero(fc->sub_mv_ref_counts); - vp9_zero(fc->mbsplit_counts); vp9_zero(fc->NMVcount); - vp9_zero(fc->mv_ref_ct); - -#if CONFIG_COMP_INTERINTRA_PRED - fc->pre_interintra_prob = fc->interintra_prob; - vp9_zero(fc->interintra_counts); -#endif - -#if CONFIG_CODE_NONZEROCOUNT - vp9_copy(fc->pre_nzc_probs_4x4, fc->nzc_probs_4x4); - vp9_copy(fc->pre_nzc_probs_8x8, fc->nzc_probs_8x8); - vp9_copy(fc->pre_nzc_probs_16x16, fc->nzc_probs_16x16); - vp9_copy(fc->pre_nzc_probs_32x32, fc->nzc_probs_32x32); - vp9_copy(fc->pre_nzc_pcat_probs, fc->nzc_pcat_probs); - - vp9_zero(fc->nzc_counts_4x4); - vp9_zero(fc->nzc_counts_8x8); - vp9_zero(fc->nzc_counts_16x16); - vp9_zero(fc->nzc_counts_32x32); - vp9_zero(fc->nzc_pcat_counts); -#endif - - read_coef_probs(pbi, r); -#if CONFIG_CODE_NONZEROCOUNT - read_nzc_probs(&pbi->common, r); -#endif + vp9_zero(fc->inter_mode_counts); + vp9_zero(fc->partition_counts); + vp9_zero(fc->switchable_interp_count); + vp9_zero(fc->intra_inter_count); + vp9_zero(fc->comp_inter_count); + vp9_zero(fc->single_ref_count); + vp9_zero(fc->comp_ref_count); + vp9_zero(fc->tx_count_8x8p); + vp9_zero(fc->tx_count_16x16p); + vp9_zero(fc->tx_count_32x32p); + vp9_zero(fc->mbskip_count); } -static void decode_tiles(VP9D_COMP *pbi, - const uint8_t *data, int first_partition_size, - BOOL_DECODER *header_bc, BOOL_DECODER *residual_bc) { +static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { VP9_COMMON *const pc = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; + int mi_row, mi_col; + + for (mi_row = pc->cur_tile_mi_row_start; + mi_row < pc->cur_tile_mi_row_end; mi_row += 64 / MI_SIZE) { + // For a SB there are 2 left contexts, each pertaining to a MB row within + vpx_memset(&pc->left_context, 0, sizeof(pc->left_context)); + vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context)); + for (mi_col = pc->cur_tile_mi_col_start; + mi_col < pc->cur_tile_mi_col_end; mi_col += 64 / MI_SIZE) + decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_SIZE_SB64X64); + } +} - const uint8_t *data_ptr = data + first_partition_size; - int tile_row, tile_col, delta_log2_tiles; - int mb_row; +static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { + int delta_log2_tiles; - vp9_get_tile_n_bits(pc, &pc->log2_tile_columns, &delta_log2_tiles); + vp9_get_tile_n_bits(cm, &cm->log2_tile_columns, &delta_log2_tiles); while (delta_log2_tiles--) { - if (vp9_read_bit(header_bc)) { - pc->log2_tile_columns++; + if (vp9_rb_read_bit(rb)) { + cm->log2_tile_columns++; } else { break; } } - pc->log2_tile_rows = vp9_read_bit(header_bc); - if (pc->log2_tile_rows) - pc->log2_tile_rows += vp9_read_bit(header_bc); - pc->tile_columns = 1 << pc->log2_tile_columns; - pc->tile_rows = 1 << pc->log2_tile_rows; - vpx_memset(pc->above_context, 0, - sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); + cm->log2_tile_rows = vp9_rb_read_bit(rb); + if (cm->log2_tile_rows) + cm->log2_tile_rows += vp9_rb_read_bit(rb); + + cm->tile_columns = 1 << cm->log2_tile_columns; + cm->tile_rows = 1 << cm->log2_tile_rows; +} + +static void decode_tiles(VP9D_COMP *pbi, + const uint8_t *data, size_t first_partition_size, + vp9_reader *residual_bc) { + VP9_COMMON *const pc = &pbi->common; + + const uint8_t *data_ptr = data + first_partition_size; + const uint8_t* const data_end = pbi->source + pbi->source_sz; + int tile_row, tile_col; + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 * + MAX_MB_PLANE * mi_cols_aligned_to_sb(pc)); + + vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) * + mi_cols_aligned_to_sb(pc)); if (pbi->oxcf.inv_tile_order) { const int n_cols = pc->tile_columns; const uint8_t *data_ptr2[4][1 << 6]; - BOOL_DECODER bc_bak = {0}; + vp9_reader bc_bak = {0}; // pre-initialize the offsets, we're going to read in inverse order data_ptr2[0][0] = data_ptr; for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { if (tile_row) { - const int size = read_le32(data_ptr2[tile_row - 1][n_cols - 1]); + const int size = read_be32(data_ptr2[tile_row - 1][n_cols - 1]); data_ptr2[tile_row - 1][n_cols - 1] += 4; data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][n_cols - 1] + size; } for (tile_col = 1; tile_col < n_cols; tile_col++) { - const int size = read_le32(data_ptr2[tile_row][tile_col - 1]); + const int size = read_be32(data_ptr2[tile_row][tile_col - 1]); data_ptr2[tile_row][tile_col - 1] += 4; data_ptr2[tile_row][tile_col] = data_ptr2[tile_row][tile_col - 1] + size; @@ -1573,14 +928,10 @@ static void decode_tiles(VP9D_COMP *pbi, vp9_get_tile_row_offsets(pc, tile_row); for (tile_col = n_cols - 1; tile_col >= 0; tile_col--) { vp9_get_tile_col_offsets(pc, tile_col); - setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], residual_bc); - - // Decode a row of superblocks - for (mb_row = pc->cur_tile_mb_row_start; - mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { - decode_sb_row(pbi, pc, mb_row, xd, residual_bc); - } - + setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], + data_end - data_ptr2[tile_row][tile_col], + residual_bc); + decode_tile(pbi, residual_bc); if (tile_row == pc->tile_rows - 1 && tile_col == n_cols - 1) bc_bak = *residual_bc; } @@ -1592,333 +943,295 @@ static void decode_tiles(VP9D_COMP *pbi, for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) { vp9_get_tile_row_offsets(pc, tile_row); for (tile_col = 0; tile_col < pc->tile_columns; tile_col++) { + size_t size; + vp9_get_tile_col_offsets(pc, tile_col); has_more = tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1; + if (has_more) { + if (!read_is_valid(data_ptr, 4, data_end)) + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); - // Setup decoder - setup_token_decoder(pbi, data_ptr + (has_more ? 4 : 0), residual_bc); - - // Decode a row of superblocks - for (mb_row = pc->cur_tile_mb_row_start; - mb_row < pc->cur_tile_mb_row_end; mb_row += 4) { - decode_sb_row(pbi, pc, mb_row, xd, residual_bc); + size = read_be32(data_ptr); + data_ptr += 4; + } else { + size = data_end - data_ptr; } - if (has_more) { - const int size = read_le32(data_ptr); - data_ptr += 4 + size; - } + setup_token_decoder(pbi, data_ptr, size, residual_bc); + decode_tile(pbi, residual_bc); + data_ptr += size; } } } } -int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { - BOOL_DECODER header_bc, residual_bc; - VP9_COMMON *const pc = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - const uint8_t *data = (const uint8_t *)pbi->Source; - const uint8_t *data_end = data + pbi->source_sz; - ptrdiff_t first_partition_length_in_bytes = 0; - int i, corrupt_tokens = 0; +static void check_sync_code(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { + if (vp9_rb_read_literal(rb, 8) != SYNC_CODE_0 || + vp9_rb_read_literal(rb, 8) != SYNC_CODE_1 || + vp9_rb_read_literal(rb, 8) != SYNC_CODE_2) { + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame sync code"); + } +} - // printf("Decoding frame %d\n", pc->current_video_frame); +static void error_handler(void *data, int bit_offset) { + VP9_COMMON *const cm = (VP9_COMMON *)data; + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); +} - xd->corrupted = 0; // start with no corruption of current frame - pc->yv12_fb[pc->new_fb_idx].corrupted = 0; +static void setup_inter_inter(VP9_COMMON *cm) { + int i; - if (data_end - data < 3) { - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); - } else { - int scaling_active; - pc->last_frame_type = pc->frame_type; - pc->frame_type = (FRAME_TYPE)(data[0] & 1); - pc->version = (data[0] >> 1) & 7; - pc->show_frame = (data[0] >> 4) & 1; - scaling_active = (data[0] >> 5) & 1; - first_partition_length_in_bytes = read_le16(data + 1); - - if (!read_is_valid(data, first_partition_length_in_bytes, data_end)) - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition 0 length"); + cm->allow_comp_inter_inter = 0; + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { + cm->allow_comp_inter_inter |= i > 0 && + cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]; + } - data += 3; + if (cm->allow_comp_inter_inter) { + // which one is always-on in comp inter-inter? + if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[GOLDEN_FRAME]) { + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; + } else if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[ALTREF_FRAME]) { + cm->comp_fixed_ref = GOLDEN_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } else { + cm->comp_fixed_ref = LAST_FRAME; + cm->comp_var_ref[0] = GOLDEN_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } + } +} - vp9_setup_version(pc); +#define RESERVED \ + if (vp9_rb_read_bit(rb)) \ + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ + "Reserved bit must be unset") - if (pc->frame_type == KEY_FRAME) { - // When error concealment is enabled we should only check the sync - // code if we have enough bits available - if (data + 3 < data_end) { - if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a) - vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - } - data += 3; - } +static size_t read_uncompressed_header(VP9D_COMP *pbi, + struct vp9_read_bit_buffer *rb) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + int i; - data = setup_frame_size(pbi, scaling_active, data, data_end); - } + cm->last_frame_type = cm->frame_type; - if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) || - pc->width == 0 || pc->height == 0) { - return -1; - } + if (vp9_rb_read_literal(rb, 2) != 0x2) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame marker"); - init_frame(pbi); + cm->version = vp9_rb_read_bit(rb); + RESERVED; - // Reset the frame pointers to the current frame size - vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx], - pc->width, pc->height, - VP9BORDERINPIXELS); + if (vp9_rb_read_bit(rb)) { + // show an existing frame directly + int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)]; + ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->new_fb_idx, frame_to_show); + pbi->refresh_frame_flags = 0; + cm->filter_level = 0; + return 0; + } - if (vp9_start_decode(&header_bc, data, - (unsigned int)first_partition_length_in_bytes)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); + cm->frame_type = (FRAME_TYPE) vp9_rb_read_bit(rb); + cm->show_frame = vp9_rb_read_bit(rb); + cm->error_resilient_mode = vp9_rb_read_bit(rb); - pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc); - pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc); - pc->error_resilient_mode = vp9_read_bit(&header_bc); + if (cm->frame_type == KEY_FRAME) { + int csp; - setup_segmentation(pc, xd, &header_bc); + check_sync_code(cm, rb); - // Read common prediction model status flag probability updates for the - // reference frame - if (pc->frame_type == KEY_FRAME) { - // Set the prediction probabilities to defaults - pc->ref_pred_probs[0] = 120; - pc->ref_pred_probs[1] = 80; - pc->ref_pred_probs[2] = 40; - } else { - for (i = 0; i < PREDICTION_PROBS; i++) { - if (vp9_read_bit(&header_bc)) - pc->ref_pred_probs[i] = vp9_read_prob(&header_bc); + csp = vp9_rb_read_literal(rb, 3); // colorspace + if (csp != 7) { // != sRGB + vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range + if (cm->version == 1) { + cm->subsampling_x = vp9_rb_read_bit(rb); + cm->subsampling_y = vp9_rb_read_bit(rb); + vp9_rb_read_bit(rb); // has extra plane + } else { + cm->subsampling_y = cm->subsampling_x = 1; + } + } else { + if (cm->version == 1) { + cm->subsampling_y = cm->subsampling_x = 0; + vp9_rb_read_bit(rb); // has extra plane + } else { + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "RGB not supported in profile 0"); + } } - } - pc->sb64_coded = vp9_read_prob(&header_bc); - pc->sb32_coded = vp9_read_prob(&header_bc); - xd->lossless = vp9_read_bit(&header_bc); - if (xd->lossless) { - pc->txfm_mode = ONLY_4X4; + pbi->refresh_frame_flags = (1 << NUM_REF_FRAMES) - 1; + + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) + cm->active_ref_idx[i] = cm->new_fb_idx; + + setup_frame_size(pbi, rb); } else { - // Read the loop filter level and type - pc->txfm_mode = vp9_read_literal(&header_bc, 2); - if (pc->txfm_mode == ALLOW_32X32) - pc->txfm_mode += vp9_read_bit(&header_bc); + cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb); - if (pc->txfm_mode == TX_MODE_SELECT) { - pc->prob_tx[0] = vp9_read_prob(&header_bc); - pc->prob_tx[1] = vp9_read_prob(&header_bc); - pc->prob_tx[2] = vp9_read_prob(&header_bc); - } - } + cm->reset_frame_context = cm->error_resilient_mode ? + 0 : vp9_rb_read_literal(rb, 2); + + if (cm->intra_only) { + check_sync_code(cm, rb); - setup_loopfilter(pc, xd, &header_bc); + pbi->refresh_frame_flags = vp9_rb_read_literal(rb, NUM_REF_FRAMES); + setup_frame_size(pbi, rb); + } else { + pbi->refresh_frame_flags = vp9_rb_read_literal(rb, NUM_REF_FRAMES); - // Dummy read for now - vp9_read_literal(&header_bc, 2); + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { + const int ref = vp9_rb_read_literal(rb, NUM_REF_FRAMES_LG2); + cm->active_ref_idx[i] = cm->ref_frame_map[ref]; + cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb); + } - /* Read the default quantizers. */ - { - int q_update = 0; - pc->base_qindex = vp9_read_literal(&header_bc, QINDEX_BITS); + setup_frame_size_with_refs(pbi, rb); - /* AC 1st order Q = default */ - pc->y1dc_delta_q = get_delta_q(&header_bc, pc->y1dc_delta_q, &q_update); - pc->uvdc_delta_q = get_delta_q(&header_bc, pc->uvdc_delta_q, &q_update); - pc->uvac_delta_q = get_delta_q(&header_bc, pc->uvac_delta_q, &q_update); + xd->allow_high_precision_mv = vp9_rb_read_bit(rb); + cm->mcomp_filter_type = read_interp_filter_type(rb); - if (q_update) - vp9_init_de_quantizer(pbi); + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) + vp9_setup_scale_factors(cm, i); - /* MB level dequantizer setup */ - mb_init_dequantizer(pbi, &pbi->mb); + setup_inter_inter(cm); + } } - // Determine if the golden frame or ARF buffer should be updated and how. - // For all non key frames the GF and ARF refresh flags and sign bias - // flags must be set explicitly. - if (pc->frame_type == KEY_FRAME) { - pc->active_ref_idx[0] = pc->new_fb_idx; - pc->active_ref_idx[1] = pc->new_fb_idx; - pc->active_ref_idx[2] = pc->new_fb_idx; + if (!cm->error_resilient_mode) { + cm->refresh_frame_context = vp9_rb_read_bit(rb); + cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb); } else { - // Should the GF or ARF be updated from the current frame - pbi->refresh_frame_flags = vp9_read_literal(&header_bc, NUM_REF_FRAMES); + cm->refresh_frame_context = 0; + cm->frame_parallel_decoding_mode = 1; + } - // Select active reference frames - for (i = 0; i < 3; i++) { - int ref_frame_num = vp9_read_literal(&header_bc, NUM_REF_FRAMES_LG2); - pc->active_ref_idx[i] = pc->ref_frame_map[ref_frame_num]; - } + cm->frame_context_idx = vp9_rb_read_literal(rb, NUM_FRAME_CONTEXTS_LG2); - pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp9_read_bit(&header_bc); - pc->ref_frame_sign_bias[ALTREF_FRAME] = vp9_read_bit(&header_bc); + if ((cm->frame_type == KEY_FRAME) || + cm->error_resilient_mode || cm->intra_only) + vp9_setup_past_independence(cm, xd); - // Is high precision mv allowed - xd->allow_high_precision_mv = vp9_read_bit(&header_bc); + setup_loopfilter(pbi, rb); + setup_quantization(pbi, rb); + setup_segmentation(pbi, rb); - // Read the type of subpel filter to use - pc->mcomp_filter_type = vp9_read_bit(&header_bc) - ? SWITCHABLE - : vp9_read_literal(&header_bc, 2); + setup_tile_info(cm, rb); -#if CONFIG_COMP_INTERINTRA_PRED - pc->use_interintra = vp9_read_bit(&header_bc); -#endif - // To enable choice of different interploation filters - vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc); - } + return vp9_rb_read_literal(rb, 16); +} - if (!pc->error_resilient_mode) { - pc->refresh_entropy_probs = vp9_read_bit(&header_bc); - pc->frame_parallel_decoding_mode = vp9_read_bit(&header_bc); - } else { - pc->refresh_entropy_probs = 0; - pc->frame_parallel_decoding_mode = 1; - } - pc->frame_context_idx = vp9_read_literal(&header_bc, NUM_FRAME_CONTEXTS_LG2); - vpx_memcpy(&pc->fc, &pc->frame_contexts[pc->frame_context_idx], - sizeof(pc->fc)); - - // Read inter mode probability context updates - if (pc->frame_type != KEY_FRAME) { - int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < 4; j++) { - if (vp9_read(&header_bc, 252)) { - pc->fc.vp9_mode_contexts[i][j] = vp9_read_prob(&header_bc); - } - } - } - } -#if CONFIG_MODELCOEFPROB && ADJUST_KF_COEF_PROBS - if (pc->frame_type == KEY_FRAME) - vp9_adjust_default_coef_probs(pc); -#endif +int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { + int i; + vp9_reader header_bc, residual_bc; + VP9_COMMON *const pc = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; -#if CONFIG_NEW_MVREF - // If Key frame reset mv ref id probabilities to defaults - if (pc->frame_type != KEY_FRAME) { - // Read any mv_ref index probability updates - int i, j; + const uint8_t *data = pbi->source; + const uint8_t *data_end = pbi->source + pbi->source_sz; - for (i = 0; i < MAX_REF_FRAMES; ++i) { - // Skip the dummy entry for intra ref frame. - if (i == INTRA_FRAME) { - continue; - } + struct vp9_read_bit_buffer rb = { data, data_end, 0, + pc, error_handler }; + const size_t first_partition_size = read_uncompressed_header(pbi, &rb); + const int keyframe = pc->frame_type == KEY_FRAME; + YV12_BUFFER_CONFIG *new_fb = &pc->yv12_fb[pc->new_fb_idx]; - // Read any updates to probabilities - for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) { - if (vp9_read(&header_bc, VP9_MVREF_UPDATE_PROB)) { - xd->mb_mv_ref_probs[i][j] = vp9_read_prob(&header_bc); - } - } - } + if (!first_partition_size) { + // showing a frame directly + *p_data_end = data + 1; + return 0; } -#endif + data += vp9_rb_bytes_read(&rb); + xd->corrupted = 0; + new_fb->corrupted = 0; - if (0) { - FILE *z = fopen("decodestats.stt", "a"); - fprintf(z, "%6d F:%d,R:%d,Q:%d\n", - pc->current_video_frame, - pc->frame_type, - pbi->refresh_frame_flags, - pc->base_qindex); - fclose(z); - } + if (!pbi->decoded_key_frame && !keyframe) + return -1; + + if (!read_is_valid(data, first_partition_size, data_end)) + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt header length"); + + xd->mode_info_context = pc->mi; + xd->prev_mode_info_context = pc->prev_mi; + xd->frame_type = pc->frame_type; + xd->mode_info_stride = pc->mode_info_stride; + + if (vp9_reader_init(&header_bc, data, first_partition_size)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder 0"); + + mb_init_dequantizer(pc, &pbi->mb); // MB level dequantizer setup - update_frame_context(pbi, &header_bc); + if (!keyframe) + vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc); + + pc->fc = pc->frame_contexts[pc->frame_context_idx]; + + update_frame_context(&pc->fc); + + setup_txfm_mode(pc, xd->lossless, &header_bc); + + read_coef_probs(pbi, &header_bc); // Initialize xd pointers. Any reference should do for xd->pre, so use 0. - vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]], - sizeof(YV12_BUFFER_CONFIG)); - vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], - sizeof(YV12_BUFFER_CONFIG)); + setup_pre_planes(xd, &pc->yv12_fb[pc->active_ref_idx[0]], NULL, + 0, 0, NULL, NULL); + setup_dst_planes(xd, new_fb, 0, 0); // Create the segmentation map structure and set to 0 if (!pc->last_frame_seg_map) CHECK_MEM_ERROR(pc->last_frame_seg_map, - vpx_calloc((pc->mb_rows * pc->mb_cols), 1)); - - /* set up frame new frame for intra coded blocks */ - vp9_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); + vpx_calloc((pc->mi_rows * pc->mi_cols), 1)); - vp9_setup_block_dptrs(xd); + vp9_setup_block_dptrs(xd, pc->subsampling_x, pc->subsampling_y); - vp9_build_block_doffsets(xd); + // clear out the coeff buffer + for (i = 0; i < MAX_MB_PLANE; ++i) + vp9_zero(xd->plane[i].qcoeff); - /* clear out the coeff buffer */ - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - /* Read the mb_no_coeff_skip flag */ - pc->mb_no_coeff_skip = (int)vp9_read_bit(&header_bc); + set_prev_mi(pc); vp9_decode_mode_mvs_init(pbi, &header_bc); - decode_tiles(pbi, data, first_partition_length_in_bytes, - &header_bc, &residual_bc); - corrupt_tokens |= xd->corrupted; + decode_tiles(pbi, data, first_partition_size, &residual_bc); - // keep track of the last coded dimensions pc->last_width = pc->width; pc->last_height = pc->height; - // Collect information about decoder corruption. - // 1. Check first boolean decoder for errors. - // 2. Check the macroblock information - pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc) | - corrupt_tokens; + new_fb->corrupted = vp9_reader_has_error(&header_bc) | xd->corrupted; if (!pbi->decoded_key_frame) { - if (pc->frame_type == KEY_FRAME && !pc->yv12_fb[pc->new_fb_idx].corrupted) + if (keyframe && !new_fb->corrupted) pbi->decoded_key_frame = 1; else - vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "A stream must start with a complete key frame"); } + // Adaptation if (!pc->error_resilient_mode && !pc->frame_parallel_decoding_mode) { vp9_adapt_coef_probs(pc); -#if CONFIG_CODE_NONZEROCOUNT - vp9_adapt_nzc_probs(pc); -#endif - } - if (pc->frame_type != KEY_FRAME) { - if (!pc->error_resilient_mode && !pc->frame_parallel_decoding_mode) { + if ((!keyframe) && (!pc->intra_only)) { vp9_adapt_mode_probs(pc); + vp9_adapt_mode_context(pc); vp9_adapt_nmv_probs(pc, xd->allow_high_precision_mv); - vp9_adapt_mode_context(&pbi->common); } } - if (pc->refresh_entropy_probs) { - vpx_memcpy(&pc->frame_contexts[pc->frame_context_idx], &pc->fc, - sizeof(pc->fc)); - } + if (pc->refresh_frame_context) + pc->frame_contexts[pc->frame_context_idx] = pc->fc; -#ifdef PACKET_TESTING - { - FILE *f = fopen("decompressor.VP8", "ab"); - unsigned int size = residual_bc.pos + header_bc.pos + 8; - fwrite((void *) &size, 4, 1, f); - fwrite((void *) pbi->Source, size, 1, f); - fclose(f); - } -#endif - - /* Find the end of the coded buffer */ - while (residual_bc.count > CHAR_BIT && - residual_bc.count < VP9_BD_VALUE_SIZE) { - residual_bc.count -= CHAR_BIT; - residual_bc.user_buffer--; - } - *p_data_end = residual_bc.user_buffer; + *p_data_end = vp9_reader_find_end(&residual_bc); return 0; } diff --git a/vp9/decoder/vp9_decodframe.h b/vp9/decoder/vp9_decodframe.h index 391a265191d26620d28cdc85af85e0d8e06b2f30..66e951d10595533070bc159845421b188a490fda 100644 --- a/vp9/decoder/vp9_decodframe.h +++ b/vp9/decoder/vp9_decodframe.h @@ -12,8 +12,11 @@ #ifndef VP9_DECODER_VP9_DECODFRAME_H_ #define VP9_DECODER_VP9_DECODFRAME_H_ +struct VP9Common; struct VP9Decompressor; -void vp9_init_de_quantizer(struct VP9Decompressor *pbi); +void vp9_init_dequantizer(struct VP9Common *pc); +int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end); +vp9_prob vp9_read_prob_diff_update(vp9_reader *r, int oldp); #endif // VP9_DECODER_VP9_DECODFRAME_H_ diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c deleted file mode 100644 index 9aebcdcfccaf3cd8ee65c2b82cae931639740239..0000000000000000000000000000000000000000 --- a/vp9/decoder/vp9_dequantize.c +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9_rtcd.h" -#include "vp9/decoder/vp9_dequantize.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/decoder/vp9_onyxd_int.h" -#include "vp9/common/vp9_common.h" - - -static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, - uint8_t *dest, int stride, int width, int height) { - int r, c; - - for (r = 0; r < height; r++) { - for (c = 0; c < width; c++) - dest[c] = clip_pixel(diff[c] + pred[c]); - - dest += stride; - diff += width; - pred += pitch; - } -} - -void vp9_add_residual_4x4_c(const int16_t *diff, const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 4, 4); -} - -void vp9_add_residual_8x8_c(const int16_t *diff, const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 8, 8); -} - -void vp9_add_residual_16x16_c(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 16, 16); -} - -void vp9_add_residual_32x32_c(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_residual(diff, pred, pitch, dest, stride, 32, 32); -} - -static void add_constant_residual(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride, - int width, int height) { - int r, c; - - for (r = 0; r < height; r++) { - for (c = 0; c < width; c++) - dest[c] = clip_pixel(diff + pred[c]); - - dest += stride; - pred += pitch; - } -} - -void vp9_add_constant_residual_8x8_c(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_constant_residual(diff, pred, pitch, dest, stride, 8, 8); -} - -void vp9_add_constant_residual_16x16_c(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_constant_residual(diff, pred, pitch, dest, stride, 16, 16); -} - -void vp9_add_constant_residual_32x32_c(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - add_constant_residual(diff, pred, pitch, dest, stride, 32, 32); -} - -void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, - uint8_t *pred, uint8_t *dest, - int pitch, int stride, int eob) { - int i; - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - - for (i = 0; i < 16; i++) - input[i] *= dq[i]; - - vp9_short_iht4x4(input, output, 4, tx_type); - vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); -} - -void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, - uint8_t *pred, uint8_t *dest, - int pitch, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64); - - if (eob == 0) { - // All 0 DCT coefficients - vp9_copy_mem8x8(pred, pitch, dest, stride); - } else if (eob > 0) { - int i; - - input[0] *= dq[0]; - for (i = 1; i < 64; i++) - input[i] *= dq[1]; - - vp9_short_iht8x8(input, output, 8, tx_type); - vpx_memset(input, 0, 128); - vp9_add_residual_8x8(output, pred, pitch, dest, stride); - } -} - -void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride, int eob) { - int i; - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - - if (eob > 1) { - for (i = 0; i < 16; i++) - input[i] *= dq[i]; - - // the idct halves ( >> 1) the pitch - vp9_short_idct4x4(input, output, 4 << 1); - - vpx_memset(input, 0, 32); - - vp9_add_residual_4x4(output, pred, pitch, dest, stride); - } else { - vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride); - ((int *)input)[0] = 0; - } -} - -void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride, int dc) { - int i; - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - - input[0] = dc; - - for (i = 1; i < 16; i++) - input[i] *= dq[i]; - - // the idct halves ( >> 1) the pitch - vp9_short_idct4x4(input, output, 4 << 1); - vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); -} - -void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, - int pitch, int stride, int eob) { - int i; - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - - if (eob > 1) { - for (i = 0; i < 16; i++) - input[i] *= dq[i]; - - vp9_short_iwalsh4x4_c(input, output, 4 << 1); - - vpx_memset(input, 0, 32); - - vp9_add_residual_4x4(output, pred, pitch, dest, stride); - } else { - vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride); - ((int *)input)[0] = 0; - } -} - -void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, - uint8_t *pred, - uint8_t *dest, - int pitch, int stride, int dc) { - int i; - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - - input[0] = dc; - - for (i = 1; i < 16; i++) - input[i] *= dq[i]; - - vp9_short_iwalsh4x4_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, pred, pitch, dest, stride); -} - -void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, int pitch, - int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64); - - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - input[0] *= dq[0]; - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - if (eob == 0) { - // All 0 DCT coefficients - vp9_copy_mem8x8(pred, pitch, dest, stride); - } else if (eob == 1) { - // DC only DCT coefficient - int16_t in = input[0]; - int16_t out; - - // Note: the idct1 will need to be modified accordingly whenever - // vp9_short_idct8x8_c() is modified. - vp9_short_idct1_8x8_c(&in, &out); - input[0] = 0; - - vp9_add_constant_residual_8x8(out, pred, pitch, dest, stride); -#if !CONFIG_SCATTERSCAN - } else if (eob <= 10) { - input[1] *= dq[1]; - input[2] *= dq[1]; - input[3] *= dq[1]; - input[8] *= dq[1]; - input[9] *= dq[1]; - input[10] *= dq[1]; - input[16] *= dq[1]; - input[17] *= dq[1]; - input[24] *= dq[1]; - - vp9_short_idct10_8x8(input, output, 16); - - input[0] = input[1] = input[2] = input[3] = 0; - input[8] = input[9] = input[10] = 0; - input[16] = input[17] = 0; - input[24] = 0; - - vp9_add_residual_8x8(output, pred, pitch, dest, stride); -#endif - } else { - int i; - - // recover quantizer for 4 4x4 blocks - for (i = 1; i < 64; i++) - input[i] *= dq[1]; - - // the idct halves ( >> 1) the pitch - vp9_short_idct8x8(input, output, 8 << 1); - vpx_memset(input, 0, 128); - vp9_add_residual_8x8(output, pred, pitch, dest, stride); - } -} - -void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride, - int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); - - if (eob == 0) { - // All 0 DCT coefficients - vp9_copy_mem16x16(pred, pitch, dest, stride); - } else if (eob > 0) { - int i; - - input[0] *= dq[0]; - - // recover quantizer for 4 4x4 blocks - for (i = 1; i < 256; i++) - input[i] *= dq[1]; - - // inverse hybrid transform - vp9_short_iht16x16(input, output, 16, tx_type); - - // the idct halves ( >> 1) the pitch - // vp9_short_idct16x16(input, output, 32); - - vpx_memset(input, 0, 512); - - vp9_add_residual_16x16(output, pred, pitch, dest, stride); - } -} - -void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, int pitch, - int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); - - /* The calculation can be simplified if there are not many non-zero dct - * coefficients. Use eobs to separate different cases. */ - if (eob == 0) { - /* All 0 DCT coefficient */ - vp9_copy_mem16x16(pred, pitch, dest, stride); - } else if (eob == 1) { - /* DC only DCT coefficient. */ - int16_t in = input[0] * dq[0]; - int16_t out; - /* Note: the idct1 will need to be modified accordingly whenever - * vp9_short_idct16x16() is modified. */ - vp9_short_idct1_16x16_c(&in, &out); - input[0] = 0; - - vp9_add_constant_residual_16x16(out, pred, pitch, dest, stride); -#if !CONFIG_SCATTERSCAN - } else if (eob <= 10) { - input[0] *= dq[0]; - - input[1] *= dq[1]; - input[2] *= dq[1]; - input[3] *= dq[1]; - input[16] *= dq[1]; - input[17] *= dq[1]; - input[18] *= dq[1]; - input[32] *= dq[1]; - input[33] *= dq[1]; - input[48] *= dq[1]; - - // the idct halves ( >> 1) the pitch - vp9_short_idct10_16x16(input, output, 32); - - input[0] = input[1] = input[2] = input[3] = 0; - input[16] = input[17] = input[18] = 0; - input[32] = input[33] = 0; - input[48] = 0; - - vp9_add_residual_16x16(output, pred, pitch, dest, stride); -#endif - } else { - int i; - - input[0] *= dq[0]; - - // recover quantizer for 4 4x4 blocks - for (i = 1; i < 256; i++) - input[i] *= dq[1]; - - // the idct halves ( >> 1) the pitch - vp9_short_idct16x16(input, output, 16 << 1); - - vpx_memset(input, 0, 512); - - vp9_add_residual_16x16(output, pred, pitch, dest, stride); - } -} - -void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, int pitch, - int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024); - - if (eob) { - input[0] = input[0] * dq[0] / 2; - if (eob == 1) { - vp9_short_idct1_32x32(input, output); - vp9_add_constant_residual_32x32(output[0], pred, pitch, dest, stride); - input[0] = 0; -#if !CONFIG_SCATTERSCAN - } else if (eob <= 10) { - input[1] = input[1] * dq[1] / 2; - input[2] = input[2] * dq[1] / 2; - input[3] = input[3] * dq[1] / 2; - input[32] = input[32] * dq[1] / 2; - input[33] = input[33] * dq[1] / 2; - input[34] = input[34] * dq[1] / 2; - input[64] = input[64] * dq[1] / 2; - input[65] = input[65] * dq[1] / 2; - input[96] = input[96] * dq[1] / 2; - - // the idct halves ( >> 1) the pitch - vp9_short_idct10_32x32(input, output, 64); - - input[0] = input[1] = input[2] = input[3] = 0; - input[32] = input[33] = input[34] = 0; - input[64] = input[65] = 0; - input[96] = 0; - - vp9_add_residual_32x32(output, pred, pitch, dest, stride); -#endif - } else { - int i; - for (i = 1; i < 1024; i++) - input[i] = input[i] * dq[1] / 2; - vp9_short_idct32x32(input, output, 64); - vpx_memset(input, 0, 2048); - vp9_add_residual_32x32(output, pred, pitch, dest, stride); - } - } -} - -void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, - uint8_t *dstu, - uint8_t *dstv, - int stride, - MACROBLOCKD *xd) { - vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, - xd->eobs[64]); - vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, - xd->eobs[80]); -} diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h deleted file mode 100644 index bde27bb7aca1ebd627a08227190a3553491e55c5..0000000000000000000000000000000000000000 --- a/vp9/decoder/vp9_dequantize.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DEQUANTIZE_H_ -#define VP9_DECODER_VP9_DEQUANTIZE_H_ - -#include "vp9/common/vp9_blockd.h" - - -void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, - unsigned char *pred, - unsigned char *output, - int pitch, int stride, int eob); - -void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, - unsigned char *pred, - unsigned char *output, - int pitch, int stride, int dc); - -void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, - const int16_t *dq, - unsigned char *pre, - unsigned char *dst, - int stride, - const int16_t *dc); - -void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - unsigned char *pre, - unsigned char *dst, - int stride, - struct macroblockd *xd); - -void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, - unsigned char *pre, - unsigned char *dst_u, - unsigned char *dst_v, - int stride, - struct macroblockd *xd); - -void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *dest, - int pitch, int stride, int eob); - -void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride, - int eob); - -void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, unsigned char *pred, - unsigned char *dest, - int pitch, int stride, int eob); - -void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, - unsigned char *dst, - int stride, - const int16_t *dc, - MACROBLOCKD *xd); - -void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, - unsigned char *dst, - int stride, - MACROBLOCKD *xd); - -void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, - unsigned char *dst, - int stride, - const int16_t *dc, - MACROBLOCKD *xd); - -void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, - unsigned char *dst, - int stride, - MACROBLOCKD *xd); - -void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, - unsigned char *dstu, - unsigned char *dstv, - int stride, - MACROBLOCKD *xd); - -void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, - unsigned char *dstu, - unsigned char *dstv, - int stride, - MACROBLOCKD *xd); - -#endif // VP9_DECODER_VP9_DEQUANTIZE_H_ diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index cb3038e534f181fd2ab1a529f4149e5807f6f61a..d06c9b000f9c346674c41f6c9f1ffdedf2c3a8f7 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -10,14 +10,20 @@ #include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_common.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/common/vp9_seg_common.h" +#if CONFIG_BALANCED_COEFTREE +#define ZERO_CONTEXT_NODE 0 +#define EOB_CONTEXT_NODE 1 +#else #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 +#endif #define ONE_CONTEXT_NODE 2 #define LOW_VAL_CONTEXT_NODE 3 #define TWO_CONTEXT_NODE 4 @@ -57,236 +63,185 @@ static const vp9_prob cat6_prob[15] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); - -static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) { - return decode_bool(br, 128) ? -value_to_sign : value_to_sign; -} - - +DECLARE_ALIGNED(16, extern const uint8_t, + vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); #define INCREMENT_COUNT(token) \ do { \ - coef_counts[type][ref][get_coef_band(scan, txfm_size, c)] \ - [pt][token]++; \ - token_cache[c] = token; \ - pt = vp9_get_coef_context(scan, nb, pad, token_cache, \ - c + 1, default_eob); \ + coef_counts[type][ref][band][pt] \ + [token >= TWO_TOKEN ? \ + (token == DCT_EOB_TOKEN ? DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \ + token]++; \ + token_cache[scan[c]] = vp9_pt_energy_class[token]; \ } while (0) -#if CONFIG_CODE_NONZEROCOUNT -#define WRITE_COEF_CONTINUE(val, token) \ - { \ - qcoeff_ptr[scan[c]] = get_signed(br, val); \ - INCREMENT_COUNT(token); \ - c++; \ - nzc++; \ - continue; \ - } -#else #define WRITE_COEF_CONTINUE(val, token) \ { \ - qcoeff_ptr[scan[c]] = get_signed(br, val); \ + qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \ + dq[c > 0] / (1 + (txfm_size == TX_32X32)); \ INCREMENT_COUNT(token); \ c++; \ continue; \ } -#endif // CONFIG_CODE_NONZEROCOUNT #define ADJUST_COEF(prob, bits_count) \ do { \ - if (vp9_read(br, prob)) \ + if (vp9_read(r, prob)) \ val += 1 << bits_count; \ } while (0); static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, - BOOL_DECODER* const br, int block_idx, + vp9_reader *r, int block_idx, PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr, - TX_SIZE txfm_size) { - ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context; - ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context; - int aidx, lidx; + TX_SIZE txfm_size, const int16_t *dq, + ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { ENTROPY_CONTEXT above_ec, left_ec; FRAME_CONTEXT *const fc = &dx->common.fc; int pt, c = 0, pad, default_eob; - vp9_coeff_probs *coef_probs; + int band; + vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES]; + vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; + uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + }; + vp9_prob *prob; - vp9_coeff_count *coef_counts; - const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; -#if CONFIG_CODE_NONZEROCOUNT - uint16_t nzc = 0; - uint16_t nzc_expected = xd->mode_info_context->mbmi.nzcs[block_idx]; -#endif + vp9_coeff_count_model *coef_counts; + const int ref = xd->mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME; + TX_TYPE tx_type = DCT_DCT; const int *scan, *nb; uint8_t token_cache[1024]; + const uint8_t * band_translate; +#if CONFIG_BALANCED_COEFTREE + int skip_eob_node = 0; +#endif - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - aidx = vp9_block2above_sb64[txfm_size][block_idx]; - lidx = vp9_block2left_sb64[txfm_size][block_idx]; - } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { - aidx = vp9_block2above_sb[txfm_size][block_idx]; - lidx = vp9_block2left_sb[txfm_size][block_idx]; - } else { - aidx = vp9_block2above[txfm_size][block_idx]; - lidx = vp9_block2left[txfm_size][block_idx]; - } - + coef_probs = fc->coef_probs[txfm_size][type][ref]; + coef_counts = fc->coef_counts[txfm_size]; switch (txfm_size) { default: case TX_4X4: { - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_4x4(xd, block_idx) : DCT_DCT; - switch (tx_type) { - default: - scan = vp9_default_zig_zag1d_4x4; - break; - case ADST_DCT: - scan = vp9_row_scan_4x4; - break; - case DCT_ADST: - scan = vp9_col_scan_4x4; - break; - } - above_ec = A0[aidx] != 0; - left_ec = L0[lidx] != 0; - coef_probs = fc->coef_probs_4x4; - coef_counts = fc->coef_counts_4x4; + tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_4x4(xd, block_idx) : DCT_DCT; + scan = get_scan_4x4(tx_type); + above_ec = A[0] != 0; + left_ec = L[0] != 0; default_eob = 16; + band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 3 + sb_type, x = block_idx & ((1 << sz) - 1); + const int sz = 1 + b_width_log2(sb_type); + const int x = block_idx & ((1 << sz) - 1); const int y = block_idx - x; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; - switch (tx_type) { - default: - scan = vp9_default_zig_zag1d_8x8; - break; - case ADST_DCT: - scan = vp9_row_scan_8x8; - break; - case DCT_ADST: - scan = vp9_col_scan_8x8; - break; - } - coef_probs = fc->coef_probs_8x8; - coef_counts = fc->coef_counts_8x8; - above_ec = (A0[aidx] + A0[aidx + 1]) != 0; - left_ec = (L0[lidx] + L0[lidx + 1]) != 0; + tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; + scan = get_scan_8x8(tx_type); + above_ec = (A[0] + A[1]) != 0; + left_ec = (L[0] + L[1]) != 0; default_eob = 64; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 4 + sb_type, x = block_idx & ((1 << sz) - 1); + const int sz = 2 + b_width_log2(sb_type); + const int x = block_idx & ((1 << sz) - 1); const int y = block_idx - x; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; - switch (tx_type) { - default: - scan = vp9_default_zig_zag1d_16x16; - break; - case ADST_DCT: - scan = vp9_row_scan_16x16; - break; - case DCT_ADST: - scan = vp9_col_scan_16x16; - break; - } - coef_probs = fc->coef_probs_16x16; - coef_counts = fc->coef_counts_16x16; - if (type == PLANE_TYPE_UV) { - ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); - ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); - above_ec = (A0[aidx] + A0[aidx + 1] + A1[aidx] + A1[aidx + 1]) != 0; - left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1]) != 0; - } else { - above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3]) != 0; - left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3]) != 0; - } + tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; + scan = get_scan_16x16(tx_type); + above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; + left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; default_eob = 256; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: - scan = vp9_default_zig_zag1d_32x32; - coef_probs = fc->coef_probs_32x32; - coef_counts = fc->coef_counts_32x32; - if (type == PLANE_TYPE_UV) { - ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); - ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); - ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2); - ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2); - ENTROPY_CONTEXT *A3 = (ENTROPY_CONTEXT *) (xd->above_context + 3); - ENTROPY_CONTEXT *L3 = (ENTROPY_CONTEXT *) (xd->left_context + 3); - above_ec = (A0[aidx] + A0[aidx + 1] + A1[aidx] + A1[aidx + 1] + - A2[aidx] + A2[aidx + 1] + A3[aidx] + A3[aidx + 1]) != 0; - left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1] + - L2[lidx] + L2[lidx + 1] + L3[lidx] + L3[lidx + 1]) != 0; - } else { - ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); - ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); - above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3] + - A1[aidx] + A1[aidx + 1] + A1[aidx + 2] + A1[aidx + 3]) != 0; - left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3] + - L1[lidx] + L1[lidx + 1] + L1[lidx + 2] + L1[lidx + 3]) != 0; - } + scan = vp9_default_scan_32x32; + above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; + left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; default_eob = 1024; + band_translate = vp9_coefband_trans_8x8plus; break; } - VP9_COMBINEENTROPYCONTEXTS(pt, above_ec, left_ec); + pt = combine_entropy_contexts(above_ec, left_ec); nb = vp9_get_coef_neighbors_handle(scan, &pad); while (1) { int val; const uint8_t *cat6 = cat6_prob; - if (c >= seg_eob) break; -#if CONFIG_CODE_NONZEROCOUNT - if (nzc == nzc_expected) - break; -#endif - prob = coef_probs[type][ref][get_coef_band(scan, txfm_size, c)][pt]; -#if CONFIG_CODE_NONZEROCOUNT == 0 - fc->eob_branch_counts[txfm_size][type][ref] - [get_coef_band(scan, txfm_size, c)][pt]++; - if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) + if (c) + pt = vp9_get_coef_context(scan, nb, pad, token_cache, + c, default_eob); + band = get_coef_band(band_translate, c); + prob = coef_probs[band][pt]; +#if !CONFIG_BALANCED_COEFTREE + fc->eob_branch_counts[txfm_size][type][ref][band][pt]++; + if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) break; -#endif + SKIP_START: +#endif if (c >= seg_eob) break; -#if CONFIG_CODE_NONZEROCOUNT - if (nzc == nzc_expected) - break; - // decode zero node only if there are zeros left - if (seg_eob - nzc_expected - c + nzc > 0) -#endif - if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { + if (c) + pt = vp9_get_coef_context(scan, nb, pad, token_cache, + c, default_eob); + band = get_coef_band(band_translate, c); + prob = coef_probs[band][pt]; + + if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; - prob = coef_probs[type][ref][get_coef_band(scan, txfm_size, c)][pt]; +#if CONFIG_BALANCED_COEFTREE + skip_eob_node = 1; + continue; +#else goto SKIP_START; +#endif } +#if CONFIG_BALANCED_COEFTREE + if (!skip_eob_node) { + fc->eob_branch_counts[txfm_size][type][ref][band][pt]++; + if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) + break; + } + skip_eob_node = 0; +#endif + // ONE_CONTEXT_NODE_0_ - if (!vp9_read(br, prob[ONE_CONTEXT_NODE])) { + if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) { WRITE_COEF_CONTINUE(1, ONE_TOKEN); } + // Load full probabilities if not already loaded + if (!load_map[band][pt]) { + vp9_model_to_full_probs(coef_probs[band][pt], + coef_probs_full[band][pt]); + load_map[band][pt] = 1; + } + prob = coef_probs_full[band][pt]; // LOW_VAL_CONTEXT_NODE_0_ - if (!vp9_read(br, prob[LOW_VAL_CONTEXT_NODE])) { - if (!vp9_read(br, prob[TWO_CONTEXT_NODE])) { + if (!vp9_read(r, prob[LOW_VAL_CONTEXT_NODE])) { + if (!vp9_read(r, prob[TWO_CONTEXT_NODE])) { WRITE_COEF_CONTINUE(2, TWO_TOKEN); } - if (!vp9_read(br, prob[THREE_CONTEXT_NODE])) { + if (!vp9_read(r, prob[THREE_CONTEXT_NODE])) { WRITE_COEF_CONTINUE(3, THREE_TOKEN); } WRITE_COEF_CONTINUE(4, FOUR_TOKEN); } // HIGH_LOW_CONTEXT_NODE_0_ - if (!vp9_read(br, prob[HIGH_LOW_CONTEXT_NODE])) { - if (!vp9_read(br, prob[CAT_ONE_CONTEXT_NODE])) { + if (!vp9_read(r, prob[HIGH_LOW_CONTEXT_NODE])) { + if (!vp9_read(r, prob[CAT_ONE_CONTEXT_NODE])) { val = CAT1_MIN_VAL; ADJUST_COEF(CAT1_PROB0, 0); WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY1); @@ -297,8 +252,8 @@ SKIP_START: WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY2); } // CAT_THREEFOUR_CONTEXT_NODE_0_ - if (!vp9_read(br, prob[CAT_THREEFOUR_CONTEXT_NODE])) { - if (!vp9_read(br, prob[CAT_THREE_CONTEXT_NODE])) { + if (!vp9_read(r, prob[CAT_THREEFOUR_CONTEXT_NODE])) { + if (!vp9_read(r, prob[CAT_THREE_CONTEXT_NODE])) { val = CAT3_MIN_VAL; ADJUST_COEF(CAT3_PROB2, 2); ADJUST_COEF(CAT3_PROB1, 1); @@ -313,7 +268,7 @@ SKIP_START: WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY4); } // CAT_FIVE_CONTEXT_NODE_0_: - if (!vp9_read(br, prob[CAT_FIVE_CONTEXT_NODE])) { + if (!vp9_read(r, prob[CAT_FIVE_CONTEXT_NODE])) { val = CAT5_MIN_VAL; ADJUST_COEF(CAT5_PROB4, 4); ADJUST_COEF(CAT5_PROB3, 3); @@ -324,262 +279,73 @@ SKIP_START: } val = 0; while (*cat6) { - val = (val << 1) | vp9_read(br, *cat6++); + val = (val << 1) | vp9_read(r, *cat6++); } val += CAT6_MIN_VAL; WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6); } -#if CONFIG_CODE_NONZEROCOUNT == 0 if (c < seg_eob) - coef_counts[type][ref][get_coef_band(scan, txfm_size, c)] - [pt][DCT_EOB_TOKEN]++; -#endif + coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN]++; + - A0[aidx] = L0[lidx] = c > 0; - if (txfm_size >= TX_8X8) { - A0[aidx + 1] = L0[lidx + 1] = A0[aidx]; - if (txfm_size >= TX_16X16) { - if (type == PLANE_TYPE_UV) { - ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); - ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); - A1[aidx] = A1[aidx + 1] = L1[lidx] = L1[lidx + 1] = A0[aidx]; - if (txfm_size >= TX_32X32) { - ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2); - ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2); - ENTROPY_CONTEXT *A3 = (ENTROPY_CONTEXT *) (xd->above_context + 3); - ENTROPY_CONTEXT *L3 = (ENTROPY_CONTEXT *) (xd->left_context + 3); - A2[aidx] = A2[aidx + 1] = A3[aidx] = A3[aidx + 1] = A0[aidx]; - L2[lidx] = L2[lidx + 1] = L3[lidx] = L3[lidx + 1] = A0[aidx]; - } - } else { - A0[aidx + 2] = A0[aidx + 3] = L0[lidx + 2] = L0[lidx + 3] = A0[aidx]; - if (txfm_size >= TX_32X32) { - ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); - ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); - A1[aidx] = A1[aidx + 1] = A1[aidx + 2] = A1[aidx + 3] = A0[aidx]; - L1[lidx] = L1[lidx + 1] = L1[lidx + 2] = L1[lidx + 3] = A0[aidx]; - } - } - } - } return c; } static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { - return vp9_get_segdata(xd, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; -} - -static INLINE int decode_sb(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc, - int offset, int count, int inc, - int eob_max, TX_SIZE tx_size) { - const int segment_id = xd->mode_info_context->mbmi.segment_id; - const int seg_eob = get_eob(xd, segment_id, eob_max); - int i, eobtotal = 0; - - // luma blocks - for (i = 0; i < offset; i += inc) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob, - xd->qcoeff + i * 16, tx_size); - xd->eobs[i] = c; - eobtotal += c; - } - - // chroma blocks - for (i = offset; i < count; i += inc) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, - xd->qcoeff + i * 16, tx_size); - xd->eobs[i] = c; - eobtotal += c; - } - - return eobtotal; -} - -int vp9_decode_sb_tokens(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc) { - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: { - // 32x32 luma block - const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; - int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, - get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32); - xd->eobs[0] = c; - eobtotal += c; - - // 16x16 chroma blocks - seg_eob = get_eob(xd, segment_id, 256); - for (i = 64; i < 96; i += 16) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, - xd->qcoeff + i * 16, TX_16X16); - xd->eobs[i] = c; - eobtotal += c; - } - return eobtotal; - } - case TX_16X16: - return decode_sb(pbi, xd, bc, 64, 96, 16, 16 * 16, TX_16X16); - case TX_8X8: - return decode_sb(pbi, xd, bc, 64, 96, 4, 8 * 8, TX_8X8); - case TX_4X4: - return decode_sb(pbi, xd, bc, 64, 96, 1, 4 * 4, TX_4X4); - default: - assert(0); - return 0; - } -} - -int vp9_decode_sb64_tokens(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc) { - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - return decode_sb(pbi, xd, bc, 256, 384, 64, 32 * 32, TX_32X32); - case TX_16X16: - return decode_sb(pbi, xd, bc, 256, 384, 16, 16 * 16, TX_16X16); - case TX_8X8: - return decode_sb(pbi, xd, bc, 256, 384, 4, 8 * 8, TX_8X8); - case TX_4X4: - return decode_sb(pbi, xd, bc, 256, 384, 1, 4 * 4, TX_4X4); - default: - assert(0); - return 0; - } + return vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; } -static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc) { - const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; - - // Luma block - int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, - get_eob(xd, segment_id, 256), xd->qcoeff, TX_16X16); - xd->eobs[0] = c; - eobtotal += c; - - // 8x8 chroma blocks - seg_eob = get_eob(xd, segment_id, 64); - for (i = 16; i < 24; i += 4) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_8X8); - xd->eobs[i] = c; - eobtotal += c; - } - return eobtotal; -} - -static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc) { - int i, eobtotal = 0; - const int segment_id = xd->mode_info_context->mbmi.segment_id; - - // luma blocks - int seg_eob = get_eob(xd, segment_id, 64); - for (i = 0; i < 16; i += 4) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, - seg_eob, xd->block[i].qcoeff, TX_8X8); - xd->eobs[i] = c; - eobtotal += c; - } - - // chroma blocks - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) { - // use 4x4 transform for U, V components in I8X8/splitmv prediction mode - seg_eob = get_eob(xd, segment_id, 16); - for (i = 16; i < 24; i++) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_4X4); - xd->eobs[i] = c; - eobtotal += c; - } +struct decode_block_args { + VP9D_COMP *pbi; + MACROBLOCKD *xd; + vp9_reader *r; + int *eobtotal; +}; +static void decode_block(int plane, int block, + BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, + void *argv) { + const struct decode_block_args* const arg = argv; + const int bw = b_width_log2(bsize); + + // find the maximum eob for this transform size, adjusted by segment + MACROBLOCKD *xd = arg->xd; + const int segment_id = arg->xd->mode_info_context->mbmi.segment_id; + const TX_SIZE ss_tx_size = ss_txfrm_size / 2; + const int seg_eob = get_eob(arg->xd, segment_id, 16 << ss_txfrm_size); + int16_t* const qcoeff_base = arg->xd->plane[plane].qcoeff; + const int off = block >> ss_txfrm_size; + const int mod = bw - ss_tx_size - arg->xd->plane[plane].subsampling_x; + const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size; + const int loff = (off >> mod) << ss_tx_size; + int pt; + ENTROPY_CONTEXT *A = arg->xd->plane[plane].above_context + aoff; + ENTROPY_CONTEXT *L = arg->xd->plane[plane].left_context + loff; + const int eob = decode_coefs(arg->pbi, arg->xd, arg->r, block, + arg->xd->plane[plane].plane_type, seg_eob, + BLOCK_OFFSET(qcoeff_base, block, 16), + ss_tx_size, arg->xd->plane[plane].dequant, + A, + L); + + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L); } else { - for (i = 16; i < 24; i += 4) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_8X8); - xd->eobs[i] = c; - eobtotal += c; + for (pt = 0; pt < (1 << ss_tx_size); pt++) { + A[pt] = L[pt] = eob > 0; } } - - return eobtotal; -} - -static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, - BOOL_DECODER* const bc, - PLANE_TYPE type, int i, int seg_eob) { - const int c = decode_coefs(dx, xd, bc, i, type, seg_eob, - xd->block[i].qcoeff, TX_4X4); - xd->eobs[i] = c; - return c; -} - -int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, - BOOL_DECODER* const bc, - PLANE_TYPE type, int i) { - const int segment_id = xd->mode_info_context->mbmi.segment_id; - const int seg_eob = get_eob(xd, segment_id, 16); - - return decode_coefs_4x4(dx, xd, bc, type, i, seg_eob); -} - -static int decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc, - int seg_eob) { - int i, eobtotal = 0; - - // chroma blocks - for (i = 16; i < 24; i++) - eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i, seg_eob); - - return eobtotal; + arg->xd->plane[plane].eobs[block] = eob; + arg->eobtotal[0] += eob; } -int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc) { - const int segment_id = xd->mode_info_context->mbmi.segment_id; - const int seg_eob = get_eob(xd, segment_id, 16); - - return decode_mb_tokens_4x4_uv(dx, xd, bc, seg_eob); -} - -static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc) { - int i, eobtotal = 0; - const int segment_id = xd->mode_info_context->mbmi.segment_id; - const int seg_eob = get_eob(xd, segment_id, 16); - - // luma blocks - for (i = 0; i < 16; ++i) - eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y_WITH_DC, i, seg_eob); - - // chroma blocks - eobtotal += decode_mb_tokens_4x4_uv(dx, xd, bc, seg_eob); - - return eobtotal; -} - -int vp9_decode_mb_tokens(VP9D_COMP* const dx, +int vp9_decode_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, - BOOL_DECODER* const bc) { - const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - switch (tx_size) { - case TX_16X16: - return vp9_decode_mb_tokens_16x16(dx, xd, bc); - case TX_8X8: - return vp9_decode_mb_tokens_8x8(dx, xd, bc); - default: - assert(tx_size == TX_4X4); - return vp9_decode_mb_tokens_4x4(dx, xd, bc); - } + vp9_reader *r, + BLOCK_SIZE_TYPE bsize) { + int eobtotal = 0; + struct decode_block_args args = {pbi, xd, r, &eobtotal}; + foreach_transformed_block(xd, bsize, decode_block, &args); + return eobtotal; } diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index 33a34aeae004bda273e4e26aed7f591b8228f6d3..13235bda1fcdf132e9c49f441bd4030d1298c6f0 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -14,22 +14,9 @@ #include "vp9/decoder/vp9_onyxd_int.h" -int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, - BOOL_DECODER* const bc, - PLANE_TYPE type, int i); - -int vp9_decode_mb_tokens(VP9D_COMP* const, MACROBLOCKD* const, - BOOL_DECODER* const); - -int vp9_decode_sb_tokens(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc); - -int vp9_decode_sb64_tokens(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc); - -int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd, - BOOL_DECODER* const bc); +int vp9_decode_tokens(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, + vp9_reader *r, + BLOCK_SIZE_TYPE bsize); #endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index b17955b1caa8d57cb861561693d43accfb3336a2..417f89177978c4b7fc4455f20fcb8c37b65b615c 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -10,18 +10,15 @@ #include "vp9_rtcd.h" #include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_dequantize.h" +#include "vp9/decoder/vp9_idct_blk.h" -void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, - int stride, - MACROBLOCKD *xd) { +void vp9_idct_add_y_block_c(int16_t *q, uint8_t *dst, int stride, + MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - xd->itxm_add(q, dq, dst, dst, stride, stride, xd->eobs[i * 4 + j]); + vp9_idct_add(q, dst, stride, xd->plane[0].eobs[i * 4 + j]); q += 16; dst += 4; } @@ -30,202 +27,205 @@ void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, } } -void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, MACROBLOCKD *xd) { +void vp9_idct_add_uv_block_c(int16_t *q, uint8_t *dst, int stride, + uint16_t *eobs) { int i, j; - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, xd->eobs[i * 4 + j]); + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + vp9_idct_add(q, dst, stride, eobs[i * 2 + j]); q += 16; - pre += 4; dst += 4; } - pre += 64 - 16; - dst += 4 * stride - 16; + dst += 4 * stride - 8; } } -void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dstu, - uint8_t *dstv, int stride, - MACROBLOCKD *xd) { - int i, j; +void vp9_idct_add_y_block_8x8_c(int16_t *q, uint8_t *dst, int stride, + MACROBLOCKD *xd) { + uint8_t *origdest = dst; - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride, - xd->eobs[16 + i * 2 + j]); - q += 16; - pre += 4; - dstu += 4; - } + vp9_idct_add_8x8_c(q, dst, stride, xd->plane[0].eobs[0]); + vp9_idct_add_8x8_c(&q[64], origdest + 8, stride, xd->plane[0].eobs[4]); + vp9_idct_add_8x8_c(&q[128], origdest + 8 * stride, stride, + xd->plane[0].eobs[8]); + vp9_idct_add_8x8_c(&q[192], origdest + 8 * stride + 8, stride, + xd->plane[0].eobs[12]); +} - pre += 32 - 8; - dstu += 4 * stride - 8; - } +void vp9_idct_add_y_block_lossless_c(int16_t *q, uint8_t *dst, int stride, + MACROBLOCKD *xd) { + int i, j; - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride, - xd->eobs[20 + i * 2 + j]); - q += 16; - pre += 4; - dstv += 4; + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + vp9_idct_add_lossless_c(q, dst, stride, xd->plane[0].eobs[i * 4 + j]); + q += 16; + dst += 4; } - pre += 32 - 8; - dstv += 4 * stride - 8; + dst += 4 * stride - 16; } } -void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, - uint8_t *dstu, - uint8_t *dstv, - int stride, - MACROBLOCKD *xd) { +void vp9_idct_add_uv_block_lossless_c(int16_t *q, uint8_t *dst, int stride, + uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - xd->itxm_add(q, dq, dstu, dstu, stride, stride, xd->eobs[16 + i * 2 + j]); - q += 16; - dstu += 4; - } - - dstu += 4 * stride - 8; - } - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - xd->itxm_add(q, dq, dstv, dstv, stride, stride, xd->eobs[20 + i * 2 + j]); - q += 16; - dstv += 4; + vp9_idct_add_lossless_c(q, dst, stride, eobs[i * 2 + j]); + q += 16; + dst += 4; } - dstv += 4 * stride - 8; + dst += 4 * stride - 8; } } -void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, - int stride, - MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, xd->eobs[0]); - - vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8, - dst + 8, stride, stride, xd->eobs[4]); +static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride, + int width, int height) { + int r, c; - vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, - dst + 8 * stride, stride, stride, - xd->eobs[8]); + for (r = 0; r < height; r++) { + for (c = 0; c < width; c++) + dest[c] = clip_pixel(diff + dest[c]); - vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, - dst + 8 * stride + 8, stride, stride, - xd->eobs[12]); + dest += stride; + } } -void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, MACROBLOCKD *xd) { - uint8_t *origdest = dst; - uint8_t *origpred = pre; - - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->eobs[0]); - vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, - origdest + 8, 16, stride, xd->eobs[4]); - vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16, - origdest + 8 * stride, 16, stride, - xd->eobs[8]); - vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8, - origdest + 8 * stride + 8, 16, stride, - xd->eobs[12]); +void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest, + int stride) { + add_constant_residual(diff, dest, stride, 8, 8); } -void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dstu, - uint8_t *dstv, - int stride, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]); - - q += 64; - pre += 64; - - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]); +void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest, + int stride) { + add_constant_residual(diff, dest, stride, 16, 16); } -void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, - uint8_t *dstu, - uint8_t *dstv, - int stride, - MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, - xd->eobs[16]); - - q += 64; - vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, - xd->eobs[20]); +void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest, + int stride) { + add_constant_residual(diff, dest, stride, 32, 32); } +void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride, + int eob) { + if (tx_type == DCT_DCT) { + vp9_idct_add(input, dest, stride, eob); + } else { + vp9_short_iht4x4_add(input, dest, stride, tx_type); + vpx_memset(input, 0, 32); + } +} -void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dst, - int stride, MACROBLOCKD *xd) { - int i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride, - xd->eobs[i * 4 + j]); - q += 16; - pre += 4; - dst += 4; +void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct_add_8x8(input, dest, stride, eob); + } else { + if (eob > 0) { + vp9_short_iht8x8_add(input, dest, stride, tx_type); + vpx_memset(input, 0, 128); } + } +} - pre += 64 - 16; - dst += 4 * stride - 16; +void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) { + if (eob > 1) { + vp9_short_idct4x4_add(input, dest, stride); + vpx_memset(input, 0, 32); + } else { + vp9_dc_only_idct_add(input[0], dest, dest, stride, stride); + ((int *)input)[0] = 0; } } -void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dstu, - uint8_t *dstv, - int stride, - MACROBLOCKD *xd) { - int i, j; +void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride, + int eob) { + if (eob > 1) { + vp9_short_iwalsh4x4_add(input, dest, stride); + vpx_memset(input, 0, 32); + } else { + vp9_short_iwalsh4x4_1_add_c(input, dest, stride); + ((int *)input)[0] = 0; + } +} - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride, - xd->eobs[16 + i * 2 + j]); - q += 16; - pre += 4; - dstu += 4; +void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + // Combine that with code here. + if (eob) { + if (eob == 1) { + // DC only DCT coefficient + int16_t in = input[0]; + int16_t out; + + // Note: the idct1 will need to be modified accordingly whenever + // vp9_short_idct8x8_c() is modified. + vp9_short_idct1_8x8_c(&in, &out); + input[0] = 0; + + vp9_add_constant_residual_8x8(out, dest, stride); + } else { + vp9_short_idct8x8_add(input, dest, stride); + vpx_memset(input, 0, 128); } + } +} - pre += 32 - 8; - dstu += 4 * stride - 8; +void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct_add_16x16(input, dest, stride, eob); + } else { + if (eob > 0) { + vp9_short_iht16x16_add(input, dest, stride, tx_type); + vpx_memset(input, 0, 512); + } } +} - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride, - xd->eobs[20 + i * 2 + j]); - q += 16; - pre += 4; - dstv += 4; +void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { + /* The calculation can be simplified if there are not many non-zero dct + * coefficients. Use eobs to separate different cases. */ + if (eob) { + if (eob == 1) { + /* DC only DCT coefficient. */ + int16_t in = input[0]; + int16_t out; + /* Note: the idct1 will need to be modified accordingly whenever + * vp9_short_idct16x16() is modified. */ + vp9_short_idct1_16x16_c(&in, &out); + input[0] = 0; + + vp9_add_constant_residual_16x16(out, dest, stride); + } else { + vp9_short_idct16x16_add(input, dest, stride); + vpx_memset(input, 0, 512); } + } +} - pre += 32 - 8; - dstv += 4 * stride - 8; +void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) { + DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024); + + if (eob) { + if (eob == 1) { + vp9_short_idct1_32x32(input, output); + vp9_add_constant_residual_32x32(output[0], dest, stride); + input[0] = 0; + } else { + vp9_short_idct32x32_add(input, dest, stride); + vpx_memset(input, 0, 2048); + } } } diff --git a/vp9/decoder/vp9_idct_blk.h b/vp9/decoder/vp9_idct_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..42dcc817080bbd74a3f5401e816c63339072ccbc --- /dev/null +++ b/vp9/decoder/vp9_idct_blk.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_DECODER_VP9_IDCT_BLK_H_ +#define VP9_DECODER_VP9_IDCT_BLK_H_ + +#include "vp9/common/vp9_blockd.h" + + +void vp9_idct_add_lossless_c(int16_t *input, unsigned char *dest, int stride, + int eob); + +void vp9_idct_add_y_block_lossless_c(int16_t *q, unsigned char *dst, int stride, + struct macroblockd *xd); + +void vp9_idct_add_uv_block_lossless_c(int16_t *q, unsigned char *dst, + int stride, uint16_t *eobs); + +void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, + int stride, int eob); + +void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, + int stride, int eob); + +void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, + int stride, int eob); + +#endif // VP9_DECODER_VP9_IDCT_BLK_H_ diff --git a/vp9/decoder/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h index cd71166e45ceb81517a6418f0024950ba24ae09f..cd5b7508f2123930ac5f3f02ebae250b287f060c 100644 --- a/vp9/decoder/vp9_onyxd.h +++ b/vp9/decoder/vp9_onyxd.h @@ -11,54 +11,56 @@ #ifndef VP9_COMMON_VP9_ONYXD_H_ #define VP9_COMMON_VP9_ONYXD_H_ -/* Create/destroy static data structures. */ #ifdef __cplusplus extern "C" { #endif + #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_ppflags.h" -#include "vpx_ports/mem.h" #include "vpx/vpx_codec.h" - typedef void *VP9D_PTR; - typedef struct { - int Width; - int Height; - int Version; - int postprocess; - int max_threads; - int inv_tile_order; - int input_partition; - } VP9D_CONFIG; - typedef enum { - VP9_LAST_FLAG = 1, - VP9_GOLD_FLAG = 2, - VP9_ALT_FLAG = 4 - } VP9_REFFRAME; - - void vp9_initialize_dec(void); - - int vp9_receive_compressed_data(VP9D_PTR comp, unsigned long size, - const unsigned char **dest, - int64_t time_stamp); - - int vp9_get_raw_frame(VP9D_PTR comp, YV12_BUFFER_CONFIG *sd, - int64_t *time_stamp, int64_t *time_end_stamp, - vp9_ppflags_t *flags); - - vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR comp, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - - vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR comp, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - - int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb); - - VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf); - - void vp9_remove_decompressor(VP9D_PTR comp); +typedef void *VP9D_PTR; + +typedef struct { + int width; + int height; + int version; + int postprocess; + int max_threads; + int inv_tile_order; + int input_partition; +} VP9D_CONFIG; + +typedef enum { + VP9_LAST_FLAG = 1, + VP9_GOLD_FLAG = 2, + VP9_ALT_FLAG = 4 +} VP9_REFFRAME; + +void vp9_initialize_dec(); + +int vp9_receive_compressed_data(VP9D_PTR comp, + uint64_t size, const uint8_t **dest, + int64_t time_stamp); + +int vp9_get_raw_frame(VP9D_PTR comp, YV12_BUFFER_CONFIG *sd, + int64_t *time_stamp, int64_t *time_end_stamp, + vp9_ppflags_t *flags); + +vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR comp, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR comp, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb); + + +VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf); + +void vp9_remove_decompressor(VP9D_PTR comp); #ifdef __cplusplus } diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index 2b61f0affbc2bdc72067ab684002b09c369f74a7..3cef88bcda5ac05bada8cd665f372a49de42eb9d 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -21,8 +21,6 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_swapyv12buffer.h" - #include "vp9/common/vp9_quant_common.h" #include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_systemdependent.h" @@ -36,7 +34,7 @@ static void recon_write_yuv_frame(const char *name, const YV12_BUFFER_CONFIG *s, int w, int _h) { - FILE *yuv_file = fopen((char *)name, "ab"); + FILE *yuv_file = fopen(name, "ab"); const uint8_t *src = s->y_buffer; int h = _h; @@ -111,7 +109,7 @@ void vp9_initialize_dec() { } VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { - VP9D_COMP *pbi = vpx_memalign(32, sizeof(VP9D_COMP)); + VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP)); if (!pbi) return NULL; @@ -121,40 +119,37 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; vp9_remove_decompressor(pbi); - return 0; + return NULL; } pbi->common.error.setjmp = 1; vp9_initialize_dec(); vp9_create_common(&pbi->common); - pbi->oxcf = *oxcf; + pbi->oxcf = *oxcf; pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; - /* vp9_init_de_quantizer() is first called here. Add check in - * frame_init_dequantizer() to avoid unnecessary calling of - * vp9_init_de_quantizer() for every frame. - */ - vp9_init_de_quantizer(pbi); + // vp9_init_dequantizer() is first called here. Add check in + // frame_init_dequantizer() to avoid unnecessary calling of + // vp9_init_dequantizer() for every frame. + vp9_init_dequantizer(&pbi->common); vp9_loop_filter_init(&pbi->common); pbi->common.error.setjmp = 0; - pbi->decoded_key_frame = 0; - return (VP9D_PTR) pbi; + return pbi; } void vp9_remove_decompressor(VP9D_PTR ptr) { - VP9D_COMP *pbi = (VP9D_COMP *) ptr; + VP9D_COMP *const pbi = (VP9D_COMP *)ptr; if (!pbi) return; - // Delete segmentation map if (pbi->common.last_frame_seg_map) vpx_free(pbi->common.last_frame_seg_map); @@ -252,7 +247,7 @@ int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) { return 0; } -/* If any buffer updating is signalled it should be done here. */ +/* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9D_COMP *pbi) { int ref_index = 0, mask; @@ -273,24 +268,23 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { pbi->common.active_ref_idx[ref_index] = INT_MAX; } -int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, - const unsigned char **psource, +int vp9_receive_compressed_data(VP9D_PTR ptr, + uint64_t size, const uint8_t **psource, int64_t time_stamp) { VP9D_COMP *pbi = (VP9D_COMP *) ptr; VP9_COMMON *cm = &pbi->common; - const unsigned char *source = *psource; + const uint8_t *source = *psource; int retcode = 0; /*if(pbi->ready_for_new_data == 0) return -1;*/ - if (ptr == 0) { + if (ptr == 0) return -1; - } pbi->common.error.error_code = VPX_CODEC_OK; - pbi->Source = source; + pbi->source = source; pbi->source_sz = size; if (pbi->source_sz == 0) { @@ -325,6 +319,7 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; + return -1; } @@ -354,10 +349,20 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, if (cm->filter_level) { /* Apply the loop filter if appropriate. */ - vp9_loop_filter_frame(cm, &pbi->mb, cm->filter_level, 0, - cm->dering_enabled); + vp9_loop_filter_frame(cm, &pbi->mb, cm->filter_level, 0); } - vp8_yv12_extend_frame_borders(cm->frame_to_show); + +#if WRITE_RECON_BUFFER == 2 + if (cm->show_frame) + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 2000); + else + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 3000); +#endif + + vp9_extend_frame_borders(cm->frame_to_show, + cm->subsampling_x, cm->subsampling_y); } #if WRITE_RECON_BUFFER == 1 @@ -368,19 +373,19 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, vp9_clear_system_state(); + cm->last_show_frame = cm->show_frame; if (cm->show_frame) { - vpx_memcpy(cm->prev_mip, cm->mip, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); - } else { - vpx_memset(cm->prev_mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); - } + // current mip will be the prev_mip for the next frame + MODE_INFO *temp = cm->prev_mip; + cm->prev_mip = cm->mip; + cm->mip = temp; - /*vp9_print_modes_and_motion_vectors(cm->mi, cm->mb_rows,cm->mb_cols, - cm->current_video_frame);*/ + // update the upper left visible macroblock ptrs + cm->mi = cm->mip + cm->mode_info_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - if (cm->show_frame) cm->current_video_frame++; + } pbi->ready_for_new_data = 0; pbi->last_time_stamp = time_stamp; diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 0e6d059af707a1530d732c10529ee8d2248c6bd9..86985700c97e3eeb809023b0f547285703ed65bb 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -14,7 +14,7 @@ #include "vp9/decoder/vp9_onyxd.h" #include "vp9/decoder/vp9_treereader.h" #include "vp9/common/vp9_onyxc_int.h" -#include "vp9/decoder/vp9_dequantize.h" +#include "vp9/decoder/vp9_idct_blk.h" // #define DEC_DEBUG @@ -25,13 +25,12 @@ typedef struct VP9Decompressor { VP9D_CONFIG oxcf; - - const unsigned char *Source; - unsigned int source_sz; + const uint8_t *source; + uint32_t source_sz; vp9_reader *mbc; int64_t last_time_stamp; - int ready_for_new_data; + int ready_for_new_data; int refresh_frame_flags; vp9_prob prob_skip_false; @@ -42,8 +41,6 @@ typedef struct VP9Decompressor { int initial_height; } VP9D_COMP; -int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end); - #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval,expr) do {\ diff --git a/vp9/decoder/vp9_read_bit_buffer.h b/vp9/decoder/vp9_read_bit_buffer.h new file mode 100644 index 0000000000000000000000000000000000000000..220056862b3feeccf16e794b78f4be92ef020e8c --- /dev/null +++ b/vp9/decoder/vp9_read_bit_buffer.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_READ_BIT_BUFFER_ +#define VP9_READ_BIT_BUFFER_ + +#include <limits.h> + +#include "vpx/vpx_integer.h" + +typedef void (*vp9_rb_error_handler)(void *data, int bit_offset); + +struct vp9_read_bit_buffer { + const uint8_t *bit_buffer; + const uint8_t *bit_buffer_end; + size_t bit_offset; + + void *error_handler_data; + vp9_rb_error_handler error_handler; +}; + +static size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) { + return rb->bit_offset / CHAR_BIT + (rb->bit_offset % CHAR_BIT > 0); +} + +static int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) { + const int off = rb->bit_offset; + const int p = off / CHAR_BIT; + const int q = CHAR_BIT - 1 - off % CHAR_BIT; + if (rb->bit_buffer + p >= rb->bit_buffer_end) { + rb->error_handler(rb->error_handler_data, rb->bit_offset); + return 0; + } else { + const int bit = (rb->bit_buffer[p] & (1 << q)) >> q; + rb->bit_offset = off + 1; + return bit; + } +} + +static int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) { + int value = 0, bit; + for (bit = bits - 1; bit >= 0; bit--) + value |= vp9_rb_read_bit(rb) << bit; + return value; +} + +#endif // VP9_READ_BIT_BUFFER_ diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h index 4ec6de99de609c495aa001456fe4089d3652b11e..4535688ea3c0169a5d8b55ccb9eb87117b982ee5 100644 --- a/vp9/decoder/vp9_treereader.h +++ b/vp9/decoder/vp9_treereader.h @@ -15,12 +15,8 @@ #include "vp9/common/vp9_treecoder.h" #include "vp9/decoder/vp9_dboolhuff.h" -typedef BOOL_DECODER vp9_reader; - -#define vp9_read decode_bool -#define vp9_read_literal decode_value -#define vp9_read_bit(r) vp9_read(r, vp9_prob_half) #define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8)) +#define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value)) // Intent of tree data structure is to make decoding trivial. static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */ diff --git a/vp9/decoder/x86/vp9_dequantize_sse2.c b/vp9/decoder/x86/vp9_dequantize_sse2.c index 1dfb8e08fd2a9657441fd8c9b98a172bbc9e3e30..54ec67f247736249187abd98be87f659f84a03ad 100644 --- a/vp9/decoder/x86/vp9_dequantize_sse2.c +++ b/vp9/decoder/x86/vp9_dequantize_sse2.c @@ -15,249 +15,20 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - const int width = 4; - const __m128i zero = _mm_setzero_si128(); - - // Diff data - const __m128i d0 = _mm_loadl_epi64((const __m128i *)(diff + 0 * width)); - const __m128i d1 = _mm_loadl_epi64((const __m128i *)(diff + 1 * width)); - const __m128i d2 = _mm_loadl_epi64((const __m128i *)(diff + 2 * width)); - const __m128i d3 = _mm_loadl_epi64((const __m128i *)(diff + 3 * width)); - - // Prediction data. - __m128i p0 = _mm_cvtsi32_si128(*(const int *)(pred + 0 * pitch)); - __m128i p1 = _mm_cvtsi32_si128(*(const int *)(pred + 1 * pitch)); - __m128i p2 = _mm_cvtsi32_si128(*(const int *)(pred + 2 * pitch)); - __m128i p3 = _mm_cvtsi32_si128(*(const int *)(pred + 3 * pitch)); - - p0 = _mm_unpacklo_epi8(p0, zero); - p1 = _mm_unpacklo_epi8(p1, zero); - p2 = _mm_unpacklo_epi8(p2, zero); - p3 = _mm_unpacklo_epi8(p3, zero); - - p0 = _mm_add_epi16(p0, d0); - p1 = _mm_add_epi16(p1, d1); - p2 = _mm_add_epi16(p2, d2); - p3 = _mm_add_epi16(p3, d3); - - p0 = _mm_packus_epi16(p0, p1); - p2 = _mm_packus_epi16(p2, p3); - - *(int *)dest = _mm_cvtsi128_si32(p0); - dest += stride; - - p0 = _mm_srli_si128(p0, 8); - *(int *)dest = _mm_cvtsi128_si32(p0); - dest += stride; - - *(int *)dest = _mm_cvtsi128_si32(p2); - dest += stride; - - p2 = _mm_srli_si128(p2, 8); - *(int *)dest = _mm_cvtsi128_si32(p2); -} - -void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - const int width = 8; - const __m128i zero = _mm_setzero_si128(); - - // Diff data - const __m128i d0 = _mm_load_si128((const __m128i *)(diff + 0 * width)); - const __m128i d1 = _mm_load_si128((const __m128i *)(diff + 1 * width)); - const __m128i d2 = _mm_load_si128((const __m128i *)(diff + 2 * width)); - const __m128i d3 = _mm_load_si128((const __m128i *)(diff + 3 * width)); - const __m128i d4 = _mm_load_si128((const __m128i *)(diff + 4 * width)); - const __m128i d5 = _mm_load_si128((const __m128i *)(diff + 5 * width)); - const __m128i d6 = _mm_load_si128((const __m128i *)(diff + 6 * width)); - const __m128i d7 = _mm_load_si128((const __m128i *)(diff + 7 * width)); - - // Prediction data. - __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch)); - __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch)); - __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch)); - __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch)); - __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch)); - __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch)); - __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch)); - - p0 = _mm_unpacklo_epi8(p0, zero); - p1 = _mm_unpacklo_epi8(p1, zero); - p2 = _mm_unpacklo_epi8(p2, zero); - p3 = _mm_unpacklo_epi8(p3, zero); - p4 = _mm_unpacklo_epi8(p4, zero); - p5 = _mm_unpacklo_epi8(p5, zero); - p6 = _mm_unpacklo_epi8(p6, zero); - p7 = _mm_unpacklo_epi8(p7, zero); - - p0 = _mm_add_epi16(p0, d0); - p1 = _mm_add_epi16(p1, d1); - p2 = _mm_add_epi16(p2, d2); - p3 = _mm_add_epi16(p3, d3); - p4 = _mm_add_epi16(p4, d4); - p5 = _mm_add_epi16(p5, d5); - p6 = _mm_add_epi16(p6, d6); - p7 = _mm_add_epi16(p7, d7); - - p0 = _mm_packus_epi16(p0, p1); - p2 = _mm_packus_epi16(p2, p3); - p4 = _mm_packus_epi16(p4, p5); - p6 = _mm_packus_epi16(p6, p7); - - _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0); - p0 = _mm_srli_si128(p0, 8); - _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0); - - _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2); - p2 = _mm_srli_si128(p2, 8); - _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2); - - _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4); - p4 = _mm_srli_si128(p4, 8); - _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4); - - _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6); - p6 = _mm_srli_si128(p6, 8); - _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6); -} - -void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - const int width = 16; - int i = 4; - const __m128i zero = _mm_setzero_si128(); - - // Diff data - __m128i d0, d1, d2, d3, d4, d5, d6, d7; - __m128i p0, p1, p2, p3, p4, p5, p6, p7; - - do { - d0 = _mm_load_si128((const __m128i *)(diff + 0 * width)); - d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8)); - d2 = _mm_load_si128((const __m128i *)(diff + 1 * width)); - d3 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8)); - d4 = _mm_load_si128((const __m128i *)(diff + 2 * width)); - d5 = _mm_load_si128((const __m128i *)(diff + 2 * width + 8)); - d6 = _mm_load_si128((const __m128i *)(diff + 3 * width)); - d7 = _mm_load_si128((const __m128i *)(diff + 3 * width + 8)); - - // Prediction data. - p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch)); - p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch)); - - p0 = _mm_unpacklo_epi8(p1, zero); - p1 = _mm_unpackhi_epi8(p1, zero); - p2 = _mm_unpacklo_epi8(p3, zero); - p3 = _mm_unpackhi_epi8(p3, zero); - p4 = _mm_unpacklo_epi8(p5, zero); - p5 = _mm_unpackhi_epi8(p5, zero); - p6 = _mm_unpacklo_epi8(p7, zero); - p7 = _mm_unpackhi_epi8(p7, zero); - - p0 = _mm_add_epi16(p0, d0); - p1 = _mm_add_epi16(p1, d1); - p2 = _mm_add_epi16(p2, d2); - p3 = _mm_add_epi16(p3, d3); - p4 = _mm_add_epi16(p4, d4); - p5 = _mm_add_epi16(p5, d5); - p6 = _mm_add_epi16(p6, d6); - p7 = _mm_add_epi16(p7, d7); - - p0 = _mm_packus_epi16(p0, p1); - p1 = _mm_packus_epi16(p2, p3); - p2 = _mm_packus_epi16(p4, p5); - p3 = _mm_packus_epi16(p6, p7); - - _mm_store_si128((__m128i *)(dest + 0 * stride), p0); - _mm_store_si128((__m128i *)(dest + 1 * stride), p1); - _mm_store_si128((__m128i *)(dest + 2 * stride), p2); - _mm_store_si128((__m128i *)(dest + 3 * stride), p3); - - diff += 4 * width; - pred += 4 * pitch; - dest += 4 * stride; - } while (--i); -} - -void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { - const int width = 32; - int i = 16; - const __m128i zero = _mm_setzero_si128(); - - // Diff data - __m128i d0, d1, d2, d3, d4, d5, d6, d7; - __m128i p0, p1, p2, p3, p4, p5, p6, p7; - - do { - d0 = _mm_load_si128((const __m128i *)(diff + 0 * width)); - d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8)); - d2 = _mm_load_si128((const __m128i *)(diff + 0 * width + 16)); - d3 = _mm_load_si128((const __m128i *)(diff + 0 * width + 24)); - d4 = _mm_load_si128((const __m128i *)(diff + 1 * width)); - d5 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8)); - d6 = _mm_load_si128((const __m128i *)(diff + 1 * width + 16)); - d7 = _mm_load_si128((const __m128i *)(diff + 1 * width + 24)); - - // Prediction data. - p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - p3 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16)); - p5 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - p7 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16)); - - p0 = _mm_unpacklo_epi8(p1, zero); - p1 = _mm_unpackhi_epi8(p1, zero); - p2 = _mm_unpacklo_epi8(p3, zero); - p3 = _mm_unpackhi_epi8(p3, zero); - p4 = _mm_unpacklo_epi8(p5, zero); - p5 = _mm_unpackhi_epi8(p5, zero); - p6 = _mm_unpacklo_epi8(p7, zero); - p7 = _mm_unpackhi_epi8(p7, zero); - - p0 = _mm_add_epi16(p0, d0); - p1 = _mm_add_epi16(p1, d1); - p2 = _mm_add_epi16(p2, d2); - p3 = _mm_add_epi16(p3, d3); - p4 = _mm_add_epi16(p4, d4); - p5 = _mm_add_epi16(p5, d5); - p6 = _mm_add_epi16(p6, d6); - p7 = _mm_add_epi16(p7, d7); - - p0 = _mm_packus_epi16(p0, p1); - p1 = _mm_packus_epi16(p2, p3); - p2 = _mm_packus_epi16(p4, p5); - p3 = _mm_packus_epi16(p6, p7); - - _mm_store_si128((__m128i *)(dest + 0 * stride), p0); - _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1); - _mm_store_si128((__m128i *)(dest + 1 * stride), p2); - _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3); - - diff += 2 * width; - pred += 2 * pitch; - dest += 2 * stride; - } while (--i); -} - -void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred, - int pitch, uint8_t *dest, int stride) { +void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest, + int stride) { uint8_t abs_diff; __m128i d; // Prediction data. - __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch)); - __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch)); - __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch)); - __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch)); - __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch)); - __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch)); - __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch)); + __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride)); + __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride)); + __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride)); + __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride)); + __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride)); + __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride)); + __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride)); + __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride)); p0 = _mm_unpacklo_epi64(p0, p1); p2 = _mm_unpacklo_epi64(p2, p3); @@ -301,29 +72,28 @@ void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred, _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6); } -void vp9_add_constant_residual_16x16_sse2(const int16_t diff, - const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { +void vp9_add_constant_residual_16x16_sse2(const int16_t diff, uint8_t *dest, + int stride) { uint8_t abs_diff; __m128i d; // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - __m128i p2 = _mm_load_si128((const __m128i *)(pred + 2 * pitch)); - __m128i p3 = _mm_load_si128((const __m128i *)(pred + 3 * pitch)); - __m128i p4 = _mm_load_si128((const __m128i *)(pred + 4 * pitch)); - __m128i p5 = _mm_load_si128((const __m128i *)(pred + 5 * pitch)); - __m128i p6 = _mm_load_si128((const __m128i *)(pred + 6 * pitch)); - __m128i p7 = _mm_load_si128((const __m128i *)(pred + 7 * pitch)); - __m128i p8 = _mm_load_si128((const __m128i *)(pred + 8 * pitch)); - __m128i p9 = _mm_load_si128((const __m128i *)(pred + 9 * pitch)); - __m128i p10 = _mm_load_si128((const __m128i *)(pred + 10 * pitch)); - __m128i p11 = _mm_load_si128((const __m128i *)(pred + 11 * pitch)); - __m128i p12 = _mm_load_si128((const __m128i *)(pred + 12 * pitch)); - __m128i p13 = _mm_load_si128((const __m128i *)(pred + 13 * pitch)); - __m128i p14 = _mm_load_si128((const __m128i *)(pred + 14 * pitch)); - __m128i p15 = _mm_load_si128((const __m128i *)(pred + 15 * pitch)); + __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); + __m128i p1 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); + __m128i p2 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); + __m128i p3 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); + __m128i p4 = _mm_load_si128((const __m128i *)(dest + 4 * stride)); + __m128i p5 = _mm_load_si128((const __m128i *)(dest + 5 * stride)); + __m128i p6 = _mm_load_si128((const __m128i *)(dest + 6 * stride)); + __m128i p7 = _mm_load_si128((const __m128i *)(dest + 7 * stride)); + __m128i p8 = _mm_load_si128((const __m128i *)(dest + 8 * stride)); + __m128i p9 = _mm_load_si128((const __m128i *)(dest + 9 * stride)); + __m128i p10 = _mm_load_si128((const __m128i *)(dest + 10 * stride)); + __m128i p11 = _mm_load_si128((const __m128i *)(dest + 11 * stride)); + __m128i p12 = _mm_load_si128((const __m128i *)(dest + 12 * stride)); + __m128i p13 = _mm_load_si128((const __m128i *)(dest + 13 * stride)); + __m128i p14 = _mm_load_si128((const __m128i *)(dest + 14 * stride)); + __m128i p15 = _mm_load_si128((const __m128i *)(dest + 15 * stride)); // Clip diff value to [0, 255] range. Then, do addition or subtraction // according to its sign. @@ -388,9 +158,8 @@ void vp9_add_constant_residual_16x16_sse2(const int16_t diff, _mm_store_si128((__m128i *)(dest + 15 * stride), p15); } -void vp9_add_constant_residual_32x32_sse2(const int16_t diff, - const uint8_t *pred, int pitch, - uint8_t *dest, int stride) { +void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest, + int stride) { uint8_t abs_diff; __m128i d; int i = 8; @@ -405,14 +174,14 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff, do { // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch)); - __m128i p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16)); - __m128i p2 = _mm_load_si128((const __m128i *)(pred + 1 * pitch)); - __m128i p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16)); - __m128i p4 = _mm_load_si128((const __m128i *)(pred + 2 * pitch)); - __m128i p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch + 16)); - __m128i p6 = _mm_load_si128((const __m128i *)(pred + 3 * pitch)); - __m128i p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch + 16)); + __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); + __m128i p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16)); + __m128i p2 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); + __m128i p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16)); + __m128i p4 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); + __m128i p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride + 16)); + __m128i p6 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); + __m128i p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride + 16)); // Clip diff value to [0, 255] range. Then, do addition or subtraction // according to its sign. @@ -446,7 +215,6 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff, _mm_store_si128((__m128i *)(dest + 3 * stride), p6); _mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7); - pred += 4 * pitch; dest += 4 * stride; } while (--i); } diff --git a/vp9/decoder/x86/vp9_idct_blk_sse2.c b/vp9/decoder/x86/vp9_idct_blk_sse2.c deleted file mode 100644 index badd97f731bf7cce411d32e070bff6a5d74bda1f..0000000000000000000000000000000000000000 --- a/vp9/decoder/x86/vp9_idct_blk_sse2.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_dequantize.h" - -void vp9_idct_dequant_dc_0_2x_sse2(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, - int dst_stride, const short *dc); - -void vp9_idct_dequant_dc_full_2x_sse2(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, - int dst_stride, const short *dc); - -void vp9_idct_dequant_0_2x_sse2(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, - int dst_stride, int blk_stride); - -void vp9_idct_dequant_full_2x_sse2(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, - int dst_stride, int blk_stride); - -void vp9_dequant_dc_idct_add_y_block_sse2(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs, - const short *dc) { - int i; - - for (i = 0; i < 4; i++) { - if (((short *)(eobs))[0] & 0xfefe) - vp9_idct_dequant_dc_full_2x_sse2(q, dq, pre, dst, stride, dc); - else - vp9_idct_dequant_dc_0_2x_sse2(q, dq, pre, dst, stride, dc); - - if (((short *)(eobs))[1] & 0xfefe) - vp9_idct_dequant_dc_full_2x_sse2(q + 32, dq, pre + 8, dst + 8, - stride, dc + 2); - else - vp9_idct_dequant_dc_0_2x_sse2(q + 32, dq, pre + 8, dst + 8, - stride, dc + 2); - - q += 64; - dc += 4; - pre += 64; - dst += stride * 4; - eobs += 4; - } -} - -void vp9_dequant_idct_add_y_block_sse2(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, - int stride, unsigned short *eobs) { - int i; - - for (i = 0; i < 4; i++) { - if (((short *)(eobs))[0] & 0xfefe) - vp9_idct_dequant_full_2x_sse2(q, dq, pre, dst, stride, 16); - else - vp9_idct_dequant_0_2x_sse2(q, dq, pre, dst, stride, 16); - - if (((short *)(eobs))[1] & 0xfefe) - vp9_idct_dequant_full_2x_sse2(q + 32, dq, pre + 8, dst + 8, stride, 16); - else - vp9_idct_dequant_0_2x_sse2(q + 32, dq, pre + 8, dst + 8, stride, 16); - - q += 64; - pre += 64; - dst += stride * 4; - eobs += 4; - } -} - -void vp9_dequant_idct_add_uv_block_sse2(short *q, const short *dq, - unsigned char *pre, - unsigned char *dstu, - unsigned char *dstv, - int stride, unsigned short *eobs) { - if (((short *)(eobs))[0] & 0xfefe) - vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstu, stride, 8); - else - vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstu, stride, 8); - - q += 32; - pre += 32; - dstu += stride * 4; - - if (((short *)(eobs))[1] & 0xfefe) - vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstu, stride, 8); - else - vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstu, stride, 8); - - q += 32; - pre += 32; - - if (((short *)(eobs))[2] & 0xfefe) - vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstv, stride, 8); - else - vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstv, stride, 8); - - q += 32; - pre += 32; - dstv += stride * 4; - - if (((short *)(eobs))[3] & 0xfefe) - vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstv, stride, 8); - else - vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstv, stride, 8); -} diff --git a/vp9/encoder/ppc/vp9_csystemdependent.c b/vp9/encoder/ppc/vp9_csystemdependent.c deleted file mode 100644 index cc67625e7dcfc8aa4e74831f2160ea5180b3cfaf..0000000000000000000000000000000000000000 --- a/vp9/encoder/ppc/vp9_csystemdependent.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/encoder/vp9_variance.h" -#include "vp9/encoder/vp9_onyx_int.h" - -SADFunction *vp9_sad16x16; -SADFunction *vp9_sad16x8; -SADFunction *vp9_sad8x16; -SADFunction *vp9_sad8x8; -SADFunction *vp9_sad4x4; - -variance_function *vp9_variance4x4; -variance_function *vp9_variance8x8; -variance_function *vp9_variance8x16; -variance_function *vp9_variance16x8; -variance_function *vp9_variance16x16; - -variance_function *vp9_mse16x16; - -sub_pixel_variance_function *vp9_sub_pixel_variance4x4; -sub_pixel_variance_function *vp9_sub_pixel_variance8x8; -sub_pixel_variance_function *vp9_sub_pixel_variance8x16; -sub_pixel_variance_function *vp9_sub_pixel_variance16x8; -sub_pixel_variance_function *vp9_sub_pixel_variance16x16; - -int (*vp9_block_error)(short *coeff, short *dqcoeff); -int (*vp9_mbblock_error)(MACROBLOCK *mb, int dc); - -int (*vp9_mbuverror)(MACROBLOCK *mb); -unsigned int (*vp9_get_mb_ss)(short *); -void (*vp9_short_fdct4x4)(short *input, short *output, int pitch); -void (*vp9_short_fdct8x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch); -void (*short_walsh4x4)(short *input, short *output, int pitch); - -void (*vp9_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch); -void (*vp9_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride); -void (*vp9_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); - -// c imports -extern int block_error_c(short *coeff, short *dqcoeff); -extern int vp9_mbblock_error_c(MACROBLOCK *mb, int dc); - -extern int vp9_mbuverror_c(MACROBLOCK *mb); -extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern void short_fdct4x4_c(short *input, short *output, int pitch); -extern void short_fdct8x4_c(short *input, short *output, int pitch); -extern void vp9_short_walsh4x4_c(short *input, short *output, int pitch); - -extern void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch); -extern void subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride); -extern void subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d); - -extern SADFunction sad16x16_c; -extern SADFunction sad16x8_c; -extern SADFunction sad8x16_c; -extern SADFunction sad8x8_c; -extern SADFunction sad4x4_c; - -extern variance_function variance16x16_c; -extern variance_function variance8x16_c; -extern variance_function variance16x8_c; -extern variance_function variance8x8_c; -extern variance_function variance4x4_c; -extern variance_function mse16x16_c; - -extern sub_pixel_variance_function sub_pixel_variance4x4_c; -extern sub_pixel_variance_function sub_pixel_variance8x8_c; -extern sub_pixel_variance_function sub_pixel_variance8x16_c; -extern sub_pixel_variance_function sub_pixel_variance16x8_c; -extern sub_pixel_variance_function sub_pixel_variance16x16_c; - -extern unsigned int vp9_get_mb_ss_c(short *); - -// ppc -extern int vp9_block_error_ppc(short *coeff, short *dqcoeff); - -extern void vp9_short_fdct4x4_ppc(short *input, short *output, int pitch); -extern void vp9_short_fdct8x4_ppc(short *input, short *output, int pitch); - -extern void vp9_subtract_mby_ppc(short *diff, unsigned char *src, unsigned char *pred, int stride); -extern void vp9_subtract_mbuv_ppc(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); - -extern SADFunction vp9_sad16x16_ppc; -extern SADFunction vp9_sad16x8_ppc; -extern SADFunction vp9_sad8x16_ppc; -extern SADFunction vp9_sad8x8_ppc; -extern SADFunction vp9_sad4x4_ppc; - -extern variance_function vp9_variance16x16_ppc; -extern variance_function vp9_variance8x16_ppc; -extern variance_function vp9_variance16x8_ppc; -extern variance_function vp9_variance8x8_ppc; -extern variance_function vp9_variance4x4_ppc; -extern variance_function vp9_mse16x16_ppc; - -extern sub_pixel_variance_function vp9_sub_pixel_variance4x4_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance8x8_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance8x16_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance16x8_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance16x16_ppc; - -extern unsigned int vp8_get8x8var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get16x16var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); - -void vp9_cmachine_specific_config(void) { - // Pure C: - vp9_mbuverror = vp9_mbuverror_c; - vp8_fast_quantize_b = vp8_fast_quantize_b_c; - vp9_short_fdct4x4 = vp9_short_fdct4x4_ppc; - vp9_short_fdct8x4 = vp9_short_fdct8x4_ppc; - vp8_fast_fdct4x4 = vp9_short_fdct4x4_ppc; - vp8_fast_fdct8x4 = vp9_short_fdct8x4_ppc; - short_walsh4x4 = vp9_short_walsh4x4_c; - - vp9_variance4x4 = vp9_variance4x4_ppc; - vp9_variance8x8 = vp9_variance8x8_ppc; - vp9_variance8x16 = vp9_variance8x16_ppc; - vp9_variance16x8 = vp9_variance16x8_ppc; - vp9_variance16x16 = vp9_variance16x16_ppc; - vp9_mse16x16 = vp9_mse16x16_ppc; - - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ppc; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ppc; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ppc; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ppc; - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ppc; - - vp9_get_mb_ss = vp9_get_mb_ss_c; - - vp9_sad16x16 = vp9_sad16x16_ppc; - vp9_sad16x8 = vp9_sad16x8_ppc; - vp9_sad8x16 = vp9_sad8x16_ppc; - vp9_sad8x8 = vp9_sad8x8_ppc; - vp9_sad4x4 = vp9_sad4x4_ppc; - - vp9_block_error = vp9_block_error_ppc; - vp9_mbblock_error = vp9_mbblock_error_c; - - vp9_subtract_b = vp9_subtract_b_c; - vp9_subtract_mby = vp9_subtract_mby_ppc; - vp9_subtract_mbuv = vp9_subtract_mbuv_ppc; -} diff --git a/vp9/encoder/ppc/vp9_encodemb_altivec.asm b/vp9/encoder/ppc/vp9_encodemb_altivec.asm deleted file mode 100644 index 6e0099ddc885859ef8412ab85ce621eb36b6b636..0000000000000000000000000000000000000000 --- a/vp9/encoder/ppc/vp9_encodemb_altivec.asm +++ /dev/null @@ -1,153 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_subtract_mbuv_ppc - .globl vp8_subtract_mby_ppc - -;# r3 short *diff -;# r4 unsigned char *usrc -;# r5 unsigned char *vsrc -;# r6 unsigned char *pred -;# r7 int stride -vp8_subtract_mbuv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf000 - mtspr 256, r12 ;# set VRSAVE - - li r9, 256 - add r3, r3, r9 - add r3, r3, r9 - add r6, r6, r9 - - li r10, 16 - li r9, 4 - mtctr r9 - - vspltisw v0, 0 - -mbu_loop: - lvsl v5, 0, r4 ;# permutate value for alignment - lvx v1, 0, r4 ;# src - lvx v2, 0, r6 ;# pred - - add r4, r4, r7 - addi r6, r6, 16 - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - lvsl v5, 0, r4 ;# permutate value for alignment - lvx v1, 0, r4 ;# src - - add r4, r4, r7 - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrglb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mbu_loop - - mtctr r9 - -mbv_loop: - lvsl v5, 0, r5 ;# permutate value for alignment - lvx v1, 0, r5 ;# src - lvx v2, 0, r6 ;# pred - - add r5, r5, r7 - addi r6, r6, 16 - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - lvsl v5, 0, r5 ;# permutate value for alignment - lvx v1, 0, r5 ;# src - - add r5, r5, r7 - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrglb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mbv_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# r3 short *diff -;# r4 unsigned char *src -;# r5 unsigned char *pred -;# r6 int stride -vp8_subtract_mby_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf800 - mtspr 256, r12 ;# set VRSAVE - - li r10, 16 - mtctr r10 - - vspltisw v0, 0 - -mby_loop: - lvx v1, 0, r4 ;# src - lvx v2, 0, r5 ;# pred - - add r4, r4, r6 - addi r5, r5, 16 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vmrglb v3, v0, v1 ;# unpack low src to short - vmrglb v4, v0, v2 ;# unpack low pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mby_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp9/encoder/ppc/vp9_fdct_altivec.asm b/vp9/encoder/ppc/vp9_fdct_altivec.asm deleted file mode 100644 index 935d0cb097743755da5a427aef6db9efb16888f4..0000000000000000000000000000000000000000 --- a/vp9/encoder/ppc/vp9_fdct_altivec.asm +++ /dev/null @@ -1,205 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_short_fdct4x4_ppc - .globl vp8_short_fdct8x4_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -;# Forward and inverse DCTs are nearly identical; only differences are -;# in normalization (fwd is twice unitary, inv is half unitary) -;# and that they are of course transposes of each other. -;# -;# The following three accomplish most of implementation and -;# are used only by ppc_idct.c and ppc_fdct.c. -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfffc - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - li r6, 16 - - load_c v0, dct_tab, 0, r9, r10 - lvx v1, r6, r10 - addi r10, r10, 32 - lvx v2, 0, r10 - lvx v3, r6, r10 - - load_c v4, ppc_dctperm_tab, 0, r9, r10 - load_c v5, ppc_dctperm_tab, r6, r9, r10 - - load_c v6, round_tab, 0, r10, r9 -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -;# Do horiz xf on two rows of coeffs v8 = a0 a1 a2 a3 b0 b1 b2 b3. -;# a/A are the even rows 0,2 b/B are the odd rows 1,3 -;# For fwd transform, indices are horizontal positions, then frequencies. -;# For inverse transform, frequencies then positions. -;# The two resulting A0..A3 B0..B3 are later combined -;# and vertically transformed. - -.macro two_rows_horiz Dst - vperm v9, v8, v8, v4 ;# v9 = a2 a3 a0 a1 b2 b3 b0 b1 - - vmsumshm v10, v0, v8, v6 - vmsumshm v10, v1, v9, v10 - vsraw v10, v10, v7 ;# v10 = A0 A1 B0 B1 - - vmsumshm v11, v2, v8, v6 - vmsumshm v11, v3, v9, v11 - vsraw v11, v11, v7 ;# v11 = A2 A3 B2 B3 - - vpkuwum v10, v10, v11 ;# v10 = A0 A1 B0 B1 A2 A3 B2 B3 - vperm \Dst, v10, v10, v5 ;# Dest = A0 B0 A1 B1 A2 B2 A3 B3 -.endm - -;# Vertical xf on two rows. DCT values in comments are for inverse transform; -;# forward transform uses transpose. - -.macro two_rows_vert Ceven, Codd - vspltw v8, \Ceven, 0 ;# v8 = c00 c10 or c02 c12 four times - vspltw v9, \Codd, 0 ;# v9 = c20 c30 or c22 c32 "" - vmsumshm v8, v8, v12, v6 - vmsumshm v8, v9, v13, v8 - vsraw v10, v8, v7 - - vspltw v8, \Codd, 1 ;# v8 = c01 c11 or c03 c13 - vspltw v9, \Ceven, 1 ;# v9 = c21 c31 or c23 c33 - vmsumshm v8, v8, v12, v6 - vmsumshm v8, v9, v13, v8 - vsraw v8, v8, v7 - - vpkuwum v8, v10, v8 ;# v8 = rows 0,1 or 2,3 -.endm - -.macro two_rows_h Dest - stw r0, 0(r8) - lwz r0, 4(r3) - stw r0, 4(r8) - lwzux r0, r3,r5 - stw r0, 8(r8) - lwz r0, 4(r3) - stw r0, 12(r8) - lvx v8, 0,r8 - two_rows_horiz \Dest -.endm - - .align 2 -;# r3 short *input -;# r4 short *output -;# r5 int pitch -vp8_short_fdct4x4_ppc: - - prologue - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - epilogue - - blr - - .align 2 -;# r3 short *input -;# r4 short *output -;# r5 int pitch -vp8_short_fdct8x4_ppc: - prologue - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - addi r10, r3, 0 - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - ;# Next block - addi r3, r10, 8 - addi r4, r4, 32 - lvx v6, 0, r9 ;# v6 = Hround - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - epilogue - - blr - - .data - .align 4 -ppc_dctperm_tab: - .byte 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 - .byte 0,1,4,5, 2,3,6,7, 8,9,12,13, 10,11,14,15 - - .align 4 -dct_tab: - .short 23170, 23170,-12540,-30274, 23170, 23170,-12540,-30274 - .short 23170, 23170, 30274, 12540, 23170, 23170, 30274, 12540 - - .short 23170,-23170, 30274,-12540, 23170,-23170, 30274,-12540 - .short -23170, 23170, 12540,-30274,-23170, 23170, 12540,-30274 - - .align 4 -round_tab: - .long (1 << (14-1)), (1 << (14-1)), (1 << (14-1)), (1 << (14-1)) - .long (1 << (16-1)), (1 << (16-1)), (1 << (16-1)), (1 << (16-1)) diff --git a/vp9/encoder/ppc/vp9_rdopt_altivec.asm b/vp9/encoder/ppc/vp9_rdopt_altivec.asm deleted file mode 100644 index ba482300973ca6fdd1eaa57621383b2957e1dad4..0000000000000000000000000000000000000000 --- a/vp9/encoder/ppc/vp9_rdopt_altivec.asm +++ /dev/null @@ -1,51 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_block_error_ppc - - .align 2 -;# r3 short *Coeff -;# r4 short *dqcoeff -vp8_block_error_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf800 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - stw r5, 12(r1) ;# tranfer dc to vector register - - lvx v0, 0, r3 ;# Coeff - lvx v1, 0, r4 ;# dqcoeff - - li r10, 16 - - vspltisw v3, 0 - - vsubshs v0, v0, v1 - - vmsumshm v2, v0, v0, v3 ;# multiply differences - - lvx v0, r10, r3 ;# Coeff - lvx v1, r10, r4 ;# dqcoeff - - vsubshs v0, v0, v1 - - vmsumshm v1, v0, v0, v2 ;# multiply differences - vsumsws v1, v1, v3 ;# sum up - - stvx v1, 0, r1 - lwz r3, 12(r1) ;# return value - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp9/encoder/ppc/vp9_sad_altivec.asm b/vp9/encoder/ppc/vp9_sad_altivec.asm deleted file mode 100644 index e5f26380f96be5c0dc615db4113a8dff88d0d119..0000000000000000000000000000000000000000 --- a/vp9/encoder/ppc/vp9_sad_altivec.asm +++ /dev/null @@ -1,277 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_sad16x16_ppc - .globl vp8_sad16x8_ppc - .globl vp8_sad8x16_ppc - .globl vp8_sad8x8_ppc - .globl vp8_sad4x4_ppc - -.macro load_aligned_16 V R O - lvsl v3, 0, \R ;# permutate value for alignment - - lvx v1, 0, \R - lvx v2, \O, \R - - vperm \V, v1, v2, v3 -.endm - -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - li r10, 16 ;# load offset and loop counter - - vspltisw v8, 0 ;# zero out total to start -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -.macro SAD_16 - ;# v6 = abs (v4 - v5) - vsububs v6, v4, v5 - vsububs v7, v5, v4 - vor v6, v6, v7 - - ;# v8 += abs (v4 - v5) - vsum4ubs v8, v6, v8 -.endm - -.macro sad_16_loop loop_label - lvsl v3, 0, r5 ;# only needs to be done once per block - - ;# preload a line of data before getting into the loop - lvx v4, 0, r3 - lvx v1, 0, r5 - lvx v2, r10, r5 - - add r5, r5, r6 - add r3, r3, r4 - - vperm v5, v1, v2, v3 - - .align 4 -\loop_label: - ;# compute difference on first row - vsububs v6, v4, v5 - vsububs v7, v5, v4 - - ;# load up next set of data - lvx v9, 0, r3 - lvx v1, 0, r5 - lvx v2, r10, r5 - - ;# perform abs() of difference - vor v6, v6, v7 - add r3, r3, r4 - - ;# add to the running tally - vsum4ubs v8, v6, v8 - - ;# now onto the next line - vperm v5, v1, v2, v3 - add r5, r5, r6 - lvx v4, 0, r3 - - ;# compute difference on second row - vsububs v6, v9, v5 - lvx v1, 0, r5 - vsububs v7, v5, v9 - lvx v2, r10, r5 - vor v6, v6, v7 - add r3, r3, r4 - vsum4ubs v8, v6, v8 - vperm v5, v1, v2, v3 - add r5, r5, r6 - - bdnz \loop_label - - vspltisw v7, 0 - - vsumsws v8, v8, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) -.endm - -.macro sad_8_loop loop_label - .align 4 -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# only one of the inputs should need to be aligned. - load_aligned_16 v6, r3, r10 - load_aligned_16 v7, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - vmrghb v4, v4, v6 - vmrghb v5, v5, v7 - - SAD_16 - - bdnz \loop_label - - vspltisw v7, 0 - - vsumsws v8, v8, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad16x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - sad_16_loop sad16x16_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad16x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - sad_16_loop sad16x8_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad8x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - sad_8_loop sad8x16_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad8x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - sad_8_loop sad8x8_loop - - epilogue - - blr - -.macro transfer_4x4 I P - lwz r0, 0(\I) - add \I, \I, \P - - lwz r7, 0(\I) - add \I, \I, \P - - lwz r8, 0(\I) - add \I, \I, \P - - lwz r9, 0(\I) - - stw r0, 0(r1) - stw r7, 4(r1) - stw r8, 8(r1) - stw r9, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad4x4_ppc: - - prologue - - transfer_4x4 r3, r4 - lvx v4, 0, r1 - - transfer_4x4 r5, r6 - lvx v5, 0, r1 - - vspltisw v8, 0 ;# zero out total to start - - ;# v6 = abs (v4 - v5) - vsububs v6, v4, v5 - vsububs v7, v5, v4 - vor v6, v6, v7 - - ;# v8 += abs (v4 - v5) - vsum4ubs v7, v6, v8 - vsumsws v7, v7, v8 - - stvx v7, 0, r1 - lwz r3, 12(r1) - - epilogue - - blr diff --git a/vp9/encoder/ppc/vp9_variance_altivec.asm b/vp9/encoder/ppc/vp9_variance_altivec.asm deleted file mode 100644 index ad2664143098418b87e3d748524b29ad95686c48..0000000000000000000000000000000000000000 --- a/vp9/encoder/ppc/vp9_variance_altivec.asm +++ /dev/null @@ -1,375 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_get8x8var_ppc - .globl vp8_get16x16var_ppc - .globl vp8_mse16x16_ppc - .globl vp9_variance16x16_ppc - .globl vp9_variance16x8_ppc - .globl vp9_variance8x16_ppc - .globl vp9_variance8x8_ppc - .globl vp9_variance4x4_ppc - -.macro load_aligned_16 V R O - lvsl v3, 0, \R ;# permutate value for alignment - - lvx v1, 0, \R - lvx v2, \O, \R - - vperm \V, v1, v2, v3 -.endm - -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - li r10, 16 ;# load offset and loop counter - - vspltisw v7, 0 ;# zero for merging - vspltisw v8, 0 ;# zero out total to start - vspltisw v9, 0 ;# zero out total for dif^2 -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -.macro compute_sum_sse - ;# Compute sum first. Unpack to so signed subract - ;# can be used. Only have a half word signed - ;# subract. Do high, then low. - vmrghb v2, v7, v4 - vmrghb v3, v7, v5 - vsubshs v2, v2, v3 - vsum4shs v8, v2, v8 - - vmrglb v2, v7, v4 - vmrglb v3, v7, v5 - vsubshs v2, v2, v3 - vsum4shs v8, v2, v8 - - ;# Now compute sse. - vsububs v2, v4, v5 - vsububs v3, v5, v4 - vor v2, v2, v3 - - vmsumubm v9, v2, v2, v9 -.endm - -.macro variance_16 DS loop_label store_sum -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - compute_sum_sse - - bdnz \loop_label - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - -.if \store_sum - stw r3, 0(r8) ;# sum -.endif - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> DS - subf r3, r3, r4 ;# sse - ((sum*sum) >> DS) -.endm - -.macro variance_8 DS loop_label store_sum -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# only one of the inputs should need to be aligned. - load_aligned_16 v6, r3, r10 - load_aligned_16 v0, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - vmrghb v4, v4, v6 - vmrghb v5, v5, v0 - - compute_sum_sse - - bdnz \loop_label - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - -.if \store_sum - stw r3, 0(r8) ;# sum -.endif - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> 8 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *SSE -;# r8 int *Sum -;# -;# r3 return value -vp8_get8x8var_ppc: - - prologue - - li r9, 4 - mtctr r9 - - variance_8 6, get8x8var_loop, 1 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *SSE -;# r8 int *Sum -;# -;# r3 return value -vp8_get16x16var_ppc: - - prologue - - mtctr r10 - - variance_16 8, get16x16var_loop, 1 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r 3 return value -vp8_mse16x16_ppc: - prologue - - mtctr r10 - -mse16x16_loop: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# Now compute sse. - vsububs v2, v4, v5 - vsububs v3, v5, v4 - vor v2, v2, v3 - - vmsumubm v9, v2, v2, v9 - - bdnz mse16x16_loop - - vsumsws v9, v9, v7 - - stvx v9, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r3, 12(r1) - - stw r3, 0(r7) ;# sse - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance16x16_ppc: - - prologue - - mtctr r10 - - variance_16 8, variance16x16_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance16x8_ppc: - - prologue - - li r9, 8 - mtctr r9 - - variance_16 7, variance16x8_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance8x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - variance_8 7, variance8x16_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance8x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - variance_8 6, variance8x8_loop, 0 - - epilogue - - blr - -.macro transfer_4x4 I P - lwz r0, 0(\I) - add \I, \I, \P - - lwz r10,0(\I) - add \I, \I, \P - - lwz r8, 0(\I) - add \I, \I, \P - - lwz r9, 0(\I) - - stw r0, 0(r1) - stw r10, 4(r1) - stw r8, 8(r1) - stw r9, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance4x4_ppc: - - prologue - - transfer_4x4 r3, r4 - lvx v4, 0, r1 - - transfer_4x4 r5, r6 - lvx v5, 0, r1 - - compute_sum_sse - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, 4 ;# (sum*sum) >> 4 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 4) - - epilogue - - blr diff --git a/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm b/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm deleted file mode 100644 index 26cc76f73fa9d6fe69b503645439ce9ec08cbc6e..0000000000000000000000000000000000000000 --- a/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm +++ /dev/null @@ -1,865 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp9_sub_pixel_variance4x4_ppc - .globl vp9_sub_pixel_variance8x8_ppc - .globl vp9_sub_pixel_variance8x16_ppc - .globl vp9_sub_pixel_variance16x8_ppc - .globl vp9_sub_pixel_variance16x16_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -.macro load_vfilter V0, V1 - load_c \V0, vfilter_b, r6, r12, r10 - - addi r6, r6, 16 - lvx \V1, r6, r10 -.endm - -.macro HProlog jump_label - ;# load up horizontal filter - slwi. r5, r5, 4 ;# index into horizontal filter array - - ;# index to the next set of vectors in the row. - li r10, 16 - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq \jump_label - - load_c v20, hfilter_b, r5, r12, r0 - - ;# setup constants - ;# v14 permutation value for alignment - load_c v28, b_hperm_b, 0, r12, r0 - - ;# index to the next set of vectors in the row. - li r12, 32 - - ;# rounding added in on the multiply - vspltisw v21, 8 - vspltisw v18, 3 - vslw v18, v21, v18 ;# 0x00000040000000400000004000000040 - - slwi. r6, r6, 5 ;# index into vertical filter array -.endm - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# - -.macro hfilter_8 V, hp, lp, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 9 bytes wide, output is 8 bytes. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - - vperm v24, v21, v21, \hp ;# v20 = 0123 1234 2345 3456 - vperm v25, v21, v21, \lp ;# v21 = 4567 5678 6789 789A - - vmsummbm v24, v20, v24, v18 - vmsummbm v25, v20, v25, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - - vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result -.endm - -.macro vfilter_16 P0 P1 - vmuleub v22, \P0, v20 ;# 64 + 4 positive taps - vadduhm v22, v18, v22 - vmuloub v23, \P0, v20 - vadduhm v23, v18, v23 - - vmuleub v24, \P1, v21 - vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary - vmuloub v25, \P1, v21 - vadduhm v23, v23, v25 ;# Ro = odds - - vsrh v22, v22, v19 ;# divide by 128 - vsrh v23, v23, v19 ;# v16 v17 = evens, odds - vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order - vmrglh v23, v22, v23 - vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result -.endm - -.macro compute_sum_sse src, ref, sum, sse, t1, t2, z0 - ;# Compute sum first. Unpack to so signed subract - ;# can be used. Only have a half word signed - ;# subract. Do high, then low. - vmrghb \t1, \z0, \src - vmrghb \t2, \z0, \ref - vsubshs \t1, \t1, \t2 - vsum4shs \sum, \t1, \sum - - vmrglb \t1, \z0, \src - vmrglb \t2, \z0, \ref - vsubshs \t1, \t1, \t2 - vsum4shs \sum, \t1, \sum - - ;# Now compute sse. - vsububs \t1, \src, \ref - vsububs \t2, \ref, \src - vor \t1, \t1, \t2 - - vmsumubm \sse, \t1, \t1, \sse -.endm - -.macro variance_final sum, sse, z0, DS - vsumsws \sum, \sum, \z0 - vsumsws \sse, \sse, \z0 - - stvx \sum, 0, r1 - lwz r3, 12(r1) - - stvx \sse, 0, r1 - lwz r4, 12(r1) - - stw r4, 0(r9) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> 8 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) -.endm - -.macro compute_sum_sse_16 V, increment_counter - load_and_align_16 v16, r7, r8, \increment_counter - compute_sum_sse \V, v16, v18, v19, v20, v21, v23 -.endm - -.macro load_and_align_16 V, R, P, increment_counter - lvsl v17, 0, \R ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, \R - lvx v22, r10, \R - -.if \increment_counter - add \R, \R, \P -.endif - - vperm \V, v21, v22, v17 -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance4x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf830 - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_4x4_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r12, r0 - load_c v11, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v10, v11, 1 - hfilter_8 v1, v10, v11, 1 - hfilter_8 v2, v10, v11, 1 - hfilter_8 v3, v10, v11, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_4x4_b - - hfilter_8 v4, v10, v11, 0 - - b second_pass_4x4_b - -second_pass_4x4_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 0 - -second_pass_4x4_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - -compute_sum_sse_4x4_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - load_and_align_16 v4, r7, r8, 1 - load_and_align_16 v5, r7, r8, 1 - load_and_align_16 v6, r7, r8, 1 - load_and_align_16 v7, r7, r8, 1 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - - load_c v10, b_hilo_b, 0, r12, r0 - - vperm v0, v0, v1, v10 - vperm v1, v2, v3, v10 - - compute_sum_sse v0, v1, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 4 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance8x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfff0 - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x8_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r12, r0 - load_c v11, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v10, v11, 1 - hfilter_8 v1, v10, v11, 1 - hfilter_8 v2, v10, v11, 1 - hfilter_8 v3, v10, v11, 1 - hfilter_8 v4, v10, v11, 1 - hfilter_8 v5, v10, v11, 1 - hfilter_8 v6, v10, v11, 1 - hfilter_8 v7, v10, v11, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_8x8_b - - hfilter_8 v8, v10, v11, 0 - - b second_pass_8x8_b - -second_pass_8x8_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 0 - - beq compute_sum_sse_8x8_b - -second_pass_8x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -compute_sum_sse_8x8_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - - load_and_align_16 v4, r7, r8, 1 - load_and_align_16 v5, r7, r8, 1 - load_and_align_16 v6, r7, r8, 1 - load_and_align_16 v7, r7, r8, 1 - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 0 - - vmrghb v4, v4, v5 - vmrghb v5, v6, v7 - vmrghb v6, v8, v9 - vmrghb v7, v10, v11 - - compute_sum_sse v0, v4, v18, v19, v20, v21, v23 - compute_sum_sse v1, v5, v18, v19, v20, v21, v23 - compute_sum_sse v2, v6, v18, v19, v20, v21, v23 - compute_sum_sse v3, v7, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 6 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance8x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfffc - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x16_pre_copy_b - - ;# Load up permutation constants - load_c v29, b_0123_b, 0, r12, r0 - load_c v30, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v29, v30, 1 - hfilter_8 v1, v29, v30, 1 - hfilter_8 v2, v29, v30, 1 - hfilter_8 v3, v29, v30, 1 - hfilter_8 v4, v29, v30, 1 - hfilter_8 v5, v29, v30, 1 - hfilter_8 v6, v29, v30, 1 - hfilter_8 v7, v29, v30, 1 - hfilter_8 v8, v29, v30, 1 - hfilter_8 v9, v29, v30, 1 - hfilter_8 v10, v29, v30, 1 - hfilter_8 v11, v29, v30, 1 - hfilter_8 v12, v29, v30, 1 - hfilter_8 v13, v29, v30, 1 - hfilter_8 v14, v29, v30, 1 - hfilter_8 v15, v29, v30, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_8x16_b - - hfilter_8 v16, v29, v30, 0 - - b second_pass_8x16_b - -second_pass_8x16_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - load_and_align_16 v9, r3, r4, 1 - load_and_align_16 v10, r3, r4, 1 - load_and_align_16 v11, r3, r4, 1 - load_and_align_16 v12, r3, r4, 1 - load_and_align_16 v13, r3, r4, 1 - load_and_align_16 v14, r3, r4, 1 - load_and_align_16 v15, r3, r4, 1 - load_and_align_16 v16, r3, r4, 0 - - beq compute_sum_sse_8x16_b - -second_pass_8x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -compute_sum_sse_8x16_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - vmrghb v4, v8, v9 - vmrghb v5, v10, v11 - vmrghb v6, v12, v13 - vmrghb v7, v14, v15 - - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 1 - load_and_align_16 v12, r7, r8, 1 - load_and_align_16 v13, r7, r8, 1 - load_and_align_16 v14, r7, r8, 1 - load_and_align_16 v15, r7, r8, 1 - - vmrghb v8, v8, v9 - vmrghb v9, v10, v11 - vmrghb v10, v12, v13 - vmrghb v11, v14, v15 - - compute_sum_sse v0, v8, v18, v19, v20, v21, v23 - compute_sum_sse v1, v9, v18, v19, v20, v21, v23 - compute_sum_sse v2, v10, v18, v19, v20, v21, v23 - compute_sum_sse v3, v11, v18, v19, v20, v21, v23 - - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 1 - load_and_align_16 v12, r7, r8, 1 - load_and_align_16 v13, r7, r8, 1 - load_and_align_16 v14, r7, r8, 1 - load_and_align_16 v15, r7, r8, 0 - - vmrghb v8, v8, v9 - vmrghb v9, v10, v11 - vmrghb v10, v12, v13 - vmrghb v11, v14, v15 - - compute_sum_sse v4, v8, v18, v19, v20, v21, v23 - compute_sum_sse v5, v9, v18, v19, v20, v21, v23 - compute_sum_sse v6, v10, v18, v19, v20, v21, v23 - compute_sum_sse v7, v11, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 7 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - blr - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# -.macro hfilter_16 V, increment_counter - - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - lvx v23, r12, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified - - ;# set 0 - vmsummbm v24, v20, v21, v18 ;# taps times elements - - ;# set 1 - vsldoi v23, v21, v22, 1 - vmsummbm v25, v20, v23, v18 - - ;# set 2 - vsldoi v23, v21, v22, 2 - vmsummbm v26, v20, v23, v18 - - ;# set 3 - vsldoi v23, v21, v22, 3 - vmsummbm v27, v20, v23, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - vsrh v25, v25, v19 - - vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result - vperm \V, \V, v0, v28 ;# \V = correctly-ordered result -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance16x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - HProlog second_pass_16x8_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_16x8_b - - hfilter_16 v8, 0 - - b second_pass_16x8_b - -second_pass_16x8_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - - beq compute_sum_sse_16x8_b - -second_pass_16x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -compute_sum_sse_16x8_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - compute_sum_sse_16 v0, 1 - compute_sum_sse_16 v1, 1 - compute_sum_sse_16 v2, 1 - compute_sum_sse_16 v3, 1 - compute_sum_sse_16 v4, 1 - compute_sum_sse_16 v5, 1 - compute_sum_sse_16 v6, 1 - compute_sum_sse_16 v7, 0 - - variance_final v18, v19, v23, 7 - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - HProlog second_pass_16x16_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - hfilter_16 v8, 1 - hfilter_16 v9, 1 - hfilter_16 v10, 1 - hfilter_16 v11, 1 - hfilter_16 v12, 1 - hfilter_16 v13, 1 - hfilter_16 v14, 1 - hfilter_16 v15, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_16x16_b - - hfilter_16 v16, 0 - - b second_pass_16x16_b - -second_pass_16x16_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - load_and_align_16 v9, r3, r4, 1 - load_and_align_16 v10, r3, r4, 1 - load_and_align_16 v11, r3, r4, 1 - load_and_align_16 v12, r3, r4, 1 - load_and_align_16 v13, r3, r4, 1 - load_and_align_16 v14, r3, r4, 1 - load_and_align_16 v15, r3, r4, 1 - load_and_align_16 v16, r3, r4, 0 - - beq compute_sum_sse_16x16_b - -second_pass_16x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -compute_sum_sse_16x16_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - compute_sum_sse_16 v0, 1 - compute_sum_sse_16 v1, 1 - compute_sum_sse_16 v2, 1 - compute_sum_sse_16 v3, 1 - compute_sum_sse_16 v4, 1 - compute_sum_sse_16 v5, 1 - compute_sum_sse_16 v6, 1 - compute_sum_sse_16 v7, 1 - compute_sum_sse_16 v8, 1 - compute_sum_sse_16 v9, 1 - compute_sum_sse_16 v10, 1 - compute_sum_sse_16 v11, 1 - compute_sum_sse_16 v12, 1 - compute_sum_sse_16 v13, 1 - compute_sum_sse_16 v14, 1 - compute_sum_sse_16 v15, 0 - - variance_final v18, v19, v23, 8 - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - - .align 4 -hfilter_b: - .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0 - .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0 - .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0 - .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0 - .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0 - .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0 - .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0 - .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0 - - .align 4 -vfilter_b: - .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - - .align 4 -b_hperm_b: - .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 - - .align 4 -b_0123_b: - .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 - - .align 4 -b_4567_b: - .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 - -b_hilo_b: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c index e174a894a06c9c3b15eeb55d97e35598959c960f..921e8f063c91ef47d0e688d8c8df4308805bccff 100644 --- a/vp9/encoder/vp9_asm_enc_offsets.c +++ b/vp9/encoder/vp9_asm_enc_offsets.c @@ -10,31 +10,8 @@ #include "vpx_ports/asm_offsets.h" -#include "vpx_config.h" -#include "vp9/encoder/vp9_block.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/encoder/vp9_onyx_int.h" -#include "vp9/encoder/vp9_treewriter.h" -#include "vp9/encoder/vp9_tokenize.h" BEGIN -/* regular quantize */ -DEFINE(vp9_block_coeff, offsetof(BLOCK, coeff)); -DEFINE(vp9_block_zbin, offsetof(BLOCK, zbin)); -DEFINE(vp9_block_round, offsetof(BLOCK, round)); -DEFINE(vp9_block_quant, offsetof(BLOCK, quant)); -DEFINE(vp9_block_quant_fast, offsetof(BLOCK, quant_fast)); -DEFINE(vp9_block_zbin_extra, offsetof(BLOCK, zbin_extra)); -DEFINE(vp9_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost)); -DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift)); - -DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff)); -DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant)); -DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff)); END - -/* add asserts for any offset that is not supported by assembly code - * add asserts for any size that is not supported by assembly code - */ diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 7128b70c8612e115aa33dd7e2703b3dc9b3e9ea8..86cd81031273d77b08dba4db1b03a275855365c8 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -8,352 +8,298 @@ * be found in the AUTHORS file in the root of the source tree. */ - -#include "vp9/common/vp9_header.h" -#include "vp9/encoder/vp9_encodemv.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_findnearmv.h" -#include "vp9/common/vp9_tile_common.h" -#include "vp9/encoder/vp9_mcomp.h" -#include "vp9/common/vp9_systemdependent.h" #include <assert.h> #include <stdio.h> #include <limits.h> -#include "vp9/common/vp9_pragmas.h" + #include "vpx/vpx_encoder.h" #include "vpx_mem/vpx_mem.h" -#include "vp9/encoder/vp9_bitstream.h" -#include "vp9/encoder/vp9_segmentation.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_findnearmv.h" +#include "vp9/common/vp9_tile_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_entropy.h" -#include "vp9/encoder/vp9_encodemv.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_treecoder.h" +#include "vp9/common/vp9_systemdependent.h" +#include "vp9/common/vp9_pragmas.h" + +#include "vp9/encoder/vp9_mcomp.h" +#include "vp9/encoder/vp9_encodemv.h" +#include "vp9/encoder/vp9_bitstream.h" +#include "vp9/encoder/vp9_segmentation.h" +#include "vp9/encoder/vp9_write_bit_buffer.h" + #if defined(SECTIONBITS_OUTPUT) unsigned __int64 Sectionbits[500]; #endif #ifdef ENTROPY_STATS -int intra_mode_stats[VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES]; -vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES]; -vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES]; -vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES]; -vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES]; +int intra_mode_stats[VP9_INTRA_MODES] + [VP9_INTRA_MODES] + [VP9_INTRA_MODES]; +vp9_coeff_stats tree_update_hist[TX_SIZE_MAX_SB][BLOCK_TYPES]; extern unsigned int active_section; #endif -#if CONFIG_CODE_NONZEROCOUNT -#ifdef NZC_STATS -unsigned int nzc_stats_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC4X4_TOKENS]; -unsigned int nzc_stats_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC8X8_TOKENS]; -unsigned int nzc_stats_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC16X16_TOKENS]; -unsigned int nzc_stats_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC32X32_TOKENS]; -unsigned int nzc_pcat_stats[MAX_NZC_CONTEXTS][NZC_TOKENS_EXTRA] - [NZC_BITS_EXTRA][2]; -void init_nzcstats(); -void update_nzcstats(VP9_COMMON *const cm); -void print_nzcstats(); -#endif -#endif - -#ifdef MODE_STATS -int count_mb_seg[4] = { 0, 0, 0, 0 }; -#endif - #define vp9_cost_upd ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)) >> 8) #define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd))) -#define SEARCH_NEWP -static int update_bits[255]; - -static void compute_update_table() { - int i; - for (i = 0; i < 255; i++) - update_bits[i] = vp9_count_term_subexp(i, SUBEXP_PARAM, 255); +#ifdef MODE_STATS +int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB]; +int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; +int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; +int64_t switchable_interp_stats[VP9_SWITCHABLE_FILTERS+1] + [VP9_SWITCHABLE_FILTERS]; + +void init_tx_count_stats() { + vp9_zero(tx_count_32x32p_stats); + vp9_zero(tx_count_16x16p_stats); + vp9_zero(tx_count_8x8p_stats); } -static int split_index(int i, int n, int modulus) { - int max1 = (n - 1 - modulus / 2) / modulus + 1; - if (i % modulus == modulus / 2) i = i / modulus; - else i = max1 + i - (i + modulus - modulus / 2) / modulus; - return i; +void init_switchable_interp_stats() { + vp9_zero(switchable_interp_stats); } -static int remap_prob(int v, int m) { - const int n = 256; - const int modulus = MODULUS_PARAM; - int i; - if ((m << 1) <= n) - i = vp9_recenter_nonneg(v, m) - 1; - else - i = vp9_recenter_nonneg(n - 1 - v, n - 1 - m) - 1; - - i = split_index(i, n - 1, modulus); - return i; +static void update_tx_count_stats(VP9_COMMON *cm) { + int i, j; + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + for (j = 0; j < TX_SIZE_MAX_SB; j++) { + tx_count_32x32p_stats[i][j] += cm->fc.tx_count_32x32p[i][j]; + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) { + tx_count_16x16p_stats[i][j] += cm->fc.tx_count_16x16p[i][j]; + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) { + tx_count_8x8p_stats[i][j] += cm->fc.tx_count_8x8p[i][j]; + } + } } -static void write_prob_diff_update(vp9_writer *const bc, - vp9_prob newp, vp9_prob oldp) { - int delp = remap_prob(newp, oldp); - vp9_encode_term_subexp(bc, delp, SUBEXP_PARAM, 255); +static void update_switchable_interp_stats(VP9_COMMON *cm) { + int i, j; + for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; ++i) + for (j = 0; j < VP9_SWITCHABLE_FILTERS; ++j) { + switchable_interp_stats[i][j] += cm->fc.switchable_interp_count[i][j]; + } } -static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) { - int delp = remap_prob(newp, oldp); - return update_bits[delp] * 256; +void write_tx_count_stats() { + int i, j; + FILE *fp = fopen("tx_count.bin", "wb"); + fwrite(tx_count_32x32p_stats, sizeof(tx_count_32x32p_stats), 1, fp); + fwrite(tx_count_16x16p_stats, sizeof(tx_count_16x16p_stats), 1, fp); + fwrite(tx_count_8x8p_stats, sizeof(tx_count_8x8p_stats), 1, fp); + fclose(fp); + + printf( + "vp9_default_tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB] = {\n"); + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + printf(" { "); + for (j = 0; j < TX_SIZE_MAX_SB; j++) { + printf("%"PRId64", ", tx_count_32x32p_stats[i][j]); + } + printf("},\n"); + } + printf("};\n"); + printf( + "vp9_default_tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB-1] = {\n"); + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + printf(" { "); + for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) { + printf("%"PRId64", ", tx_count_16x16p_stats[i][j]); + } + printf("},\n"); + } + printf("};\n"); + printf( + "vp9_default_tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB-2] = {\n"); + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + printf(" { "); + for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) { + printf("%"PRId64", ", tx_count_8x8p_stats[i][j]); + } + printf("},\n"); + } + printf("};\n"); } -static void update_mode( - vp9_writer *const bc, - int n, - vp9_token tok [/* n */], - vp9_tree tree, - vp9_prob Pnew [/* n-1 */], - vp9_prob Pcur [/* n-1 */], - unsigned int bct [/* n-1 */] [2], - const unsigned int num_events[/* n */] -) { - unsigned int new_b = 0, old_b = 0; - int i = 0; +void write_switchable_interp_stats() { + int i, j; + FILE *fp = fopen("switchable_interp.bin", "wb"); + fwrite(switchable_interp_stats, sizeof(switchable_interp_stats), 1, fp); + fclose(fp); - vp9_tree_probs_from_distribution(tree, Pnew, bct, num_events, 0); - n--; + printf( + "vp9_default_switchable_filter_count[VP9_SWITCHABLE_FILTERS+1]" + "[VP9_SWITCHABLE_FILTERS] = {\n"); + for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; i++) { + printf(" { "); + for (j = 0; j < VP9_SWITCHABLE_FILTERS; j++) { + printf("%"PRId64", ", switchable_interp_stats[i][j]); + } + printf("},\n"); + } + printf("};\n"); +} +#endif - do { - new_b += cost_branch(bct[i], Pnew[i]); - old_b += cost_branch(bct[i], Pcur[i]); - } while (++i < n); +static int update_bits[255]; - if (new_b + (n << 8) < old_b) { - int i = 0; +static INLINE void write_be32(uint8_t *p, int value) { + p[0] = value >> 24; + p[1] = value >> 16; + p[2] = value >> 8; + p[3] = value; +} - vp9_write_bit(bc, 1); - do { - const vp9_prob p = Pnew[i]; - vp9_write_literal(bc, Pcur[i] = p ? p : 1, 8); - } while (++i < n); - } else - vp9_write_bit(bc, 0); +int recenter_nonneg(int v, int m) { + if (v > (m << 1)) + return v; + else if (v >= m) + return ((v - m) << 1); + else + return ((m - v) << 1) - 1; } -static void update_mbintra_mode_probs(VP9_COMP* const cpi, - vp9_writer* const bc) { - VP9_COMMON *const cm = &cpi->common; - - { - vp9_prob Pnew [VP9_YMODES - 1]; - unsigned int bct [VP9_YMODES - 1] [2]; - - update_mode( - bc, VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, - Pnew, cm->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count - ); - update_mode(bc, VP9_I32X32_MODES, vp9_sb_ymode_encodings, - vp9_sb_ymode_tree, Pnew, cm->fc.sb_ymode_prob, bct, - (unsigned int *)cpi->sb_ymode_count); +static int get_unsigned_bits(unsigned num_values) { + int cat = 0; + if ((num_values--) <= 1) return 0; + while (num_values > 0) { + cat++; + num_values >>= 1; } + return cat; } -void vp9_update_skip_probs(VP9_COMP *cpi) { - VP9_COMMON *const pc = &cpi->common; - int k; - - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - pc->mbskip_pred_probs[k] = get_binary_prob(cpi->skip_false_count[k], - cpi->skip_true_count[k]); - } +void vp9_encode_unsigned_max(struct vp9_write_bit_buffer *wb, + int data, int max) { + vp9_wb_write_literal(wb, data, get_unsigned_bits(max)); } -static void update_switchable_interp_probs(VP9_COMP *cpi, - vp9_writer* const bc) { - VP9_COMMON *const pc = &cpi->common; - unsigned int branch_ct[32][2]; - int i, j; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { - vp9_tree_probs_from_distribution( - vp9_switchable_interp_tree, - pc->fc.switchable_interp_prob[j], branch_ct, - cpi->switchable_interp_count[j], 0); - for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { - if (pc->fc.switchable_interp_prob[j][i] < 1) - pc->fc.switchable_interp_prob[j][i] = 1; - vp9_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8); - } +void encode_uniform(vp9_writer *w, int v, int n) { + int l = get_unsigned_bits(n); + int m; + if (l == 0) + return; + m = (1 << l) - n; + if (v < m) { + vp9_write_literal(w, v, l - 1); + } else { + vp9_write_literal(w, m + ((v - m) >> 1), l - 1); + vp9_write_literal(w, (v - m) & 1, 1); } } -// This function updates the reference frame prediction stats -static void update_refpred_stats(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - int i; - vp9_prob new_pred_probs[PREDICTION_PROBS]; - int old_cost, new_cost; - - // Set the prediction probability structures to defaults - if (cm->frame_type != KEY_FRAME) { - // From the prediction counts set the probabilities for each context - for (i = 0; i < PREDICTION_PROBS; i++) { - new_pred_probs[i] = get_binary_prob(cpi->ref_pred_count[i][0], - cpi->ref_pred_count[i][1]); - - // Decide whether or not to update the reference frame probs. - // Returned costs are in 1/256 bit units. - old_cost = - (cpi->ref_pred_count[i][0] * vp9_cost_zero(cm->ref_pred_probs[i])) + - (cpi->ref_pred_count[i][1] * vp9_cost_one(cm->ref_pred_probs[i])); - - new_cost = - (cpi->ref_pred_count[i][0] * vp9_cost_zero(new_pred_probs[i])) + - (cpi->ref_pred_count[i][1] * vp9_cost_one(new_pred_probs[i])); - - // Cost saving must be >= 8 bits (2048 in these units) - if ((old_cost - new_cost) >= 2048) { - cpi->ref_pred_probs_update[i] = 1; - cm->ref_pred_probs[i] = new_pred_probs[i]; - } else - cpi->ref_pred_probs_update[i] = 0; - } - } +int count_uniform(int v, int n) { + int l = get_unsigned_bits(n); + int m; + if (l == 0) return 0; + m = (1 << l) - n; + if (v < m) + return l - 1; + else + return l; } -// This function is called to update the mode probability context used to encode -// inter modes. It assumes the branch counts table has already been populated -// prior to the actual packing of the bitstream (in rd stage or dummy pack) -// -// The branch counts table is re-populated during the actual pack stage and in -// the decoder to facilitate backwards update of the context. -static void update_inter_mode_probs(VP9_COMMON *cm, - int mode_context[INTER_MODE_CONTEXTS][4]) { - int i, j; - unsigned int (*mv_ref_ct)[4][2]; - - vpx_memcpy(mode_context, cm->fc.vp9_mode_contexts, - sizeof(cm->fc.vp9_mode_contexts)); - - mv_ref_ct = cm->fc.mv_ref_ct; - - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < 4; j++) { - int new_prob, old_cost, new_cost; - - // Work out cost of coding branches with the old and optimal probability - old_cost = cost_branch256(mv_ref_ct[i][j], mode_context[i][j]); - new_prob = get_binary_prob(mv_ref_ct[i][j][0], mv_ref_ct[i][j][1]); - new_cost = cost_branch256(mv_ref_ct[i][j], new_prob); - - // If cost saving is >= 14 bits then update the mode probability. - // This is the approximate net cost of updating one probability given - // that the no update case ismuch more common than the update case. - if (new_cost <= (old_cost - (14 << 8))) { - mode_context[i][j] = new_prob; +void encode_term_subexp(vp9_writer *w, int word, int k, int num_syms) { + int i = 0; + int mk = 0; + while (1) { + int b = (i ? k + i - 1 : k); + int a = (1 << b); + if (num_syms <= mk + 3 * a) { + encode_uniform(w, word - mk, num_syms - mk); + break; + } else { + int t = (word >= mk + a); + vp9_write_literal(w, t, 1); + if (t) { + i = i + 1; + mk += a; + } else { + vp9_write_literal(w, word - mk, b); + break; } } } } -#if CONFIG_NEW_MVREF -static void update_mv_ref_probs(VP9_COMP *cpi, - int mvref_probs[MAX_REF_FRAMES] - [MAX_MV_REF_CANDIDATES-1]) { - MACROBLOCKD *xd = &cpi->mb.e_mbd; - int rf; // Reference frame - int ref_c; // Motion reference candidate - int node; // Probability node index - - for (rf = 0; rf < MAX_REF_FRAMES; ++rf) { - int count = 0; - - // Skip the dummy entry for intra ref frame. - if (rf == INTRA_FRAME) { - continue; - } - - // Sum the counts for all candidates - for (ref_c = 0; ref_c < MAX_MV_REF_CANDIDATES; ++ref_c) { - count += cpi->mb_mv_ref_count[rf][ref_c]; - } - - // Calculate the tree node probabilities - for (node = 0; node < MAX_MV_REF_CANDIDATES-1; ++node) { - int new_prob, old_cost, new_cost; - unsigned int branch_cnts[2]; - - // How many hits on each branch at this node - branch_cnts[0] = cpi->mb_mv_ref_count[rf][node]; - branch_cnts[1] = count - cpi->mb_mv_ref_count[rf][node]; - - // Work out cost of coding branches with the old and optimal probability - old_cost = cost_branch256(branch_cnts, xd->mb_mv_ref_probs[rf][node]); - new_prob = get_prob(branch_cnts[0], count); - new_cost = cost_branch256(branch_cnts, new_prob); - - // Take current 0 branch cases out of residual count - count -= cpi->mb_mv_ref_count[rf][node]; - - if ((new_cost + VP9_MV_REF_UPDATE_COST) <= old_cost) { - mvref_probs[rf][node] = new_prob; +int count_term_subexp(int word, int k, int num_syms) { + int count = 0; + int i = 0; + int mk = 0; + while (1) { + int b = (i ? k + i - 1 : k); + int a = (1 << b); + if (num_syms <= mk + 3 * a) { + count += count_uniform(word - mk, num_syms - mk); + break; + } else { + int t = (word >= mk + a); + count++; + if (t) { + i = i + 1; + mk += a; } else { - mvref_probs[rf][node] = xd->mb_mv_ref_probs[rf][node]; + count += b; + break; } } } -} -#endif - -static void write_ymode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_ymode_tree, p, vp9_ymode_encodings + m); -} - -static void kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_kf_ymode_tree, p, vp9_kf_ymode_encodings + m); -} - -static void write_sb_ymode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_sb_ymode_tree, p, vp9_sb_ymode_encodings + m); -} - -static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m); + return count; } -static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m); +static void compute_update_table() { + int i; + for (i = 0; i < 254; i++) + update_bits[i] = count_term_subexp(i, SUBEXP_PARAM, 255); } -static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m); +static int split_index(int i, int n, int modulus) { + int max1 = (n - 1 - modulus / 2) / modulus + 1; + if (i % modulus == modulus / 2) i = i / modulus; + else i = max1 + i - (i + modulus - modulus / 2) / modulus; + return i; } +static int remap_prob(int v, int m) { + const int n = 255; + const int modulus = MODULUS_PARAM; + int i; + v--; + m--; + if ((m << 1) <= n) + i = recenter_nonneg(v, m) - 1; + else + i = recenter_nonneg(n - 1 - v, n - 1 - m) - 1; -static void write_bmode(vp9_writer *bc, int m, const vp9_prob *p) { -#if CONFIG_NEWBINTRAMODES - assert(m < B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS || m == B_CONTEXT_PRED); - if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; -#endif - write_token(bc, vp9_bmode_tree, p, vp9_bmode_encodings + m); + i = split_index(i, n - 1, modulus); + return i; } -static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m); +static void write_prob_diff_update(vp9_writer *w, + vp9_prob newp, vp9_prob oldp) { + int delp = remap_prob(newp, oldp); + encode_term_subexp(w, delp, SUBEXP_PARAM, 255); } -static void write_split(vp9_writer *bc, int x, const vp9_prob *p) { - write_token( - bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x); +static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) { + int delp = remap_prob(newp, oldp); + return update_bits[delp] * 256; } static int prob_update_savings(const unsigned int *ct, @@ -362,17 +308,7 @@ static int prob_update_savings(const unsigned int *ct, const int old_b = cost_branch256(ct, oldp); const int new_b = cost_branch256(ct, newp); const int update_b = 2048 + vp9_cost_upd256; - return (old_b - new_b - update_b); -} - -static int prob_diff_update_savings(const unsigned int *ct, - const vp9_prob oldp, const vp9_prob newp, - const vp9_prob upd) { - const int old_b = cost_branch256(ct, oldp); - const int new_b = cost_branch256(ct, newp); - const int update_b = (newp == oldp ? 0 : - prob_diff_update_cost(newp, oldp) + vp9_cost_upd256); - return (old_b - new_b - update_b); + return old_b - new_b - update_b; } static int prob_diff_update_savings_search(const unsigned int *ct, @@ -399,7 +335,6 @@ static int prob_diff_update_savings_search(const unsigned int *ct, return bestsavings; } -#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE static int prob_diff_update_savings_search_model(const unsigned int *ct, const vp9_prob *oldp, vp9_prob *bestp, @@ -407,23 +342,26 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct, int b, int r) { int i, old_b, new_b, update_b, savings, bestsavings, step; int newp; - vp9_prob bestnewp, newplist[ENTROPY_NODES]; - for (i = UNCONSTRAINED_NODES - 1, old_b = 0; i < ENTROPY_NODES; ++i) - old_b += cost_branch256(ct + 2 * i, oldp[i]); + vp9_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES]; + vp9_model_to_full_probs(oldp, oldplist); + vpx_memcpy(newplist, oldp, sizeof(vp9_prob) * UNCONSTRAINED_NODES); + for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i) + old_b += cost_branch256(ct + 2 * i, oldplist[i]); + old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]); bestsavings = 0; - bestnewp = oldp[UNCONSTRAINED_NODES - 1]; + bestnewp = oldp[PIVOT_NODE]; - step = (*bestp > oldp[UNCONSTRAINED_NODES - 1] ? -1 : 1); + step = (*bestp > oldp[PIVOT_NODE] ? -1 : 1); newp = *bestp; - // newp = *bestp - step * (abs(*bestp - oldp[UNCONSTRAINED_NODES - 1]) >> 1); - for (; newp != oldp[UNCONSTRAINED_NODES - 1]; newp += step) { + for (; newp != oldp[PIVOT_NODE]; newp += step) { if (newp < 1 || newp > 255) continue; - newplist[UNCONSTRAINED_NODES - 1] = newp; - vp9_get_model_distribution(newp, newplist, b, r); - for (i = UNCONSTRAINED_NODES - 1, new_b = 0; i < ENTROPY_NODES; ++i) + newplist[PIVOT_NODE] = newp; + vp9_model_to_full_probs(newplist, newplist); + for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i) new_b += cost_branch256(ct + 2 * i, newplist[i]); - update_b = prob_diff_update_cost(newp, oldp[UNCONSTRAINED_NODES - 1]) + + new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]); + update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + vp9_cost_upd256; savings = old_b - new_b - update_b; if (savings > bestsavings) { @@ -434,73 +372,190 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct, *bestp = bestnewp; return bestsavings; } -#endif static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd, unsigned int *ct) { vp9_prob newp; int savings; newp = get_binary_prob(ct[0], ct[1]); + assert(newp >= 1); savings = prob_update_savings(ct, *oldp, newp, upd); if (savings > 0) { vp9_write(bc, 1, upd); - vp9_write_literal(bc, newp, 8); + vp9_write_prob(bc, newp); + *oldp = newp; + } else { + vp9_write(bc, 0, upd); + } +} + +static void vp9_cond_prob_diff_update(vp9_writer *bc, vp9_prob *oldp, + vp9_prob upd, + unsigned int *ct) { + vp9_prob newp; + int savings; + newp = get_binary_prob(ct[0], ct[1]); + assert(newp >= 1); + savings = prob_diff_update_savings_search(ct, *oldp, &newp, upd); + if (savings > 0) { + vp9_write(bc, 1, upd); + write_prob_diff_update(bc, newp, *oldp); *oldp = newp; } else { vp9_write(bc, 0, upd); } } +static void update_mode( + vp9_writer *w, + int n, + const struct vp9_token tok[/* n */], + vp9_tree tree, + vp9_prob Pnew[/* n-1 */], + vp9_prob Pcur[/* n-1 */], + unsigned int bct[/* n-1 */] [2], + const unsigned int num_events[/* n */] +) { + int i = 0; + + vp9_tree_probs_from_distribution(tree, Pnew, bct, num_events, 0); + n--; + + for (i = 0; i < n; ++i) { + vp9_cond_prob_diff_update(w, &Pcur[i], VP9_MODE_UPDATE_PROB, bct[i]); + } +} + +static void update_mbintra_mode_probs(VP9_COMP* const cpi, + vp9_writer* const bc) { + VP9_COMMON *const cm = &cpi->common; + int j; + vp9_prob pnew[VP9_INTRA_MODES - 1]; + unsigned int bct[VP9_INTRA_MODES - 1][2]; + + for (j = 0; j < BLOCK_SIZE_GROUPS; j++) + update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_encodings, + vp9_intra_mode_tree, pnew, + cm->fc.y_mode_prob[j], bct, + (unsigned int *)cpi->y_mode_count[j]); +} + +void vp9_update_skip_probs(VP9_COMP *cpi, vp9_writer *bc) { + VP9_COMMON *const pc = &cpi->common; + int k; + + for (k = 0; k < MBSKIP_CONTEXTS; ++k) { + vp9_cond_prob_diff_update(bc, &pc->fc.mbskip_probs[k], + VP9_MODE_UPDATE_PROB, pc->fc.mbskip_count[k]); + } +} + +static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) { + write_token(bc, vp9_intra_mode_tree, p, vp9_intra_mode_encodings + m); +} + +static void update_switchable_interp_probs(VP9_COMP *const cpi, + vp9_writer* const bc) { + VP9_COMMON *const pc = &cpi->common; + unsigned int branch_ct[VP9_SWITCHABLE_FILTERS + 1] + [VP9_SWITCHABLE_FILTERS - 1][2]; + vp9_prob new_prob[VP9_SWITCHABLE_FILTERS + 1][VP9_SWITCHABLE_FILTERS - 1]; + int i, j; + for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { + vp9_tree_probs_from_distribution( + vp9_switchable_interp_tree, + new_prob[j], branch_ct[j], + pc->fc.switchable_interp_count[j], 0); + } + for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { + for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { + vp9_cond_prob_diff_update(bc, &pc->fc.switchable_interp_prob[j][i], + VP9_MODE_UPDATE_PROB, branch_ct[j][i]); + } + } +#ifdef MODE_STATS + if (!cpi->dummy_packing) + update_switchable_interp_stats(pc); +#endif +} + +static void update_inter_mode_probs(VP9_COMMON *pc, vp9_writer* const bc) { + int i, j; + + for (i = 0; i < INTER_MODE_CONTEXTS; i++) { + for (j = 0; j < VP9_INTER_MODES - 1; j++) { + vp9_cond_prob_diff_update(bc, &pc->fc.inter_mode_probs[i][j], + VP9_MODE_UPDATE_PROB, + pc->fc.inter_mode_counts[i][j]); + } + } +} + static void pack_mb_tokens(vp9_writer* const bc, TOKENEXTRA **tp, const TOKENEXTRA *const stop) { TOKENEXTRA *p = *tp; while (p < stop) { - const int t = p->Token; - vp9_token *const a = vp9_coef_encodings + t; - const vp9_extra_bit_struct *const b = vp9_extra_bits + t; + const int t = p->token; + const struct vp9_token *const a = vp9_coef_encodings + t; + const vp9_extra_bit *const b = vp9_extra_bits + t; int i = 0; - const unsigned char *pp = p->context_tree; + const vp9_prob *pp; int v = a->value; - int n = a->Len; + int n = a->len; + vp9_prob probs[ENTROPY_NODES]; - if (t == EOSB_TOKEN) - { + if (t == EOSB_TOKEN) { ++p; break; } + if (t >= TWO_TOKEN) { + vp9_model_to_full_probs(p->context_tree, probs); + pp = probs; + } else { + pp = p->context_tree; + } + assert(pp != 0); /* skip one or two nodes */ +#if !CONFIG_BALANCED_COEFTREE if (p->skip_eob_node) { n -= p->skip_eob_node; i = 2 * p->skip_eob_node; } +#endif do { const int bb = (v >> --n) & 1; - encode_bool(bc, bb, pp[i >> 1]); +#if CONFIG_BALANCED_COEFTREE + if (i == 2 && p->skip_eob_node) { + i += 2; + assert(bb == 1); + continue; + } +#endif + vp9_write(bc, bb, pp[i >> 1]); i = vp9_coef_tree[i + bb]; } while (n); - if (b->base_val) { - const int e = p->Extra, L = b->Len; + const int e = p->extra, l = b->len; - if (L) { - const unsigned char *pp = b->prob; + if (l) { + const unsigned char *pb = b->prob; int v = e >> 1; - int n = L; /* number of bits in v, assumed nonzero */ + int n = l; /* number of bits in v, assumed nonzero */ int i = 0; do { const int bb = (v >> --n) & 1; - encode_bool(bc, bb, pp[i >> 1]); + vp9_write(bc, bb, pb[i >> 1]); i = b->tree[i + bb]; } while (n); } - encode_bool(bc, e & 1, 128); + vp9_write_bit(bc, e & 1); } ++p; } @@ -508,276 +563,81 @@ static void pack_mb_tokens(vp9_writer* const bc, *tp = p; } -static void write_partition_size(unsigned char *cx_data, int size) { - signed char csize; - - csize = size & 0xff; - *cx_data = csize; - csize = (size >> 8) & 0xff; - *(cx_data + 1) = csize; - csize = (size >> 16) & 0xff; - *(cx_data + 2) = csize; - -} - -static void write_mv_ref -( - vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p -) { -#if CONFIG_DEBUG - assert(NEARESTMV <= m && m <= SPLITMV); -#endif - write_token(bc, vp9_mv_ref_tree, p, - vp9_mv_ref_encoding_array - NEARESTMV + m); -} - static void write_sb_mv_ref(vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p) { #if CONFIG_DEBUG - assert(NEARESTMV <= m && m < SPLITMV); + assert(NEARESTMV <= m && m <= NEWMV); #endif write_token(bc, vp9_sb_mv_ref_tree, p, vp9_sb_mv_ref_encoding_array - NEARESTMV + m); } -static void write_sub_mv_ref -( - vp9_writer *bc, B_PREDICTION_MODE m, const vp9_prob *p -) { -#if CONFIG_DEBUG - assert(LEFT4X4 <= m && m <= NEW4X4); -#endif - write_token(bc, vp9_sub_mv_ref_tree, p, - vp9_sub_mv_ref_encoding_array - LEFT4X4 + m); -} - -static void write_nmv(VP9_COMP *cpi, vp9_writer *bc, - const MV *mv, const int_mv *ref, - const nmv_context *nmvc, int usehp) { - MV e; - e.row = mv->row - ref->as_mv.row; - e.col = mv->col - ref->as_mv.col; - - vp9_encode_nmv(bc, &e, &ref->as_mv, nmvc); - vp9_encode_nmv_fp(bc, &e, &ref->as_mv, nmvc, usehp); -} - -#if CONFIG_NEW_MVREF -static void vp9_write_mv_ref_id(vp9_writer *w, - vp9_prob * ref_id_probs, - int mv_ref_id) { - // Encode the index for the MV reference. - switch (mv_ref_id) { - case 0: - vp9_write(w, 0, ref_id_probs[0]); - break; - case 1: - vp9_write(w, 1, ref_id_probs[0]); - vp9_write(w, 0, ref_id_probs[1]); - break; - case 2: - vp9_write(w, 1, ref_id_probs[0]); - vp9_write(w, 1, ref_id_probs[1]); - vp9_write(w, 0, ref_id_probs[2]); - break; - case 3: - vp9_write(w, 1, ref_id_probs[0]); - vp9_write(w, 1, ref_id_probs[1]); - vp9_write(w, 1, ref_id_probs[2]); - break; - - // TRAP.. This should not happen - default: - assert(0); - break; - } -} -#endif - // This function writes the current macro block's segnment id to the bitstream // It should only be called if a segment map update is indicated. static void write_mb_segid(vp9_writer *bc, const MB_MODE_INFO *mi, const MACROBLOCKD *xd) { - // Encode the MB segment id. - int seg_id = mi->segment_id; - - if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { - switch (seg_id) { - case 0: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[1]); - break; - case 1: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[1]); - break; - case 2: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[2]); - break; - case 3: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[2]); - break; - - // TRAP.. This should not happen - default: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[1]); - break; - } - } -} - -static void write_mb_segid_except(VP9_COMMON *cm, - vp9_writer *bc, - const MB_MODE_INFO *mi, - const MACROBLOCKD *xd, - int mb_row, int mb_col) { - // Encode the MB segment id. - int seg_id = mi->segment_id; - int pred_seg_id = vp9_get_pred_mb_segid(cm, xd, - mb_row * cm->mb_cols + mb_col); - const vp9_prob *p = xd->mb_segment_tree_probs; - const vp9_prob p1 = xd->mb_segment_mispred_tree_probs[pred_seg_id]; - - if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { - vp9_write(bc, seg_id >= 2, p1); - if (pred_seg_id >= 2 && seg_id < 2) { - vp9_write(bc, seg_id == 1, p[1]); - } else if (pred_seg_id < 2 && seg_id >= 2) { - vp9_write(bc, seg_id == 3, p[2]); - } - } + if (xd->segmentation_enabled && xd->update_mb_segmentation_map) + treed_write(bc, vp9_segment_tree, xd->mb_segment_tree_probs, + mi->segment_id, 3); } // This function encodes the reference frame -static void encode_ref_frame(vp9_writer *const bc, - VP9_COMMON *const cm, - MACROBLOCKD *xd, - int segment_id, - MV_REFERENCE_FRAME rf) { - int seg_ref_active; - int seg_ref_count = 0; - seg_ref_active = vp9_segfeature_active(xd, - segment_id, - SEG_LVL_REF_FRAME); - - if (seg_ref_active) { - seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) + - vp9_check_segref(xd, segment_id, LAST_FRAME) + - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, segment_id, ALTREF_FRAME); - } - +static void encode_ref_frame(VP9_COMP *cpi, vp9_writer *bc) { + VP9_COMMON *const pc = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mi = &xd->mode_info_context->mbmi; + const int segment_id = mi->segment_id; + int seg_ref_active = vp9_segfeature_active(xd, segment_id, + SEG_LVL_REF_FRAME); // If segment level coding of this signal is disabled... // or the segment allows multiple reference frame options - if (!seg_ref_active || (seg_ref_count > 1)) { - // Values used in prediction model coding - unsigned char prediction_flag; - vp9_prob pred_prob; - MV_REFERENCE_FRAME pred_rf; - - // Get the context probability the prediction flag - pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF); - - // Get the predicted value. - pred_rf = vp9_get_pred_ref(cm, xd); - - // Did the chosen reference frame match its predicted value. - prediction_flag = - (xd->mode_info_context->mbmi.ref_frame == pred_rf); - - vp9_set_pred_flag(xd, PRED_REF, prediction_flag); - vp9_write(bc, prediction_flag, pred_prob); - - // If not predicted correctly then code value explicitly - if (!prediction_flag) { - vp9_prob mod_refprobs[PREDICTION_PROBS]; - - vpx_memcpy(mod_refprobs, - cm->mod_refprobs[pred_rf], sizeof(mod_refprobs)); - - // If segment coding enabled blank out options that cant occur by - // setting the branch probability to 0. - if (seg_ref_active) { - mod_refprobs[INTRA_FRAME] *= - vp9_check_segref(xd, segment_id, INTRA_FRAME); - mod_refprobs[LAST_FRAME] *= - vp9_check_segref(xd, segment_id, LAST_FRAME); - mod_refprobs[GOLDEN_FRAME] *= - (vp9_check_segref(xd, segment_id, GOLDEN_FRAME) * - vp9_check_segref(xd, segment_id, ALTREF_FRAME)); - } - - if (mod_refprobs[0]) { - vp9_write(bc, (rf != INTRA_FRAME), mod_refprobs[0]); - } - - // Inter coded - if (rf != INTRA_FRAME) { - if (mod_refprobs[1]) { - vp9_write(bc, (rf != LAST_FRAME), mod_refprobs[1]); - } + if (!seg_ref_active) { + // does the feature use compound prediction or not + // (if not specified at the frame/segment level) + if (pc->comp_pred_mode == HYBRID_PREDICTION) { + vp9_write(bc, mi->ref_frame[1] > INTRA_FRAME, + vp9_get_pred_prob(pc, xd, PRED_COMP_INTER_INTER)); + } else { + assert((mi->ref_frame[1] <= INTRA_FRAME) == + (pc->comp_pred_mode == SINGLE_PREDICTION_ONLY)); + } - if (rf != LAST_FRAME) { - if (mod_refprobs[2]) { - vp9_write(bc, (rf != GOLDEN_FRAME), mod_refprobs[2]); - } - } - } + if (mi->ref_frame[1] > INTRA_FRAME) { + vp9_write(bc, mi->ref_frame[0] == GOLDEN_FRAME, + vp9_get_pred_prob(pc, xd, PRED_COMP_REF_P)); + } else { + vp9_write(bc, mi->ref_frame[0] != LAST_FRAME, + vp9_get_pred_prob(pc, xd, PRED_SINGLE_REF_P1)); + if (mi->ref_frame[0] != LAST_FRAME) + vp9_write(bc, mi->ref_frame[0] != GOLDEN_FRAME, + vp9_get_pred_prob(pc, xd, PRED_SINGLE_REF_P2)); } + } else { + assert(mi->ref_frame[1] <= INTRA_FRAME); + assert(vp9_get_segdata(xd, segment_id, SEG_LVL_REF_FRAME) == + mi->ref_frame[0]); } // if using the prediction mdoel we have nothing further to do because // the reference frame is fully coded by the segment } -// Update the probabilities used to encode reference frame data -static void update_ref_probs(VP9_COMP *const cpi) { - VP9_COMMON *const cm = &cpi->common; - - const int *const rfct = cpi->count_mb_ref_frame_usage; - const int rf_intra = rfct[INTRA_FRAME]; - const int rf_inter = rfct[LAST_FRAME] + - rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; - - cm->prob_intra_coded = get_binary_prob(rf_intra, rf_inter); - cm->prob_last_coded = get_prob(rfct[LAST_FRAME], rf_inter); - cm->prob_gf_coded = get_binary_prob(rfct[GOLDEN_FRAME], rfct[ALTREF_FRAME]); - - // Compute a modified set of probabilities to use when prediction of the - // reference frame fails - vp9_compute_mod_refprobs(cm); -} - static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, - vp9_writer *bc, - int mb_rows_left, int mb_cols_left) { + vp9_writer *bc, int mi_row, int mi_col) { VP9_COMMON *const pc = &cpi->common; const nmv_context *nmvc = &pc->fc.nmvc; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - const int mis = pc->mode_info_stride; MB_MODE_INFO *const mi = &m->mbmi; - const MV_REFERENCE_FRAME rf = mi->ref_frame; + const MV_REFERENCE_FRAME rf = mi->ref_frame[0]; const MB_PREDICTION_MODE mode = mi->mode; const int segment_id = mi->segment_id; - const int mb_size = 1 << mi->sb_type; int skip_coeff; - int mb_row = pc->mb_rows - mb_rows_left; - int mb_col = pc->mb_cols - mb_cols_left; xd->prev_mode_info_context = pc->prev_mi + (m - pc->mi); x->partition_info = x->pi + (m - pc->mi); - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to MV - // values that are in 1/8th pel units - - set_mb_row(pc, xd, mb_row, mb_size); - set_mb_col(pc, xd, mb_col, mb_size); - #ifdef ENTROPY_STATS active_section = 9; #endif @@ -793,16 +653,14 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, // If the mb segment id wasn't predicted code explicitly if (!prediction_flag) - write_mb_segid_except(pc, bc, mi, &cpi->mb.e_mbd, mb_row, mb_col); + write_mb_segid(bc, mi, &cpi->mb.e_mbd); } else { // Normal unpredicted coding write_mb_segid(bc, mi, &cpi->mb.e_mbd); } } - if (!pc->mb_no_coeff_skip) { - skip_coeff = 0; - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { + if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { skip_coeff = 1; } else { skip_coeff = m->mbmi.mb_skip_coeff; @@ -810,41 +668,49 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_get_pred_prob(pc, xd, PRED_MBSKIP)); } - // Encode the reference frame. - encode_ref_frame(bc, pc, xd, segment_id, rf); + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) + vp9_write(bc, rf != INTRA_FRAME, + vp9_get_pred_prob(pc, xd, PRED_INTRA_INTER)); + + if (mi->sb_type >= BLOCK_SIZE_SB8X8 && pc->txfm_mode == TX_MODE_SELECT && + !(rf != INTRA_FRAME && + (skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { + TX_SIZE sz = mi->txfm_size; + const vp9_prob *tx_probs = vp9_get_pred_probs(pc, xd, PRED_TX_SIZE); + vp9_write(bc, sz != TX_4X4, tx_probs[0]); + if (mi->sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) { + vp9_write(bc, sz != TX_8X8, tx_probs[1]); + if (mi->sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, tx_probs[2]); + } + } if (rf == INTRA_FRAME) { #ifdef ENTROPY_STATS active_section = 6; #endif - if (m->mbmi.sb_type) - write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); - else - write_ymode(bc, mode, pc->fc.ymode_prob); - - if (mode == B_PRED) { - int j = 0; - do { - write_bmode(bc, m->bmi[j].as_mode.first, - pc->fc.bmode_prob); - } while (++j < 16); - } - if (mode == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, - pc->fc.i8x8_mode_prob); + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + const BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + const int bsl = MIN(bwl, bhl); + write_intra_mode(bc, mode, pc->fc.y_mode_prob[MIN(3, bsl)]); } else { - write_uv_mode(bc, mi->uv_mode, - pc->fc.uv_mode_prob[mode]); + int idx, idy; + int bw = 1 << b_width_log2(mi->sb_type); + int bh = 1 << b_height_log2(mi->sb_type); + for (idy = 0; idy < 2; idy += bh) + for (idx = 0; idx < 2; idx += bw) { + MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode.first; + write_intra_mode(bc, bm, pc->fc.y_mode_prob[0]); + } } + write_intra_mode(bc, mi->uv_mode, + pc->fc.uv_mode_prob[mode]); } else { - vp9_prob mv_ref_p[VP9_MVREFS - 1]; + vp9_prob mv_ref_p[VP9_INTER_MODES - 1]; + + encode_ref_frame(cpi, bc); vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); @@ -854,882 +720,269 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, // If segment skip is not enabled code the mode. if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { - if (mi->sb_type) { + if (mi->sb_type >= BLOCK_SIZE_SB8X8) { write_sb_mv_ref(bc, mode, mv_ref_p); - } else { - write_mv_ref(bc, mode, mv_ref_p); + vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); } - vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); } - if (mode >= NEARESTMV && mode <= SPLITMV) { - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - write_token(bc, vp9_switchable_interp_tree, - vp9_get_pred_probs(&cpi->common, xd, - PRED_SWITCHABLE_INTERP), - vp9_switchable_interp_encodings + - vp9_switchable_interp_map[mi->interp_filter]); - } else { - assert(mi->interp_filter == cpi->common.mcomp_filter_type); - } - } - - // does the feature use compound prediction or not - // (if not specified at the frame/segment level) - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - vp9_write(bc, mi->second_ref_frame > INTRA_FRAME, - vp9_get_pred_prob(pc, xd, PRED_COMP)); - } -#if CONFIG_COMP_INTERINTRA_PRED - if (cpi->common.use_interintra && - mode >= NEARESTMV && mode < SPLITMV && - mi->second_ref_frame <= INTRA_FRAME) { - vp9_write(bc, mi->second_ref_frame == INTRA_FRAME, - pc->fc.interintra_prob); - // if (!cpi->dummy_packing) - // printf("-- %d (%d)\n", mi->second_ref_frame == INTRA_FRAME, - // pc->fc.interintra_prob); - if (mi->second_ref_frame == INTRA_FRAME) { - // if (!cpi->dummy_packing) - // printf("** %d %d\n", mi->interintra_mode, - // mi->interintra_uv_mode); - write_ymode(bc, mi->interintra_mode, pc->fc.ymode_prob); -#if SEPARATE_INTERINTRA_UV - write_uv_mode(bc, mi->interintra_uv_mode, - pc->fc.uv_mode_prob[mi->interintra_mode]); -#endif - } - } -#endif - -#if CONFIG_NEW_MVREF - // if ((mode == NEWMV) || (mode == SPLITMV)) { - if (mode == NEWMV) { - // Encode the index of the choice. - vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_probs[rf], mi->best_index); - - if (mi->second_ref_frame > 0) { - // Encode the index of the choice. - vp9_write_mv_ref_id( - bc, xd->mb_mv_ref_probs[mi->second_ref_frame], - mi->best_second_index); - } - } -#endif - - switch (mode) { /* new, split require MVs */ - case NEWMV: -#ifdef ENTROPY_STATS - active_section = 5; -#endif - write_nmv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - - if (mi->second_ref_frame > 0) { - write_nmv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } - break; - case SPLITMV: { - int j = 0; - -#ifdef MODE_STATS - ++count_mb_seg[mi->partitioning]; -#endif - - write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); - cpi->mbsplit_count[mi->partitioning]++; - - do { - B_PREDICTION_MODE blockmode; - int_mv blockmv; - const int *const L = vp9_mbsplits[mi->partitioning]; - int k = -1; /* first block in subset j */ - int mv_contz; - int_mv leftmv, abovemv; - + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + write_token(bc, vp9_switchable_interp_tree, + vp9_get_pred_probs(&cpi->common, xd, + PRED_SWITCHABLE_INTERP), + vp9_switchable_interp_encodings + + vp9_switchable_interp_map[mi->interp_filter]); + } else { + assert(mi->interp_filter == cpi->common.mcomp_filter_type); + } + + if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) { + int j; + MB_PREDICTION_MODE blockmode; + int_mv blockmv; + int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl; + int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl; + int idx, idy; + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + j = idy * 2 + idx; blockmode = cpi->mb.partition_info->bmi[j].mode; blockmv = cpi->mb.partition_info->bmi[j].mv; -#if CONFIG_DEBUG - while (j != L[++k]) - if (k >= 16) - assert(0); -#else - while (j != L[++k]); -#endif - leftmv.as_int = left_block_mv(xd, m, k); - abovemv.as_int = above_block_mv(m, k, mis); - mv_contz = vp9_mv_cont(&leftmv, &abovemv); - - write_sub_mv_ref(bc, blockmode, - cpi->common.fc.sub_mv_ref_prob[mv_contz]); - cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; - if (blockmode == NEW4X4) { + write_sb_mv_ref(bc, blockmode, mv_ref_p); + vp9_accum_mv_refs(&cpi->common, blockmode, mi->mb_mode_context[rf]); + if (blockmode == NEWMV) { #ifdef ENTROPY_STATS active_section = 11; #endif - write_nmv(cpi, bc, &blockmv.as_mv, &mi->best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - - if (mi->second_ref_frame > 0) { - write_nmv(cpi, bc, - &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &mi->best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } + vp9_encode_mv(bc, &blockmv.as_mv, &mi->best_mv.as_mv, + nmvc, xd->allow_high_precision_mv); + + if (mi->ref_frame[1] > INTRA_FRAME) + vp9_encode_mv(bc, + &cpi->mb.partition_info->bmi[j].second_mv.as_mv, + &mi->best_second_mv.as_mv, + nmvc, xd->allow_high_precision_mv); } - } while (++j < cpi->mb.partition_info->count); - break; + } } - default: - break; - } - } + } else if (mode == NEWMV) { +#ifdef ENTROPY_STATS + active_section = 5; +#endif + vp9_encode_mv(bc, + &mi->mv[0].as_mv, &mi->best_mv.as_mv, + nmvc, xd->allow_high_precision_mv); - if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || - (rf != INTRA_FRAME && !(mode == SPLITMV && - mi->partitioning == PARTITIONING_4X4))) && - pc->txfm_mode == TX_MODE_SELECT && - !((pc->mb_no_coeff_skip && skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { - TX_SIZE sz = mi->txfm_size; - // FIXME(rbultje) code ternary symbol once all experiments are merged - vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); - if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { - vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); - if (mi->sb_type && sz != TX_8X8) - vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); + if (mi->ref_frame[1] > INTRA_FRAME) + vp9_encode_mv(bc, + &mi->mv[1].as_mv, &mi->best_second_mv.as_mv, + nmvc, xd->allow_high_precision_mv); } } } static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO *m, - vp9_writer *bc, - int mb_rows_left, int mb_cols_left) { + vp9_writer *bc, int mi_row, int mi_col) { const VP9_COMMON *const c = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const int mis = c->mode_info_stride; const int ym = m->mbmi.mode; + const int mis = c->mode_info_stride; const int segment_id = m->mbmi.segment_id; int skip_coeff; - if (xd->update_mb_segmentation_map) { + if (xd->update_mb_segmentation_map) write_mb_segid(bc, &m->mbmi, xd); - } - if (!c->mb_no_coeff_skip) { - skip_coeff = 0; - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { + if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { skip_coeff = 1; } else { skip_coeff = m->mbmi.mb_skip_coeff; - vp9_write(bc, skip_coeff, - vp9_get_pred_prob(c, xd, PRED_MBSKIP)); - } - - if (m->mbmi.sb_type) { - sb_kfwrite_ymode(bc, ym, - c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); - } else { - kfwrite_ymode(bc, ym, - c->kf_ymode_prob[c->kf_ymode_probs_index]); + vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP)); } - if (ym == B_PRED) { - int i = 0; - do { - const B_PREDICTION_MODE A = above_block_mode(m, i, mis); - const B_PREDICTION_MODE L = (xd->left_available || (i & 3)) ? - left_block_mode(m, i) : B_DC_PRED; - const int bm = m->bmi[i].as_mode.first; - -#ifdef ENTROPY_STATS - ++intra_mode_stats [A] [L] [bm]; -#endif - - write_kf_bmode(bc, bm, c->kf_bmode_prob[A][L]); - } while (++i < 16); - } - if (ym == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[2].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[8].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout); - } else - write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); - - if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && - !((c->mb_no_coeff_skip && skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->txfm_mode == TX_MODE_SELECT) { TX_SIZE sz = m->mbmi.txfm_size; - // FIXME(rbultje) code ternary symbol once all experiments are merged - vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); - if (sz != TX_4X4 && ym <= TM_PRED) { - vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); - if (m->mbmi.sb_type && sz != TX_8X8) - vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); + const vp9_prob *tx_probs = vp9_get_pred_probs(c, xd, PRED_TX_SIZE); + vp9_write(bc, sz != TX_4X4, tx_probs[0]); + if (m->mbmi.sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) { + vp9_write(bc, sz != TX_8X8, tx_probs[1]); + if (m->mbmi.sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, tx_probs[2]); } } -} -#if CONFIG_CODE_NONZEROCOUNT -static void write_nzc(VP9_COMMON *const cm, - uint16_t nzc, - int nzc_context, - TX_SIZE tx_size, - int ref, - int type, - vp9_writer* const bc) { - int c, e; - c = codenzc(nzc); - if (tx_size == TX_32X32) { - write_token(bc, vp9_nzc32x32_tree, - cm->fc.nzc_probs_32x32[nzc_context][ref][type], - vp9_nzc32x32_encodings + c); - // cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++; - } else if (tx_size == TX_16X16) { - write_token(bc, vp9_nzc16x16_tree, - cm->fc.nzc_probs_16x16[nzc_context][ref][type], - vp9_nzc16x16_encodings + c); - // cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++; - } else if (tx_size == TX_8X8) { - write_token(bc, vp9_nzc8x8_tree, - cm->fc.nzc_probs_8x8[nzc_context][ref][type], - vp9_nzc8x8_encodings + c); - // cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++; - } else if (tx_size == TX_4X4) { - write_token(bc, vp9_nzc4x4_tree, - cm->fc.nzc_probs_4x4[nzc_context][ref][type], - vp9_nzc4x4_encodings + c); - // cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++; + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis); + const MB_PREDICTION_MODE L = xd->left_available ? + left_block_mode(m, 0) : DC_PRED; + write_intra_mode(bc, ym, c->kf_y_mode_prob[A][L]); } else { - assert(0); - } - - if ((e = vp9_extranzcbits[c])) { - int x = nzc - vp9_basenzcvalue[c]; - while (e--) { - int b = (x >> e) & 1; - vp9_write(bc, b, - cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]); - // cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++; - } - } -} - -static void write_nzcs_sb64(VP9_COMP *cpi, - MACROBLOCKD *xd, - int mb_row, - int mb_col, - vp9_writer* const bc) { - VP9_COMMON *const cm = &cpi->common; - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_32X32: - for (j = 0; j < 256; j += 64) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc); - } - for (j = 256; j < 384; j += 64) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1, bc); - } - break; - - case TX_16X16: - for (j = 0; j < 256; j += 16) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc); - } - for (j = 256; j < 384; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc); - } - break; - - case TX_8X8: - for (j = 0; j < 256; j += 4) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc); - } - for (j = 256; j < 384; j += 4) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); - } - break; - - case TX_4X4: - for (j = 0; j < 256; ++j) { - nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc); - } - for (j = 256; j < 384; ++j) { - nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); - } - break; - - default: - break; - } -} - -static void write_nzcs_sb32(VP9_COMP *cpi, - MACROBLOCKD *xd, - int mb_row, - int mb_col, - vp9_writer* const bc) { - VP9_COMMON *const cm = &cpi->common; - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_32X32: - for (j = 0; j < 64; j += 64) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc); - } - for (j = 64; j < 96; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc); - } - break; - - case TX_16X16: - for (j = 0; j < 64; j += 16) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc); - } - for (j = 64; j < 96; j += 16) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc); - } - break; - - case TX_8X8: - for (j = 0; j < 64; j += 4) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc); - } - for (j = 64; j < 96; j += 4) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); - } - break; - - case TX_4X4: - for (j = 0; j < 64; ++j) { - nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc); - } - for (j = 64; j < 96; ++j) { - nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); - } - break; - - default: - break; - } -} - -static void write_nzcs_mb16(VP9_COMP *cpi, - MACROBLOCKD *xd, - int mb_row, - int mb_col, - vp9_writer* const bc) { - VP9_COMMON *const cm = &cpi->common; - MODE_INFO *m = xd->mode_info_context; - MB_MODE_INFO *const mi = &m->mbmi; - int j, nzc_context; - const int ref = m->mbmi.ref_frame != INTRA_FRAME; - - assert(mb_col == get_mb_col(xd)); - assert(mb_row == get_mb_row(xd)); - - if (mi->mb_skip_coeff) - return; - - switch (mi->txfm_size) { - case TX_16X16: - for (j = 0; j < 16; j += 16) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc); - } - for (j = 16; j < 24; j += 4) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); - } - break; - - case TX_8X8: - for (j = 0; j < 16; j += 4) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc); - } - if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) { - for (j = 16; j < 24; ++j) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); - } - } else { - for (j = 16; j < 24; j += 4) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); - } - } - break; - - case TX_4X4: - for (j = 0; j < 16; ++j) { - nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc); - } - for (j = 16; j < 24; ++j) { - nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); - write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); - } - break; - - default: - break; - } -} - -#ifdef NZC_STATS -void init_nzcstats() { - vp9_zero(nzc_stats_4x4); - vp9_zero(nzc_stats_8x8); - vp9_zero(nzc_stats_16x16); - vp9_zero(nzc_stats_32x32); - vp9_zero(nzc_pcat_stats); -} - -void update_nzcstats(VP9_COMMON *const cm) { - int c, r, b, t; - - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - for (t = 0; t < NZC4X4_TOKENS; ++t) { - nzc_stats_4x4[c][r][b][t] += cm->fc.nzc_counts_4x4[c][r][b][t]; - } - } - } - } - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - for (t = 0; t < NZC8X8_TOKENS; ++t) { - nzc_stats_8x8[c][r][b][t] += cm->fc.nzc_counts_8x8[c][r][b][t]; - } - } - } - } - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - for (t = 0; t < NZC16X16_TOKENS; ++t) { - nzc_stats_16x16[c][r][b][t] += cm->fc.nzc_counts_16x16[c][r][b][t]; - } - } - } - } - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - for (t = 0; t < NZC32X32_TOKENS; ++t) { - nzc_stats_32x32[c][r][b][t] += cm->fc.nzc_counts_32x32[c][r][b][t]; - } - } - } - } - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (t = 0; t < NZC_TOKENS_EXTRA; ++t) { - int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA]; - for (b = 0; b < bits; ++b) { - nzc_pcat_stats[c][t][b][0] += cm->fc.nzc_pcat_counts[c][t][b][0]; - nzc_pcat_stats[c][t][b][1] += cm->fc.nzc_pcat_counts[c][t][b][1]; - } - } - } -} - -void print_nzcstats() { - int c, r, b, t; - FILE *f; - - printf( - "static const unsigned int default_nzc_counts_4x4[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC4X4_TOKENS] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - printf(" {"); - for (t = 0; t < NZC4X4_TOKENS; ++t) { - printf(" %-3d,", nzc_stats_4x4[c][r][b][t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const unsigned int default_nzc_counts_8x8[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC8X8_TOKENS] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - printf(" {"); - for (t = 0; t < NZC8X8_TOKENS; ++t) { - printf(" %-3d,", nzc_stats_8x8[c][r][b][t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const unsigned int default_nzc_counts_16x16[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC16X16_TOKENS] = {" - "\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - printf(" {"); - for (t = 0; t < NZC16X16_TOKENS; ++t) { - printf(" %-3d,", nzc_stats_16x16[c][r][b][t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const unsigned int default_nzc_counts_32x32[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC32X32_TOKENS] = {" - "\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - printf(" {"); - for (t = 0; t < NZC32X32_TOKENS; ++t) { - printf(" %-3d,", nzc_stats_32x32[c][r][b][t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const vp9_prob default_nzc_pcat_counts[MAX_NZC_CONTEXTS]\n" - " [NZC_TOKENS_EXTRA]\n" - " [NZC_BITS_EXTRA] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (t = 0; t < NZC_TOKENS_EXTRA; ++t) { - printf(" {"); - for (b = 0; b < NZC_BITS_EXTRA; ++b) { - printf(" %d/%d,", - nzc_pcat_stats[c][t][b][0], nzc_pcat_stats[c][t][b][1]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const vp9_prob default_nzc_probs_4x4[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC4X4_TOKENS] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - vp9_prob probs[NZC4X4_NODES]; - unsigned int branch_ct[NZC4X4_NODES][2]; - vp9_tree_probs_from_distribution(vp9_nzc4x4_tree, - probs, branch_ct, - nzc_stats_4x4[c][r][b], 0); - printf(" {"); - for (t = 0; t < NZC4X4_NODES; ++t) { - printf(" %-3d,", probs[t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const vp9_prob default_nzc_probs_8x8[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC8X8_TOKENS] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - vp9_prob probs[NZC8X8_NODES]; - unsigned int branch_ct[NZC8X8_NODES][2]; - vp9_tree_probs_from_distribution(vp9_nzc8x8_tree, - probs, branch_ct, - nzc_stats_8x8[c][r][b], 0); - printf(" {"); - for (t = 0; t < NZC8X8_NODES; ++t) { - printf(" %-3d,", probs[t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const vp9_prob default_nzc_probs_16x16[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC16X16_TOKENS] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - vp9_prob probs[NZC16X16_NODES]; - unsigned int branch_ct[NZC16X16_NODES][2]; - vp9_tree_probs_from_distribution(vp9_nzc16x16_tree, - probs, branch_ct, - nzc_stats_16x16[c][r][b], 0); - printf(" {"); - for (t = 0; t < NZC16X16_NODES; ++t) { - printf(" %-3d,", probs[t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const vp9_prob default_nzc_probs_32x32[MAX_NZC_CONTEXTS]\n" - " [REF_TYPES]\n" - " [BLOCK_TYPES]\n" - " [NZC32X32_TOKENS] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (r = 0; r < REF_TYPES; ++r) { - printf(" {\n"); - for (b = 0; b < BLOCK_TYPES; ++b) { - vp9_prob probs[NZC32X32_NODES]; - unsigned int branch_ct[NZC32X32_NODES][2]; - vp9_tree_probs_from_distribution(vp9_nzc32x32_tree, - probs, branch_ct, - nzc_stats_32x32[c][r][b], 0); - printf(" {"); - for (t = 0; t < NZC32X32_NODES; ++t) { - printf(" %-3d,", probs[t]); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf(" },\n"); - } - printf("};\n"); - - printf( - "static const vp9_prob default_nzc_pcat_probs[MAX_NZC_CONTEXTS]\n" - " [NZC_TOKENS_EXTRA]\n" - " [NZC_BITS_EXTRA] = {\n"); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - printf(" {\n"); - for (t = 0; t < NZC_TOKENS_EXTRA; ++t) { - printf(" {"); - for (b = 0; b < NZC_BITS_EXTRA; ++b) { - vp9_prob prob = get_binary_prob(nzc_pcat_stats[c][t][b][0], - nzc_pcat_stats[c][t][b][1]); - printf(" %-3d,", prob); + int idx, idy; + int bw = 1 << b_width_log2(m->mbmi.sb_type); + int bh = 1 << b_height_log2(m->mbmi.sb_type); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int i = idy * 2 + idx; + const MB_PREDICTION_MODE A = above_block_mode(m, i, mis); + const MB_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(m, i) : DC_PRED; + const int bm = m->bmi[i].as_mode.first; +#ifdef ENTROPY_STATS + ++intra_mode_stats[A][L][bm]; +#endif + write_intra_mode(bc, bm, c->kf_y_mode_prob[A][L]); } - printf(" },\n"); } - printf(" },\n"); } - printf("};\n"); - f = fopen("nzcstats.bin", "wb"); - fwrite(nzc_stats_4x4, sizeof(nzc_stats_4x4), 1, f); - fwrite(nzc_stats_8x8, sizeof(nzc_stats_8x8), 1, f); - fwrite(nzc_stats_16x16, sizeof(nzc_stats_16x16), 1, f); - fwrite(nzc_stats_32x32, sizeof(nzc_stats_32x32), 1, f); - fwrite(nzc_pcat_stats, sizeof(nzc_pcat_stats), 1, f); - fclose(f); + write_intra_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); } -#endif - -#endif // CONFIG_CODE_NONZEROCOUNT static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, TOKENEXTRA **tok, TOKENEXTRA *tok_end, - int mb_row, int mb_col) { + int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; + if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) + if (xd->ab_index > 0) + return; xd->mode_info_context = m; - set_mb_row(&cpi->common, xd, mb_row, (1 << m->mbmi.sb_type)); - set_mb_col(&cpi->common, xd, mb_col, (1 << m->mbmi.sb_type)); - if (cm->frame_type == KEY_FRAME) { - write_mb_modes_kf(cpi, m, bc, - cm->mb_rows - mb_row, cm->mb_cols - mb_col); + set_mi_row_col(&cpi->common, xd, mi_row, + 1 << mi_height_log2(m->mbmi.sb_type), + mi_col, 1 << mi_width_log2(m->mbmi.sb_type)); + if ((cm->frame_type == KEY_FRAME) || cm->intra_only) { + write_mb_modes_kf(cpi, m, bc, mi_row, mi_col); #ifdef ENTROPY_STATS active_section = 8; #endif } else { - pack_inter_mode_mvs(cpi, m, bc, - cm->mb_rows - mb_row, cm->mb_cols - mb_col); + pack_inter_mode_mvs(cpi, m, bc, mi_row, mi_col); #ifdef ENTROPY_STATS active_section = 1; #endif } -#if CONFIG_CODE_NONZEROCOUNT - if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) - write_nzcs_sb64(cpi, xd, mb_row, mb_col, bc); - else if (m->mbmi.sb_type == BLOCK_SIZE_SB32X32) - write_nzcs_sb32(cpi, xd, mb_row, mb_col, bc); - else - write_nzcs_mb16(cpi, xd, mb_row, mb_col, bc); -#endif assert(*tok < tok_end); pack_mb_tokens(bc, tok, tok_end); } +static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, + TOKENEXTRA **tok, TOKENEXTRA *tok_end, + int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &cpi->mb.e_mbd; + const int mis = cm->mode_info_stride; + int bwl, bhl; + int bsl = b_width_log2(bsize); + int bs = (1 << bsl) / 4; // mode_info step for subsize + int n; + PARTITION_TYPE partition; + BLOCK_SIZE_TYPE subsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + bwl = b_width_log2(m->mbmi.sb_type); + bhl = b_height_log2(m->mbmi.sb_type); + + // parse the partition type + if ((bwl == bsl) && (bhl == bsl)) + partition = PARTITION_NONE; + else if ((bwl == bsl) && (bhl < bsl)) + partition = PARTITION_HORZ; + else if ((bwl < bsl) && (bhl == bsl)) + partition = PARTITION_VERT; + else if ((bwl < bsl) && (bhl < bsl)) + partition = PARTITION_SPLIT; + else + assert(0); + + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index > 0) + return; + + if (bsize >= BLOCK_SIZE_SB8X8) { + int pl; + int idx = check_bsize_coverage(cm, xd, mi_row, mi_col, bsize); + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = cm->above_seg_context + mi_col; + pl = partition_plane_context(xd, bsize); + // encode the partition information + if (idx == 0) + write_token(bc, vp9_partition_tree, + cm->fc.partition_prob[cm->frame_type][pl], + vp9_partition_encodings + partition); + else if (idx > 0) + vp9_write(bc, partition == PARTITION_SPLIT, + cm->fc.partition_prob[cm->frame_type][pl][idx]); + } + + subsize = get_subsize(bsize, partition); + *(get_sb_index(xd, subsize)) = 0; + + switch (partition) { + case PARTITION_NONE: + write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); + break; + case PARTITION_HORZ: + write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); + *(get_sb_index(xd, subsize)) = 1; + if ((mi_row + bs) < cm->mi_rows) + write_modes_b(cpi, m + bs * mis, bc, tok, tok_end, mi_row + bs, mi_col); + break; + case PARTITION_VERT: + write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); + *(get_sb_index(xd, subsize)) = 1; + if ((mi_col + bs) < cm->mi_cols) + write_modes_b(cpi, m + bs, bc, tok, tok_end, mi_row, mi_col + bs); + break; + case PARTITION_SPLIT: + for (n = 0; n < 4; n++) { + int j = n >> 1, i = n & 0x01; + *(get_sb_index(xd, subsize)) = n; + write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end, + mi_row + j * bs, mi_col + i * bs, subsize); + } + break; + default: + assert(0); + } + + // update partition context + if (bsize >= BLOCK_SIZE_SB8X8 && + (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + update_partition_context(xd, subsize, bsize); + } +} + static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, TOKENEXTRA **tok, TOKENEXTRA *tok_end) { VP9_COMMON *const c = &cpi->common; const int mis = c->mode_info_stride; MODE_INFO *m, *m_ptr = c->mi; - int i, mb_row, mb_col; + int mi_row, mi_col; - m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis; - for (mb_row = c->cur_tile_mb_row_start; - mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) { - m = m_ptr; - for (mb_col = c->cur_tile_mb_col_start; - mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) { - vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded); - if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col); - } else { - int j; - - for (j = 0; j < 4; j++) { - const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; - MODE_INFO *sb_m = m + y_idx_sb * mis + x_idx_sb; - - if (mb_col + x_idx_sb >= c->mb_cols || - mb_row + y_idx_sb >= c->mb_rows) - continue; + m_ptr += c->cur_tile_mi_col_start + c->cur_tile_mi_row_start * mis; + vpx_memset(c->above_seg_context, 0, sizeof(PARTITION_CONTEXT) * + mi_cols_aligned_to_sb(c)); - vp9_write(bc, sb_m->mbmi.sb_type, c->sb32_coded); - if (sb_m->mbmi.sb_type) { - assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32); - write_modes_b(cpi, sb_m, bc, tok, tok_end, - mb_row + y_idx_sb, mb_col + x_idx_sb); - } else { - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1); - MODE_INFO *mb_m = m + x_idx + y_idx * mis; - - if (mb_row + y_idx >= c->mb_rows || - mb_col + x_idx >= c->mb_cols) { - // MB lies outside frame, move on - continue; - } - - assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16); - write_modes_b(cpi, mb_m, bc, tok, tok_end, - mb_row + y_idx, mb_col + x_idx); - } - } - } - } - } + for (mi_row = c->cur_tile_mi_row_start; + mi_row < c->cur_tile_mi_row_end; + mi_row += 8, m_ptr += 8 * mis) { + m = m_ptr; + vpx_memset(c->left_seg_context, 0, sizeof(c->left_seg_context)); + for (mi_col = c->cur_tile_mi_col_start; + mi_col < c->cur_tile_mi_col_end; + mi_col += 64 / MI_SIZE, m += 64 / MI_SIZE) + write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, + BLOCK_SIZE_SB64X64); } } - /* This function is used for debugging probability trees. */ static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) { /* print coef probability tree */ @@ -1759,42 +1012,47 @@ static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) { fclose(f); } -static void build_tree_distribution(vp9_coeff_probs *coef_probs, - vp9_coeff_count *coef_counts, - unsigned int (*eob_branch_ct)[REF_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS], -#ifdef ENTROPY_STATS - VP9_COMP *cpi, - vp9_coeff_accum *context_counters, -#endif - vp9_coeff_stats *coef_branch_ct, - int block_types) { +static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE txfm_size) { + vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[txfm_size]; + vp9_coeff_count *coef_counts = cpi->coef_counts[txfm_size]; + unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] = + cpi->common.fc.eob_branch_counts[txfm_size]; + vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[txfm_size]; + vp9_prob full_probs[ENTROPY_NODES]; int i, j, k, l; -#ifdef ENTROPY_STATS - int t = 0; -#endif - for (i = 0; i < block_types; ++i) { + for (i = 0; i < BLOCK_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { if (l >= 3 && k == 0) continue; vp9_tree_probs_from_distribution(vp9_coef_tree, - coef_probs[i][j][k][l], + full_probs, coef_branch_ct[i][j][k][l], coef_counts[i][j][k][l], 0); + vpx_memcpy(coef_probs[i][j][k][l], full_probs, + sizeof(vp9_prob) * UNCONSTRAINED_NODES); +#if CONFIG_BALANCED_COEFTREE + coef_branch_ct[i][j][k][l][1][1] = eob_branch_ct[i][j][k][l] - + coef_branch_ct[i][j][k][l][1][0]; + coef_probs[i][j][k][l][1] = + get_binary_prob(coef_branch_ct[i][j][k][l][1][0], + coef_branch_ct[i][j][k][l][1][1]); +#else coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0]; coef_probs[i][j][k][l][0] = get_binary_prob(coef_branch_ct[i][j][k][l][0][0], coef_branch_ct[i][j][k][l][0][1]); +#endif #ifdef ENTROPY_STATS if (!cpi->dummy_packing) { + int t; for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters[i][j][k][l][t] += coef_counts[i][j][k][l][t]; - context_counters[i][j][k][l][MAX_ENTROPY_TOKENS] += + context_counters[txfm_size][i][j][k][l][t] += + coef_counts[i][j][k][l][t]; + context_counters[txfm_size][i][j][k][l][MAX_ENTROPY_TOKENS] += eob_branch_ct[i][j][k][l]; } #endif @@ -1805,301 +1063,45 @@ static void build_tree_distribution(vp9_coeff_probs *coef_probs, } static void build_coeff_contexts(VP9_COMP *cpi) { - build_tree_distribution(cpi->frame_coef_probs_4x4, - cpi->coef_counts_4x4, - cpi->common.fc.eob_branch_counts[TX_4X4], -#ifdef ENTROPY_STATS - cpi, context_counters_4x4, -#endif - cpi->frame_branch_ct_4x4, BLOCK_TYPES); - build_tree_distribution(cpi->frame_coef_probs_8x8, - cpi->coef_counts_8x8, - cpi->common.fc.eob_branch_counts[TX_8X8], -#ifdef ENTROPY_STATS - cpi, context_counters_8x8, -#endif - cpi->frame_branch_ct_8x8, BLOCK_TYPES); - build_tree_distribution(cpi->frame_coef_probs_16x16, - cpi->coef_counts_16x16, - cpi->common.fc.eob_branch_counts[TX_16X16], -#ifdef ENTROPY_STATS - cpi, context_counters_16x16, -#endif - cpi->frame_branch_ct_16x16, BLOCK_TYPES); - build_tree_distribution(cpi->frame_coef_probs_32x32, - cpi->coef_counts_32x32, - cpi->common.fc.eob_branch_counts[TX_32X32], -#ifdef ENTROPY_STATS - cpi, context_counters_32x32, -#endif - cpi->frame_branch_ct_32x32, BLOCK_TYPES); -} - -#if CONFIG_CODE_NONZEROCOUNT -static void update_nzc_probs_common(VP9_COMP* cpi, - vp9_writer* const bc, - int block_size) { - VP9_COMMON *cm = &cpi->common; - int c, r, b, t; - int update[2] = {0, 0}; - int savings = 0; - int tokens, nodes; - const vp9_tree_index *nzc_tree; - vp9_prob *new_nzc_probs; - vp9_prob *old_nzc_probs; - unsigned int *nzc_counts; - unsigned int (*nzc_branch_ct)[2]; - vp9_prob upd; - - if (block_size == 32) { - tokens = NZC32X32_TOKENS; - nzc_tree = vp9_nzc32x32_tree; - old_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0]; - new_nzc_probs = cpi->frame_nzc_probs_32x32[0][0][0]; - nzc_counts = cm->fc.nzc_counts_32x32[0][0][0]; - nzc_branch_ct = cpi->frame_nzc_branch_ct_32x32[0][0][0]; - upd = NZC_UPDATE_PROB_32X32; - } else if (block_size == 16) { - tokens = NZC16X16_TOKENS; - nzc_tree = vp9_nzc16x16_tree; - old_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0]; - new_nzc_probs = cpi->frame_nzc_probs_16x16[0][0][0]; - nzc_counts = cm->fc.nzc_counts_16x16[0][0][0]; - nzc_branch_ct = cpi->frame_nzc_branch_ct_16x16[0][0][0]; - upd = NZC_UPDATE_PROB_16X16; - } else if (block_size == 8) { - tokens = NZC8X8_TOKENS; - nzc_tree = vp9_nzc8x8_tree; - old_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0]; - new_nzc_probs = cpi->frame_nzc_probs_8x8[0][0][0]; - nzc_counts = cm->fc.nzc_counts_8x8[0][0][0]; - nzc_branch_ct = cpi->frame_nzc_branch_ct_8x8[0][0][0]; - upd = NZC_UPDATE_PROB_8X8; - } else { - nzc_tree = vp9_nzc4x4_tree; - tokens = NZC4X4_TOKENS; - old_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0]; - new_nzc_probs = cpi->frame_nzc_probs_4x4[0][0][0]; - nzc_counts = cm->fc.nzc_counts_4x4[0][0][0]; - nzc_branch_ct = cpi->frame_nzc_branch_ct_4x4[0][0][0]; - upd = NZC_UPDATE_PROB_4X4; - } - nodes = tokens - 1; - // Get the new probabilities and the branch counts - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; - int offset_nodes = offset * nodes; - int offset_tokens = offset * tokens; - vp9_tree_probs_from_distribution(nzc_tree, - new_nzc_probs + offset_nodes, - nzc_branch_ct + offset_nodes, - nzc_counts + offset_tokens, 0); - } - } - } - - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; - int offset_nodes = offset * nodes; - for (t = 0; t < nodes; ++t) { - vp9_prob newp = new_nzc_probs[offset_nodes + t]; - vp9_prob oldp = old_nzc_probs[offset_nodes + t]; - int s, u = 0; -#if defined(SEARCH_NEWP) - s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes], - oldp, &newp, upd); - if (s > 0 && newp != oldp) - u = 1; - if (u) - savings += s - (int)(vp9_cost_zero(upd)); - else - savings -= (int)(vp9_cost_zero(upd)); -#else - s = prob_update_savings(nzc_branch_ct[offset_nodes], - oldp, newp, upd); - if (s > 0) - u = 1; - if (u) - savings += s; -#endif - update[u]++; - } - } - } - } - if (update[1] == 0 || savings < 0) { - vp9_write_bit(bc, 0); - } else { - vp9_write_bit(bc, 1); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; - int offset_nodes = offset * nodes; - for (t = 0; t < nodes; ++t) { - vp9_prob newp = new_nzc_probs[offset_nodes + t]; - vp9_prob *oldp = &old_nzc_probs[offset_nodes + t]; - int s, u = 0; -#if defined(SEARCH_NEWP) - s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes], - *oldp, &newp, upd); - if (s > 0 && newp != *oldp) - u = 1; -#else - s = prob_update_savings(nzc_branch_ct[offset_nodes], - *oldp, newp, upd); - if (s > 0) - u = 1; -#endif - vp9_write(bc, u, upd); - if (u) { - /* send/use new probability */ - write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; - } - } - } - } - } - } + TX_SIZE t; + for (t = TX_4X4; t <= TX_32X32; t++) + build_tree_distribution(cpi, t); } -static void update_nzc_pcat_probs(VP9_COMP *cpi, vp9_writer* const bc) { - VP9_COMMON *cm = &cpi->common; - int c, t, b; - int update[2] = {0, 0}; - int savings = 0; - vp9_prob upd = NZC_UPDATE_PROB_PCAT; - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (t = 0; t < NZC_TOKENS_EXTRA; ++t) { - int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA]; - for (b = 0; b < bits; ++b) { - vp9_prob newp = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0], - cm->fc.nzc_pcat_counts[c][t][b][1]); - vp9_prob oldp = cm->fc.nzc_pcat_probs[c][t][b]; - int s, u = 0; -#if defined(SEARCH_NEWP) - s = prob_diff_update_savings_search(cm->fc.nzc_pcat_counts[c][t][b], - oldp, &newp, upd); - if (s > 0 && newp != oldp) - u = 1; - if (u) - savings += s - (int)(vp9_cost_zero(upd)); - else - savings -= (int)(vp9_cost_zero(upd)); -#else - s = prob_update_savings(cm->fc.nzc_pcat_counts[c][t][b], - oldp, newp, upd); - if (s > 0) - u = 1; - if (u) - savings += s; -#endif - update[u]++; - } - } - } - if (update[1] == 0 || savings < 0) { - vp9_write_bit(bc, 0); - } else { - vp9_write_bit(bc, 1); - for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { - for (t = 0; t < NZC_TOKENS_EXTRA; ++t) { - int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA]; - for (b = 0; b < bits; ++b) { - vp9_prob newp = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0], - cm->fc.nzc_pcat_counts[c][t][b][1]); - vp9_prob *oldp = &cm->fc.nzc_pcat_probs[c][t][b]; - int s, u = 0; -#if defined(SEARCH_NEWP) - s = prob_diff_update_savings_search(cm->fc.nzc_pcat_counts[c][t][b], - *oldp, &newp, upd); - if (s > 0 && newp != *oldp) - u = 1; -#else - s = prob_update_savings(cm->fc.nzc_pcat_counts[c][t][b], - *oldp, newp, upd); - if (s > 0) - u = 1; -#endif - vp9_write(bc, u, upd); - if (u) { - /* send/use new probability */ - write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; - } - } - } - } - } -} - -static void update_nzc_probs(VP9_COMP* cpi, - vp9_writer* const bc) { - update_nzc_probs_common(cpi, bc, 4); - if (cpi->common.txfm_mode != ONLY_4X4) - update_nzc_probs_common(cpi, bc, 8); - if (cpi->common.txfm_mode > ALLOW_8X8) - update_nzc_probs_common(cpi, bc, 16); - if (cpi->common.txfm_mode > ALLOW_16X16) - update_nzc_probs_common(cpi, bc, 32); -#ifdef NZC_PCAT_UPDATE - update_nzc_pcat_probs(cpi, bc); -#endif -#ifdef NZC_STATS - if (!cpi->dummy_packing) - update_nzcstats(&cpi->common); -#endif -} -#endif // CONFIG_CODE_NONZEROCOUNT - -static void update_coef_probs_common(vp9_writer* const bc, -#ifdef ENTROPY_STATS - VP9_COMP *cpi, - vp9_coeff_stats *tree_update_hist, -#endif - vp9_coeff_probs *new_frame_coef_probs, - vp9_coeff_probs *old_frame_coef_probs, - vp9_coeff_stats *frame_branch_ct, - int block_types) { +static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, + TX_SIZE tx_size) { + vp9_coeff_probs_model *new_frame_coef_probs = cpi->frame_coef_probs[tx_size]; + vp9_coeff_probs_model *old_frame_coef_probs = + cpi->common.fc.coef_probs[tx_size]; + vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size]; int i, j, k, l, t; int update[2] = {0, 0}; int savings; -#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE - const int entropy_nodes_update = UNCONSTRAINED_UPDATE_NODES; -#else - const int entropy_nodes_update = ENTROPY_NODES; -#endif - // vp9_prob bestupd = find_coef_update_prob(cpi); + const int entropy_nodes_update = UNCONSTRAINED_NODES; + + const int tstart = 0; /* dry run to see if there is any udpate at all needed */ savings = 0; - for (i = 0; i < block_types; ++i) { + for (i = 0; i < BLOCK_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { // int prev_coef_savings[ENTROPY_NODES] = {0}; for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { - for (t = CONFIG_CODE_NONZEROCOUNT; t < entropy_nodes_update; ++t) { + for (t = tstart; t < entropy_nodes_update; ++t) { vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t]; const vp9_prob upd = vp9_coef_update_prob[t]; - int s; // = prev_coef_savings[t]; + int s; int u = 0; if (l >= 3 && k == 0) continue; -#if defined(SEARCH_NEWP) -#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE - if (t == UNCONSTRAINED_NODES - 1) + if (t == PIVOT_NODE) s = prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); else -#endif s = prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], oldp, &newp, upd); if (s > 0 && newp != oldp) @@ -2108,15 +1110,6 @@ static void update_coef_probs_common(vp9_writer* const bc, savings += s - (int)(vp9_cost_zero(upd)); else savings -= (int)(vp9_cost_zero(upd)); -#else - s = prob_update_savings(frame_branch_ct[i][j][k][l][t], - oldp, newp, upd); - if (s > 0) - u = 1; - if (u) - savings += s; -#endif - update[u]++; } } @@ -2131,54 +1124,39 @@ static void update_coef_probs_common(vp9_writer* const bc, return; } vp9_write_bit(bc, 1); - for (i = 0; i < block_types; ++i) { + for (i = 0; i < BLOCK_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { // int prev_coef_savings[ENTROPY_NODES] = {0}; for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { // calc probs and branch cts for this frame only - for (t = CONFIG_CODE_NONZEROCOUNT; t < entropy_nodes_update; ++t) { + for (t = tstart; t < entropy_nodes_update; ++t) { vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; const vp9_prob upd = vp9_coef_update_prob[t]; - int s; // = prev_coef_savings[t]; + int s; int u = 0; if (l >= 3 && k == 0) continue; - -#if defined(SEARCH_NEWP) -#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE - if (t == UNCONSTRAINED_NODES - 1) + if (t == PIVOT_NODE) s = prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); else -#endif s = prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd); if (s > 0 && newp != *oldp) u = 1; -#else - s = prob_update_savings(frame_branch_ct[i][j][k][l][t], - *oldp, newp, upd); - if (s > 0) - u = 1; -#endif vp9_write(bc, u, upd); #ifdef ENTROPY_STATS if (!cpi->dummy_packing) - ++tree_update_hist[i][j][k][l][t][u]; + ++tree_update_hist[tx_size][i][j][k][l][t][u]; #endif if (u) { /* send/use new probability */ write_prob_diff_update(bc, newp, *oldp); *oldp = newp; -#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE - if (t == UNCONSTRAINED_NODES - 1) - vp9_get_model_distribution( - newp, old_frame_coef_probs[i][j][k][l], i, j); -#endif } } } @@ -2188,737 +1166,564 @@ static void update_coef_probs_common(vp9_writer* const bc, } static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { + const TXFM_MODE txfm_mode = cpi->common.txfm_mode; + vp9_clear_system_state(); // Build the cofficient contexts based on counts collected in encode loop build_coeff_contexts(cpi); - update_coef_probs_common(bc, -#ifdef ENTROPY_STATS - cpi, - tree_update_hist_4x4, -#endif - cpi->frame_coef_probs_4x4, - cpi->common.fc.coef_probs_4x4, - cpi->frame_branch_ct_4x4, - BLOCK_TYPES); - - /* do not do this if not even allowed */ - if (cpi->common.txfm_mode != ONLY_4X4) { - update_coef_probs_common(bc, -#ifdef ENTROPY_STATS - cpi, - tree_update_hist_8x8, -#endif - cpi->frame_coef_probs_8x8, - cpi->common.fc.coef_probs_8x8, - cpi->frame_branch_ct_8x8, - BLOCK_TYPES); - } + update_coef_probs_common(bc, cpi, TX_4X4); - if (cpi->common.txfm_mode > ALLOW_8X8) { - update_coef_probs_common(bc, -#ifdef ENTROPY_STATS - cpi, - tree_update_hist_16x16, -#endif - cpi->frame_coef_probs_16x16, - cpi->common.fc.coef_probs_16x16, - cpi->frame_branch_ct_16x16, - BLOCK_TYPES); - } + // do not do this if not even allowed + if (txfm_mode > ONLY_4X4) + update_coef_probs_common(bc, cpi, TX_8X8); - if (cpi->common.txfm_mode > ALLOW_16X16) { - update_coef_probs_common(bc, -#ifdef ENTROPY_STATS - cpi, - tree_update_hist_32x32, -#endif - cpi->frame_coef_probs_32x32, - cpi->common.fc.coef_probs_32x32, - cpi->frame_branch_ct_32x32, - BLOCK_TYPES); - } + if (txfm_mode > ALLOW_8X8) + update_coef_probs_common(bc, cpi, TX_16X16); + + if (txfm_mode > ALLOW_16X16) + update_coef_probs_common(bc, cpi, TX_32X32); } -#ifdef PACKET_TESTING -FILE *vpxlogc = 0; -#endif +static void encode_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, + struct vp9_write_bit_buffer *wb) { + int i; -static void put_delta_q(vp9_writer *bc, int delta_q) { - if (delta_q != 0) { - vp9_write_bit(bc, 1); - vp9_write_literal(bc, abs(delta_q), 4); - - if (delta_q < 0) - vp9_write_bit(bc, 1); - else - vp9_write_bit(bc, 0); - } else - vp9_write_bit(bc, 0); -} + // Encode the loop filter level and type + vp9_wb_write_literal(wb, pc->filter_level, 6); + vp9_wb_write_literal(wb, pc->sharpness_level, 3); -static void decide_kf_ymode_entropy(VP9_COMP *cpi) { + // Write out loop filter deltas applied at the MB level based on mode or + // ref frame (if they are enabled). + vp9_wb_write_bit(wb, xd->mode_ref_lf_delta_enabled); - int mode_cost[MB_MODE_COUNT]; - int cost; - int bestcost = INT_MAX; - int bestindex = 0; - int i, j; + if (xd->mode_ref_lf_delta_enabled) { + // Do the deltas need to be updated + vp9_wb_write_bit(wb, xd->mode_ref_lf_delta_update); + if (xd->mode_ref_lf_delta_update) { + // Send update + for (i = 0; i < MAX_REF_LF_DELTAS; i++) { + const int delta = xd->ref_lf_deltas[i]; - for (i = 0; i < 8; i++) { - vp9_cost_tokens(mode_cost, cpi->common.kf_ymode_prob[i], vp9_kf_ymode_tree); - cost = 0; - for (j = 0; j < VP9_YMODES; j++) { - cost += mode_cost[j] * cpi->ymode_count[j]; - } - vp9_cost_tokens(mode_cost, cpi->common.sb_kf_ymode_prob[i], - vp9_sb_ymode_tree); - for (j = 0; j < VP9_I32X32_MODES; j++) { - cost += mode_cost[j] * cpi->sb_ymode_count[j]; - } - if (cost < bestcost) { - bestindex = i; - bestcost = cost; - } - } - cpi->common.kf_ymode_probs_index = bestindex; + // Frame level data + if (delta != xd->last_ref_lf_deltas[i]) { + xd->last_ref_lf_deltas[i] = delta; + vp9_wb_write_bit(wb, 1); -} -static void segment_reference_frames(VP9_COMP *cpi) { - VP9_COMMON *oci = &cpi->common; - MODE_INFO *mi = oci->mi; - int ref[MAX_MB_SEGMENTS] = {0}; - int i, j; - int mb_index = 0; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + assert(delta != 0); + vp9_wb_write_literal(wb, abs(delta) & 0x3F, 6); + vp9_wb_write_bit(wb, delta < 0); + } else { + vp9_wb_write_bit(wb, 0); + } + } - for (i = 0; i < oci->mb_rows; i++) { - for (j = 0; j < oci->mb_cols; j++, mb_index++) { - ref[mi[mb_index].mbmi.segment_id] |= (1 << mi[mb_index].mbmi.ref_frame); + // Send update + for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { + const int delta = xd->mode_lf_deltas[i]; + if (delta != xd->last_mode_lf_deltas[i]) { + xd->last_mode_lf_deltas[i] = delta; + vp9_wb_write_bit(wb, 1); + + assert(delta != 0); + vp9_wb_write_literal(wb, abs(delta) & 0x3F, 6); + vp9_wb_write_bit(wb, delta < 0); + } else { + vp9_wb_write_bit(wb, 0); + } + } } - mb_index++; - } - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - vp9_enable_segfeature(xd, i, SEG_LVL_REF_FRAME); - vp9_set_segdata(xd, i, SEG_LVL_REF_FRAME, ref[i]); } } -void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, - unsigned long *size) { - int i, j; - VP9_HEADER oh; - VP9_COMMON *const pc = &cpi->common; - vp9_writer header_bc, residual_bc; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - int extra_bytes_packed = 0; - - unsigned char *cx_data = dest; +static void write_delta_q(struct vp9_write_bit_buffer *wb, int delta_q) { + if (delta_q != 0) { + vp9_wb_write_bit(wb, 1); + vp9_wb_write_literal(wb, abs(delta_q), 4); + vp9_wb_write_bit(wb, delta_q < 0); + } else { + vp9_wb_write_bit(wb, 0); + } +} - oh.show_frame = (int) pc->show_frame; - oh.type = (int)pc->frame_type; - oh.version = pc->version; - oh.first_partition_length_in_bytes = 0; +static void encode_quantization(VP9_COMMON *cm, + struct vp9_write_bit_buffer *wb) { + vp9_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS); + write_delta_q(wb, cm->y_dc_delta_q); + write_delta_q(wb, cm->uv_dc_delta_q); + write_delta_q(wb, cm->uv_ac_delta_q); +} - cx_data += 3; -#if defined(SECTIONBITS_OUTPUT) - Sectionbits[active_section = 1] += sizeof(VP9_HEADER) * 8 * 256; -#endif +static void encode_segmentation(VP9_COMP *cpi, + struct vp9_write_bit_buffer *wb) { + int i, j; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; - compute_update_table(); + vp9_wb_write_bit(wb, xd->segmentation_enabled); + if (!xd->segmentation_enabled) + return; - /* vp9_kf_default_bmode_probs() is called in vp9_setup_key_frame() once - * for each K frame before encode frame. pc->kf_bmode_prob doesn't get - * changed anywhere else. No need to call it again here. --yw - * vp9_kf_default_bmode_probs( pc->kf_bmode_prob); - */ - - /* every keyframe send startcode, width, height, scale factor, clamp - * and color type. - */ - if (oh.type == KEY_FRAME) { - // Start / synch code - cx_data[0] = 0x9D; - cx_data[1] = 0x01; - cx_data[2] = 0x2a; - extra_bytes_packed = 3; - cx_data += extra_bytes_packed; - } - { - int v; - - if (pc->width != pc->display_width || pc->height != pc->display_height) { - v = pc->display_width; - cx_data[0] = v; - cx_data[1] = v >> 8; - - v = pc->display_height; - cx_data[2] = v; - cx_data[3] = v >> 8; - cx_data += 4; - extra_bytes_packed += 4; + // Segmentation map + vp9_wb_write_bit(wb, xd->update_mb_segmentation_map); + if (xd->update_mb_segmentation_map) { + // Select the coding strategy (temporal or spatial) + vp9_choose_segmap_coding_method(cpi); + // Write out probabilities used to decode unpredicted macro-block segments + for (i = 0; i < MB_SEG_TREE_PROBS; i++) { + const int prob = xd->mb_segment_tree_probs[i]; + const int update = prob != MAX_PROB; + vp9_wb_write_bit(wb, update); + if (update) + vp9_wb_write_literal(wb, prob, 8); + } + + // Write out the chosen coding method. + vp9_wb_write_bit(wb, cm->temporal_update); + if (cm->temporal_update) { + for (i = 0; i < PREDICTION_PROBS; i++) { + const int prob = cm->segment_pred_probs[i]; + const int update = prob != MAX_PROB; + vp9_wb_write_bit(wb, update); + if (update) + vp9_wb_write_literal(wb, prob, 8); + } + } + } + + // Segmentation data + vp9_wb_write_bit(wb, xd->update_mb_segmentation_data); + if (xd->update_mb_segmentation_data) { + vp9_wb_write_bit(wb, xd->mb_segment_abs_delta); + + for (i = 0; i < MAX_MB_SEGMENTS; i++) { + for (j = 0; j < SEG_LVL_MAX; j++) { + const int active = vp9_segfeature_active(xd, i, j); + vp9_wb_write_bit(wb, active); + if (active) { + const int data = vp9_get_segdata(xd, i, j); + const int data_max = vp9_seg_feature_data_max(j); + + if (vp9_is_segfeature_signed(j)) { + vp9_encode_unsigned_max(wb, abs(data), data_max); + vp9_wb_write_bit(wb, data < 0); + } else { + vp9_encode_unsigned_max(wb, data, data_max); + } + } + } } + } +} - v = pc->width; - cx_data[0] = v; - cx_data[1] = v >> 8; - - v = pc->height; - cx_data[2] = v; - cx_data[3] = v >> 8; - extra_bytes_packed += 4; - cx_data += 4; - } +static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { + VP9_COMMON *const cm = &cpi->common; - vp9_start_encode(&header_bc, cx_data); + // Mode + vp9_write_literal(w, MIN(cm->txfm_mode, ALLOW_32X32), 2); + if (cm->txfm_mode >= ALLOW_32X32) + vp9_write_bit(w, cm->txfm_mode == TX_MODE_SELECT); - // TODO(jkoleszar): remove these two unused bits? - vp9_write_bit(&header_bc, pc->clr_type); - vp9_write_bit(&header_bc, pc->clamp_type); - - // error resilient mode - vp9_write_bit(&header_bc, pc->error_resilient_mode); - - // Signal whether or not Segmentation is enabled - vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0); - - // Indicate which features are enabled - if (xd->segmentation_enabled) { - // Indicate whether or not the segmentation map is being updated. - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_map) ? 1 : 0); - - // If it is, then indicate the method that will be used. - if (xd->update_mb_segmentation_map) { - // Select the coding strategy (temporal or spatial) - vp9_choose_segmap_coding_method(cpi); - // Send the tree probabilities used to decode unpredicted - // macro-block segments - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { - int data = xd->mb_segment_tree_probs[i]; - - if (data != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, data, 8); - } else { - vp9_write_bit(&header_bc, 0); - } - } + // Probabilities + if (cm->txfm_mode == TX_MODE_SELECT) { + int i, j; + unsigned int ct_8x8p[TX_SIZE_MAX_SB - 3][2]; + unsigned int ct_16x16p[TX_SIZE_MAX_SB - 2][2]; + unsigned int ct_32x32p[TX_SIZE_MAX_SB - 1][2]; - // Write out the chosen coding method. - vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0); - if (pc->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) { - int data = pc->segment_pred_probs[i]; - if (data != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, data, 8); - } else { - vp9_write_bit(&header_bc, 0); - } - } + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + tx_counts_to_branch_counts_8x8(cm->fc.tx_count_8x8p[i], + ct_8x8p); + for (j = 0; j < TX_SIZE_MAX_SB - 3; j++) { + vp9_cond_prob_diff_update(w, &cm->fc.tx_probs_8x8p[i][j], + VP9_MODE_UPDATE_PROB, ct_8x8p[j]); + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + tx_counts_to_branch_counts_16x16(cm->fc.tx_count_16x16p[i], + ct_16x16p); + for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) { + vp9_cond_prob_diff_update(w, &cm->fc.tx_probs_16x16p[i][j], + VP9_MODE_UPDATE_PROB, ct_16x16p[j]); + } + } + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + tx_counts_to_branch_counts_32x32(cm->fc.tx_count_32x32p[i], + ct_32x32p); + for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) { + vp9_cond_prob_diff_update(w, &cm->fc.tx_probs_32x32p[i][j], + VP9_MODE_UPDATE_PROB, ct_32x32p[j]); } } +#ifdef MODE_STATS + if (!cpi->dummy_packing) + update_tx_count_stats(cm); +#endif + } +} - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_data) ? 1 : 0); - - // segment_reference_frames(cpi); - - if (xd->update_mb_segmentation_data) { - signed char Data; - - vp9_write_bit(&header_bc, (xd->mb_segment_abs_delta) ? 1 : 0); - - // For each segments id... - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // For each segmentation codable feature... - for (j = 0; j < SEG_LVL_MAX; j++) { - Data = vp9_get_segdata(xd, i, j); - - // If the feature is enabled... - if (vp9_segfeature_active(xd, i, j)) { - vp9_write_bit(&header_bc, 1); - - // Is the segment data signed.. - if (vp9_is_segfeature_signed(j)) { - // Encode the relevant feature data - if (Data < 0) { - Data = - Data; - vp9_encode_unsigned_max(&header_bc, Data, - vp9_seg_feature_data_max(j)); - vp9_write_bit(&header_bc, 1); - } else { - vp9_encode_unsigned_max(&header_bc, Data, - vp9_seg_feature_data_max(j)); - vp9_write_bit(&header_bc, 0); - } - } - // Unsigned data element so no sign bit needed - else - vp9_encode_unsigned_max(&header_bc, Data, - vp9_seg_feature_data_max(j)); - } else - vp9_write_bit(&header_bc, 0); +static void write_interp_filter_type(INTERPOLATIONFILTERTYPE type, + struct vp9_write_bit_buffer *wb) { + vp9_wb_write_bit(wb, type == SWITCHABLE); + if (type != SWITCHABLE) + vp9_wb_write_literal(wb, type, 2); +} + +static void fix_mcomp_filter_type(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + + if (cm->mcomp_filter_type == SWITCHABLE) { + // Check to see if only one of the filters is actually used + int count[VP9_SWITCHABLE_FILTERS]; + int i, j, c = 0; + for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + count[i] = 0; + for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) + count[i] += cm->fc.switchable_interp_count[j][i]; + c += (count[i] > 0); + } + if (c == 1) { + // Only one filter is used. So set the filter at frame level + for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + if (count[i]) { + cm->mcomp_filter_type = vp9_switchable_interp[i]; + break; } } } } +} - // Encode the common prediction model status flag probability updates for - // the reference frame - update_refpred_stats(cpi); - if (pc->frame_type != KEY_FRAME) { - for (i = 0; i < PREDICTION_PROBS; i++) { - if (cpi->ref_pred_probs_update[i]) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, pc->ref_pred_probs[i], 8); - } else { - vp9_write_bit(&header_bc, 0); - } +static void write_tile_info(VP9_COMMON *cm, struct vp9_write_bit_buffer *wb) { + int min_log2_tiles, delta_log2_tiles, n_tile_bits, n; + vp9_get_tile_n_bits(cm, &min_log2_tiles, &delta_log2_tiles); + n_tile_bits = cm->log2_tile_columns - min_log2_tiles; + for (n = 0; n < delta_log2_tiles; n++) { + if (n_tile_bits--) { + vp9_wb_write_bit(wb, 1); + } else { + vp9_wb_write_bit(wb, 0); + break; } } - pc->sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]); - vp9_write_literal(&header_bc, pc->sb64_coded, 8); - pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); - vp9_write_literal(&header_bc, pc->sb32_coded, 8); + vp9_wb_write_bit(wb, cm->log2_tile_rows != 0); + if (cm->log2_tile_rows != 0) + vp9_wb_write_bit(wb, cm->log2_tile_rows != 1); +} - vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless); - if (cpi->mb.e_mbd.lossless) { - pc->txfm_mode = ONLY_4X4; - } else { - if (pc->txfm_mode == TX_MODE_SELECT) { - pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] + - cpi->txfm_count_16x16p[TX_4X4] + - cpi->txfm_count_8x8p[TX_4X4], - cpi->txfm_count_32x32p[TX_4X4] + - cpi->txfm_count_32x32p[TX_8X8] + - cpi->txfm_count_32x32p[TX_16X16] + - cpi->txfm_count_32x32p[TX_32X32] + - cpi->txfm_count_16x16p[TX_4X4] + - cpi->txfm_count_16x16p[TX_8X8] + - cpi->txfm_count_16x16p[TX_16X16] + - cpi->txfm_count_8x8p[TX_4X4] + - cpi->txfm_count_8x8p[TX_8X8]); - pc->prob_tx[1] = get_prob(cpi->txfm_count_32x32p[TX_8X8] + - cpi->txfm_count_16x16p[TX_8X8], - cpi->txfm_count_32x32p[TX_8X8] + - cpi->txfm_count_32x32p[TX_16X16] + - cpi->txfm_count_32x32p[TX_32X32] + - cpi->txfm_count_16x16p[TX_8X8] + - cpi->txfm_count_16x16p[TX_16X16]); - pc->prob_tx[2] = get_prob(cpi->txfm_count_32x32p[TX_16X16], - cpi->txfm_count_32x32p[TX_16X16] + - cpi->txfm_count_32x32p[TX_32X32]); +static int get_refresh_mask(VP9_COMP *cpi) { + // Should the GF or ARF be updated using the transmitted frame or buffer +#if CONFIG_MULTIPLE_ARF + if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame && + !cpi->refresh_alt_ref_frame) { +#else + if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { +#endif + // Preserve the previously existing golden frame and update the frame in + // the alt ref slot instead. This is highly specific to the use of + // alt-ref as a forward reference, and this needs to be generalized as + // other uses are implemented (like RTC/temporal scaling) + // + // gld_fb_idx and alt_fb_idx need to be swapped for future frames, but + // that happens in vp9_onyx_if.c:update_reference_frames() so that it can + // be done outside of the recode loop. + return (cpi->refresh_last_frame << cpi->lst_fb_idx) | + (cpi->refresh_golden_frame << cpi->alt_fb_idx); } else { - pc->prob_tx[0] = 128; - pc->prob_tx[1] = 128; - pc->prob_tx[2] = 128; - } - vp9_write_literal(&header_bc, pc->txfm_mode <= 3 ? pc->txfm_mode : 3, 2); - if (pc->txfm_mode > ALLOW_16X16) { - vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT); - } - if (pc->txfm_mode == TX_MODE_SELECT) { - vp9_write_literal(&header_bc, pc->prob_tx[0], 8); - vp9_write_literal(&header_bc, pc->prob_tx[1], 8); - vp9_write_literal(&header_bc, pc->prob_tx[2], 8); + int arf_idx = cpi->alt_fb_idx; +#if CONFIG_MULTIPLE_ARF + // Determine which ARF buffer to use to encode this ARF frame. + if (cpi->multi_arf_enabled) { + int sn = cpi->sequence_number; + arf_idx = (cpi->frame_coding_order[sn] < 0) ? + cpi->arf_buffer_idx[sn + 1] : + cpi->arf_buffer_idx[sn]; + } +#endif + return (cpi->refresh_last_frame << cpi->lst_fb_idx) | + (cpi->refresh_golden_frame << cpi->gld_fb_idx) | + (cpi->refresh_alt_ref_frame << arf_idx); } - } +} - // Encode the loop filter level and type - vp9_write_bit(&header_bc, pc->filter_type); - vp9_write_literal(&header_bc, pc->filter_level, 6); - vp9_write_literal(&header_bc, pc->sharpness_level, 3); -#if CONFIG_LOOP_DERING - if (pc->dering_enabled) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, pc->dering_enabled - 1, 4); - } else { - vp9_write_bit(&header_bc, 0); +static void write_display_size(VP9_COMP *cpi, struct vp9_write_bit_buffer *wb) { + VP9_COMMON *const cm = &cpi->common; + + const int scaling_active = cm->width != cm->display_width || + cm->height != cm->display_height; + vp9_wb_write_bit(wb, scaling_active); + if (scaling_active) { + vp9_wb_write_literal(wb, cm->display_width - 1, 16); + vp9_wb_write_literal(wb, cm->display_height - 1, 16); } -#endif +} - // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled). - vp9_write_bit(&header_bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0); +static void write_frame_size(VP9_COMP *cpi, + struct vp9_write_bit_buffer *wb) { + VP9_COMMON *const cm = &cpi->common; + vp9_wb_write_literal(wb, cm->width - 1, 16); + vp9_wb_write_literal(wb, cm->height - 1, 16); - if (xd->mode_ref_lf_delta_enabled) { - // Do the deltas need to be updated - int send_update = xd->mode_ref_lf_delta_update; + write_display_size(cpi, wb); +} - vp9_write_bit(&header_bc, send_update); - if (send_update) { - int Data; +static void write_frame_size_with_refs(VP9_COMP *cpi, + struct vp9_write_bit_buffer *wb) { + VP9_COMMON *const cm = &cpi->common; + int refs[ALLOWED_REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx, + cpi->alt_fb_idx}; + int i, found = 0; + + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { + YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->ref_frame_map[refs[i]]]; + found = cm->width == cfg->y_crop_width && + cm->height == cfg->y_crop_height; + vp9_wb_write_bit(wb, found); + if (found) + break; + } - // Send update - for (i = 0; i < MAX_REF_LF_DELTAS; i++) { - Data = xd->ref_lf_deltas[i]; + if (!found) { + vp9_wb_write_literal(wb, cm->width - 1, 16); + vp9_wb_write_literal(wb, cm->height - 1, 16); + } - // Frame level data - if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]) { - xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i]; - vp9_write_bit(&header_bc, 1); + write_display_size(cpi, wb); +} - if (Data > 0) { - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 0); // sign - } else { - Data = -Data; - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 1); // sign - } - } else { - vp9_write_bit(&header_bc, 0); - } - } +static void write_sync_code(struct vp9_write_bit_buffer *wb) { + vp9_wb_write_literal(wb, SYNC_CODE_0, 8); + vp9_wb_write_literal(wb, SYNC_CODE_1, 8); + vp9_wb_write_literal(wb, SYNC_CODE_2, 8); +} - // Send update - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { - Data = xd->mode_lf_deltas[i]; +static void write_uncompressed_header(VP9_COMP *cpi, + struct vp9_write_bit_buffer *wb) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; - if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i]) { - xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i]; - vp9_write_bit(&header_bc, 1); + // frame marker bits + vp9_wb_write_literal(wb, 0x2, 2); - if (Data > 0) { - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 0); // sign - } else { - Data = -Data; - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 1); // sign - } - } else { - vp9_write_bit(&header_bc, 0); - } + // bitstream version. + // 00 - profile 0. 4:2:0 only + // 10 - profile 1. adds 4:4:4, 4:2:2, alpha + vp9_wb_write_bit(wb, cm->version); + vp9_wb_write_bit(wb, 0); + + vp9_wb_write_bit(wb, 0); + vp9_wb_write_bit(wb, cm->frame_type); + vp9_wb_write_bit(wb, cm->show_frame); + vp9_wb_write_bit(wb, cm->error_resilient_mode); + + if (cm->frame_type == KEY_FRAME) { + write_sync_code(wb); + // colorspaces + // 000 - Unknown + // 001 - BT.601 + // 010 - BT.709 + // 011 - SMPTE-170 + // 100 - SMPTE-240 + // 101 - Reserved + // 110 - Reserved + // 111 - sRGB (RGB) + vp9_wb_write_literal(wb, 0, 3); + if (1 /* colorspace != sRGB */) { + vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] + if (cm->version == 1) { + vp9_wb_write_bit(wb, cm->subsampling_x); + vp9_wb_write_bit(wb, cm->subsampling_y); + vp9_wb_write_bit(wb, 0); // has extra plane } + } else { + assert(cm->version == 1); + vp9_wb_write_bit(wb, 0); // has extra plane } - } - // signal here is multi token partition is enabled - // vp9_write_literal(&header_bc, pc->multi_token_partition, 2); - vp9_write_literal(&header_bc, 0, 2); - - // Frame Q baseline quantizer index - vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS); + write_frame_size(cpi, wb); + } else { + const int refs[ALLOWED_REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx, + cpi->alt_fb_idx}; + if (!cm->show_frame) + vp9_wb_write_bit(wb, cm->intra_only); - // Transmit Dc, Second order and Uv quantizer delta information - put_delta_q(&header_bc, pc->y1dc_delta_q); - put_delta_q(&header_bc, pc->uvdc_delta_q); - put_delta_q(&header_bc, pc->uvac_delta_q); + if (!cm->error_resilient_mode) + vp9_wb_write_literal(wb, cm->reset_frame_context, 2); - // When there is a key frame all reference buffers are updated using the new key frame - if (pc->frame_type != KEY_FRAME) { - int refresh_mask; + if (cm->intra_only) { + write_sync_code(wb); - // Should the GF or ARF be updated using the transmitted frame or buffer - if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { - /* Preserve the previously existing golden frame and update the frame in - * the alt ref slot instead. This is highly specific to the use of - * alt-ref as a forward reference, and this needs to be generalized as - * other uses are implemented (like RTC/temporal scaling) - * - * gld_fb_idx and alt_fb_idx need to be swapped for future frames, but - * that happens in vp9_onyx_if.c:update_reference_frames() so that it can - * be done outside of the recode loop. - */ - refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) | - (cpi->refresh_golden_frame << cpi->alt_fb_idx); + vp9_wb_write_literal(wb, get_refresh_mask(cpi), NUM_REF_FRAMES); + write_frame_size(cpi, wb); } else { - refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) | - (cpi->refresh_golden_frame << cpi->gld_fb_idx) | - (cpi->refresh_alt_ref_frame << cpi->alt_fb_idx); - } - vp9_write_literal(&header_bc, refresh_mask, NUM_REF_FRAMES); - vp9_write_literal(&header_bc, cpi->lst_fb_idx, NUM_REF_FRAMES_LG2); - vp9_write_literal(&header_bc, cpi->gld_fb_idx, NUM_REF_FRAMES_LG2); - vp9_write_literal(&header_bc, cpi->alt_fb_idx, NUM_REF_FRAMES_LG2); - - // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer) - vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]); - vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); - - // Signal whether to allow high MV precision - vp9_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0); - if (pc->mcomp_filter_type == SWITCHABLE) { - /* Check to see if only one of the filters is actually used */ - int count[VP9_SWITCHABLE_FILTERS]; - int i, j, c = 0; - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - count[i] = 0; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { - count[i] += cpi->switchable_interp_count[j][i]; - } - c += (count[i] > 0); - } - if (c == 1) { - /* Only one filter is used. So set the filter at frame level */ - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - if (count[i]) { - pc->mcomp_filter_type = vp9_switchable_interp[i]; - break; - } - } + int i; + vp9_wb_write_literal(wb, get_refresh_mask(cpi), NUM_REF_FRAMES); + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { + vp9_wb_write_literal(wb, refs[i], NUM_REF_FRAMES_LG2); + vp9_wb_write_bit(wb, cm->ref_frame_sign_bias[LAST_FRAME + i]); } + + write_frame_size_with_refs(cpi, wb); + + vp9_wb_write_bit(wb, xd->allow_high_precision_mv); + + fix_mcomp_filter_type(cpi); + write_interp_filter_type(cm->mcomp_filter_type, wb); } - // Signal the type of subpel filter to use - vp9_write_bit(&header_bc, (pc->mcomp_filter_type == SWITCHABLE)); - if (pc->mcomp_filter_type != SWITCHABLE) - vp9_write_literal(&header_bc, (pc->mcomp_filter_type), 2); -#if CONFIG_COMP_INTERINTRA_PRED - // printf("Counts: %d %d\n", cpi->interintra_count[0], - // cpi->interintra_count[1]); - if (!cpi->dummy_packing && pc->use_interintra) - pc->use_interintra = (cpi->interintra_count[1] > 0); - vp9_write_bit(&header_bc, pc->use_interintra); - if (!pc->use_interintra) - vp9_zero(cpi->interintra_count); -#endif } - if (!pc->error_resilient_mode) { - vp9_write_bit(&header_bc, pc->refresh_entropy_probs); - vp9_write_bit(&header_bc, pc->frame_parallel_decoding_mode); + if (!cm->error_resilient_mode) { + vp9_wb_write_bit(wb, cm->refresh_frame_context); + vp9_wb_write_bit(wb, cm->frame_parallel_decoding_mode); } - vp9_write_literal(&header_bc, pc->frame_context_idx, - NUM_FRAME_CONTEXTS_LG2); + vp9_wb_write_literal(wb, cm->frame_context_idx, NUM_FRAME_CONTEXTS_LG2); -#ifdef ENTROPY_STATS - if (pc->frame_type == INTER_FRAME) - active_section = 0; - else - active_section = 7; -#endif + encode_loopfilter(cm, xd, wb); + encode_quantization(cm, wb); + encode_segmentation(cpi, wb); - // If appropriate update the inter mode probability context and code the - // changes in the bitstream. - if (pc->frame_type != KEY_FRAME) { - int i, j; - int new_context[INTER_MODE_CONTEXTS][4]; - if (!cpi->dummy_packing) { - update_inter_mode_probs(pc, new_context); - } else { - // In dummy pack assume context unchanged. - vpx_memcpy(new_context, pc->fc.vp9_mode_contexts, - sizeof(pc->fc.vp9_mode_contexts)); - } + write_tile_info(cm, wb); +} - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < 4; j++) { - if (new_context[i][j] != pc->fc.vp9_mode_contexts[i][j]) { - vp9_write(&header_bc, 1, 252); - vp9_write_literal(&header_bc, new_context[i][j], 8); +void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) { + int i, bytes_packed; + VP9_COMMON *const pc = &cpi->common; + vp9_writer header_bc, residual_bc; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; - // Only update the persistent copy if this is the "real pack" - if (!cpi->dummy_packing) { - pc->fc.vp9_mode_contexts[i][j] = new_context[i][j]; - } - } else { - vp9_write(&header_bc, 0, 252); - } - } - } - } + uint8_t *cx_data = dest; + struct vp9_write_bit_buffer wb = {dest, 0}; + struct vp9_write_bit_buffer first_partition_size_wb; -#if CONFIG_NEW_MVREF - if ((pc->frame_type != KEY_FRAME)) { - int new_mvref_probs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES-1]; - int i, j; + write_uncompressed_header(cpi, &wb); + first_partition_size_wb = wb; + vp9_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size - update_mv_ref_probs(cpi, new_mvref_probs); + bytes_packed = vp9_rb_bytes_written(&wb); + cx_data += bytes_packed; - for (i = 0; i < MAX_REF_FRAMES; ++i) { - // Skip the dummy entry for intra ref frame. - if (i == INTRA_FRAME) { - continue; - } + compute_update_table(); - // Encode any mandated updates to probabilities - for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) { - if (new_mvref_probs[i][j] != xd->mb_mv_ref_probs[i][j]) { - vp9_write(&header_bc, 1, VP9_MVREF_UPDATE_PROB); - vp9_write_literal(&header_bc, new_mvref_probs[i][j], 8); + vp9_start_encode(&header_bc, cx_data); - // Only update the persistent copy if this is the "real pack" - if (!cpi->dummy_packing) { - xd->mb_mv_ref_probs[i][j] = new_mvref_probs[i][j]; - } - } else { - vp9_write(&header_bc, 0, VP9_MVREF_UPDATE_PROB); - } - } - } - } +#ifdef ENTROPY_STATS + if (pc->frame_type == INTER_FRAME) + active_section = 0; + else + active_section = 7; #endif vp9_clear_system_state(); // __asm emms; - vp9_copy(cpi->common.fc.pre_coef_probs_4x4, - cpi->common.fc.coef_probs_4x4); - vp9_copy(cpi->common.fc.pre_coef_probs_8x8, - cpi->common.fc.coef_probs_8x8); - vp9_copy(cpi->common.fc.pre_coef_probs_16x16, - cpi->common.fc.coef_probs_16x16); - vp9_copy(cpi->common.fc.pre_coef_probs_32x32, - cpi->common.fc.coef_probs_32x32); -#if CONFIG_CODE_NONZEROCOUNT - vp9_copy(cpi->common.fc.pre_nzc_probs_4x4, - cpi->common.fc.nzc_probs_4x4); - vp9_copy(cpi->common.fc.pre_nzc_probs_8x8, - cpi->common.fc.nzc_probs_8x8); - vp9_copy(cpi->common.fc.pre_nzc_probs_16x16, - cpi->common.fc.nzc_probs_16x16); - vp9_copy(cpi->common.fc.pre_nzc_probs_32x32, - cpi->common.fc.nzc_probs_32x32); - vp9_copy(cpi->common.fc.pre_nzc_pcat_probs, - cpi->common.fc.nzc_pcat_probs); - // NOTE that if the counts are reset, we also need to uncomment - // the count updates in the write_nzc function - /* - vp9_zero(cpi->common.fc.nzc_counts_4x4); - vp9_zero(cpi->common.fc.nzc_counts_8x8); - vp9_zero(cpi->common.fc.nzc_counts_16x16); - vp9_zero(cpi->common.fc.nzc_counts_32x32); - vp9_zero(cpi->common.fc.nzc_pcat_counts); - */ -#endif - vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob); - vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob); - vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); - vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob); - vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob); - vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob); - vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob); - cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc; -#if CONFIG_COMP_INTERINTRA_PRED - cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob; -#endif - vp9_zero(cpi->sub_mv_ref_count); - vp9_zero(cpi->mbsplit_count); - vp9_zero(cpi->common.fc.mv_ref_ct) + vp9_copy(pc->fc.pre_coef_probs, pc->fc.coef_probs); + vp9_copy(pc->fc.pre_y_mode_prob, pc->fc.y_mode_prob); + vp9_copy(pc->fc.pre_uv_mode_prob, pc->fc.uv_mode_prob); + vp9_copy(pc->fc.pre_partition_prob, pc->fc.partition_prob[INTER_FRAME]); + pc->fc.pre_nmvc = pc->fc.nmvc; + vp9_copy(pc->fc.pre_switchable_interp_prob, pc->fc.switchable_interp_prob); + vp9_copy(pc->fc.pre_inter_mode_probs, pc->fc.inter_mode_probs); + vp9_copy(pc->fc.pre_intra_inter_prob, pc->fc.intra_inter_prob); + vp9_copy(pc->fc.pre_comp_inter_prob, pc->fc.comp_inter_prob); + vp9_copy(pc->fc.pre_comp_ref_prob, pc->fc.comp_ref_prob); + vp9_copy(pc->fc.pre_single_ref_prob, pc->fc.single_ref_prob); + vp9_copy(pc->fc.pre_tx_probs_8x8p, pc->fc.tx_probs_8x8p); + vp9_copy(pc->fc.pre_tx_probs_16x16p, pc->fc.tx_probs_16x16p); + vp9_copy(pc->fc.pre_tx_probs_32x32p, pc->fc.tx_probs_32x32p); + vp9_copy(pc->fc.pre_mbskip_probs, pc->fc.mbskip_probs); + + if (xd->lossless) { + pc->txfm_mode = ONLY_4X4; + } else { + encode_txfm_probs(cpi, &header_bc); + } update_coef_probs(cpi, &header_bc); -#if CONFIG_CODE_NONZEROCOUNT - update_nzc_probs(cpi, &header_bc); -#endif #ifdef ENTROPY_STATS active_section = 2; #endif - // Write out the mb_no_coeff_skip flag - vp9_write_bit(&header_bc, pc->mb_no_coeff_skip); - if (pc->mb_no_coeff_skip) { - int k; - - vp9_update_skip_probs(cpi); - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8); - } - } - - if (pc->frame_type == KEY_FRAME) { - if (!pc->kf_ymode_probs_update) { - vp9_write_literal(&header_bc, pc->kf_ymode_probs_index, 3); - } - } else { - // Update the probabilities used to encode reference frame data - update_ref_probs(cpi); + vp9_update_skip_probs(cpi, &header_bc); + if (pc->frame_type != KEY_FRAME) { #ifdef ENTROPY_STATS active_section = 1; #endif + update_inter_mode_probs(pc, &header_bc); + vp9_zero(cpi->common.fc.inter_mode_counts); + if (pc->mcomp_filter_type == SWITCHABLE) update_switchable_interp_probs(cpi, &header_bc); -#if CONFIG_COMP_INTERINTRA_PRED - if (pc->use_interintra) { - vp9_cond_prob_update(&header_bc, - &pc->fc.interintra_prob, - VP9_UPD_INTERINTRA_PROB, - cpi->interintra_count); - } -#endif - - vp9_write_literal(&header_bc, pc->prob_intra_coded, 8); - vp9_write_literal(&header_bc, pc->prob_last_coded, 8); - vp9_write_literal(&header_bc, pc->prob_gf_coded, 8); + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + vp9_cond_prob_diff_update(&header_bc, &pc->fc.intra_inter_prob[i], + VP9_MODE_UPDATE_PROB, + cpi->intra_inter_count[i]); - { + if (pc->allow_comp_inter_inter) { const int comp_pred_mode = cpi->common.comp_pred_mode; const int use_compound_pred = (comp_pred_mode != SINGLE_PREDICTION_ONLY); const int use_hybrid_pred = (comp_pred_mode == HYBRID_PREDICTION); - vp9_write(&header_bc, use_compound_pred, 128); + vp9_write_bit(&header_bc, use_compound_pred); if (use_compound_pred) { - vp9_write(&header_bc, use_hybrid_pred, 128); + vp9_write_bit(&header_bc, use_hybrid_pred); if (use_hybrid_pred) { - for (i = 0; i < COMP_PRED_CONTEXTS; i++) { - pc->prob_comppred[i] = get_binary_prob(cpi->single_pred_count[i], - cpi->comp_pred_count[i]); - vp9_write_literal(&header_bc, pc->prob_comppred[i], 8); - } + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + vp9_cond_prob_diff_update(&header_bc, &pc->fc.comp_inter_prob[i], + VP9_MODE_UPDATE_PROB, + cpi->comp_inter_count[i]); } } } - update_mbintra_mode_probs(cpi, &header_bc); - - vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc); - } - - /* tiling */ - { - int min_log2_tiles, delta_log2_tiles, n_tile_bits, n; - vp9_get_tile_n_bits(pc, &min_log2_tiles, &delta_log2_tiles); - n_tile_bits = pc->log2_tile_columns - min_log2_tiles; - for (n = 0; n < delta_log2_tiles; n++) { - if (n_tile_bits--) { - vp9_write_bit(&header_bc, 1); - } else { - vp9_write_bit(&header_bc, 0); - break; + if (pc->comp_pred_mode != COMP_PREDICTION_ONLY) { + for (i = 0; i < REF_CONTEXTS; i++) { + vp9_cond_prob_diff_update(&header_bc, &pc->fc.single_ref_prob[i][0], + VP9_MODE_UPDATE_PROB, + cpi->single_ref_count[i][0]); + vp9_cond_prob_diff_update(&header_bc, &pc->fc.single_ref_prob[i][1], + VP9_MODE_UPDATE_PROB, + cpi->single_ref_count[i][1]); } } - vp9_write_bit(&header_bc, pc->log2_tile_rows != 0); - if (pc->log2_tile_rows != 0) - vp9_write_bit(&header_bc, pc->log2_tile_rows != 1); - } - vp9_stop_encode(&header_bc); + if (pc->comp_pred_mode != SINGLE_PREDICTION_ONLY) { + for (i = 0; i < REF_CONTEXTS; i++) + vp9_cond_prob_diff_update(&header_bc, &pc->fc.comp_ref_prob[i], + VP9_MODE_UPDATE_PROB, + cpi->comp_ref_count[i]); + } - oh.first_partition_length_in_bytes = header_bc.pos; + update_mbintra_mode_probs(cpi, &header_bc); - /* update frame tag */ - { - int scaling = (pc->width != pc->display_width || - pc->height != pc->display_height); - int v = (oh.first_partition_length_in_bytes << 8) | - (scaling << 5) | - (oh.show_frame << 4) | - (oh.version << 1) | - oh.type; - - assert(oh.first_partition_length_in_bytes <= 0xffff); - dest[0] = v; - dest[1] = v >> 8; - dest[2] = v >> 16; + for (i = 0; i < NUM_PARTITION_CONTEXTS; ++i) { + vp9_prob Pnew[PARTITION_TYPES - 1]; + unsigned int bct[PARTITION_TYPES - 1][2]; + update_mode(&header_bc, PARTITION_TYPES, vp9_partition_encodings, + vp9_partition_tree, Pnew, + pc->fc.partition_prob[pc->frame_type][i], bct, + (unsigned int *)cpi->partition_count[i]); + } + + vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc); } - *size = VP9_HEADER_SIZE + extra_bytes_packed + header_bc.pos; - if (pc->frame_type == KEY_FRAME) { - decide_kf_ymode_entropy(cpi); - } else { - /* This is not required if the counts in cpi are consistent with the - * final packing pass */ - // if (!cpi->dummy_packing) vp9_zero(cpi->NMVcount); - } + vp9_stop_encode(&header_bc); + + + // first partition size + assert(header_bc.pos <= 0xffff); + vp9_wb_write_literal(&first_partition_size_wb, header_bc.pos, 16); + *size = bytes_packed + header_bc.pos; { int tile_row, tile_col, total_size = 0; @@ -2943,11 +1748,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, write_modes(cpi, &residual_bc, &tok[tile_col], tok_end); vp9_stop_encode(&residual_bc); if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) { - /* size of this tile */ - data_ptr[total_size + 0] = residual_bc.pos; - data_ptr[total_size + 1] = residual_bc.pos >> 8; - data_ptr[total_size + 2] = residual_bc.pos >> 16; - data_ptr[total_size + 3] = residual_bc.pos >> 24; + // size of this tile + write_be32(data_ptr + total_size, residual_bc.pos); total_size += 4; } @@ -2999,21 +1801,18 @@ void print_tree_update_probs() { FILE *f = fopen("coefupdprob.h", "w"); fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); - print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES, + print_tree_update_for_type(f, tree_update_hist[TX_4X4], BLOCK_TYPES, "vp9_coef_update_probs_4x4[BLOCK_TYPES]"); - print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES, + print_tree_update_for_type(f, tree_update_hist[TX_8X8], BLOCK_TYPES, "vp9_coef_update_probs_8x8[BLOCK_TYPES]"); - print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES, + print_tree_update_for_type(f, tree_update_hist[TX_16X16], BLOCK_TYPES, "vp9_coef_update_probs_16x16[BLOCK_TYPES]"); - print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES, + print_tree_update_for_type(f, tree_update_hist[TX_32X32], BLOCK_TYPES, "vp9_coef_update_probs_32x32[BLOCK_TYPES]"); fclose(f); f = fopen("treeupdate.bin", "wb"); - fwrite(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f); - fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); - fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); - fwrite(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f); + fwrite(tree_update_hist, sizeof(tree_update_hist), 1, f); fclose(f); } #endif diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h index f7a8ece64ceaaf6cec26152bc2e28dde04d5df74..b3dbee1a772528078e405d29ad2a17f43d4df9c9 100644 --- a/vp9/encoder/vp9_bitstream.h +++ b/vp9/encoder/vp9_bitstream.h @@ -12,6 +12,6 @@ #ifndef VP9_ENCODER_VP9_BITSTREAM_H_ #define VP9_ENCODER_VP9_BITSTREAM_H_ -void vp9_update_skip_probs(VP9_COMP *cpi); +void vp9_update_skip_probs(VP9_COMP *cpi, vp9_writer *bc); #endif // VP9_ENCODER_VP9_BITSTREAM_H_ diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 491ea62b5e183dae08a3fb1179f888cbb42d2678..e78f54eb4913e589b65136f506033b81633ae1f1 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -23,43 +23,13 @@ typedef struct { int offset; } search_site; -typedef struct block { - // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries - int16_t *src_diff; - int16_t *coeff; - - // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries - int16_t *quant; - int16_t *quant_fast; // fast quant deprecated for now - uint8_t *quant_shift; - int16_t *zbin; - int16_t *zbin_8x8; - int16_t *zbin_16x16; - int16_t *zbin_32x32; - int16_t *zrun_zbin_boost; - int16_t *zrun_zbin_boost_8x8; - int16_t *zrun_zbin_boost_16x16; - int16_t *zrun_zbin_boost_32x32; - int16_t *round; - - // Zbin Over Quant value - short zbin_extra; - - uint8_t **base_src; - uint8_t **base_second_src; - int src; - int src_stride; - - int skip_block; -} BLOCK; - typedef struct { int count; struct { - B_PREDICTION_MODE mode; + MB_PREDICTION_MODE mode; int_mv mv; int_mv second_mv; - } bmi[16]; + } bmi[4]; } PARTITION_INFO; // Structure to hold snapshot of coding context during the mode picking process @@ -81,18 +51,36 @@ typedef struct { int comp_pred_diff; int single_pred_diff; int64_t txfm_rd_diff[NB_TXFM_MODES]; + + // Bit flag for each mode whether it has high error in comparison to others. + unsigned int modes_with_high_error; + + // Bit flag for each ref frame whether it has high error compared to others. + unsigned int frames_with_high_error; } PICK_MODE_CONTEXT; +struct macroblock_plane { + DECLARE_ALIGNED(16, int16_t, src_diff[64*64]); + DECLARE_ALIGNED(16, int16_t, coeff[64*64]); + struct buf_2d src; + + // Quantizer setings + int16_t *quant; + uint8_t *quant_shift; + int16_t *zbin; + int16_t *zrun_zbin_boost; + int16_t *round; + + // Zbin Over Quant value + int16_t zbin_extra; +}; + typedef struct macroblock MACROBLOCK; struct macroblock { - DECLARE_ALIGNED(16, int16_t, src_diff[64*64+32*32*2]); - DECLARE_ALIGNED(16, int16_t, coeff[64*64+32*32*2]); - // 16 Y blocks, 4 U blocks, 4 V blocks, - BLOCK block[24]; - - YV12_BUFFER_CONFIG src; + struct macroblock_plane plane[MAX_MB_PLANE]; MACROBLOCKD e_mbd; + int skip_block; PARTITION_INFO *partition_info; /* work pointer */ PARTITION_INFO *pi; /* Corresponds to upper left visible macroblock */ PARTITION_INFO *pip; /* Base of allocated array */ @@ -126,11 +114,9 @@ struct macroblock { int *nmvsadcost_hp[2]; int **mvsadcost; - int mbmode_cost[2][MB_MODE_COUNT]; + int mbmode_cost[MB_MODE_COUNT]; int intra_uv_mode_cost[2][MB_MODE_COUNT]; - int bmode_costs[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES]; - int i8x8_mode_costs[MB_MODE_COUNT]; - int inter_bmode_costs[B_MODE_COUNT]; + int y_mode_costs[VP9_INTRA_MODES][VP9_INTRA_MODES][VP9_INTRA_MODES]; int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; @@ -145,36 +131,43 @@ struct macroblock { int encode_breakout; - // char * gf_active_ptr; - signed char *gf_active_ptr; - unsigned char *active_ptr; + // note that token_costs is the cost when eob node is skipped vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES]; -#if CONFIG_CODE_NONZEROCOUNT - unsigned int nzc_costs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][17]; - unsigned int nzc_costs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][65]; - unsigned int nzc_costs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][257]; - unsigned int nzc_costs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][1025]; -#endif + vp9_coeff_count token_costs_noskip[TX_SIZE_MAX_SB][BLOCK_TYPES]; int optimize; - // Structure to hold context for each of the 4 MBs within a SB: - // when encoded as 4 independent MBs: + // TODO(jingning): Need to refactor the structure arrays that buffers the + // coding mode decisions of each partition type. + PICK_MODE_CONTEXT ab4x4_context[4][4][4]; + PICK_MODE_CONTEXT sb8x4_context[4][4][4]; + PICK_MODE_CONTEXT sb4x8_context[4][4][4]; + PICK_MODE_CONTEXT sb8x8_context[4][4][4]; + PICK_MODE_CONTEXT sb8x16_context[4][4][2]; + PICK_MODE_CONTEXT sb16x8_context[4][4][2]; PICK_MODE_CONTEXT mb_context[4][4]; + PICK_MODE_CONTEXT sb32x16_context[4][2]; + PICK_MODE_CONTEXT sb16x32_context[4][2]; // when 4 MBs share coding parameters: PICK_MODE_CONTEXT sb32_context[4]; + PICK_MODE_CONTEXT sb32x64_context[2]; + PICK_MODE_CONTEXT sb64x32_context[2]; PICK_MODE_CONTEXT sb64_context; + int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; + + BLOCK_SIZE_TYPE b_partitioning[4][4][4]; + BLOCK_SIZE_TYPE mb_partitioning[4][4]; + BLOCK_SIZE_TYPE sb_partitioning[4]; + BLOCK_SIZE_TYPE sb64_partitioning; void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); - void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx); - void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2); - void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type); - void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type); + void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, + int y_blocks); }; #endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_boolhuff.c b/vp9/encoder/vp9_boolhuff.c index 2137421827562fd991237270785d258d96c0f6ab..0f1aa594e94582beb752acb348c4cbfead93605a 100644 --- a/vp9/encoder/vp9_boolhuff.c +++ b/vp9/encoder/vp9_boolhuff.c @@ -10,6 +10,7 @@ #include <assert.h> #include "vp9/encoder/vp9_boolhuff.h" +#include "vp9/common/vp9_entropy.h" #if defined(SECTIONBITS_OUTPUT) unsigned __int64 Sectionbits[500]; @@ -39,127 +40,24 @@ const unsigned int vp9_prob_cost[256] = { 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1 }; -void vp9_start_encode(BOOL_CODER *br, unsigned char *source) { +void vp9_start_encode(vp9_writer *br, uint8_t *source) { br->lowvalue = 0; br->range = 255; br->value = 0; br->count = -24; br->buffer = source; br->pos = 0; + vp9_write_bit(br, 0); } -void vp9_stop_encode(BOOL_CODER *br) { +void vp9_stop_encode(vp9_writer *br) { int i; for (i = 0; i < 32; i++) - encode_bool(br, 0, 128); + vp9_write_bit(br, 0); // Ensure there's no ambigous collision with any index marker bytes if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0; } - -void vp9_encode_value(BOOL_CODER *br, int data, int bits) { - int bit; - - for (bit = bits - 1; bit >= 0; bit--) - encode_bool(br, (1 & (data >> bit)), 0x80); -} - -void vp9_encode_unsigned_max(BOOL_CODER *br, int data, int max) { - assert(data <= max); - while (max) { - encode_bool(br, data & 1, 128); - data >>= 1; - max >>= 1; - } -} - -int vp9_recenter_nonneg(int v, int m) { - if (v > (m << 1)) return v; - else if (v >= m) return ((v - m) << 1); - else return ((m - v) << 1) - 1; -} - -static int get_unsigned_bits(unsigned num_values) { - int cat = 0; - if ((num_values--) <= 1) return 0; - while (num_values > 0) { - cat++; - num_values >>= 1; - } - return cat; -} - -void vp9_encode_uniform(BOOL_CODER *br, int v, int n) { - int l = get_unsigned_bits(n); - int m; - if (l == 0) return; - m = (1 << l) - n; - if (v < m) - vp9_encode_value(br, v, l - 1); - else { - vp9_encode_value(br, m + ((v - m) >> 1), l - 1); - vp9_encode_value(br, (v - m) & 1, 1); - } -} - -int vp9_count_uniform(int v, int n) { - int l = get_unsigned_bits(n); - int m; - if (l == 0) return 0; - m = (1 << l) - n; - if (v < m) - return l - 1; - else - return l; -} - -void vp9_encode_term_subexp(BOOL_CODER *br, int word, int k, int num_syms) { - int i = 0; - int mk = 0; - while (1) { - int b = (i ? k + i - 1 : k); - int a = (1 << b); - if (num_syms <= mk + 3 * a) { - vp9_encode_uniform(br, word - mk, num_syms - mk); - break; - } else { - int t = (word >= mk + a); - vp9_encode_value(br, t, 1); - if (t) { - i = i + 1; - mk += a; - } else { - vp9_encode_value(br, word - mk, b); - break; - } - } - } -} - -int vp9_count_term_subexp(int word, int k, int num_syms) { - int count = 0; - int i = 0; - int mk = 0; - while (1) { - int b = (i ? k + i - 1 : k); - int a = (1 << b); - if (num_syms <= mk + 3 * a) { - count += vp9_count_uniform(word - mk, num_syms - mk); - break; - } else { - int t = (word >= mk + a); - count++; - if (t) { - i = i + 1; - mk += a; - } else { - count += b; - break; - } - } - } - return count; -} diff --git a/vp9/encoder/vp9_boolhuff.h b/vp9/encoder/vp9_boolhuff.h index 0be4b53c14d7a2e8fa1cdc9c36dbb6cc00351248..c3f340d1bdf24f4df6c1f0c613cb8c798349dc39 100644 --- a/vp9/encoder/vp9_boolhuff.h +++ b/vp9/encoder/vp9_boolhuff.h @@ -27,30 +27,21 @@ typedef struct { unsigned int value; int count; unsigned int pos; - unsigned char *buffer; + uint8_t *buffer; // Variables used to track bit costs without outputing to the bitstream unsigned int measure_cost; unsigned long bit_counter; -} BOOL_CODER; +} vp9_writer; -extern void vp9_start_encode(BOOL_CODER *bc, unsigned char *buffer); - -extern void vp9_encode_value(BOOL_CODER *br, int data, int bits); -extern void vp9_encode_unsigned_max(BOOL_CODER *br, int data, int max); -extern void vp9_stop_encode(BOOL_CODER *bc); extern const unsigned int vp9_prob_cost[256]; -extern void vp9_encode_uniform(BOOL_CODER *bc, int v, int n); -extern void vp9_encode_term_subexp(BOOL_CODER *bc, int v, int k, int n); -extern int vp9_count_uniform(int v, int n); -extern int vp9_count_term_subexp(int v, int k, int n); -extern int vp9_recenter_nonneg(int v, int m); +void vp9_start_encode(vp9_writer *bc, uint8_t *buffer); +void vp9_stop_encode(vp9_writer *bc); DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]); - -static void encode_bool(BOOL_CODER *br, int bit, int probability) { +static void vp9_write(vp9_writer *br, int bit, int probability) { unsigned int split; int count = br->count; unsigned int range = br->range; @@ -89,7 +80,7 @@ static void encode_bool(BOOL_CODER *br, int bit, int probability) { int x = br->pos - 1; while (x >= 0 && br->buffer[x] == 0xff) { - br->buffer[x] = (unsigned char)0; + br->buffer[x] = 0; x--; } @@ -109,4 +100,16 @@ static void encode_bool(BOOL_CODER *br, int bit, int probability) { br->range = range; } +static void vp9_write_bit(vp9_writer *w, int bit) { + vp9_write(w, bit, 128); // vp9_prob_half +} + +static void vp9_write_literal(vp9_writer *w, int data, int bits) { + int bit; + + for (bit = bits - 1; bit >= 0; bit--) + vp9_write_bit(w, 1 & (data >> bit)); +} + + #endif // VP9_ENCODER_VP9_BOOLHUFF_H_ diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index ebf40e4e6a5f753054a4aa1bbee8a03d733afb33..8d4eec1394bf36e8bcce7ab24e3bf189696a02fa 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -591,23 +591,32 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, } } +/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per + pixel. */ void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { int i; - int a1, b1, c1, d1; + int a1, b1, c1, d1, e1; short *ip = input; short *op = output; int pitch_short = pitch >> 1; for (i = 0; i < 4; i++) { - a1 = ip[0 * pitch_short] + ip[3 * pitch_short]; - b1 = ip[1 * pitch_short] + ip[2 * pitch_short]; - c1 = ip[1 * pitch_short] - ip[2 * pitch_short]; - d1 = ip[0 * pitch_short] - ip[3 * pitch_short]; - - op[0] = (a1 + b1 + 1) >> 1; - op[4] = (c1 + d1) >> 1; - op[8] = (a1 - b1) >> 1; - op[12] = (d1 - c1) >> 1; + a1 = ip[0 * pitch_short]; + b1 = ip[1 * pitch_short]; + c1 = ip[2 * pitch_short]; + d1 = ip[3 * pitch_short]; + + a1 += b1; + d1 = d1 - c1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= c1; + d1 += b1; + op[0] = a1; + op[4] = c1; + op[8] = d1; + op[12] = b1; ip++; op++; @@ -616,15 +625,22 @@ void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { op = output; for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - - op[0] = ((a1 + b1 + 1) >> 1) << WHT_UPSCALE_FACTOR; - op[1] = ((c1 + d1) >> 1) << WHT_UPSCALE_FACTOR; - op[2] = ((a1 - b1) >> 1) << WHT_UPSCALE_FACTOR; - op[3] = ((d1 - c1) >> 1) << WHT_UPSCALE_FACTOR; + a1 = ip[0]; + b1 = ip[1]; + c1 = ip[2]; + d1 = ip[3]; + + a1 += b1; + d1 -= c1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= c1; + d1 += b1; + op[0] = a1 << WHT_UPSCALE_FACTOR; + op[1] = c1 << WHT_UPSCALE_FACTOR; + op[2] = d1 << WHT_UPSCALE_FACTOR; + op[3] = b1 << WHT_UPSCALE_FACTOR; ip += 4; op += 4; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f2a13de99a20b32f46a6933a127474a579296bd7..213a9c72a89005296d1f6ff345947393a46e4056 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -10,6 +10,7 @@ #include "./vpx_config.h" +#include "./vp9_rtcd.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" @@ -20,7 +21,6 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/encoder/vp9_segmentation.h" -#include "vp9/common/vp9_setupintrarecon.h" #include "vp9/encoder/vp9_encodeintra.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_invtrans.h" @@ -47,29 +47,12 @@ int enc_debug = 0; void vp9_select_interp_filter_type(VP9_COMP *cpi); -static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col); - -static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col); - -static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col); +static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, + int output_enabled, int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize); static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); -#ifdef MODE_STATS -unsigned int inter_y_modes[MB_MODE_COUNT]; -unsigned int inter_uv_modes[VP9_UV_MODES]; -unsigned int inter_b_modes[B_MODE_COUNT]; -unsigned int y_modes[VP9_YMODES]; -unsigned int i8x8_modes[VP9_I8X8_MODES]; -unsigned int uv_modes[VP9_UV_MODES]; -unsigned int uv_modes_y[VP9_YMODES][VP9_UV_MODES]; -unsigned int b_modes[B_MODE_COUNT]; -#endif - - /* activity_avg must be positive, or flat regions could get a zero weight * (infinite lambda), which confounds analysis. * This also avoids the need for divide by zero checks in @@ -98,8 +81,8 @@ static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) { * lambda using a non-linear combination (e.g., the smallest, or second * smallest, etc.). */ - act = vp9_variance16x16(x->src.y_buffer, x->src.y_stride, VP9_VAR_OFFS, 0, - &sse); + act = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride, + VP9_VAR_OFFS, 0, &sse); act <<= 4; /* If the region is flat, lower the activity some more. */ @@ -115,6 +98,8 @@ static unsigned int alt_activity_measure(VP9_COMP *cpi, return vp9_encode_intra(cpi, x, use_dc_pred); } +DECLARE_ALIGNED(16, static const uint8_t, vp9_64x64_zeros[64*64]) = { 0 }; + // Measure the activity of the current macroblock // What we measure here is TBD so abstracted to this function @@ -280,7 +265,7 @@ static void build_activity_map(VP9_COMP *cpi) { // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { #if ALT_ACT_MEASURE - xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; + xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; xd->left_available = (mb_col != 0); recon_yoffset += 16; #endif @@ -298,19 +283,12 @@ static void build_activity_map(VP9_COMP *cpi) { x->mb_activity_ptr++; // adjust to the next column of source macroblocks - x->src.y_buffer += 16; + x->plane[0].src.buf += 16; } // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - -#if ALT_ACT_MEASURE - // extend the recon for intra prediction - vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); -#endif - + x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols; } // Calculate an "average" MB activity @@ -347,156 +325,66 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { adjust_act_zbin(cpi, x); } -#if CONFIG_NEW_MVREF -static int vp9_cost_mv_ref_id(vp9_prob * ref_id_probs, int mv_ref_id) { - int cost; - - // Encode the index for the MV reference. - switch (mv_ref_id) { - case 0: - cost = vp9_cost_zero(ref_id_probs[0]); - break; - case 1: - cost = vp9_cost_one(ref_id_probs[0]); - cost += vp9_cost_zero(ref_id_probs[1]); - break; - case 2: - cost = vp9_cost_one(ref_id_probs[0]); - cost += vp9_cost_one(ref_id_probs[1]); - cost += vp9_cost_zero(ref_id_probs[2]); - break; - case 3: - cost = vp9_cost_one(ref_id_probs[0]); - cost += vp9_cost_one(ref_id_probs[1]); - cost += vp9_cost_one(ref_id_probs[2]); - break; - - // TRAP.. This should not happen - default: - assert(0); - break; - } - return cost; -} - -// Estimate the cost of each coding the vector using each reference candidate -static unsigned int pick_best_mv_ref(MACROBLOCK *x, - MV_REFERENCE_FRAME ref_frame, - int_mv target_mv, - int_mv * mv_ref_list, - int_mv * best_ref) { - int i; - int best_index = 0; - int cost, cost2; - int zero_seen = (mv_ref_list[0].as_int) ? FALSE : TRUE; - MACROBLOCKD *xd = &x->e_mbd; - int max_mv = MV_MAX; - - cost = vp9_cost_mv_ref_id(xd->mb_mv_ref_probs[ref_frame], 0) + - vp9_mv_bit_cost(&target_mv, &mv_ref_list[0], x->nmvjointcost, - x->mvcost, 96, xd->allow_high_precision_mv); - - for (i = 1; i < MAX_MV_REF_CANDIDATES; ++i) { - // If we see a 0,0 reference vector for a second time we have reached - // the end of the list of valid candidate vectors. - if (!mv_ref_list[i].as_int) { - if (zero_seen) - break; - else - zero_seen = TRUE; - } - - // Check for cases where the reference choice would give rise to an - // uncodable/out of range residual for row or col. - if ((abs(target_mv.as_mv.row - mv_ref_list[i].as_mv.row) > max_mv) || - (abs(target_mv.as_mv.col - mv_ref_list[i].as_mv.col) > max_mv)) { - continue; - } - - cost2 = vp9_cost_mv_ref_id(xd->mb_mv_ref_probs[ref_frame], i) + - vp9_mv_bit_cost(&target_mv, &mv_ref_list[i], x->nmvjointcost, - x->mvcost, 96, xd->allow_high_precision_mv); - - if (cost2 < cost) { - cost = cost2; - best_index = i; - } - } - best_ref->as_int = mv_ref_list[best_index].as_int; - - return best_index; -} -#endif - static void update_state(VP9_COMP *cpi, - PICK_MODE_CONTEXT *ctx, int block_size, + PICK_MODE_CONTEXT *ctx, + BLOCK_SIZE_TYPE bsize, int output_enabled) { int i, x_idx, y; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = &ctx->mic; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - int mb_mode = mi->mbmi.mode; +#if CONFIG_DEBUG || CONFIG_INTERNAL_STATS + MB_PREDICTION_MODE mb_mode = mi->mbmi.mode; +#endif int mb_mode_index = ctx->best_mode_index; const int mis = cpi->common.mode_info_stride; - int mb_block_size = 1 << mi->mbmi.sb_type; + const int bh = 1 << mi_height_log2(bsize), bw = 1 << mi_width_log2(bsize); #if CONFIG_DEBUG assert(mb_mode < MB_MODE_COUNT); assert(mb_mode_index < MAX_MODES); - assert(mi->mbmi.ref_frame < MAX_REF_FRAMES); + assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES); + assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES); #endif - assert(mi->mbmi.sb_type == (block_size >> 5)); + assert(mi->mbmi.sb_type == bsize); // Restore the coding context of the MB to that that was in place // when the mode was picked for it - for (y = 0; y < mb_block_size; y++) { - for (x_idx = 0; x_idx < mb_block_size; x_idx++) { - if ((xd->mb_to_right_edge >> 7) + mb_block_size > x_idx && - (xd->mb_to_bottom_edge >> 7) + mb_block_size > y) { + for (y = 0; y < bh; y++) { + for (x_idx = 0; x_idx < bw; x_idx++) { + if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > x_idx && + (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > y) { MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis; - - vpx_memcpy(mi_addr, mi, sizeof(MODE_INFO)); + *mi_addr = *mi; } } } - if (block_size == 16) { + if (bsize < BLOCK_SIZE_SB32X32) { + if (bsize < BLOCK_SIZE_MB16X16) + ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8]; ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; } - if (mb_mode == B_PRED) { - for (i = 0; i < 16; i++) { - xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode; - assert(xd->block[i].bmi.as_mode.first < B_MODE_COUNT); - } - } else if (mb_mode == I8X8_PRED) { - for (i = 0; i < 16; i++) { - xd->block[i].bmi = xd->mode_info_context->bmi[i]; - } - } else if (mb_mode == SPLITMV) { - vpx_memcpy(x->partition_info, &ctx->partition_info, - sizeof(PARTITION_INFO)); - - mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int; - mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; + if (mbmi->ref_frame[0] != INTRA_FRAME && mbmi->sb_type < BLOCK_SIZE_SB8X8) { + *x->partition_info = ctx->partition_info; + mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int; + mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int; } x->skip = ctx->skip; if (!output_enabled) return; - { - int segment_id = mbmi->segment_id; - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { - for (i = 0; i < NB_TXFM_MODES; i++) { - cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i]; - } + if (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) { + for (i = 0; i < NB_TXFM_MODES; i++) { + cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i]; } } if (cpi->common.frame_type == KEY_FRAME) { // Restore the coding modes to that held in the coding context - // if (mb_mode == B_PRED) + // if (mb_mode == I4X4_PRED) // for (i = 0; i < 16; i++) // { // xd->block[i].bmi.as_mode = @@ -515,8 +403,7 @@ static void update_state(VP9_COMP *cpi, THR_D27_PRED /*D27_PRED*/, THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, - THR_I8X8_PRED /*I8X8_PRED*/, - THR_B_PRED /*B_PRED*/, + THR_B_PRED /*I4X4_PRED*/, }; cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++; #endif @@ -541,57 +428,34 @@ static void update_state(VP9_COMP *cpi, */ // Note how often each mode chosen as best cpi->mode_chosen_counts[mb_mode_index]++; - if (mbmi->mode == SPLITMV || mbmi->mode == NEWMV) { + if (mbmi->ref_frame[0] != INTRA_FRAME && + (mbmi->sb_type < BLOCK_SIZE_SB8X8 || mbmi->mode == NEWMV)) { int_mv best_mv, best_second_mv; - MV_REFERENCE_FRAME rf = mbmi->ref_frame; -#if CONFIG_NEW_MVREF - unsigned int best_index; - MV_REFERENCE_FRAME sec_ref_frame = mbmi->second_ref_frame; -#endif + const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1]; best_mv.as_int = ctx->best_ref_mv.as_int; best_second_mv.as_int = ctx->second_best_ref_mv.as_int; if (mbmi->mode == NEWMV) { - best_mv.as_int = mbmi->ref_mvs[rf][0].as_int; - best_second_mv.as_int = mbmi->ref_mvs[mbmi->second_ref_frame][0].as_int; -#if CONFIG_NEW_MVREF - best_index = pick_best_mv_ref(x, rf, mbmi->mv[0], - mbmi->ref_mvs[rf], &best_mv); - mbmi->best_index = best_index; - ++cpi->mb_mv_ref_count[rf][best_index]; - - if (mbmi->second_ref_frame > 0) { - unsigned int best_index; - best_index = - pick_best_mv_ref(x, sec_ref_frame, mbmi->mv[1], - mbmi->ref_mvs[sec_ref_frame], - &best_second_mv); - mbmi->best_second_index = best_index; - ++cpi->mb_mv_ref_count[sec_ref_frame][best_index]; - } -#endif + best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int; + best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int; } mbmi->best_mv.as_int = best_mv.as_int; mbmi->best_second_mv.as_int = best_second_mv.as_int; vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv); } -#if CONFIG_COMP_INTERINTRA_PRED - if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV && - mbmi->second_ref_frame <= INTRA_FRAME) { - if (mbmi->second_ref_frame == INTRA_FRAME) { - ++cpi->interintra_count[1]; - ++cpi->ymode_count[mbmi->interintra_mode]; -#if SEPARATE_INTERINTRA_UV - ++cpi->y_uv_mode_count[mbmi->interintra_mode][mbmi->interintra_uv_mode]; -#endif - } else { - ++cpi->interintra_count[0]; - } + + if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) { + int i, j; + for (j = 0; j < bh; ++j) + for (i = 0; i < bw; ++i) + if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > i && + (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > j) + xd->mode_info_context[mis * j + i].mbmi = *mbmi; } -#endif + if (cpi->common.mcomp_filter_type == SWITCHABLE && - mbmi->mode >= NEARESTMV && - mbmi->mode <= SPLITMV) { - ++cpi->switchable_interp_count + is_inter_mode(mbmi->mode)) { + ++cpi->common.fc.switchable_interp_count [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] [vp9_switchable_interp_map[mbmi->interp_filter]]; } @@ -602,14 +466,16 @@ static void update_state(VP9_COMP *cpi, } } -static unsigned find_seg_id(uint8_t *buf, int block_size, +static unsigned find_seg_id(VP9_COMMON *cm, uint8_t *buf, BLOCK_SIZE_TYPE bsize, int start_y, int height, int start_x, int width) { - const int end_x = MIN(start_x + block_size, width); - const int end_y = MIN(start_y + block_size, height); + const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize); + const int end_x = MIN(start_x + bw, width); + const int end_y = MIN(start_y + bh, height); int x, y; unsigned seg_id = -1; buf += width * start_y; + assert(start_y < cm->mi_rows && start_x < cm->cur_tile_mi_col_end); for (y = start_y; y < end_y; y++, buf += width) { for (x = start_x; x < end_x; x++) { seg_id = MIN(seg_id, buf[x]); @@ -619,22 +485,48 @@ static unsigned find_seg_id(uint8_t *buf, int block_size, return seg_id; } +void vp9_setup_src_planes(MACROBLOCK *x, + const YV12_BUFFER_CONFIG *src, + int mb_row, int mb_col) { + uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + int i; + + for (i = 0; i < MAX_MB_PLANE; i++) { + setup_pred_plane(&x->plane[i].src, + buffers[i], strides[i], + mb_row, mb_col, NULL, + x->e_mbd.plane[i].subsampling_x, + x->e_mbd.plane[i].subsampling_y); + } +} + static void set_offsets(VP9_COMP *cpi, - int mb_row, int mb_col, int block_size) { + int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) { MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi; const int dst_fb_idx = cm->new_fb_idx; + const int idx_str = xd->mode_info_stride * mi_row + mi_col; + const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize); + const int mb_row = mi_row >> 1; + const int mb_col = mi_col >> 1; const int idx_map = mb_row * cm->mb_cols + mb_col; - const int idx_str = xd->mode_info_stride * mb_row + mb_col; + int i; // entropy context structures - xd->above_context = cm->above_context + mb_col; - xd->left_context = cm->left_context + (mb_row & 3); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].above_context = cm->above_context[i] + + (mi_col * 2 >> xd->plane[i].subsampling_x); + xd->plane[i].left_context = cm->left_context[i] + + (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y); + } - // GF active flags data structure - x->gf_active_ptr = (signed char *)&cpi->gf_active_flags[idx_map]; + // partition contexts + set_partition_seg_context(cm, xd, mi_row, mi_col); // Activity map pointer x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; @@ -644,30 +536,29 @@ static void set_offsets(VP9_COMP *cpi, x->partition_info = x->pi + idx_str; xd->mode_info_context = cm->mi + idx_str; mbmi = &xd->mode_info_context->mbmi; - xd->prev_mode_info_context = cm->prev_mi + idx_str; + // Special case: if prev_mi is NULL, the previous mode info context + // cannot be used. + xd->prev_mode_info_context = cm->prev_mi ? + cm->prev_mi + idx_str : NULL; // Set up destination pointers - setup_pred_block(&xd->dst, - &cm->yv12_fb[dst_fb_idx], - mb_row, mb_col, NULL, NULL); + setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mi_row, mi_col); /* Set up limit values for MV components to prevent them from * extending beyond the UMV borders assuming 16x16 block size */ - x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); + x->mv_row_min = -((mi_row * MI_SIZE) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); + x->mv_col_min = -((mi_col * MI_SIZE) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); + x->mv_row_max = ((cm->mi_rows - mi_row) * MI_SIZE + + (VP9BORDERINPIXELS - MI_SIZE * bh - VP9_INTERP_EXTEND)); + x->mv_col_max = ((cm->mi_cols - mi_col) * MI_SIZE + + (VP9BORDERINPIXELS - MI_SIZE * bw - VP9_INTERP_EXTEND)); // Set up distance of MB to edge of frame in 1/8th pel units - block_size >>= 4; // in macroblock units - assert(!(mb_col & (block_size - 1)) && !(mb_row & (block_size - 1))); - set_mb_row(cm, xd, mb_row, block_size); - set_mb_col(cm, xd, mb_col, block_size); + assert(!(mi_col & (bw - 1)) && !(mi_row & (bh - 1))); + set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); /* set up source buffers */ - setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL); + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); /* R/D setup */ x->rddiv = cpi->RDDIV; @@ -675,31 +566,27 @@ static void set_offsets(VP9_COMP *cpi, /* segment ID */ if (xd->segmentation_enabled) { - if (xd->update_mb_segmentation_map) { - mbmi->segment_id = find_seg_id(cpi->segmentation_map, block_size, - mb_row, cm->mb_rows, mb_col, cm->mb_cols); - } else { - mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, block_size, - mb_row, cm->mb_rows, mb_col, cm->mb_cols); - } - assert(mbmi->segment_id <= 3); + uint8_t *map = xd->update_mb_segmentation_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = find_seg_id(cm, map, bsize, mi_row, + cm->mi_rows, mi_col, cm->mi_cols); + + assert(mbmi->segment_id <= (MAX_MB_SEGMENTS-1)); vp9_mb_init_quantizer(cpi, x); if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && - vp9_check_segref(xd, 1, INTRA_FRAME) + - vp9_check_segref(xd, 1, LAST_FRAME) + - vp9_check_segref(xd, 1, GOLDEN_FRAME) + - vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { + vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) { cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; } else { const int y = mb_row & ~3; const int x = mb_col & ~3; const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); - const int tile_progress = cm->cur_tile_mb_col_start * cm->mb_rows; - const int mb_cols = cm->cur_tile_mb_col_end - cm->cur_tile_mb_col_start; + const int tile_progress = + cm->cur_tile_mi_col_start * cm->mb_rows >> 1; + const int mb_cols = + (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start) >> 1; cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) << 16) / cm->MBs; @@ -709,451 +596,822 @@ static void set_offsets(VP9_COMP *cpi, } } -static int pick_mb_modes(VP9_COMP *cpi, - int mb_row0, - int mb_col0, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) { +static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, + TOKENEXTRA **tp, int *totalrate, int *totaldist, + BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - int i; - int splitmodes_used = 0; - ENTROPY_CONTEXT_PLANES left_context[2]; - ENTROPY_CONTEXT_PLANES above_context[2]; - ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context - + mb_col0; - - /* Function should not modify L & A contexts; save and restore on exit */ - vpx_memcpy(left_context, - cm->left_context + (mb_row0 & 2), - sizeof(left_context)); - vpx_memcpy(above_context, - initial_above_context_ptr, - sizeof(above_context)); - - /* Encode MBs in raster order within the SB */ - for (i = 0; i < 4; i++) { - const int x_idx = i & 1, y_idx = i >> 1; - const int mb_row = mb_row0 + y_idx; - const int mb_col = mb_col0 + x_idx; - MB_MODE_INFO *mbmi; - - if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) { - // MB lies outside frame, move on - continue; - } - - // Index of the MB in the SB 0..3 - xd->mb_index = i; - set_offsets(cpi, mb_row, mb_col, 16); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - mbmi = &xd->mode_info_context->mbmi; - mbmi->sb_type = BLOCK_SIZE_MB16X16; - - // Find best coding mode & reconstruct the MB so it is available - // as a predictor for MBs that follow in the SB - if (cm->frame_type == KEY_FRAME) { - int r, d; -#if 0 // ENC_DEBUG - if (enc_debug) - printf("intra pick_mb_modes %d %d\n", mb_row, mb_col); -#endif - vp9_rd_pick_intra_mode(cpi, x, &r, &d); - *totalrate += r; - *totaldist += d; - // Dummy encode, do not do the tokenization - encode_macroblock(cpi, tp, 0, mb_row, mb_col); + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index != 0) + return; - // Note the encoder may have changed the segment_id + set_offsets(cpi, mi_row, mi_col, bsize); + xd->mode_info_context->mbmi.sb_type = bsize; + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp9_activity_masking(cpi, x); - // Save the coding context - vpx_memcpy(&x->mb_context[xd->sb_index][i].mic, xd->mode_info_context, - sizeof(MODE_INFO)); - } else { - int seg_id, r, d; + /* Find best coding mode & reconstruct the MB so it is available + * as a predictor for MBs that follow in the SB */ + if (cm->frame_type == KEY_FRAME) { + vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx); + } else { + vp9_rd_pick_inter_mode_sb(cpi, x, mi_row, mi_col, totalrate, totaldist, + bsize, ctx); + } +} -#if 0 // ENC_DEBUG - if (enc_debug) - printf("inter pick_mb_modes %d %d\n", mb_row, mb_col); -#endif - vp9_pick_mode_inter_macroblock(cpi, x, mb_row, mb_col, &r, &d); - *totalrate += r; - *totaldist += d; +static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; - splitmodes_used += (mbmi->mode == SPLITMV); + if (cm->frame_type != KEY_FRAME) { + int segment_id, seg_ref_active; - // Dummy encode, do not do the tokenization - encode_macroblock(cpi, tp, 0, mb_row, mb_col); + segment_id = mbmi->segment_id; + seg_ref_active = vp9_segfeature_active(xd, segment_id, + SEG_LVL_REF_FRAME); - seg_id = mbmi->segment_id; - if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) { - cpi->seg0_idx++; - } - if (!xd->segmentation_enabled || - !vp9_segfeature_active(xd, seg_id, SEG_LVL_REF_FRAME) || - vp9_check_segref(xd, seg_id, INTRA_FRAME) + - vp9_check_segref(xd, seg_id, LAST_FRAME) + - vp9_check_segref(xd, seg_id, GOLDEN_FRAME) + - vp9_check_segref(xd, seg_id, ALTREF_FRAME) > 1) { - // Get the prediction context and status - int pred_flag = vp9_get_pred_flag(xd, PRED_REF); - int pred_context = vp9_get_pred_context(cm, xd, PRED_REF); - - // Count prediction success - cpi->ref_pred_count[pred_context][pred_flag]++; + if (!seg_ref_active) + cpi->intra_inter_count[vp9_get_pred_context(cm, xd, PRED_INTRA_INTER)] + [mbmi->ref_frame[0] > INTRA_FRAME]++; + + // If the segment reference feature is enabled we have only a single + // reference frame allowed for the segment so exclude it from + // the reference frame counts used to work out probabilities. + if ((mbmi->ref_frame[0] > INTRA_FRAME) && !seg_ref_active) { + if (cm->comp_pred_mode == HYBRID_PREDICTION) + cpi->comp_inter_count[vp9_get_pred_context(cm, xd, + PRED_COMP_INTER_INTER)] + [mbmi->ref_frame[1] > INTRA_FRAME]++; + + if (mbmi->ref_frame[1] > INTRA_FRAME) { + cpi->comp_ref_count[vp9_get_pred_context(cm, xd, PRED_COMP_REF_P)] + [mbmi->ref_frame[0] == GOLDEN_FRAME]++; + } else { + cpi->single_ref_count[vp9_get_pred_context(cm, xd, PRED_SINGLE_REF_P1)] + [0][mbmi->ref_frame[0] != LAST_FRAME]++; + if (mbmi->ref_frame[0] != LAST_FRAME) + cpi->single_ref_count[vp9_get_pred_context(cm, xd, + PRED_SINGLE_REF_P2)] + [1][mbmi->ref_frame[0] != GOLDEN_FRAME]++; } } + // Count of last ref frame 0,0 usage + if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame[0] == LAST_FRAME)) + cpi->inter_zz_count++; } +} + +// TODO(jingning): the variables used here are little complicated. need further +// refactoring on organizing the the temporary buffers, when recursive +// partition down to 4x4 block size is enabled. +static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; - /* Restore L & A coding context to those in place on entry */ - vpx_memcpy(cm->left_context + (mb_row0 & 2), - left_context, - sizeof(left_context)); - vpx_memcpy(initial_above_context_ptr, - above_context, - sizeof(above_context)); + switch (bsize) { + case BLOCK_SIZE_SB64X64: + return &x->sb64_context; + case BLOCK_SIZE_SB64X32: + return &x->sb64x32_context[xd->sb_index]; + case BLOCK_SIZE_SB32X64: + return &x->sb32x64_context[xd->sb_index]; + case BLOCK_SIZE_SB32X32: + return &x->sb32_context[xd->sb_index]; + case BLOCK_SIZE_SB32X16: + return &x->sb32x16_context[xd->sb_index][xd->mb_index]; + case BLOCK_SIZE_SB16X32: + return &x->sb16x32_context[xd->sb_index][xd->mb_index]; + case BLOCK_SIZE_MB16X16: + return &x->mb_context[xd->sb_index][xd->mb_index]; + case BLOCK_SIZE_SB16X8: + return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB8X16: + return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB8X8: + return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB8X4: + return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB4X8: + return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_AB4X4: + return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index]; + default: + assert(0); + return NULL; + } +} - return splitmodes_used; +static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *xd = &x->e_mbd; + switch (bsize) { + case BLOCK_SIZE_SB64X64: + return &x->sb64_partitioning; + case BLOCK_SIZE_SB32X32: + return &x->sb_partitioning[xd->sb_index]; + case BLOCK_SIZE_MB16X16: + return &x->mb_partitioning[xd->sb_index][xd->mb_index]; + case BLOCK_SIZE_SB8X8: + return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index]; + default: + assert(0); + return NULL; + } } -static void pick_sb_modes(VP9_COMP *cpi, - int mb_row, - int mb_col, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) { +static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, + ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], + PARTITION_CONTEXT sa[8], + PARTITION_CONTEXT sl[8], + BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - - set_offsets(cpi, mb_row, mb_col, 32); - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB32X32; - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - /* Find best coding mode & reconstruct the MB so it is available - * as a predictor for MBs that follow in the SB */ - if (cm->frame_type == KEY_FRAME) { - vp9_rd_pick_intra_mode_sb32(cpi, x, - totalrate, - totaldist); - - /* Save the coding context */ - vpx_memcpy(&x->sb32_context[xd->sb_index].mic, xd->mode_info_context, - sizeof(MODE_INFO)); - } else { - vp9_rd_pick_inter_mode_sb32(cpi, x, mb_row, mb_col, totalrate, totaldist); + int p; + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int mwl = mi_width_log2(bsize), mw = 1 << mwl; + int mhl = mi_height_log2(bsize), mh = 1 << mhl; + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->above_context[p] + + ((mi_col * 2) >> xd->plane[p].subsampling_x), + a + bw * p, + sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x); + vpx_memcpy(cm->left_context[p] + + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), + l + bh * p, + sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y); + } + vpx_memcpy(cm->above_seg_context + mi_col, sa, + sizeof(PARTITION_CONTEXT) * mw); + vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl, + sizeof(PARTITION_CONTEXT) * mh); +} +static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, + ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], + PARTITION_CONTEXT sa[8], + PARTITION_CONTEXT sl[8], + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int p; + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int mwl = mi_width_log2(bsize), mw = 1 << mwl; + int mhl = mi_height_log2(bsize), mh = 1 << mhl; + + // buffer the above/left context information of the block in search. + for (p = 0; p < MAX_MB_PLANE; ++p) { + vpx_memcpy(a + bw * p, cm->above_context[p] + + (mi_col * 2 >> xd->plane[p].subsampling_x), + sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x); + vpx_memcpy(l + bh * p, cm->left_context[p] + + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), + sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y); } + vpx_memcpy(sa, cm->above_seg_context + mi_col, + sizeof(PARTITION_CONTEXT) * mw); + vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK), + sizeof(PARTITION_CONTEXT) * mh); } -static void pick_sb64_modes(VP9_COMP *cpi, - int mb_row, - int mb_col, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) { +static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, + int mi_row, int mi_col, int output_enabled, + BLOCK_SIZE_TYPE bsize, int sub_index) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - set_offsets(cpi, mb_row, mb_col, 64); - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64; - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; - /* Find best coding mode & reconstruct the MB so it is available - * as a predictor for MBs that follow in the SB */ - if (cm->frame_type == KEY_FRAME) { - vp9_rd_pick_intra_mode_sb64(cpi, x, totalrate, totaldist); + if (sub_index != -1) + *(get_sb_index(xd, bsize)) = sub_index; - /* Save the coding context */ - vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context, sizeof(MODE_INFO)); - } else { - vp9_rd_pick_inter_mode_sb64(cpi, x, mb_row, mb_col, totalrate, totaldist); + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index > 0) + return; + set_offsets(cpi, mi_row, mi_col, bsize); + update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); + encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); + + if (output_enabled) { + update_stats(cpi, mi_row, mi_col); + + (*tp)->token = EOSB_TOKEN; + (*tp)++; } } -static void update_stats(VP9_COMP *cpi, int mb_row, int mb_col) { +static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, + int mi_row, int mi_col, int output_enabled, + BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *mi = xd->mode_info_context; - MB_MODE_INFO *const mbmi = &mi->mbmi; + BLOCK_SIZE_TYPE c1 = BLOCK_SIZE_SB8X8; + const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4; + int bwl, bhl; + int UNINITIALIZED_IS_SAFE(pl); - if (cm->frame_type == KEY_FRAME) { -#ifdef MODE_STATS - y_modes[mbmi->mode]++; -#endif + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + c1 = BLOCK_SIZE_AB4X4; + if (bsize >= BLOCK_SIZE_SB8X8) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + c1 = *(get_sb_partitioning(x, bsize)); + } + + bwl = b_width_log2(c1), bhl = b_height_log2(c1); + + if (bsl == bwl && bsl == bhl) { + if (output_enabled && bsize >= BLOCK_SIZE_SB8X8) + cpi->partition_count[pl][PARTITION_NONE]++; + encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1); + } else if (bsl == bhl && bsl > bwl) { + if (output_enabled) + cpi->partition_count[pl][PARTITION_VERT]++; + encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0); + encode_b(cpi, tp, mi_row, mi_col + bs, output_enabled, c1, 1); + } else if (bsl == bwl && bsl > bhl) { + if (output_enabled) + cpi->partition_count[pl][PARTITION_HORZ]++; + encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0); + encode_b(cpi, tp, mi_row + bs, mi_col, output_enabled, c1, 1); } else { - int segment_id, seg_ref_active; + BLOCK_SIZE_TYPE subsize; + int i; - if (mbmi->ref_frame) { - int pred_context = vp9_get_pred_context(cm, xd, PRED_COMP); + assert(bwl < bsl && bhl < bsl); + subsize = get_subsize(bsize, PARTITION_SPLIT); - if (mbmi->second_ref_frame <= INTRA_FRAME) - cpi->single_pred_count[pred_context]++; - else - cpi->comp_pred_count[pred_context]++; - } + if (output_enabled) + cpi->partition_count[pl][PARTITION_SPLIT]++; + + for (i = 0; i < 4; i++) { + const int x_idx = i & 1, y_idx = i >> 1; -#ifdef MODE_STATS - inter_y_modes[mbmi->mode]++; + *(get_sb_index(xd, subsize)) = i; + encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs, + output_enabled, subsize); + } + } - if (mbmi->mode == SPLITMV) { - int b; + if (bsize >= BLOCK_SIZE_SB8X8 && + (bsize == BLOCK_SIZE_SB8X8 || bsl == bwl || bsl == bhl)) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + update_partition_context(xd, c1, bsize); + } +} - for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition_info->bmi[b].mode]++; +static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m, + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mode_info_stride; + int bsl = b_width_log2(bsize); + int bs = (1 << bsl) / 2; // + int block_row, block_col; + int row, col; + + // this test function sets the entire macroblock to the same bsize + for (block_row = 0; block_row < 8; block_row += bs) { + for (block_col = 0; block_col < 8; block_col += bs) { + for (row = 0; row < bs; row++) { + for (col = 0; col < bs; col++) { + m[(block_row+row)*mis + block_col+col].mbmi.sb_type = bsize; + } } } -#endif + } +} - // If we have just a single reference frame coded for a segment then - // exclude from the reference frame counts used to work out - // probabilities. NOTE: At the moment we dont support custom trees - // for the reference frame coding for each segment but this is a - // possible future action. - segment_id = mbmi->segment_id; - seg_ref_active = vp9_segfeature_active(xd, segment_id, - SEG_LVL_REF_FRAME); - if (!seg_ref_active || - ((vp9_check_segref(xd, segment_id, INTRA_FRAME) + - vp9_check_segref(xd, segment_id, LAST_FRAME) + - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, segment_id, ALTREF_FRAME)) > 1)) { - cpi->count_mb_ref_frame_usage[mbmi->ref_frame]++; +static void set_block_size(VP9_COMMON *const cm, + MODE_INFO *m, BLOCK_SIZE_TYPE bsize, int mis, + int mi_row, int mi_col) { + int row, col; + int bwl = b_width_log2(bsize); + int bhl = b_height_log2(bsize); + int bsl = (bwl > bhl ? bwl : bhl); + + int bs = (1 << bsl) / 2; // + MODE_INFO *m2 = m + mi_row * mis + mi_col; + for (row = 0; row < bs; row++) { + for (col = 0; col < bs; col++) { + if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols) + continue; + m2[row*mis+col].mbmi.sb_type = bsize; } - // Count of last ref frame 0,0 usage - if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) - cpi->inter_zz_count++; } -#if CONFIG_CODE_NONZEROCOUNT - vp9_update_nzc_counts(&cpi->common, xd, mb_row, mb_col); -#endif +} +typedef struct { + int64_t sum_square_error; + int64_t sum_error; + int count; + int variance; +} var; + +#define VT(TYPE, BLOCKSIZE) \ + typedef struct { \ + var none; \ + var horz[2]; \ + var vert[2]; \ + BLOCKSIZE split[4]; } TYPE; + +VT(v8x8, var) +VT(v16x16, v8x8) +VT(v32x32, v16x16) +VT(v64x64, v32x32) + +typedef enum { + V16X16, + V32X32, + V64X64, +} TREE_LEVEL; + +// Set variance values given sum square error, sum error, count. +static void fill_variance(var *v, int64_t s2, int64_t s, int c) { + v->sum_square_error = s2; + v->sum_error = s; + v->count = c; + v->variance = 256 + * (v->sum_square_error - v->sum_error * v->sum_error / v->count) + / v->count; } -static void encode_sb(VP9_COMP *cpi, - int mb_row, - int mb_col, - int output_enabled, - TOKENEXTRA **tp, int is_sb) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; +// Combine 2 variance structures by summing the sum_error, sum_square_error, +// and counts and then calculating the new variance. +void sum_2_variances(var *r, var *a, var*b) { + fill_variance(r, a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->count + b->count); +} +// Fill one level of our variance tree, by summing the split sums into each of +// the horizontal, vertical and none from split and recalculating variance. +#define fill_variance_tree(VT) \ + sum_2_variances(VT.horz[0], VT.split[0].none, VT.split[1].none); \ + sum_2_variances(VT.horz[1], VT.split[2].none, VT.split[3].none); \ + sum_2_variances(VT.vert[0], VT.split[0].none, VT.split[2].none); \ + sum_2_variances(VT.vert[1], VT.split[1].none, VT.split[3].none); \ + sum_2_variances(VT.none, VT.vert[0], VT.vert[1]); + +// Set the blocksize in the macroblock info structure if the variance is less +// than our threshold to one of none, horz, vert. +#define set_vt_size(VT, BLOCKSIZE, R, C, ACTION) \ + if (VT.none.variance < threshold) { \ + set_block_size(cm, m, BLOCKSIZE, mis, R, C); \ + ACTION; \ + } \ + if (VT.horz[0].variance < threshold && VT.horz[1].variance < threshold ) { \ + set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_HORZ), mis, R, C); \ + ACTION; \ + } \ + if (VT.vert[0].variance < threshold && VT.vert[1].variance < threshold ) { \ + set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_VERT), mis, R, C); \ + ACTION; \ + } - cpi->sb32_count[is_sb]++; - if (is_sb) { - set_offsets(cpi, mb_row, mb_col, 32); - update_state(cpi, &x->sb32_context[xd->sb_index], 32, output_enabled); +static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, + int mi_col) { + VP9_COMMON * const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + MACROBLOCKD *xd = &cpi->mb.e_mbd; + const int mis = cm->mode_info_stride; + // TODO(JBB): More experimentation or testing of this threshold; + int64_t threshold = 4; + int i, j, k; + v64x64 vt; + unsigned char * s; + int sp; + const unsigned char * d = xd->plane[0].pre->buf; + int dp = xd->plane[0].pre->stride; + int pixels_wide = 64, pixels_high = 64; - encode_superblock32(cpi, tp, - output_enabled, mb_row, mb_col); - if (output_enabled) { - update_stats(cpi, mb_row, mb_col); - } + vpx_memset(&vt, 0, sizeof(vt)); - if (output_enabled) { - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row < cm->mb_rows) - cpi->tplist[mb_row].stop = *tp; - } - } else { - int i; + set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64); - for (i = 0; i < 4; i++) { - const int x_idx = i & 1, y_idx = i >> 1; + if (xd->mb_to_right_edge < 0) + pixels_wide += (xd->mb_to_right_edge >> 3); - if ((mb_row + y_idx >= cm->mb_rows) || (mb_col + x_idx >= cm->mb_cols)) { - // MB lies outside frame, move on - continue; - } + if (xd->mb_to_bottom_edge < 0) + pixels_high += (xd->mb_to_bottom_edge >> 3); - set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16); - xd->mb_index = i; - update_state(cpi, &x->mb_context[xd->sb_index][i], 16, output_enabled); + s = x->plane[0].src.buf; + sp = x->plane[0].src.stride; - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); + // TODO(JBB): Clearly the higher the quantizer the fewer partitions we want + // but this needs more experimentation. + threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex; - encode_macroblock(cpi, tp, - output_enabled, mb_row + y_idx, mb_col + x_idx); - if (output_enabled) { - update_stats(cpi, mb_row + y_idx, mb_col + x_idx); - } + // if ( cm->frame_type == KEY_FRAME ) { + d = vp9_64x64_zeros; + dp = 64; + // } - if (output_enabled) { - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row + y_idx < cm->mb_rows) - cpi->tplist[mb_row + y_idx].stop = *tp; - } + // Fill in the entire tree of 8x8 variances for splits. + for (i = 0; i < 4; i++) { + const int x32_idx = ((i & 1) << 5); + const int y32_idx = ((i >> 1) << 5); + for (j = 0; j < 4; j++) { + const int x_idx = x32_idx + ((j & 1) << 4); + const int y_idx = y32_idx + ((j >> 1) << 4); + const uint8_t *st = s + y_idx * sp + x_idx; + const uint8_t *dt = d + y_idx * dp + x_idx; + unsigned int sse = 0; + int sum = 0; + v16x16 *vst = &vt.split[i].split[j]; + sse = sum = 0; + if (x_idx < pixels_wide && y_idx < pixels_high) + vp9_get_sse_sum_8x8(st, sp, dt, dp, &sse, &sum); + fill_variance(&vst->split[0].none, sse, sum, 64); + sse = sum = 0; + if (x_idx + 8 < pixels_wide && y_idx < pixels_high) + vp9_get_sse_sum_8x8(st + 8, sp, dt + 8, dp, &sse, &sum); + fill_variance(&vst->split[1].none, sse, sum, 64); + sse = sum = 0; + if (x_idx < pixels_wide && y_idx + 8 < pixels_high) + vp9_get_sse_sum_8x8(st + 8 * sp, sp, dt + 8 * dp, dp, &sse, &sum); + fill_variance(&vst->split[2].none, sse, sum, 64); + sse = sum = 0; + if (x_idx + 8 < pixels_wide && y_idx + 8 < pixels_high) + vp9_get_sse_sum_8x8(st + 8 * sp + 8, sp, dt + 8 + 8 * dp, dp, &sse, + &sum); + fill_variance(&vst->split[3].none, sse, sum, 64); } } + // Fill the rest of the variance tree by summing the split partition + // values. + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + fill_variance_tree(&vt.split[i].split[j]) + } + fill_variance_tree(&vt.split[i]) + } + fill_variance_tree(&vt) + + // Now go through the entire structure, splitting every blocksize until + // we get to one that's got a variance lower than our threshold, or we + // hit 8x8. + set_vt_size( vt, BLOCK_SIZE_SB64X64, mi_row, mi_col, return); + for (i = 0; i < 4; ++i) { + const int x32_idx = ((i & 1) << 2); + const int y32_idx = ((i >> 1) << 2); + set_vt_size(vt, BLOCK_SIZE_SB32X32, mi_row + y32_idx, mi_col + x32_idx, + continue); - // debug output -#if DBG_PRNT_SEGMAP - { - FILE *statsfile; - statsfile = fopen("segmap2.stt", "a"); - fprintf(statsfile, "\n"); - fclose(statsfile); + for (j = 0; j < 4; ++j) { + const int x16_idx = ((j & 1) << 1); + const int y16_idx = ((j >> 1) << 1); + set_vt_size(vt, BLOCK_SIZE_MB16X16, mi_row + y32_idx + y16_idx, + mi_col+x32_idx+x16_idx, continue); + + for (k = 0; k < 4; ++k) { + const int x8_idx = (k & 1); + const int y8_idx = (k >> 1); + set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis, + mi_row + y32_idx + y16_idx + y8_idx, + mi_col + x32_idx + x16_idx + x8_idx); + } + } } -#endif } +static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, + int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, + int *rate, int *dist) { + VP9_COMMON * const cm = &cpi->common; + MACROBLOCK * const x = &cpi->mb; + MACROBLOCKD *xd = &cpi->mb.e_mbd; + const int mis = cm->mode_info_stride; + int bwl = b_width_log2(m->mbmi.sb_type); + int bhl = b_height_log2(m->mbmi.sb_type); + int bsl = b_width_log2(bsize); + int bh = (1 << bhl); + int bs = (1 << bsl); + int bss = (1 << bsl)/4; + int i, pl; + PARTITION_TYPE partition; + BLOCK_SIZE_TYPE subsize; + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; + PARTITION_CONTEXT sl[8], sa[8]; + int r = 0, d = 0; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; -static void encode_sb64(VP9_COMP *cpi, - int mb_row, - int mb_col, - TOKENEXTRA **tp, int is_sb[4]) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - cpi->sb64_count[is_sb[0] == 2]++; - if (is_sb[0] == 2) { - set_offsets(cpi, mb_row, mb_col, 64); - update_state(cpi, &x->sb64_context, 64, 1); - encode_superblock64(cpi, tp, - 1, mb_row, mb_col); - update_stats(cpi, mb_row, mb_col); + // parse the partition type + if ((bwl == bsl) && (bhl == bsl)) + partition = PARTITION_NONE; + else if ((bwl == bsl) && (bhl < bsl)) + partition = PARTITION_HORZ; + else if ((bwl < bsl) && (bhl == bsl)) + partition = PARTITION_VERT; + else if ((bwl < bsl) && (bhl < bsl)) + partition = PARTITION_SPLIT; + else + assert(0); + + subsize = get_subsize(bsize, partition); - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row < cm->mb_rows) - cpi->tplist[mb_row].stop = *tp; - } else { - int i; + // TODO(JBB): this restriction is here because pick_sb_modes can return + // r's that are INT_MAX meaning we can't select a mode / mv for this block. + // when the code is made to work for less than sb8x8 we need to come up with + // a solution to this problem. + assert(subsize >= BLOCK_SIZE_SB8X8); - for (i = 0; i < 4; i++) { - const int x_idx = i & 1, y_idx = i >> 1; + if (bsize >= BLOCK_SIZE_SB8X8) { + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = cm->above_seg_context + mi_col; + *(get_sb_partitioning(x, bsize)) = subsize; + } - if (mb_row + y_idx * 2 >= cm->mb_rows || - mb_col + x_idx * 2 >= cm->mb_cols) { - // MB lies outside frame, move on - continue; + pl = partition_plane_context(xd, bsize); + save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + switch (partition) { + case PARTITION_NONE: + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize, + get_block_context(x, bsize)); + r += x->partition_cost[pl][PARTITION_NONE]; + break; + case PARTITION_HORZ: + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, + get_block_context(x, subsize)); + if (mi_row + (bh >> 1) <= cm->mi_rows) { + int rt, dt; + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row + (bs >> 2), mi_col, tp, &rt, &dt, subsize, + get_block_context(x, subsize)); + r += rt; + d += dt; } - xd->sb_index = i; - encode_sb(cpi, mb_row + 2 * y_idx, mb_col + 2 * x_idx, 1, tp, - is_sb[i]); - } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_HORZ]; + break; + case PARTITION_VERT: + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, + get_block_context(x, subsize)); + if (mi_col + (bs >> 1) <= cm->mi_cols) { + int rt, dt; + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row, mi_col + (bs >> 2), tp, &rt, &dt, subsize, + get_block_context(x, subsize)); + r += rt; + d += dt; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_VERT]; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + break; + case PARTITION_SPLIT: + for (i = 0; i < 4; i++) { + int x_idx = (i & 1) * (bs >> 2); + int y_idx = (i >> 1) * (bs >> 2); + int jj = i >> 1, ii = i & 0x01; + int rt, dt; + + if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) + continue; + + *(get_sb_index(xd, subsize)) = i; + + rd_use_partition(cpi, m + jj * bss * mis + ii * bss, tp, mi_row + y_idx, + mi_col + x_idx, subsize, &rt, &dt); + r += rt; + d += dt; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_SPLIT]; + break; + default: + assert(0); + } + + // update partition context +#if CONFIG_AB4X4 + if (bsize >= BLOCK_SIZE_SB8X8 && + (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { +#else + if (bsize > BLOCK_SIZE_SB8X8 + && (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) { +#endif + set_partition_seg_context(cm, xd, mi_row, mi_col); + update_partition_context(xd, subsize, bsize); } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + + if (r < INT_MAX && d < INT_MAX) + encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize); + *rate = r; + *dist = d; } -static void encode_sb_row(VP9_COMP *cpi, - int mb_row, - TOKENEXTRA **tp, - int *totalrate) { + +// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are +// unlikely to be selected depending on previously rate-distortion optimization +// results, for encoding speed-up. +static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, + int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize, + int *rate, int *dist) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - int mb_col; - - // Initialize the left context for the new SB row - vpx_memset(cm->left_context, 0, sizeof(cm->left_context)); - - // Code each SB in the row - for (mb_col = cm->cur_tile_mb_col_start; - mb_col < cm->cur_tile_mb_col_end; mb_col += 4) { - int i; - int sb32_rate = 0, sb32_dist = 0; - int is_sb[4]; - int sb64_rate = INT_MAX, sb64_dist; - int sb64_skip = 0; - ENTROPY_CONTEXT_PLANES l[4], a[4]; - TOKENEXTRA *tp_orig = *tp; - - memcpy(&a, cm->above_context + mb_col, sizeof(a)); - memcpy(&l, cm->left_context, sizeof(l)); - for (i = 0; i < 4; i++) { - const int x_idx = (i & 1) << 1, y_idx = i & 2; - int mb_rate = 0, mb_dist = 0; - int sb_rate = INT_MAX, sb_dist; - int splitmodes_used = 0; - int sb32_skip = 0; - - if (mb_row + y_idx >= cm->mb_rows || mb_col + x_idx >= cm->mb_cols) - continue; + int bsl = b_width_log2(bsize), bs = 1 << bsl; + int ms = bs / 2; + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; + PARTITION_CONTEXT sl[8], sa[8]; + TOKENEXTRA *tp_orig = *tp; + int i, pl; + BLOCK_SIZE_TYPE subsize; + int srate = INT_MAX, sdist = INT_MAX; + + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index != 0) { + *rate = 0; + *dist = 0; + return; + } + assert(mi_height_log2(bsize) == mi_width_log2(bsize)); - xd->sb_index = i; + save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - splitmodes_used = pick_mb_modes(cpi, mb_row + y_idx, mb_col + x_idx, - tp, &mb_rate, &mb_dist); + // PARTITION_SPLIT + if (bsize >= BLOCK_SIZE_SB8X8) { + int r4 = 0, d4 = 0; + subsize = get_subsize(bsize, PARTITION_SPLIT); + *(get_sb_partitioning(x, bsize)) = subsize; - mb_rate += vp9_cost_bit(cm->sb32_coded, 0); + for (i = 0; i < 4; ++i) { + int x_idx = (i & 1) * (ms >> 1); + int y_idx = (i >> 1) * (ms >> 1); + int r = 0, d = 0; - if (cpi->sf.splitmode_breakout) { - sb32_skip = splitmodes_used; - sb64_skip += splitmodes_used; - } + if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) + continue; - if ( !sb32_skip && - !(((cm->mb_cols & 1) && mb_col + x_idx == cm->mb_cols - 1) || - ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) { - /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ - pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx, - tp, &sb_rate, &sb_dist); - sb_rate += vp9_cost_bit(cm->sb32_coded, 1); - } + *(get_sb_index(xd, subsize)) = i; + rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, + &r, &d); + + r4 += r; + d4 += d; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + if (r4 < INT_MAX) + r4 += x->partition_cost[pl][PARTITION_SPLIT]; + assert(r4 >= 0); + assert(d4 >= 0); + srate = r4; + sdist = d4; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } - /* Decide whether to encode as a SB or 4xMBs */ - if (sb_rate < INT_MAX && - RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < - RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { - is_sb[i] = 1; - sb32_rate += sb_rate; - sb32_dist += sb_dist; - } else { - is_sb[i] = 0; - sb32_rate += mb_rate; - sb32_dist += mb_dist; + // PARTITION_HORZ + if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) { + int r2, d2; + int r = 0, d = 0; + subsize = get_subsize(bsize, PARTITION_HORZ); + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, + get_block_context(x, subsize)); + + if (mi_row + (ms >> 1) < cm->mi_rows) { + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize, + get_block_context(x, subsize)); + r2 += r; + d2 += d; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + if (r2 < INT_MAX) + r2 += x->partition_cost[pl][PARTITION_HORZ]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + srate = r2; + sdist = d2; + *(get_sb_partitioning(x, bsize)) = subsize; + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } - // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled). - if (cpi->sf.mb16_breakout) { - ++sb64_skip; - } - } + // PARTITION_VERT + if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) { + int r2, d2; + subsize = get_subsize(bsize, PARTITION_VERT); + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, + get_block_context(x, subsize)); + if (mi_col + (ms >> 1) < cm->mi_cols) { + int r = 0, d = 0; + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize, + get_block_context(x, subsize)); + r2 += r; + d2 += d; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + if (r2 < INT_MAX) + r2 += x->partition_cost[pl][PARTITION_VERT]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + srate = r2; + sdist = d2; + *(get_sb_partitioning(x, bsize)) = subsize; + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } - /* Encode SB using best computed mode(s) */ - // FIXME(rbultje): there really shouldn't be any need to encode_mb/sb - // for each level that we go up, we can just keep tokens and recon - // pixels of the lower level; also, inverting SB/MB order (big->small - // instead of small->big) means we can use as threshold for small, which - // may enable breakouts if RD is not good enough (i.e. faster) - encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, 0, tp, is_sb[i]); + // PARTITION_NONE + if ((mi_row + (ms >> 1) < cm->mi_rows) && + (mi_col + (ms >> 1) < cm->mi_cols)) { + int r, d; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize, + get_block_context(x, bsize)); + if (bsize >= BLOCK_SIZE_SB8X8) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + r += x->partition_cost[pl][PARTITION_NONE]; + } + + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + srate = r; + sdist = d; + if (bsize >= BLOCK_SIZE_SB8X8) + *(get_sb_partitioning(x, bsize)) = bsize; } + } - memcpy(cm->above_context + mb_col, &a, sizeof(a)); - memcpy(cm->left_context, &l, sizeof(l)); - sb32_rate += vp9_cost_bit(cm->sb64_coded, 0); + *rate = srate; + *dist = sdist; - if (!sb64_skip && - !(((cm->mb_cols & 3) && mb_col + 3 >= cm->mb_cols) || - ((cm->mb_rows & 3) && mb_row + 3 >= cm->mb_rows))) { - pick_sb64_modes(cpi, mb_row, mb_col, tp, &sb64_rate, &sb64_dist); - sb64_rate += vp9_cost_bit(cm->sb64_coded, 1); - } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - /* Decide whether to encode as a SB or 4xMBs */ - if (sb64_rate < INT_MAX && - RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist) < - RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { - is_sb[0] = 2; - *totalrate += sb64_rate; - } else { - *totalrate += sb32_rate; - } + if (srate < INT_MAX && sdist < INT_MAX) + encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize); - assert(tp_orig == *tp); - encode_sb64(cpi, mb_row, mb_col, tp, is_sb); + if (bsize == BLOCK_SIZE_SB64X64) { assert(tp_orig < *tp); - } + assert(srate < INT_MAX); + assert(sdist < INT_MAX); + } else { + assert(tp_orig == *tp); + } +} + +static void encode_sb_row(VP9_COMP *cpi, int mi_row, + TOKENEXTRA **tp, int *totalrate) { + VP9_COMMON *const cm = &cpi->common; + int mi_col; + + // Initialize the left context for the new SB row + vpx_memset(&cm->left_context, 0, sizeof(cm->left_context)); + vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context)); + + // Code each SB in the row + for (mi_col = cm->cur_tile_mi_col_start; + mi_col < cm->cur_tile_mi_col_end; mi_col += 64 / MI_SIZE) { + int dummy_rate, dummy_dist; + if (cpi->speed < 5) { + rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, + &dummy_rate, &dummy_dist); + } else { + const int idx_str = cm->mode_info_stride * mi_row + mi_col; + MODE_INFO *m = cm->mi + idx_str; + // set_partitioning(cpi, m, BLOCK_SIZE_SB64X64); + choose_partitioning(cpi, cm->mi, mi_row, mi_col); + rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, + &dummy_rate, &dummy_dist); + } + } } static void init_encode_frame_mb_context(VP9_COMP *cpi) { @@ -1163,7 +1421,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { x->act_zbin_adj = 0; cpi->seg0_idx = 0; - vpx_memset(cpi->ref_pred_count, 0, sizeof(cpi->ref_pred_count)); xd->mode_info_stride = cm->mode_info_stride; xd->frame_type = cm->frame_type; @@ -1176,78 +1433,70 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_init_mbmode_probs(cm); // Copy data over into macro block data structures. - x->src = *cpi->Source; - xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]]; - xd->dst = cm->yv12_fb[cm->new_fb_idx]; + vp9_setup_src_planes(x, cpi->Source, 0, 0); - // set up frame for intra coded blocks - vp9_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); + // TODO(jkoleszar): are these initializations required? + setup_pre_planes(xd, &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]], NULL, + 0, 0, NULL, NULL); + setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0); vp9_build_block_offsets(x); - vp9_setup_block_dptrs(&x->e_mbd); - - vp9_setup_block_ptrs(x); + vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED; - vp9_zero(cpi->count_mb_ref_frame_usage) - vp9_zero(cpi->bmode_count) - vp9_zero(cpi->ymode_count) - vp9_zero(cpi->i8x8_mode_count) + vp9_zero(cpi->y_mode_count) vp9_zero(cpi->y_uv_mode_count) - vp9_zero(cpi->sub_mv_ref_count) - vp9_zero(cpi->mbsplit_count) - vp9_zero(cpi->common.fc.mv_ref_ct) - vp9_zero(cpi->sb_ymode_count) - vp9_zero(cpi->sb32_count); - vp9_zero(cpi->sb64_count); -#if CONFIG_COMP_INTERINTRA_PRED - vp9_zero(cpi->interintra_count); - vp9_zero(cpi->interintra_select_count); -#endif - - vpx_memset(cm->above_context, 0, - sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); - - xd->fullpixel_mask = cm->full_pixel ? 0xfffffff8 : 0xffffffff; + vp9_zero(cm->fc.inter_mode_counts) + vp9_zero(cpi->partition_count); + vp9_zero(cpi->intra_inter_count); + vp9_zero(cpi->comp_inter_count); + vp9_zero(cpi->single_ref_count); + vp9_zero(cpi->comp_ref_count); + vp9_zero(cm->fc.tx_count_32x32p); + vp9_zero(cm->fc.tx_count_16x16p); + vp9_zero(cm->fc.tx_count_8x8p); + vp9_zero(cm->fc.mbskip_count); + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 * + MAX_MB_PLANE * mi_cols_aligned_to_sb(cm)); + vpx_memset(cm->above_seg_context, 0, sizeof(PARTITION_CONTEXT) * + mi_cols_aligned_to_sb(cm)); } static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { if (lossless) { cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4; cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_iwalsh4x4_1; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add; cpi->mb.optimize = 0; cpi->common.filter_level = 0; - cpi->zbin_mode_boost_enabled = FALSE; + cpi->zbin_mode_boost_enabled = 0; cpi->common.txfm_mode = ONLY_4X4; } else { cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4_1; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add; } } static void encode_frame_internal(VP9_COMP *cpi) { - int mb_row; + int mi_row; MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; int totalrate; -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); - - // Compute a modified set of reference frame probabilities to use when - // prediction fails. These are based on the current general estimates for - // this frame which may be updated with each iteration of the recode loop. - vp9_compute_mod_refprobs(cm); +// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", +// cpi->common.current_video_frame, cpi->common.show_frame, +// cm->frame_type); // debug output #if DBG_PRNT_SEGMAP @@ -1264,41 +1513,25 @@ static void encode_frame_internal(VP9_COMP *cpi) { // Reset frame count of inter 0,0 motion vector usage. cpi->inter_zz_count = 0; - cpi->skip_true_count[0] = cpi->skip_true_count[1] = cpi->skip_true_count[2] = 0; - cpi->skip_false_count[0] = cpi->skip_false_count[1] = cpi->skip_false_count[2] = 0; - - vp9_zero(cpi->switchable_interp_count); + vp9_zero(cm->fc.switchable_interp_count); vp9_zero(cpi->best_switchable_interp_count); xd->mode_info_context = cm->mi; xd->prev_mode_info_context = cm->prev_mi; vp9_zero(cpi->NMVcount); - vp9_zero(cpi->coef_counts_4x4); - vp9_zero(cpi->coef_counts_8x8); - vp9_zero(cpi->coef_counts_16x16); - vp9_zero(cpi->coef_counts_32x32); + vp9_zero(cpi->coef_counts); vp9_zero(cm->fc.eob_branch_counts); -#if CONFIG_CODE_NONZEROCOUNT - vp9_zero(cm->fc.nzc_counts_4x4); - vp9_zero(cm->fc.nzc_counts_8x8); - vp9_zero(cm->fc.nzc_counts_16x16); - vp9_zero(cm->fc.nzc_counts_32x32); - vp9_zero(cm->fc.nzc_pcat_counts); -#endif -#if CONFIG_NEW_MVREF - vp9_zero(cpi->mb_mv_ref_count); -#endif - cpi->mb.e_mbd.lossless = (cm->base_qindex == 0 && - cm->y1dc_delta_q == 0 && - cm->uvdc_delta_q == 0 && - cm->uvac_delta_q == 0); + cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); vp9_frame_init_quantizer(cpi); - vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); + vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y_dc_delta_q); vp9_initialize_me_consts(cpi, cm->base_qindex); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { @@ -1313,12 +1546,11 @@ static void encode_frame_internal(VP9_COMP *cpi) { init_encode_frame_mb_context(cpi); vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff)); - vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count)); - vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count)); - vpx_memset(cpi->txfm_count_32x32p, 0, sizeof(cpi->txfm_count_32x32p)); - vpx_memset(cpi->txfm_count_16x16p, 0, sizeof(cpi->txfm_count_16x16p)); - vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p)); vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff)); + vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes)); + + set_prev_mi(cm); + { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); @@ -1336,11 +1568,13 @@ static void encode_frame_internal(VP9_COMP *cpi) { // For each row of SBs in the frame vp9_get_tile_col_offsets(cm, tile_col); - for (mb_row = cm->cur_tile_mb_row_start; - mb_row < cm->cur_tile_mb_row_end; mb_row += 4) { - encode_sb_row(cpi, mb_row, &tp, &totalrate); - } + for (mi_row = cm->cur_tile_mi_row_start; + mi_row < cm->cur_tile_mi_row_end; + mi_row += 8) + encode_sb_row(cpi, mi_row, &tp, &totalrate); cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old); + assert(tp - cpi->tok <= + get_token_alloc(cm->mb_rows, cm->mb_cols)); } } } @@ -1365,15 +1599,6 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { int ref_flags = cpi->ref_frame_flags; if (vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) { - if ((ref_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) == (VP9_LAST_FLAG | VP9_GOLD_FLAG) && - vp9_check_segref(xd, 1, LAST_FRAME)) - return 1; - if ((ref_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) == (VP9_GOLD_FLAG | VP9_ALT_FLAG) && - vp9_check_segref(xd, 1, GOLDEN_FRAME)) - return 1; - if ((ref_flags & (VP9_ALT_FLAG | VP9_LAST_FLAG)) == (VP9_ALT_FLAG | VP9_LAST_FLAG) && - vp9_check_segref(xd, 1, ALTREF_FRAME)) - return 1; return 0; } else { return (!!(ref_flags & VP9_GOLD_FLAG) + @@ -1382,23 +1607,6 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static void reset_skip_txfm_size_mb(VP9_COMP *cpi, - MODE_INFO *mi, TX_SIZE txfm_max) { - MB_MODE_INFO *const mbmi = &mi->mbmi; - - if (mbmi->txfm_size > txfm_max) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int segment_id = mbmi->segment_id; - - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) || - (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); - mbmi->txfm_size = txfm_max; - } -} - static int get_skip_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs) { int x, y; @@ -1422,96 +1630,120 @@ static void set_txfm_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs, } } -static void reset_skip_txfm_size_sb32(VP9_COMP *cpi, MODE_INFO *mi, - int mis, TX_SIZE txfm_max, - int mb_rows_left, int mb_cols_left) { +static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO *mi, + int mis, TX_SIZE txfm_max, + int bw, int bh, int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + if (mbmi->txfm_size > txfm_max) { - VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const int segment_id = mbmi->segment_id; - const int ymbs = MIN(2, mb_rows_left); - const int xmbs = MIN(2, mb_cols_left); + const int ymbs = MIN(bh, cm->mi_rows - mi_row); + const int xmbs = MIN(bw, cm->mi_cols - mi_col); xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) || - (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); + assert(vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) || + get_skip_flag(mi, mis, ymbs, xmbs)); set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); } } -static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, - int mis, TX_SIZE txfm_max, - int mb_rows_left, int mb_cols_left) { - MB_MODE_INFO *const mbmi = &mi->mbmi; +static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi, + TX_SIZE txfm_max, + int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mode_info_stride; + int bwl, bhl; + const int bsl = mi_width_log2(bsize), bs = 1 << (bsl - 1); - if (mbmi->txfm_size > txfm_max) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int segment_id = mbmi->segment_id; - const int ymbs = MIN(4, mb_rows_left); - const int xmbs = MIN(4, mb_cols_left); + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) || - (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); - set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); + bwl = mi_width_log2(mi->mbmi.sb_type); + bhl = mi_height_log2(mi->mbmi.sb_type); + + if (bwl == bsl && bhl == bsl) { + reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, 1 << bsl, 1 << bsl, + mi_row, mi_col, bsize); + } else if (bwl == bsl && bhl < bsl) { + reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, 1 << bsl, bs, + mi_row, mi_col, bsize); + reset_skip_txfm_size_b(cpi, mi + bs * mis, mis, txfm_max, 1 << bsl, bs, + mi_row + bs, mi_col, bsize); + } else if (bwl < bsl && bhl == bsl) { + reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, bs, 1 << bsl, + mi_row, mi_col, bsize); + reset_skip_txfm_size_b(cpi, mi + bs, mis, txfm_max, bs, 1 << bsl, + mi_row, mi_col + bs, bsize); + } else { + BLOCK_SIZE_TYPE subsize; + int n; + + assert(bwl < bsl && bhl < bsl); + if (bsize == BLOCK_SIZE_SB64X64) { + subsize = BLOCK_SIZE_SB32X32; + } else if (bsize == BLOCK_SIZE_SB32X32) { + subsize = BLOCK_SIZE_MB16X16; + } else { + assert(bsize == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; + } + + for (n = 0; n < 4; n++) { + const int y_idx = n >> 1, x_idx = n & 0x01; + + reset_skip_txfm_size_sb(cpi, mi + y_idx * bs * mis + x_idx * bs, + txfm_max, mi_row + y_idx * bs, + mi_col + x_idx * bs, subsize); + } } } static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { VP9_COMMON *const cm = &cpi->common; - int mb_row, mb_col; + int mi_row, mi_col; const int mis = cm->mode_info_stride; MODE_INFO *mi, *mi_ptr = cm->mi; - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { + for (mi_row = 0; mi_row < cm->mi_rows; + mi_row += 8, mi_ptr += 8 * mis) { mi = mi_ptr; - for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { - if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - reset_skip_txfm_size_sb64(cpi, mi, mis, txfm_max, - cm->mb_rows - mb_row, cm->mb_cols - mb_col); - } else { - int i; - - for (i = 0; i < 4; i++) { - const int x_idx_sb = (i & 1) << 1, y_idx_sb = i & 2; - MODE_INFO *sb_mi = mi + y_idx_sb * mis + x_idx_sb; - - if (mb_row + y_idx_sb >= cm->mb_rows || - mb_col + x_idx_sb >= cm->mb_cols) - continue; - - if (sb_mi->mbmi.sb_type) { - reset_skip_txfm_size_sb32(cpi, sb_mi, mis, txfm_max, - cm->mb_rows - mb_row - y_idx_sb, - cm->mb_cols - mb_col - x_idx_sb); - } else { - int m; - - for (m = 0; m < 4; m++) { - const int x_idx = x_idx_sb + (m & 1), y_idx = y_idx_sb + (m >> 1); - MODE_INFO *mb_mi; - - if (mb_col + x_idx >= cm->mb_cols || - mb_row + y_idx >= cm->mb_rows) - continue; - - mb_mi = mi + y_idx * mis + x_idx; - assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); - reset_skip_txfm_size_mb(cpi, mb_mi, txfm_max); - } - } - } - } + for (mi_col = 0; mi_col < cm->mi_cols; + mi_col += 8, mi += 8) { + reset_skip_txfm_size_sb(cpi, mi, txfm_max, + mi_row, mi_col, BLOCK_SIZE_SB64X64); } } } void vp9_encode_frame(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + + // In the longer term the encoder should be generalized to match the + // decoder such that we allow compound where one of the 3 buffers has a + // differnt sign bias and that buffer is then the fixed ref. However, this + // requires further work in the rd loop. For now the only supported encoder + // side behaviour is where the ALT ref buffer has oppositie sign bias to + // the other two. + if ((cm->ref_frame_sign_bias[ALTREF_FRAME] == + cm->ref_frame_sign_bias[GOLDEN_FRAME]) || + (cm->ref_frame_sign_bias[ALTREF_FRAME] == + cm->ref_frame_sign_bias[LAST_FRAME])) { + cm->allow_comp_inter_inter = 0; + } else { + cm->allow_comp_inter_inter = 1; + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; + } + if (cpi->sf.RD) { int i, frame_type, pred_type; TXFM_MODE txfm_type; @@ -1535,7 +1767,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { frame_type = 2; /* prediction (compound, single or hybrid) mode selection */ - if (frame_type == 3) + if (frame_type == 3 || !cm->allow_comp_inter_inter) pred_type = SINGLE_PREDICTION_ONLY; else if (cpi->rd_prediction_type_threshes[frame_type][1] > cpi->rd_prediction_type_threshes[frame_type][0] && @@ -1584,15 +1816,11 @@ void vp9_encode_frame(VP9_COMP *cpi) { } else txfm_type = ALLOW_8X8; #else - txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >= + txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? ALLOW_32X32 : TX_MODE_SELECT; #endif cpi->common.txfm_mode = txfm_type; - if (txfm_type != TX_MODE_SELECT) { - cpi->common.prob_tx[0] = 128; - cpi->common.prob_tx[1] = 128; - } cpi->common.comp_pred_mode = pred_type; encode_frame_internal(cpi); @@ -1617,28 +1845,49 @@ void vp9_encode_frame(VP9_COMP *cpi) { int single_count_zero = 0; int comp_count_zero = 0; - for (i = 0; i < COMP_PRED_CONTEXTS; i++) { - single_count_zero += cpi->single_pred_count[i]; - comp_count_zero += cpi->comp_pred_count[i]; + for (i = 0; i < COMP_INTER_CONTEXTS; i++) { + single_count_zero += cpi->comp_inter_count[i][0]; + comp_count_zero += cpi->comp_inter_count[i][1]; } if (comp_count_zero == 0) { cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY; + vp9_zero(cpi->comp_inter_count); } else if (single_count_zero == 0) { cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY; + vp9_zero(cpi->comp_inter_count); } } if (cpi->common.txfm_mode == TX_MODE_SELECT) { - const int count4x4 = cpi->txfm_count_16x16p[TX_4X4] + - cpi->txfm_count_32x32p[TX_4X4] + - cpi->txfm_count_8x8p[TX_4X4]; - const int count8x8_lp = cpi->txfm_count_32x32p[TX_8X8] + - cpi->txfm_count_16x16p[TX_8X8]; - const int count8x8_8x8p = cpi->txfm_count_8x8p[TX_8X8]; - const int count16x16_16x16p = cpi->txfm_count_16x16p[TX_16X16]; - const int count16x16_lp = cpi->txfm_count_32x32p[TX_16X16]; - const int count32x32 = cpi->txfm_count_32x32p[TX_32X32]; + int count4x4 = 0; + int count8x8_lp = 0, count8x8_8x8p = 0; + int count16x16_16x16p = 0, count16x16_lp = 0; + int count32x32 = 0; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count4x4 += cm->fc.tx_count_32x32p[i][TX_4X4]; + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count4x4 += cm->fc.tx_count_16x16p[i][TX_4X4]; + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count4x4 += cm->fc.tx_count_8x8p[i][TX_4X4]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count8x8_lp += cm->fc.tx_count_32x32p[i][TX_8X8]; + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count8x8_lp += cm->fc.tx_count_16x16p[i][TX_8X8]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count8x8_8x8p += cm->fc.tx_count_8x8p[i][TX_8X8]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count16x16_16x16p += cm->fc.tx_count_16x16p[i][TX_16X16]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count16x16_lp += cm->fc.tx_count_32x32p[i][TX_16X16]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) + count32x32 += cm->fc.tx_count_32x32p[i][TX_32X32]; if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && count32x32 == 0) { @@ -1665,70 +1914,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { } -void vp9_setup_block_ptrs(MACROBLOCK *x) { - int r, c; - int i; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) - x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4; - } - - for (r = 0; r < 2; r++) { - for (c = 0; c < 2; c++) - x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4; - } - - - for (r = 0; r < 2; r++) { - for (c = 0; c < 2; c++) - x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4; - } - - for (i = 0; i < 24; i++) - x->block[i].coeff = x->coeff + i * 16; -} - void vp9_build_block_offsets(MACROBLOCK *x) { - int block = 0; - int br, bc; - - vp9_build_block_doffsets(&x->e_mbd); - - for (br = 0; br < 4; br++) { - for (bc = 0; bc < 4; bc++) { - BLOCK *this_block = &x->block[block]; - // this_block->base_src = &x->src.y_buffer; - // this_block->src_stride = x->src.y_stride; - // this_block->src = 4 * br * this_block->src_stride + 4 * bc; - this_block->base_src = &x->src.y_buffer; - this_block->src_stride = x->src.y_stride; - this_block->src = 4 * br * this_block->src_stride + 4 * bc; - ++block; - } - } - - // u blocks - for (br = 0; br < 2; br++) { - for (bc = 0; bc < 2; bc++) { - BLOCK *this_block = &x->block[block]; - this_block->base_src = &x->src.u_buffer; - this_block->src_stride = x->src.uv_stride; - this_block->src = 4 * br * this_block->src_stride + 4 * bc; - ++block; - } - } - - // v blocks - for (br = 0; br < 2; br++) { - for (bc = 0; bc < 2; bc++) { - BLOCK *this_block = &x->block[block]; - this_block->base_src = &x->src.v_buffer; - this_block->src_stride = x->src.uv_stride; - this_block->src = 4 * br * this_block->src_stride + 4 * bc; - ++block; - } - } } static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { @@ -1736,52 +1922,22 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode; const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode; -#ifdef MODE_STATS - const int is_key = cpi->common.frame_type == KEY_FRAME; - - ++ (is_key ? uv_modes : inter_uv_modes)[uvm]; - ++ uv_modes_y[m][uvm]; - - if (m == B_PRED) { - unsigned int *const bct = is_key ? b_modes : inter_b_modes; - - int b = 0; - - do { - ++ bct[xd->block[b].bmi.as_mode.first]; - } while (++b < 16); - } - - if (m == I8X8_PRED) { - i8x8_modes[xd->block[0].bmi.as_mode.first]++; - i8x8_modes[xd->block[2].bmi.as_mode.first]++; - i8x8_modes[xd->block[8].bmi.as_mode.first]++; - i8x8_modes[xd->block[10].bmi.as_mode.first]++; - } -#endif - - if (xd->mode_info_context->mbmi.sb_type) { - ++cpi->sb_ymode_count[m]; + ++cpi->y_uv_mode_count[m][uvm]; + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + const BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + const int bsl = MIN(bwl, bhl); + ++cpi->y_mode_count[MIN(bsl, 3)][m]; } else { - ++cpi->ymode_count[m]; - } - if (m != I8X8_PRED) - ++cpi->y_uv_mode_count[m][uvm]; - else { - cpi->i8x8_mode_count[xd->block[0].bmi.as_mode.first]++; - cpi->i8x8_mode_count[xd->block[2].bmi.as_mode.first]++; - cpi->i8x8_mode_count[xd->block[8].bmi.as_mode.first]++; - cpi->i8x8_mode_count[xd->block[10].bmi.as_mode.first]++; - } - if (m == B_PRED) { - int b = 0; - do { - int m = xd->block[b].bmi.as_mode.first; -#if CONFIG_NEWBINTRAMODES - if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; -#endif - ++cpi->bmode_count[m]; - } while (++b < 16); + int idx, idy; + int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type); + int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode.first; + ++cpi->y_mode_count[0][m]; + } + } } } @@ -1806,268 +1962,22 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #endif } -static void update_sb64_skip_coeff_state(VP9_COMP *cpi, - ENTROPY_CONTEXT_PLANES ta[16], - ENTROPY_CONTEXT_PLANES tl[16], - TOKENEXTRA *t[16], - TOKENEXTRA **tp, - int skip[16], int output_enabled) { - MACROBLOCK *const x = &cpi->mb; - - if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { - TOKENEXTRA tokens[4][1024+512]; - int n_tokens[4], n; - - // if there were no skips, we don't need to do anything - if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) - return; - - // if we don't do coeff skipping for this frame, we don't - // need to do anything here - if (!cpi->common.mb_no_coeff_skip) - return; - - // if all 4 MBs skipped coeff coding, nothing to be done - if (skip[0] && skip[1] && skip[2] && skip[3]) - return; - - // so the situation now is that we want to skip coeffs - // for some MBs, but not all, and we didn't code EOB - // coefficients for them. However, the skip flag for this - // SB will be 0 overall, so we need to insert EOBs in the - // middle of the token tree. Do so here. - for (n = 0; n < 4; n++) { - if (n < 3) { - n_tokens[n] = t[n + 1] - t[n]; - } else { - n_tokens[n] = *tp - t[3]; - } - if (n_tokens[n]) { - memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); - } - } - - // reset pointer, stuff EOBs where necessary - *tp = t[0]; - for (n = 0; n < 4; n++) { - if (skip[n]) { - x->e_mbd.above_context = &ta[n * 2]; - x->e_mbd.left_context = &tl[n * 2]; - vp9_stuff_sb(cpi, &x->e_mbd, tp, !output_enabled); - } else { - if (n_tokens[n]) { - memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); - } - (*tp) += n_tokens[n]; - } - } - } else { - TOKENEXTRA tokens[16][16 * 25]; - int n_tokens[16], n; - - // if there were no skips, we don't need to do anything - if (!skip[ 0] && !skip[ 1] && !skip[ 2] && !skip[ 3] && - !skip[ 4] && !skip[ 5] && !skip[ 6] && !skip[ 7] && - !skip[ 8] && !skip[ 9] && !skip[10] && !skip[11] && - !skip[12] && !skip[13] && !skip[14] && !skip[15]) - return; - - // if we don't do coeff skipping for this frame, we don't - // need to do anything here - if (!cpi->common.mb_no_coeff_skip) - return; - - // if all 4 MBs skipped coeff coding, nothing to be done - if (skip[ 0] && skip[ 1] && skip[ 2] && skip[ 3] && - skip[ 4] && skip[ 5] && skip[ 6] && skip[ 7] && - skip[ 8] && skip[ 9] && skip[10] && skip[11] && - skip[12] && skip[13] && skip[14] && skip[15]) - return; - - // so the situation now is that we want to skip coeffs - // for some MBs, but not all, and we didn't code EOB - // coefficients for them. However, the skip flag for this - // SB will be 0 overall, so we need to insert EOBs in the - // middle of the token tree. Do so here. - for (n = 0; n < 16; n++) { - if (n < 15) { - n_tokens[n] = t[n + 1] - t[n]; - } else { - n_tokens[n] = *tp - t[15]; - } - if (n_tokens[n]) { - memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); - } - } - - // reset pointer, stuff EOBs where necessary - *tp = t[0]; - for (n = 0; n < 16; n++) { - if (skip[n]) { - x->e_mbd.above_context = &ta[n]; - x->e_mbd.left_context = &tl[n]; - vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); - } else { - if (n_tokens[n]) { - memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); - } - (*tp) += n_tokens[n]; - } - } - } -} - -#if CONFIG_CODE_NONZEROCOUNT -static void gather_nzcs_mb16(VP9_COMMON *const cm, - MACROBLOCKD *xd) { - int i; - vpx_memset(xd->mode_info_context->mbmi.nzcs, 0, - 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0])); - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_4X4: - for (i = 0; i < 24; ++i) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - case TX_8X8: - for (i = 0; i < 16; i += 4) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) { - for (i = 16; i < 24; ++i) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - } else { - for (i = 16; i < 24; i += 4) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - } - break; - - case TX_16X16: - xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0]; - for (i = 16; i < 24; i += 4) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - default: - break; - } -} - -static void gather_nzcs_sb32(VP9_COMMON *const cm, - MACROBLOCKD *xd) { - int i, j; - MODE_INFO *m = xd->mode_info_context; - int mis = cm->mode_info_stride; - vpx_memset(m->mbmi.nzcs, 0, - 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0])); - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_4X4: - for (i = 0; i < 96; ++i) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - case TX_8X8: - for (i = 0; i < 96; i += 4) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - case TX_16X16: - for (i = 0; i < 96; i += 16) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - case TX_32X32: - xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0]; - for (i = 64; i < 96; i += 16) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - default: - break; - } - for (i = 0; i < 2; ++i) - for (j = 0; j < 2; ++j) { - if (i == 0 && j == 0) continue; - vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs, - 384 * sizeof(m->mbmi.nzcs[0])); - } -} - -static void gather_nzcs_sb64(VP9_COMMON *const cm, - MACROBLOCKD *xd) { - int i, j; - MODE_INFO *m = xd->mode_info_context; - int mis = cm->mode_info_stride; - vpx_memset(xd->mode_info_context->mbmi.nzcs, 0, - 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0])); - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_4X4: - for (i = 0; i < 384; ++i) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - case TX_8X8: - for (i = 0; i < 384; i += 4) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - case TX_16X16: - for (i = 0; i < 384; i += 16) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - case TX_32X32: - for (i = 0; i < 384; i += 64) { - xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; - } - break; - - default: - break; - } - for (i = 0; i < 4; ++i) - for (j = 0; j < 4; ++j) { - if (i == 0 && j == 0) continue; - vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs, - 384 * sizeof(m->mbmi.nzcs[0])); - } -} -#endif - -static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, - int mb_row, int mb_col) { +static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, + int output_enabled, int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + int n; MODE_INFO *mi = xd->mode_info_context; - MB_MODE_INFO *const mbmi = &mi->mbmi; + MB_MODE_INFO *mbmi = &mi->mbmi; + unsigned int segment_id = mbmi->segment_id; const int mis = cm->mode_info_stride; - unsigned char ref_pred_flag; + const int bwl = mi_width_log2(bsize); + const int bw = 1 << bwl, bh = 1 << mi_height_log2(bsize); - assert(!xd->mode_info_context->mbmi.sb_type); - -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && - mb_row == 8 && mb_col == 0 && output_enabled); - if (enc_debug) - printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled); -#endif if (cm->frame_type == KEY_FRAME) { - if (cpi->oxcf.tuning == VP8_TUNE_SSIM && output_enabled) { - // Adjust the zbin based on this MB rate. + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { adjust_act_zbin(cpi, x); vp9_update_zbin_extra(cpi, x); } @@ -2083,657 +1993,111 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, // Increase zbin size to suppress noise cpi->zbin_mode_boost = 0; if (cpi->zbin_mode_boost_enabled) { - if (mbmi->ref_frame != INTRA_FRAME) { + if (mbmi->ref_frame[0] != INTRA_FRAME) { if (mbmi->mode == ZEROMV) { - if (mbmi->ref_frame != LAST_FRAME) + if (mbmi->ref_frame[0] != LAST_FRAME) cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; else cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (mbmi->mode == SPLITMV) + } else if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST; - else + } else { cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } } else { cpi->zbin_mode_boost = INTRA_ZBIN_BOOST; } } vp9_update_zbin_extra(cpi, x); - - // SET VARIOUS PREDICTION FLAGS - - // Did the chosen reference frame match its predicted value. - ref_pred_flag = ((mbmi->ref_frame == vp9_get_pred_ref(cm, xd))); - vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); } - if (mbmi->ref_frame == INTRA_FRAME) { -#if 0 // def ENC_DEBUG - if (enc_debug) { - printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip, - mbmi->txfm_size); - } -#endif - if (mbmi->mode == B_PRED) { - vp9_encode_intra16x16mbuv(cm, x); - vp9_encode_intra4x4mby(x); - } else if (mbmi->mode == I8X8_PRED) { - vp9_encode_intra8x8mby(x); - vp9_encode_intra8x8mbuv(x); - } else { - vp9_encode_intra16x16mbuv(cm, x); - vp9_encode_intra16x16mby(cm, x); - } - + if (mbmi->ref_frame[0] == INTRA_FRAME) { + vp9_encode_intra_block_y(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? + BLOCK_SIZE_SB8X8 : bsize); + vp9_encode_intra_block_uv(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? + BLOCK_SIZE_SB8X8 : bsize); if (output_enabled) sum_intra_stats(cpi, x); } else { - int ref_fb_idx; -#ifdef ENC_DEBUG - if (enc_debug) - printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n", - mbmi->mode, x->skip, mbmi->txfm_size, - mbmi->ref_frame, mbmi->second_ref_frame, - mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, - mbmi->interp_filter); -#endif - - assert(cm->frame_type != KEY_FRAME); - - if (mbmi->ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (mbmi->ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->pre, - &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[0], &xd->scale_factor_uv[0]); - - if (mbmi->second_ref_frame > 0) { - int second_ref_fb_idx; - - if (mbmi->second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (mbmi->second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->second_pre, - &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[1], &xd->scale_factor_uv[1]); - } - - if (!x->skip) { - vp9_encode_inter16x16(cm, x, mb_row, mb_col); - - // Clear mb_skip_coeff if mb_no_coeff_skip is not set - if (!cpi->common.mb_no_coeff_skip) - mbmi->mb_skip_coeff = 0; - - } else { - vp9_build_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); -#if CONFIG_COMP_INTERINTRA_PRED - if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - } -#endif - } - } - - if (!x->skip) { -#ifdef ENC_DEBUG - if (enc_debug) { - int i, j; - printf("\n"); - printf("qcoeff\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->qcoeff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("predictor\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->predictor[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("src_diff\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", x->src_diff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("diff\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->block[0].diff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("final y\n"); - for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j++) - printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]); - printf("\n"); - } - printf("\n"); - printf("final u\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]); - printf("\n"); - } - printf("\n"); - printf("final v\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]); - printf("\n"); - } - fflush(stdout); - } -#endif - -#if CONFIG_CODE_NONZEROCOUNT - gather_nzcs_mb16(cm, xd); -#endif - vp9_tokenize_mb(cpi, xd, t, !output_enabled); - - } else { - // FIXME(rbultje): not tile-aware (mi - 1) - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; - - if (cm->mb_no_coeff_skip) { - mbmi->mb_skip_coeff = 1; - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_mb_tokens_context(xd); - } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - mbmi->mb_skip_coeff = 0; - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } - - if (output_enabled) { - int segment_id = mbmi->segment_id; - if (cpi->common.txfm_mode == TX_MODE_SELECT && - !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) || - (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP)))) { - assert(mbmi->txfm_size <= TX_16X16); - if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV) { - cpi->txfm_count_16x16p[mbmi->txfm_size]++; - } else if (mbmi->mode == I8X8_PRED || - (mbmi->mode == SPLITMV && - mbmi->partitioning != PARTITIONING_4X4)) { - cpi->txfm_count_8x8p[mbmi->txfm_size]++; - } - } else if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) { - mbmi->txfm_size = TX_16X16; - } else if (mbmi->mode != B_PRED && - !(mbmi->mode == SPLITMV && - mbmi->partitioning == PARTITIONING_4X4) && - cpi->common.txfm_mode >= ALLOW_8X8) { - mbmi->txfm_size = TX_8X8; - } else { - mbmi->txfm_size = TX_4X4; - } - } -} - -static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *src = x->src.y_buffer; - uint8_t *dst = xd->dst.y_buffer; - const uint8_t *usrc = x->src.u_buffer; - uint8_t *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer; - uint8_t *vdst = xd->dst.v_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - unsigned char ref_pred_flag; - MODE_INFO *mi = x->e_mbd.mode_info_context; - unsigned int segment_id = mi->mbmi.segment_id; - const int mis = cm->mode_info_stride; - -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && - mb_row == 8 && mb_col == 0 && output_enabled); - if (enc_debug) { - printf("Encode SB32 %d %d output %d\n", mb_row, mb_col, output_enabled); - printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n", - mi->mbmi.mode, x->skip, mi->mbmi.txfm_size, - mi->mbmi.ref_frame, mi->mbmi.second_ref_frame, - mi->mbmi.mv[0].as_mv.row, mi->mbmi.mv[0].as_mv.col, - mi->mbmi.interp_filter); - } -#endif - if (cm->frame_type == KEY_FRAME) { - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - adjust_act_zbin(cpi, x); - vp9_update_zbin_extra(cpi, x); - } - } else { - vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, cm); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Adjust the zbin based on this MB rate. - adjust_act_zbin(cpi, x); + int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])]; + YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx]; + YV12_BUFFER_CONFIG *second_ref_fb = NULL; + if (mbmi->ref_frame[1] > 0) { + idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])]; + second_ref_fb = &cm->yv12_fb[idx]; } - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - cpi->zbin_mode_boost = 0; - if (cpi->zbin_mode_boost_enabled) { - if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { - if (xd->mode_info_context->mbmi.mode == ZEROMV) { - if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) - cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } else { - cpi->zbin_mode_boost = INTRA_ZBIN_BOOST; - } - } - - vp9_update_zbin_extra(cpi, x); - - // SET VARIOUS PREDICTION FLAGS - // Did the chosen reference frame match its predicted value. - ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == - vp9_get_pred_ref(cm, xd))); - vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); - } - - - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sby_s(&x->e_mbd); - vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - if (output_enabled) - sum_intra_stats(cpi, x); - } else { - int ref_fb_idx; - assert(cm->frame_type != KEY_FRAME); - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->pre, - &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[0], &xd->scale_factor_uv[0]); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - int second_ref_fb_idx; + setup_pre_planes(xd, ref_fb, second_ref_fb, + mi_row, mi_col, xd->scale_factor, xd->scale_factor_uv); - if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->second_pre, - &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[1], &xd->scale_factor_uv[1]); - } - - vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride, - mb_row, mb_col); + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, + bsize < BLOCK_SIZE_SB8X8 ? BLOCK_SIZE_SB8X8 + : bsize); } - if (!x->skip) { - vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, - dst, dst_y_stride); - vp9_subtract_sbuv_s_c(x->src_diff, - usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - switch (mi->mbmi.txfm_size) { - case TX_32X32: - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - if (x->optimize) { - vp9_optimize_sby_32x32(cm, x); - vp9_optimize_sbuv_16x16(cm, x); - } - vp9_inverse_transform_sby_32x32(xd); - vp9_inverse_transform_sbuv_16x16(xd); - break; - case TX_16X16: - vp9_transform_sby_16x16(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_16x16(x); - vp9_quantize_sbuv_16x16(x); - if (x->optimize) { - vp9_optimize_sby_16x16(cm, x); - vp9_optimize_sbuv_16x16(cm, x); - } - vp9_inverse_transform_sby_16x16(xd); - vp9_inverse_transform_sbuv_16x16(xd); - break; - case TX_8X8: - vp9_transform_sby_8x8(x); - vp9_transform_sbuv_8x8(x); - vp9_quantize_sby_8x8(x); - vp9_quantize_sbuv_8x8(x); - if (x->optimize) { - vp9_optimize_sby_8x8(cm, x); - vp9_optimize_sbuv_8x8(cm, x); - } - vp9_inverse_transform_sby_8x8(xd); - vp9_inverse_transform_sbuv_8x8(xd); - break; - case TX_4X4: - vp9_transform_sby_4x4(x); - vp9_transform_sbuv_4x4(x); - vp9_quantize_sby_4x4(x); - vp9_quantize_sbuv_4x4(x); - if (x->optimize) { - vp9_optimize_sby_4x4(cm, x); - vp9_optimize_sbuv_4x4(cm, x); - } - vp9_inverse_transform_sby_4x4(xd); - vp9_inverse_transform_sbuv_4x4(xd); - break; - default: assert(0); - } - vp9_recon_sby_s_c(xd, dst); - vp9_recon_sbuv_s_c(xd, udst, vdst); -#if CONFIG_CODE_NONZEROCOUNT - gather_nzcs_sb32(cm, xd); -#endif - - vp9_tokenize_sb(cpi, xd, t, !output_enabled); + if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) { + vp9_tokenize_sb(cpi, xd, t, !output_enabled, + (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize); + } else if (!x->skip) { + vp9_encode_sb(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize); + vp9_tokenize_sb(cpi, xd, t, !output_enabled, + (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize); } else { // FIXME(rbultje): not tile-aware (mi - 1) - int mb_skip_context = cm->mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; - - mi->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_sb_tokens_context(xd); - } else { - vp9_stuff_sb(cpi, xd, t, !output_enabled); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } - - // copy skip flag on all mb_mode_info contexts in this SB - // if this was a skip at this txfm size - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - } - - if (output_enabled) { - if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { - cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; - } else { - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; - mi->mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.txfm_size = sz; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.txfm_size = sz; - } - } - } -} - -static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *src = x->src.y_buffer; - uint8_t *dst = xd->dst.y_buffer; - const uint8_t *usrc = x->src.u_buffer; - uint8_t *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer; - uint8_t *vdst = xd->dst.v_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - unsigned char ref_pred_flag; - int n; - MODE_INFO *mi = x->e_mbd.mode_info_context; - unsigned int segment_id = mi->mbmi.segment_id; - const int mis = cm->mode_info_stride; - -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && - mb_row == 8 && mb_col == 0 && output_enabled); - if (enc_debug) - printf("Encode SB64 %d %d output %d\n", mb_row, mb_col, output_enabled); -#endif - if (cm->frame_type == KEY_FRAME) { - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - adjust_act_zbin(cpi, x); - vp9_update_zbin_extra(cpi, x); - } - } else { - vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, cm); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Adjust the zbin based on this MB rate. - adjust_act_zbin(cpi, x); - } + int mb_skip_context = + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff; - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - cpi->zbin_mode_boost = 0; - if (cpi->zbin_mode_boost_enabled) { - if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { - if (xd->mode_info_context->mbmi.mode == ZEROMV) { - if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST; - } else { - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } - } else { - cpi->zbin_mode_boost = INTRA_ZBIN_BOOST; - } - } - - vp9_update_zbin_extra(cpi, x); - - // Did the chosen reference frame match its predicted value. - ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == - vp9_get_pred_ref(cm, xd))); - vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); - } - - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sb64y_s(&x->e_mbd); - vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + mbmi->mb_skip_coeff = 1; if (output_enabled) - sum_intra_stats(cpi, x); - } else { - int ref_fb_idx; - - assert(cm->frame_type != KEY_FRAME); - - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->pre, - &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[0], &xd->scale_factor_uv[0]); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - int second_ref_fb_idx; - - if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->second_pre, - &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[1], &xd->scale_factor_uv[1]); - } - - vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride, - mb_row, mb_col); - } - - if (!x->skip) { - vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); - vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - vp9_transform_sb64y_32x32(x); - vp9_transform_sb64uv_32x32(x); - vp9_quantize_sb64y_32x32(x); - vp9_quantize_sb64uv_32x32(x); - if (x->optimize) { - vp9_optimize_sb64y_32x32(cm, x); - vp9_optimize_sb64uv_32x32(cm, x); - } - vp9_inverse_transform_sb64y_32x32(xd); - vp9_inverse_transform_sb64uv_32x32(xd); - break; - case TX_16X16: - vp9_transform_sb64y_16x16(x); - vp9_transform_sb64uv_16x16(x); - vp9_quantize_sb64y_16x16(x); - vp9_quantize_sb64uv_16x16(x); - if (x->optimize) { - vp9_optimize_sb64y_16x16(cm, x); - vp9_optimize_sb64uv_16x16(cm, x); - } - vp9_inverse_transform_sb64y_16x16(xd); - vp9_inverse_transform_sb64uv_16x16(xd); - break; - case TX_8X8: - vp9_transform_sb64y_8x8(x); - vp9_transform_sb64uv_8x8(x); - vp9_quantize_sb64y_8x8(x); - vp9_quantize_sb64uv_8x8(x); - if (x->optimize) { - vp9_optimize_sb64y_8x8(cm, x); - vp9_optimize_sb64uv_8x8(cm, x); - } - vp9_inverse_transform_sb64y_8x8(xd); - vp9_inverse_transform_sb64uv_8x8(xd); - break; - case TX_4X4: - vp9_transform_sb64y_4x4(x); - vp9_transform_sb64uv_4x4(x); - vp9_quantize_sb64y_4x4(x); - vp9_quantize_sb64uv_4x4(x); - if (x->optimize) { - vp9_optimize_sb64y_4x4(cm, x); - vp9_optimize_sb64uv_4x4(cm, x); - } - vp9_inverse_transform_sb64y_4x4(xd); - vp9_inverse_transform_sb64uv_4x4(xd); - break; - default: assert(0); - } - vp9_recon_sb64y_s_c(xd, dst); - vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst); -#if CONFIG_CODE_NONZEROCOUNT - gather_nzcs_sb64(cm, &x->e_mbd); -#endif - vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled); - } else { - // FIXME(rbultje): not tile-aware (mi - 1) - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; - - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_sb64_tokens_context(xd); - } else { - vp9_stuff_sb64(cpi, xd, t, !output_enabled); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } + cm->fc.mbskip_count[mb_skip_context][1]++; + vp9_reset_sb_tokens_context(xd, + (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize); } // copy skip flag on all mb_mode_info contexts in this SB // if this was a skip at this txfm size - for (n = 1; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - if (mb_col + x_idx < cm->mb_cols && mb_row + y_idx < cm->mb_rows) + for (n = 1; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + if (mi_col + x_idx < cm->mi_cols && mi_row + y_idx < cm->mi_rows) mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { - cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + mbmi->sb_type >= BLOCK_SIZE_SB8X8 && + !(mbmi->ref_frame[0] != INTRA_FRAME && (mbmi->mb_skip_coeff || + vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { + const int context = vp9_get_pred_context(cm, xd, PRED_TX_SIZE); + if (bsize >= BLOCK_SIZE_SB32X32) { + cm->fc.tx_count_32x32p[context][mbmi->txfm_size]++; + } else if (bsize >= BLOCK_SIZE_MB16X16) { + cm->fc.tx_count_16x16p[context][mbmi->txfm_size]++; + } else { + cm->fc.tx_count_8x8p[context][mbmi->txfm_size]++; + } } else { int x, y; TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; - for (y = 0; y < 4; y++) { - for (x = 0; x < 4; x++) { - if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) { + // The new intra coding scheme requires no change of transform size + if (mi->mbmi.ref_frame[0] != INTRA_FRAME) { + if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) + sz = TX_16X16; + if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16) + sz = TX_8X8; + if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8) + sz = TX_4X4; + } else if (bsize >= BLOCK_SIZE_SB8X8) { + sz = mbmi->txfm_size; + } else { + sz = TX_4X4; + } + + for (y = 0; y < bh; y++) { + for (x = 0; x < bw; x++) { + if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) { mi[mis * y + x].mbmi.txfm_size = sz; } } diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index 9f13edcec9ddfaa35e60071930fcabcd9cb53629..d37bdca368a191d3195fe686be593ecc9b3493c9 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -13,9 +13,12 @@ #define VP9_ENCODER_VP9_ENCODEFRAME_H_ struct macroblock; +struct yv12_buffer_config; void vp9_build_block_offsets(struct macroblock *x); -void vp9_setup_block_ptrs(struct macroblock *x); +void vp9_setup_src_planes(struct macroblock *x, + const struct yv12_buffer_config *src, + int mb_row, int mb_col); #endif // VP9_ENCODER_VP9_ENCODEFRAME_H_ diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index eddacb872a4378b85b6791e02fd251917f9f1346..fe4e9fd0f909856daf5510d675bd9e50f6eaea42 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -19,217 +19,15 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; (void) cpi; - + mbmi->mode = DC_PRED; + mbmi->ref_frame[0] = INTRA_FRAME; if (use_16x16_pred) { - mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame = INTRA_FRAME; - - vp9_encode_intra16x16mby(&cpi->common, x); - } else { - int i; - - for (i = 0; i < 16; i++) { - x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED; - vp9_encode_intra4x4block(x, i); - } - } - - return vp9_get_mb_ss(x->src_diff); -} - -void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { - BLOCKD *b = &x->e_mbd.block[ib]; - BLOCK *be = &x->block[ib]; - TX_TYPE tx_type; - -#if CONFIG_NEWBINTRAMODES - b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b); -#endif - - vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor); - vp9_subtract_b(be, b, 16); - - tx_type = get_tx_type_4x4(&x->e_mbd, ib); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); - vp9_ht_quantize_b_4x4(x, ib, tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); - } else { - x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], - b->dqcoeff, b->diff, 32); - } - - vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); -} - -void vp9_encode_intra4x4mby(MACROBLOCK *mb) { - int i; - - for (i = 0; i < 16; i++) - vp9_encode_intra4x4block(mb, i); -} - -void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - - vp9_build_intra_predictors_mby(xd); - - vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); - - switch (tx_size) { - case TX_16X16: - vp9_transform_mby_16x16(x); - vp9_quantize_mby_16x16(x); - if (x->optimize) - vp9_optimize_mby_16x16(cm, x); - vp9_inverse_transform_mby_16x16(xd); - break; - case TX_8X8: - vp9_transform_mby_8x8(x); - vp9_quantize_mby_8x8(x); - if (x->optimize) - vp9_optimize_mby_8x8(cm, x); - vp9_inverse_transform_mby_8x8(xd); - break; - default: - vp9_transform_mby_4x4(x); - vp9_quantize_mby_4x4(x); - if (x->optimize) - vp9_optimize_mby_4x4(cm, x); - vp9_inverse_transform_mby_4x4(xd); - break; - } - - vp9_recon_mby(xd); -} - -void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - - vp9_build_intra_predictors_mbuv(xd); - - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - xd->predictor, x->src.uv_stride); - - switch (tx_size) { - case TX_4X4: - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - if (x->optimize) - vp9_optimize_mbuv_4x4(cm, x); - vp9_inverse_transform_mbuv_4x4(xd); - break; - default: // 16x16 or 8x8 - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - if (x->optimize) - vp9_optimize_mbuv_8x8(cm, x); - vp9_inverse_transform_mbuv_8x8(xd); - break; - } - - vp9_recon_intra_mbuv(xd); -} - -void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCKD *b = &xd->block[ib]; - BLOCK *be = &x->block[ib]; - const int iblock[4] = {0, 1, 4, 5}; - int i; - TX_TYPE tx_type; - - vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, b->predictor); - // generate residual blocks - vp9_subtract_4b_c(be, b, 16); - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - int idx = (ib & 0x02) ? (ib + 2) : ib; - - tx_type = get_tx_type_8x8(xd, ib); - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); - x->quantize_b_8x8(x, idx, tx_type); - vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, - 16, tx_type); - } else { - x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT); - vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); - } + mbmi->txfm_size = mbmi->sb_type >= BLOCK_SIZE_MB16X16 ? TX_16X16 : TX_8X8; + vp9_encode_intra_block_y(&cpi->common, x, mbmi->sb_type); } else { - for (i = 0; i < 4; i++) { - b = &xd->block[ib + iblock[i]]; - be = &x->block[ib + iblock[i]]; - tx_type = get_tx_type_4x4(xd, ib + iblock[i]); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); - vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); - } else if (!(i & 1) && - get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) { - x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1); - vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], - b->dqcoeff, b->diff, 32); - vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1], - (b + 1)->dqcoeff, (b + 1)->diff, 32); - i++; - } else { - x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, ib + iblock[i]); - vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], - b->dqcoeff, b->diff, 32); - } - } + mbmi->txfm_size = TX_4X4; + vp9_encode_intra_block_y(&cpi->common, x, mbmi->sb_type); } - // reconstruct submacroblock - for (i = 0; i < 4; i++) { - b = &xd->block[ib + iblock[i]]; - vp9_recon_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, - b->dst_stride); - } -} - -void vp9_encode_intra8x8mby(MACROBLOCK *x) { - int i; - - for (i = 0; i < 4; i++) - vp9_encode_intra8x8(x, vp9_i8x8_block[i]); -} - -static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { - BLOCKD *b = &x->e_mbd.block[ib]; - BLOCK *be = &x->block[ib]; - - vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor); - - vp9_subtract_b(be, b, 8); - - x->fwd_txm4x4(be->src_diff, be->coeff, 16); - x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], - b->dqcoeff, b->diff, 16); - - vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, - b->dst_stride); -} - -void vp9_encode_intra8x8mbuv(MACROBLOCK *x) { - int i; - - for (i = 0; i < 4; i++) { - BLOCKD *b = &x->e_mbd.block[vp9_i8x8_block[i]]; - int mode = b->bmi.as_mode.first; - - encode_intra_uv4x4(x, i + 16, mode); // u - encode_intra_uv4x4(x, i + 20, mode); // v - } + return vp9_get_mb_ss(x->plane[0].src_diff); } diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index 0b19b5652c95bfe9890bd88d6466cc6909ad337d..14d144b7485679c76f29743faa19d8f60fef6168 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -14,12 +14,9 @@ #include "vp9/encoder/vp9_onyx_int.h" int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); -void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_encode_intra4x4mby(MACROBLOCK *mb); -void vp9_encode_intra4x4block(MACROBLOCK *x, int ib); -void vp9_encode_intra8x8mby(MACROBLOCK *x); -void vp9_encode_intra8x8mbuv(MACROBLOCK *x); -void vp9_encode_intra8x8(MACROBLOCK *x, int ib); +void vp9_encode_intra_block_y(VP9_COMMON *const cm, MACROBLOCK *mb, + BLOCK_SIZE_TYPE bs); +void vp9_encode_intra_block_uv(VP9_COMMON *const cm, MACROBLOCK *mb, + BLOCK_SIZE_TYPE bs); #endif // VP9_ENCODER_VP9_ENCODEINTRA_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 3ad429a9e543753d4820a270a0baa466d79deb75..90f00d2be14c6672fc51d1d7b35ab1c1ee5daa9a 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -20,480 +20,54 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9_rtcd.h" -void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { - uint8_t *src_ptr = (*(be->base_src) + be->src); - int16_t *diff_ptr = be->src_diff; - uint8_t *pred_ptr = bd->predictor; - int src_stride = be->src_stride; +DECLARE_ALIGNED(16, extern const uint8_t, + vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); +void vp9_subtract_block(int rows, int cols, + int16_t *diff_ptr, int diff_stride, + const uint8_t *src_ptr, int src_stride, + const uint8_t *pred_ptr, int pred_stride) { int r, c; - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) + for (r = 0; r < rows; r++) { + for (c = 0; c < cols; c++) diff_ptr[c] = src_ptr[c] - pred_ptr[c]; - diff_ptr += pitch; - pred_ptr += pitch; + diff_ptr += diff_stride; + pred_ptr += pred_stride; src_ptr += src_stride; } } -void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { - uint8_t *src_ptr = (*(be->base_src) + be->src); - int16_t *diff_ptr = be->src_diff; - uint8_t *pred_ptr = bd->predictor; - int src_stride = be->src_stride; - int r, c; - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) - diff_ptr[c] = src_ptr[c] - pred_ptr[c]; - - diff_ptr += pitch; - pred_ptr += pitch; - src_ptr += src_stride; - } -} - -void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride) { - int16_t *udiff = diff + 256; - int16_t *vdiff = diff + 320; - int r, c; - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) - udiff[c] = usrc[c] - upred[c]; - - udiff += 8; - upred += dst_stride; - usrc += src_stride; - } - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - vdiff[c] = vsrc[c] - vpred[c]; - } - - vdiff += 8; - vpred += dst_stride; - vsrc += src_stride; - } -} - -void vp9_subtract_mbuv_c(int16_t *diff, uint8_t *usrc, - uint8_t *vsrc, uint8_t *pred, int stride) { - uint8_t *upred = pred + 256; - uint8_t *vpred = pred + 320; - - vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8); -} - -void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride) { - int r, c; - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) - diff[c] = src[c] - pred[c]; - - diff += 16; - pred += dst_stride; - src += src_stride; - } -} - -void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride) { - int r, c; - - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) - diff[c] = src[c] - pred[c]; - - diff += 32; - pred += dst_stride; - src += src_stride; - } -} - -void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride) { - int16_t *udiff = diff + 1024; - int16_t *vdiff = diff + 1024 + 256; - int r, c; - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) - udiff[c] = usrc[c] - upred[c]; - - udiff += 16; - upred += dst_stride; - usrc += src_stride; - } - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) - vdiff[c] = vsrc[c] - vpred[c]; - - vdiff += 16; - vpred += dst_stride; - vsrc += src_stride; - } -} - -void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride) { - int r, c; - - for (r = 0; r < 64; r++) { - for (c = 0; c < 64; c++) { - diff[c] = src[c] - pred[c]; - } - - diff += 64; - pred += dst_stride; - src += src_stride; - } -} - -void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride) { - int16_t *udiff = diff + 4096; - int16_t *vdiff = diff + 4096 + 1024; - int r, c; - - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) { - udiff[c] = usrc[c] - upred[c]; - } - - udiff += 32; - upred += dst_stride; - usrc += src_stride; - } - - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) { - vdiff[c] = vsrc[c] - vpred[c]; - } - - vdiff += 32; - vpred += dst_stride; - vsrc += src_stride; - } -} - -void vp9_subtract_mby_c(int16_t *diff, uint8_t *src, - uint8_t *pred, int stride) { - vp9_subtract_mby_s_c(diff, src, stride, pred, 16); -} - -static void subtract_mb(MACROBLOCK *x) { - BLOCK *b = &x->block[0]; - vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor, - b->src_stride); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); -} +static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) { + struct macroblock_plane *const p = &x->plane[plane]; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int bw = plane_block_width(bsize, pd); + const int bh = plane_block_height(bsize, pd); -void vp9_transform_mby_4x4(MACROBLOCK *x) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - - for (i = 0; i < 16; i++) { - BLOCK *b = &x->block[i]; - TX_TYPE tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type); - } else if (!(i & 1) && get_tx_type_4x4(xd, i + 1) == DCT_DCT) { - x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 32); - i++; - } else { - x->fwd_txm4x4(x->block[i].src_diff, x->block[i].coeff, 32); - } - } + vp9_subtract_block(bh, bw, p->src_diff, bw, + p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride); } -void vp9_transform_mbuv_4x4(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i += 2) - x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 16); -} - -static void transform_mb_4x4(MACROBLOCK *x) { - vp9_transform_mby_4x4(x); - vp9_transform_mbuv_4x4(x); -} - -void vp9_transform_mby_8x8(MACROBLOCK *x) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - TX_TYPE tx_type; - - for (i = 0; i < 9; i += 8) { - BLOCK *b = &x->block[i]; - tx_type = get_tx_type_8x8(xd, i); - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type); - } else { - x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 32); - } - } - for (i = 2; i < 11; i += 8) { - BLOCK *b = &x->block[i]; - tx_type = get_tx_type_8x8(xd, i); - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type); - } else { - x->fwd_txm8x8(x->block[i].src_diff, x->block[i + 2].coeff, 32); - } - } +void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + subtract_plane(x, bsize, 0); } -void vp9_transform_mbuv_8x8(MACROBLOCK *x) { +void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { int i; - for (i = 16; i < 24; i += 4) - x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 16); -} - -void vp9_transform_mb_8x8(MACROBLOCK *x) { - vp9_transform_mby_8x8(x); - vp9_transform_mbuv_8x8(x); -} - -void vp9_transform_mby_16x16(MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - TX_TYPE tx_type = get_tx_type_16x16(xd, 0); - vp9_clear_system_state(); - if (tx_type != DCT_DCT) { - vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type); - } else { - x->fwd_txm16x16(x->block[0].src_diff, x->block[0].coeff, 32); - } -} - -void vp9_transform_mb_16x16(MACROBLOCK *x) { - vp9_transform_mby_16x16(x); - vp9_transform_mbuv_8x8(x); -} - -void vp9_transform_sby_32x32(MACROBLOCK *x) { - vp9_short_fdct32x32(x->src_diff, x->coeff, 64); -} - -void vp9_transform_sby_16x16(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4); - - if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, - x->coeff + n * 256, 32, tx_type); - } else { - x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, - x->coeff + n * 256, 64); - } - } -} - -void vp9_transform_sby_8x8(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); - - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, - x->coeff + n * 64, 32, tx_type); - } else { - x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, - x->coeff + n * 64, 64); - } - } -} - -void vp9_transform_sby_4x4(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); - - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, - x->coeff + n * 16, 32, tx_type); - } else { - x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, - x->coeff + n * 16, 64); - } - } -} - -void vp9_transform_sbuv_16x16(MACROBLOCK *x) { - vp9_clear_system_state(); - x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32); - x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32); + for (i = 1; i < MAX_MB_PLANE; i++) + subtract_plane(x, bsize, i); } -void vp9_transform_sbuv_8x8(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8, - x->coeff + 1024 + n * 64, 32); - x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8, - x->coeff + 1280 + n * 64, 32); - } -} - -void vp9_transform_sbuv_4x4(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4, - x->coeff + 1024 + n * 16, 32); - x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4, - x->coeff + 1280 + n * 16, 32); - } +void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + vp9_subtract_sby(x, bsize); + vp9_subtract_sbuv(x, bsize); } -void vp9_transform_sb64y_32x32(MACROBLOCK *x) { - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32, - x->coeff + n * 1024, 128); - } -} - -void vp9_transform_sb64y_16x16(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); - - if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, - x->coeff + n * 256, 64, tx_type); - } else { - x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, - x->coeff + n * 256, 128); - } - } -} - -void vp9_transform_sb64y_8x8(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); - - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, - x->coeff + n * 64, 64, tx_type); - } else { - x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, - x->coeff + n * 64, 128); - } - } -} - -void vp9_transform_sb64y_4x4(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); - - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 64 * 4 + x_idx * 4, - x->coeff + n * 16, 64, tx_type); - } else { - x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4, - x->coeff + n * 16, 128); - } - } -} - -void vp9_transform_sb64uv_32x32(MACROBLOCK *x) { - vp9_clear_system_state(); - vp9_short_fdct32x32(x->src_diff + 4096, - x->coeff + 4096, 64); - vp9_short_fdct32x32(x->src_diff + 4096 + 1024, - x->coeff + 4096 + 1024, 64); -} - -void vp9_transform_sb64uv_16x16(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16, - x->coeff + 4096 + n * 256, 64); - x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16, - x->coeff + 4096 + 1024 + n * 256, 64); - } -} - -void vp9_transform_sb64uv_8x8(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8, - x->coeff + 4096 + n * 64, 64); - x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8, - x->coeff + 4096 + 1024 + n * 64, 64); - } -} - -void vp9_transform_sb64uv_4x4(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - - x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4, - x->coeff + 4096 + n * 16, 64); - x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4, - x->coeff + 4096 + 1024 + n * 16, 64); - } -} #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) #define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) @@ -533,126 +107,84 @@ static int trellis_get_coeff_context(const int *scan, int idx, int token, uint8_t *token_cache, int pad, int l) { - int bak = token_cache[idx], pt; - token_cache[idx] = token; + int bak = token_cache[scan[idx]], pt; + token_cache[scan[idx]] = vp9_pt_energy_class[token]; pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l); - token_cache[idx] = bak; + token_cache[scan[idx]] = bak; return pt; } -static void optimize_b(VP9_COMMON *const cm, - MACROBLOCK *mb, int ib, PLANE_TYPE type, - const int16_t *dequant_ptr, +static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, + int plane, int block, BLOCK_SIZE_TYPE bsize, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int tx_size) { - const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME; + TX_SIZE tx_size) { + const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; - const int16_t *coeff_ptr = mb->coeff + ib * 16; - int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16; - int eob = xd->eobs[ib], final_eob, sz = 0; + const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, + block, 16); + int16_t *qcoeff_ptr; + int16_t *dqcoeff_ptr; + int eob = xd->plane[plane].eobs[block], final_eob, sz = 0; const int i0 = 0; int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; int rate0, rate1, error0, error1, t0, t1; int best, band, pt; + PLANE_TYPE type = xd->plane[plane].plane_type; int err_mult = plane_rd_mult[type]; int default_eob, pad; int const *scan, *nb; const int mul = 1 + (tx_size == TX_32X32); uint8_t token_cache[1024]; -#if CONFIG_CODE_NONZEROCOUNT - // TODO(debargha): the dynamic programming approach used in this function - // is not compatible with the true rate cost when nzcs are used. Note - // the total rate is the sum of the nzc rate and the indicvidual token - // rates. The latter part can be optimized in this function, but because - // the nzc rate is a function of all the other tokens without a Markov - // relationship this rate cannot be considered correctly. - // The current implementation uses a suboptimal approach to account for - // the nzc rates somewhat, but in reality the optimization approach needs - // to change substantially. - uint16_t nzc = xd->nzcs[ib]; - uint16_t nzc0, nzc1; - uint16_t final_nzc = 0, final_nzc_exp; - int nzc_context = vp9_get_nzc_context(cm, xd, ib); - unsigned int *nzc_cost; - nzc0 = nzc1 = nzc; -#endif - + const int ib = txfrm_block_to_raster_block(xd, bsize, plane, + block, 2 * tx_size); + const int16_t *dequant_ptr = xd->plane[plane].dequant; + const uint8_t * band_translate; + + assert((!type && !plane) || (type && plane)); + dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16); + qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); switch (tx_size) { default: case TX_4X4: { - const TX_TYPE tx_type = get_tx_type_4x4(xd, ib); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT; default_eob = 16; -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type]; -#endif - if (tx_type == DCT_ADST) { - scan = vp9_col_scan_4x4; - } else if (tx_type == ADST_DCT) { - scan = vp9_row_scan_4x4; - } else { - scan = vp9_default_zig_zag1d_4x4; - } + scan = get_scan_4x4(tx_type); + band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1)); - if (tx_type == DCT_ADST) { - scan = vp9_col_scan_8x8; - } else if (tx_type == ADST_DCT) { - scan = vp9_row_scan_8x8; - } else { - scan = vp9_default_zig_zag1d_8x8; - } + const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT; + scan = get_scan_8x8(tx_type); default_eob = 64; -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type]; -#endif + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2)); - if (tx_type == DCT_ADST) { - scan = vp9_col_scan_16x16; - } else if (tx_type == ADST_DCT) { - scan = vp9_row_scan_16x16; - } else { - scan = vp9_default_zig_zag1d_16x16; - } + const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT; + scan = get_scan_16x16(tx_type); default_eob = 256; -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type]; -#endif + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: - scan = vp9_default_zig_zag1d_32x32; + scan = vp9_default_scan_32x32; default_eob = 1024; -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type]; -#endif + band_translate = vp9_coefband_trans_8x8plus; break; } + assert(eob <= default_eob); /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; - if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) + if (mb->e_mbd.mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) rdmult = (rdmult * 9) >> 4; rddiv = mb->rddiv; memset(best_index, 0, sizeof(best_index)); /* Initialize the sentinel node of the trellis. */ -#if CONFIG_CODE_NONZEROCOUNT - tokens[eob][0].rate = nzc_cost[nzc]; -#else tokens[eob][0].rate = 0; -#endif tokens[eob][0].error = 0; tokens[eob][0].next = default_eob; tokens[eob][0].token = DCT_EOB_TOKEN; @@ -660,14 +192,12 @@ static void optimize_b(VP9_COMMON *const cm, *(tokens[eob] + 1) = *(tokens[eob] + 0); next = eob; for (i = 0; i < eob; i++) - token_cache[i] = vp9_dct_value_tokens_ptr[qcoeff_ptr[scan[i]]].Token; + token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ + qcoeff_ptr[scan[i]]].token]; nb = vp9_get_coef_neighbors_handle(scan, &pad); for (i = eob; i-- > i0;) { int base_bits, d2, dx; -#if CONFIG_CODE_NONZEROCOUNT - int new_nzc0, new_nzc1; -#endif rc = scan[i]; x = qcoeff_ptr[rc]; @@ -679,16 +209,18 @@ static void optimize_b(VP9_COMMON *const cm, /* Evaluate the first possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - t0 = (vp9_dct_value_tokens_ptr + x)->Token; + t0 = (vp9_dct_value_tokens_ptr + x)->token; /* Consider both possible successor states. */ if (next < default_eob) { - band = get_coef_band(scan, tx_size, i + 1); + band = get_coef_band(band_translate, i + 1); pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, pad, default_eob); rate0 += - mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token]; + mb->token_costs_noskip[tx_size][type][ref][band][pt] + [tokens[next][0].token]; rate1 += - mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token]; + mb->token_costs_noskip[tx_size][type][ref][band][pt] + [tokens[next][1].token]; } UPDATE_RD_COST(); /* And pick the best. */ @@ -702,9 +234,6 @@ static void optimize_b(VP9_COMMON *const cm, tokens[i][0].token = t0; tokens[i][0].qc = x; best_index[i][0] = best; -#if CONFIG_CODE_NONZEROCOUNT - new_nzc0 = (best ? nzc1 : nzc0); -#endif /* Evaluate the second possibility for this state. */ rate0 = tokens[next][0].rate; @@ -731,28 +260,30 @@ static void optimize_b(VP9_COMMON *const cm, DCT_EOB_TOKEN : ZERO_TOKEN; t1 = tokens[next][1].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN; -#if CONFIG_CODE_NONZEROCOUNT - // Account for rate drop because of the nzc change. - // TODO(debargha): Find a better solution - rate0 -= nzc_cost[nzc0] - nzc_cost[nzc0 - 1]; - rate1 -= nzc_cost[nzc1] - nzc_cost[nzc1 - 1]; -#endif } else { - t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token; + t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; } if (next < default_eob) { - band = get_coef_band(scan, tx_size, i + 1); + band = get_coef_band(band_translate, i + 1); if (t0 != DCT_EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, pad, default_eob); - rate0 += mb->token_costs[tx_size][type][ref][band][pt][ - tokens[next][0].token]; + if (!x) + rate0 += mb->token_costs[tx_size][type][ref][band][pt][ + tokens[next][0].token]; + else + rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][ + tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache, pad, default_eob); - rate1 += mb->token_costs[tx_size][type][ref][band][pt][ - tokens[next][1].token]; + if (!x) + rate1 += mb->token_costs[tx_size][type][ref][band][pt][ + tokens[next][1].token]; + else + rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][ + tokens[next][1].token]; } } @@ -771,11 +302,6 @@ static void optimize_b(VP9_COMMON *const cm, tokens[i][1].token = best ? t1 : t0; tokens[i][1].qc = x; best_index[i][1] = best; -#if CONFIG_CODE_NONZEROCOUNT - new_nzc1 = (best ? nzc1 : nzc0) - (!x); - nzc0 = new_nzc0; - nzc1 = new_nzc1; -#endif /* Finally, make this the new head of the trellis. */ next = i; } @@ -783,7 +309,7 @@ static void optimize_b(VP9_COMMON *const cm, * add a new trellis node, but we do need to update the costs. */ else { - band = get_coef_band(scan, tx_size, i + 1); + band = get_coef_band(band_translate, i + 1); t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ @@ -802,29 +328,25 @@ static void optimize_b(VP9_COMMON *const cm, } /* Now pick the best path through the whole trellis. */ - band = get_coef_band(scan, tx_size, i + 1); - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); + band = get_coef_band(band_translate, i + 1); + pt = combine_entropy_contexts(*a, *l); rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; error0 = tokens[next][0].error; error1 = tokens[next][1].error; t0 = tokens[next][0].token; t1 = tokens[next][1].token; - rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0]; - rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1]; + rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t0]; + rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; -#if CONFIG_CODE_NONZEROCOUNT - final_nzc_exp = (best ? nzc1 : nzc0); -#endif final_eob = i0 - 1; + vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2))); + vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2))); for (i = next; i < eob; i = next) { x = tokens[i][best].qc; if (x) { final_eob = i; -#if CONFIG_CODE_NONZEROCOUNT - ++final_nzc; -#endif } rc = scan[i]; qcoeff_ptr[rc] = x; @@ -835,519 +357,338 @@ static void optimize_b(VP9_COMMON *const cm, } final_eob++; - xd->eobs[ib] = final_eob; + xd->plane[plane].eobs[block] = final_eob; *a = *l = (final_eob > 0); -#if CONFIG_CODE_NONZEROCOUNT - assert(final_nzc == final_nzc_exp); - xd->nzcs[ib] = final_nzc; -#endif } -void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - int b; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; +struct optimize_block_args { + VP9_COMMON *cm; + MACROBLOCK *x; + struct optimize_ctx *ctx; +}; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); +void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb, + struct optimize_ctx *ctx) { + MACROBLOCKD *const xd = &mb->e_mbd; + int x, y; - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + // find current entropy context + txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); - for (b = 0; b < 16; b++) { - optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, - ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b], TX_4X4); - } + optimize_b(cm, mb, plane, block, bsize, + &ctx->ta[plane][x], &ctx->tl[plane][y], ss_txfrm_size / 2); } -void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - int b; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; +static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + const struct optimize_block_args* const args = arg; + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x, + args->ctx); +} - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); +void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + struct optimize_ctx *ctx) { + int p; - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + for (p = 0; p < MAX_MB_PLANE; p++) { + const struct macroblockd_plane* const plane = &xd->plane[p]; + const int bwl = b_width_log2(bsize) - plane->subsampling_x; + const int bhl = b_height_log2(bsize) - plane->subsampling_y; + const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const TX_SIZE tx_size = p ? get_uv_tx_size(mbmi) + : mbmi->txfm_size; + int i, j; - for (b = 16; b < 24; b++) { - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, - ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b], TX_4X4); + for (i = 0; i < 1 << bwl; i += 1 << tx_size) { + int c = 0; + ctx->ta[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->ta[p][i] |= plane->above_context[i + j]; + } + } + for (i = 0; i < 1 << bhl; i += 1 << tx_size) { + int c = 0; + ctx->tl[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->tl[p][i] |= plane->left_context[i + j]; + } + } } } -static void optimize_mb_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - vp9_optimize_mby_4x4(cm, x); - vp9_optimize_mbuv_4x4(cm, x); +void vp9_optimize_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, optimize_block, &arg); } -void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - int b; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - for (b = 0; b < 16; b += 4) { - ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b]; - ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, - &above_ec, &left_ec, TX_8X8); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; - } +void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg); } -void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - int b; - ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context; - ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context; - - if (!ta || !tl) - return; - - for (b = 16; b < 24; b += 4) { - ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b]; - ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, - &above_ec, &left_ec, TX_8X8); +struct encode_b_args { + VP9_COMMON *cm; + MACROBLOCK *x; + struct optimize_ctx *ctx; +}; + +static void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args* const args = arg; + MACROBLOCK* const x = args->x; + MACROBLOCKD* const xd = &x->e_mbd; + const int bw = plane_block_width(bsize, &xd->plane[plane]); + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + int16_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block, 16); + int16_t *const src_diff = raster_block_offset_int16(xd, bsize, plane, + raster_block, + x->plane[plane].src_diff); + TX_TYPE tx_type = DCT_DCT; + + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_fdct32x32(src_diff, coeff, bw * 2); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) + vp9_short_fht16x16(src_diff, coeff, bw, tx_type); + else + x->fwd_txm16x16(src_diff, coeff, bw * 2); + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) + vp9_short_fht8x8(src_diff, coeff, bw, tx_type); + else + x->fwd_txm8x8(src_diff, coeff, bw * 2); + break; + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) + vp9_short_fht4x4(src_diff, coeff, bw, tx_type); + else + x->fwd_txm4x4(src_diff, coeff, bw * 2); + break; + default: + assert(0); } -} -static void optimize_mb_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - vp9_optimize_mby_8x8(cm, x); - vp9_optimize_mbuv_8x8(cm, x); + vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type); } -void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context; - ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context; - ENTROPY_CONTEXT ta, tl; +static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args *const args = arg; + MACROBLOCK *const x = args->x; + MACROBLOCKD *const xd = &x->e_mbd; + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + struct macroblockd_plane *const pd = &xd->plane[plane]; + int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16); + uint8_t *const dst = raster_block_offset_uint8(xd, bsize, plane, + raster_block, + pd->dst.buf, pd->dst.stride); + TX_TYPE tx_type = DCT_DCT; - if (!t_above || !t_left) - return; + xform_quant(plane, block, bsize, ss_txfrm_size, arg); - ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0; - tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0; - optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - &ta, &tl, TX_16X16); -} + if (x->optimize) + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx); -static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - vp9_optimize_mby_16x16(cm, x); - vp9_optimize_mbuv_8x8(cm, x); + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride); + else + vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + vp9_short_idct8x8_add(dqcoeff, dst, pd->dst.stride); + else + vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); + break; + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + // this is like vp9_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + vp9_inverse_transform_b_4x4_add(xd, pd->eobs[block], dqcoeff, + dst, pd->dst.stride); + else + vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type); + break; + } } -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT ta, tl; - - ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; - tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; - optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - &ta, &tl, TX_32X32); -} +void vp9_xform_quant_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct encode_b_args arg = {cm, x, NULL}; -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT ta[2], tl[2]; - int n; - - ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; - ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; - tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16); - } + foreach_transformed_block_in_plane(xd, bsize, 0, xform_quant, &arg); } -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT ta[4], tl[4]; - int n; - - ta[0] = (a[0] + a[1]) != 0; - ta[1] = (a[2] + a[3]) != 0; - ta[2] = (a1[0] + a1[1]) != 0; - ta[3] = (a1[2] + a1[3]) != 0; - tl[0] = (l[0] + l[1]) != 0; - tl[1] = (l[2] + l[3]) != 0; - tl[2] = (l1[0] + l1[1]) != 0; - tl[3] = (l1[2] + l1[3]) != 0; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8); - } +void vp9_xform_quant_sbuv(VP9_COMMON *cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct encode_b_args arg = {cm, x, NULL}; + + foreach_transformed_block_uv(xd, bsize, xform_quant, &arg); } -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT ta[8], tl[8]; - int n; +void vp9_encode_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; - vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; + vp9_subtract_sby(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); - optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4); - } + foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg); } -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; - int b; - - for (b = 64; b < 96; b += 16) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_16X16][b]; - l = tl + vp9_block2left_sb[TX_16X16][b]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16); - } -} +void vp9_encode_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, above_ec, left_ec; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 64; b < 96; b += 4) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_8X8][b]; - l = tl + vp9_block2left_sb[TX_8X8][b]; - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8); - a[0] = a[1] = above_ec; - l[0] = l[1] = left_ec; - } -} + vp9_subtract_sbuv(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 64; b < 96; b++) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_4X4][b]; - l = tl + vp9_block2left_sb[TX_4X4][b]; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4); - } + foreach_transformed_block_uv(xd, bsize, encode_block, &arg); } -void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); - ENTROPY_CONTEXT ta[2], tl[2]; - int n; - - ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; - ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; - tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_32X32); - } -} +void vp9_encode_sb(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; -void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); - ENTROPY_CONTEXT ta[4], tl[4]; - int n; - - ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; - ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; - ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0; - ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; - tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; - tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0; - tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16); - } -} + vp9_subtract_sb(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); -void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); - ENTROPY_CONTEXT ta[8], tl[8]; - int n; - - ta[0] = (a[0] + a[1]) != 0; - ta[1] = (a[2] + a[3]) != 0; - ta[2] = (a1[0] + a1[1]) != 0; - ta[3] = (a1[2] + a1[3]) != 0; - ta[4] = (a2[0] + a2[1]) != 0; - ta[5] = (a2[2] + a2[3]) != 0; - ta[6] = (a3[0] + a3[1]) != 0; - ta[7] = (a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1]) != 0; - tl[1] = (l[2] + l[3]) != 0; - tl[2] = (l1[0] + l1[1]) != 0; - tl[3] = (l1[2] + l1[3]) != 0; - tl[4] = (l2[0] + l2[1]) != 0; - tl[5] = (l2[2] + l2[3]) != 0; - tl[6] = (l3[0] + l3[1]) != 0; - tl[7] = (l3[2] + l3[3]) != 0; - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - - optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8); - } + foreach_transformed_block(xd, bsize, encode_block, &arg); } -void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT ta[16], tl[16]; - int n; - - vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; - - optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4); - } -} +static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args* const args = arg; + MACROBLOCK *const x = args->x; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + const TX_SIZE tx_size = (TX_SIZE)(ss_txfrm_size / 2); + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16); + const int bw = plane_block_width(bsize, pd); + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + + uint8_t *const src = raster_block_offset_uint8(xd, bsize, plane, raster_block, + p->src.buf, p->src.stride); + uint8_t *const dst = raster_block_offset_uint8(xd, bsize, plane, raster_block, + pd->dst.buf, pd->dst.stride); + int16_t *const src_diff = raster_block_offset_int16(xd, bsize, plane, + raster_block, + p->src_diff); + + const int txfm_b_size = 4 << tx_size; + int ib = raster_block; + int tx_ib = ib >> tx_size; + int plane_b_size; -void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; - int b; - - for (b = 256; b < 384; b += 64) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_32X32][b]; - l = tl + vp9_block2left_sb64[TX_32X32][b]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a2 = a + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l2 = l + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a3 = a + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0; - l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &a_ec, &l_ec, TX_32X32); - } -} + TX_TYPE tx_type; + int mode, b_mode; -void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b += 16) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_16X16][b]; - l = tl + vp9_block2left_sb64[TX_16X16][b]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16); - a[0] = a[1] = a1[0] = a1[1] = above_ec; - l[0] = l[1] = l1[0] = l1[1] = left_ec; + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + extend_for_intra(xd, plane, block, bsize, ss_txfrm_size); } -} -void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, above_ec, left_ec; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b += 4) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_8X8][b]; - l = tl + vp9_block2left_sb64[TX_8X8][b]; - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8); - a[0] = a[1] = above_ec; - l[0] = l[1] = left_ec; - } -} + mode = plane == 0? mbmi->mode: mbmi->uv_mode; + if (plane == 0 && + mbmi->sb_type < BLOCK_SIZE_SB8X8 && + mbmi->ref_frame[0] == INTRA_FRAME) + b_mode = xd->mode_info_context->bmi[ib].as_mode.first; + else + b_mode = mode; -void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b++) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_4X4][b]; - l = tl + vp9_block2left_sb64[TX_4X4][b]; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4); - } -} + assert(b_mode >= DC_PRED && b_mode <= TM_PRED); -void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - - if (tx_size == TX_16X16) { - vp9_transform_mb_16x16(x); - vp9_quantize_mb_16x16(x); - if (x->optimize) - optimize_mb_16x16(cm, x); - vp9_inverse_transform_mb_16x16(xd); - } else if (tx_size == TX_8X8) { - if (xd->mode_info_context->mbmi.mode == SPLITMV) { - assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4); - vp9_transform_mby_8x8(x); - vp9_transform_mbuv_4x4(x); - vp9_quantize_mby_8x8(x); - vp9_quantize_mbuv_4x4(x); - if (x->optimize) { - vp9_optimize_mby_8x8(cm, x); - vp9_optimize_mbuv_4x4(cm, x); - } - vp9_inverse_transform_mby_8x8(xd); - vp9_inverse_transform_mbuv_4x4(xd); - } else { - vp9_transform_mb_8x8(x); - vp9_quantize_mb_8x8(x); - if (x->optimize) - optimize_mb_8x8(cm, x); - vp9_inverse_transform_mb_8x8(xd); - } - } else { - transform_mb_4x4(x); - vp9_quantize_mb_4x4(x); - if (x->optimize) - optimize_mb_4x4(cm, x); - vp9_inverse_transform_mb_4x4(xd); - } -} + plane_b_size = b_width_log2(bsize) - pd->subsampling_x; + vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode, + dst, pd->dst.stride); + vp9_subtract_block(txfm_b_size, txfm_b_size, src_diff, bw, + src, p->src.stride, dst, pd->dst.stride); -void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int mb_row, int mb_col) { - MACROBLOCKD *const xd = &x->e_mbd; + xform_quant(plane, block, bsize, ss_txfrm_size, arg); - vp9_build_inter_predictors_mb(xd, mb_row, mb_col); - subtract_mb(x); - vp9_fidct_mb(cm, x); - vp9_recon_mb(xd); -} - -/* this function is used by first pass only */ -void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); + // if (x->optimize) + // vp9_optimize_b(plane, block, bsize, ss_txfrm_size, + // args->cm, x, args->ctx); - vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride); + else + vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + vp9_short_idct8x8_add(dqcoeff, dst, pd->dst.stride); + else + vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); + break; + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) + // this is like vp9_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + vp9_inverse_transform_b_4x4_add(xd, pd->eobs[block], dqcoeff, + dst, pd->dst.stride); + else + vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type); + break; + } +} - vp9_transform_mby_4x4(x); - vp9_quantize_mby_4x4(x); - vp9_inverse_transform_mby_4x4(xd); +void vp9_encode_intra_block_y(VP9_COMMON *cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; - vp9_recon_mby(xd); + foreach_transformed_block_in_plane(xd, bsize, 0, + encode_block_intra, &arg); +} +void vp9_encode_intra_block_uv(VP9_COMMON *cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; + foreach_transformed_block_uv(xd, bsize, encode_block_intra, &arg); } + diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 242afbeae9ab87e5761233eaf9513860bd1df162..57969034609acb0d6f29b895f6b7b2bcb5d07d48 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -22,82 +22,32 @@ typedef struct { MV_REFERENCE_FRAME second_ref_frame; } MODE_DEFINITION; - -struct VP9_ENCODER_RTCD; -void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int mb_row, int mb_col); - -void vp9_transform_mbuv_4x4(MACROBLOCK *x); -void vp9_transform_mby_4x4(MACROBLOCK *x); - -void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); - -void vp9_transform_mb_8x8(MACROBLOCK *mb); -void vp9_transform_mby_8x8(MACROBLOCK *x); -void vp9_transform_mbuv_8x8(MACROBLOCK *x); -void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); - -void vp9_transform_mb_16x16(MACROBLOCK *mb); -void vp9_transform_mby_16x16(MACROBLOCK *x); -void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x); - -void vp9_transform_sby_32x32(MACROBLOCK *x); -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sby_16x16(MACROBLOCK *x); -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sby_8x8(MACROBLOCK *x); -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sby_4x4(MACROBLOCK *x); -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sbuv_16x16(MACROBLOCK *x); -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sbuv_8x8(MACROBLOCK *x); -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sbuv_4x4(MACROBLOCK *x); -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); - -void vp9_transform_sb64y_32x32(MACROBLOCK *x); -void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64y_16x16(MACROBLOCK *x); -void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64y_8x8(MACROBLOCK *x); -void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64y_4x4(MACROBLOCK *x); -void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_32x32(MACROBLOCK *x); -void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_16x16(MACROBLOCK *x); -void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_8x8(MACROBLOCK *x); -void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_4x4(MACROBLOCK *x); -void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); - -void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); - -void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); - -void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride); -void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, - int src_stride, const uint8_t *pred, - int dst_stride); -void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride); -void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride); -void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride); -void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride); +struct optimize_ctx { + ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; +}; + +void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + struct optimize_ctx *ctx); +void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *x, + struct optimize_ctx *ctx); +void vp9_optimize_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); + +void vp9_encode_sb(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_encode_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_encode_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); + +void vp9_xform_quant_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_xform_quant_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); + +void vp9_subtract_block(int rows, int cols, + int16_t *diff_ptr, int diff_stride, + const uint8_t *src_ptr, int src_stride, + const uint8_t *pred_ptr, int pred_stride); +void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_subtract_sb(MACROBLOCK *xd, BLOCK_SIZE_TYPE bsize); #endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index 9431f078105cb56aa11ae60ccbf633e03f6b7257..a582d183db8751ad40929f9605b44783249cf544 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -24,68 +24,48 @@ extern unsigned int active_section; nmv_context_counts tnmvcounts; #endif -static void encode_nmv_component(vp9_writer* const bc, - int v, - int r, - const nmv_component* const mvcomp) { - int s, z, c, o, d; - assert (v != 0); /* should not be zero */ - s = v < 0; - vp9_write(bc, s, mvcomp->sign); - z = (s ? -v : v) - 1; /* magnitude - 1 */ - - c = vp9_get_mv_class(z, &o); - - write_token(bc, vp9_mv_class_tree, mvcomp->classes, - vp9_mv_class_encodings + c); - - d = (o >> 3); /* int mv data */ - - if (c == MV_CLASS_0) { - write_token(bc, vp9_mv_class0_tree, mvcomp->class0, - vp9_mv_class0_encodings + d); +static void encode_mv_component(vp9_writer* w, int comp, + const nmv_component* mvcomp, int usehp) { + int offset; + const int sign = comp < 0; + const int mag = sign ? -comp : comp; + const int mv_class = vp9_get_mv_class(mag - 1, &offset); + const int d = offset >> 3; // int mv data + const int fr = (offset >> 1) & 3; // fractional mv data + const int hp = offset & 1; // high precision mv data + + assert(comp != 0); + + // Sign + vp9_write(w, sign, mvcomp->sign); + + // Class + write_token(w, vp9_mv_class_tree, mvcomp->classes, + &vp9_mv_class_encodings[mv_class]); + + // Integer bits + if (mv_class == MV_CLASS_0) { + write_token(w, vp9_mv_class0_tree, mvcomp->class0, + &vp9_mv_class0_encodings[d]); } else { - int i, b; - b = c + CLASS0_BITS - 1; /* number of bits */ - for (i = 0; i < b; ++i) - vp9_write(bc, ((d >> i) & 1), mvcomp->bits[i]); + int i; + const int n = mv_class + CLASS0_BITS - 1; // number of bits + for (i = 0; i < n; ++i) + vp9_write(w, (d >> i) & 1, mvcomp->bits[i]); } -} -static void encode_nmv_component_fp(vp9_writer *bc, - int v, - int r, - const nmv_component* const mvcomp, - int usehp) { - int s, z, c, o, d, f, e; - assert (v != 0); /* should not be zero */ - s = v < 0; - z = (s ? -v : v) - 1; /* magnitude - 1 */ - - c = vp9_get_mv_class(z, &o); - - d = (o >> 3); /* int mv data */ - f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ - - /* Code the fractional pel bits */ - if (c == MV_CLASS_0) { - write_token(bc, vp9_mv_fp_tree, mvcomp->class0_fp[d], - vp9_mv_fp_encodings + f); - } else { - write_token(bc, vp9_mv_fp_tree, mvcomp->fp, - vp9_mv_fp_encodings + f); - } - /* Code the high precision bit */ - if (usehp) { - if (c == MV_CLASS_0) { - vp9_write(bc, e, mvcomp->class0_hp); - } else { - vp9_write(bc, e, mvcomp->hp); - } - } + // Fractional bits + write_token(w, vp9_mv_fp_tree, + mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp, + &vp9_mv_fp_encodings[fr]); + + // High precision bit + if (usehp) + vp9_write(w, hp, + mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp); } + static void build_nmv_component_cost_table(int *mvcost, const nmv_component* const mvcomp, int usehp) { @@ -556,30 +536,19 @@ void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) { } } -void vp9_encode_nmv(vp9_writer* const bc, const MV* const mv, - const MV* const ref, const nmv_context* const mvctx) { - MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); - write_token(bc, vp9_mv_joint_tree, mvctx->joints, - vp9_mv_joint_encodings + j); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component(bc, mv->row, ref->col, &mvctx->comps[0]); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component(bc, mv->col, ref->col, &mvctx->comps[1]); - } -} - -void vp9_encode_nmv_fp(vp9_writer* const bc, const MV* const mv, - const MV* const ref, const nmv_context* const mvctx, - int usehp) { - MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); +void vp9_encode_mv(vp9_writer* w, const MV* mv, const MV* ref, + const nmv_context* mvctx, int usehp) { + const MV diff = {mv->row - ref->row, + mv->col - ref->col}; + const MV_JOINT_TYPE j = vp9_get_mv_joint(&diff); usehp = usehp && vp9_use_nmv_hp(ref); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component_fp(bc, mv->row, ref->row, &mvctx->comps[0], usehp); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component_fp(bc, mv->col, ref->col, &mvctx->comps[1], usehp); - } + + write_token(w, vp9_mv_joint_tree, mvctx->joints, &vp9_mv_joint_encodings[j]); + if (mv_joint_vertical(j)) + encode_mv_component(w, diff.row, &mvctx->comps[0], usehp); + + if (mv_joint_horizontal(j)) + encode_mv_component(w, diff.col, &mvctx->comps[1], usehp); } void vp9_build_nmv_cost_table(int *mvjoint, @@ -600,62 +569,42 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, int_mv *second_best_ref_mv) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; MV mv; + int bwl = b_width_log2(mbmi->sb_type), bw = 1 << bwl; + int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl; + int idx, idy; - if (mbmi->mode == SPLITMV) { + if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { int i; - - for (i = 0; i < x->partition_info->count; i++) { - if (x->partition_info->bmi[i].mode == NEW4X4) { - if (x->e_mbd.allow_high_precision_mv) { - mv.row = (x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1); - if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) { - mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, - &cpi->NMVcount, 1); - } - } else { - mv.row = (x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0); - if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) { - mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, - &cpi->NMVcount, 0); + PARTITION_INFO *pi = x->partition_info; + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + i = idy * 2 + idx; + if (pi->bmi[i].mode == NEWMV) { + mv.row = (pi->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row); + mv.col = (pi->bmi[i].mv.as_mv.col - best_ref_mv->as_mv.col); + vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); + if (x->e_mbd.mode_info_context->mbmi.ref_frame[1] > INTRA_FRAME) { + mv.row = pi->bmi[i].second_mv.as_mv.row - + second_best_ref_mv->as_mv.row; + mv.col = pi->bmi[i].second_mv.as_mv.col - + second_best_ref_mv->as_mv.col; + vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); } } } } } else if (mbmi->mode == NEWMV) { - if (x->e_mbd.allow_high_precision_mv) { - mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1); - if (mbmi->second_ref_frame > 0) { - mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1); - } - } else { - mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0); - if (mbmi->second_ref_frame > 0) { - mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0); - } + mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); + mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); + vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); + if (mbmi->ref_frame[1] > INTRA_FRAME) { + mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); + mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); + vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, + x->e_mbd.allow_high_precision_mv); } } } diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index 982c952c1763ab0b8fa520a465663ff1d88c2701..cb25d85ee31dba9792410ccbbd69cb34ffcdc782 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -15,11 +15,10 @@ #include "vp9/encoder/vp9_onyx_int.h" void vp9_write_nmv_probs(VP9_COMP* const, int usehp, vp9_writer* const); -void vp9_encode_nmv(vp9_writer* const w, const MV* const mv, - const MV* const ref, const nmv_context* const mvctx); -void vp9_encode_nmv_fp(vp9_writer* const w, const MV* const mv, - const MV* const ref, const nmv_context* const mvctx, - int usehp); + +void vp9_encode_mv(vp9_writer* w, const MV* mv, const MV* ref, + const nmv_context* mvctx, int usehp); + void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], const nmv_context* const mvctx, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 70f9e3153a2de5cbb59396624bdb201fea3442b1..5e26cd82a2b40c0d91196ba582bacd2dc44086b4 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -14,7 +14,6 @@ #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_variance.h" #include "vp9/encoder/vp9_encodeintra.h" -#include "vp9/common/vp9_setupintrarecon.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_firstpass.h" #include "vpx_scale/vpx_scale.h" @@ -23,7 +22,7 @@ #include "vp9/common/vp9_extend.h" #include "vp9/common/vp9_systemdependent.h" #include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_swapyv12buffer.h" +#include "vpx_scale/yv12config.h" #include <stdio.h> #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rdopt.h" @@ -32,13 +31,15 @@ #include "vp9/common/vp9_entropymv.h" #include "vp9/encoder/vp9_encodemv.h" #include "./vpx_scale_rtcd.h" +// TODO(jkoleszar): for setup_dst_planes +#include "vp9/common/vp9_reconinter.h" #define OUTPUT_FPF 0 #define IIFACTOR 12.5 #define IIKFACTOR1 12.5 #define IIKFACTOR2 15.0 -#define RMAX 128.0 +#define RMAX 512.0 #define GF_RMAX 96.0 #define ERR_DIVISOR 150.0 #define MIN_DECAY_FACTOR 0.1 @@ -46,11 +47,17 @@ #define KF_MB_INTRA_MIN 150 #define GF_MB_INTRA_MIN 100 -#define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001) +#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) #define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0 #define POW2 (double)cpi->oxcf.two_pass_vbrbias/100.0 +static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { + YV12_BUFFER_CONFIG temp = *a; + *a = *b; + *b = temp; +} + static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame); static int select_cq_level(int qindex) { @@ -71,8 +78,8 @@ static int select_cq_level(int qindex) { // Resets the first pass file to the given position using a relative seek from the current position -static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *Position) { - cpi->twopass.stats_in = Position; +static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *position) { + cpi->twopass.stats_in = position; } static int lookup_next_frame_stats(VP9_COMP *cpi, FIRSTPASS_STATS *next_frame) { @@ -128,7 +135,7 @@ static void output_stats(const VP9_COMP *cpi, FILE *fpfile; fpfile = fopen("firstpass.stt", "a"); - fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f" + fprintf(stdout, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f" "%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f" "%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n", stats->frame, @@ -245,17 +252,11 @@ static void avg_stats(FIRSTPASS_STATS *section) { // Calculate a modified Error used in distributing bits between easier and harder frames static double calculate_modified_err(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - double av_err = (cpi->twopass.total_stats->ssim_weighted_pred_err / - cpi->twopass.total_stats->count); - double this_err = this_frame->ssim_weighted_pred_err; - double modified_err; - - if (this_err > av_err) - modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW1); - else - modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW2); - - return modified_err; + const FIRSTPASS_STATS *const stats = &cpi->twopass.total_stats; + const double av_err = stats->ssim_weighted_pred_err / stats->count; + const double this_err = this_frame->ssim_weighted_pred_err; + return av_err * pow(this_err / DOUBLE_DIVIDE_CHECK(av_err), + this_err > av_err ? POW1 : POW2); } static const double weight_table[256] = { @@ -317,46 +318,69 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) { } -// This function returns the current per frame maximum bitrate target +// This function returns the current per frame maximum bitrate target. static int frame_max_bits(VP9_COMP *cpi) { - // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left - int max_bits; - - // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user - max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); - - // Trap case where we are out of bits - if (max_bits < 0) - max_bits = 0; - - return max_bits; + // Max allocation for a single frame based on the max section guidelines + // passed in and how many bits are left. + // For VBR base this on the bits and frames left plus the + // two_pass_vbrmax_section rate passed in by the user. + const double max_bits = (1.0 * cpi->twopass.bits_left / + (cpi->twopass.total_stats.count - cpi->common.current_video_frame)) * + (cpi->oxcf.two_pass_vbrmax_section / 100.0); + + // Trap case where we are out of bits. + return MAX((int)max_bits, 0); } void vp9_init_first_pass(VP9_COMP *cpi) { - zero_stats(cpi->twopass.total_stats); + zero_stats(&cpi->twopass.total_stats); } void vp9_end_first_pass(VP9_COMP *cpi) { - output_stats(cpi, cpi->output_pkt_list, cpi->twopass.total_stats); + output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.total_stats); } static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) { MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; - - uint8_t *src_ptr = (*(b->base_src) + b->src); - int src_stride = b->src_stride; - uint8_t *ref_ptr; - int ref_stride = d->pre_stride; // Set up pointers for this macro block recon buffer - xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; + xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset; - ref_ptr = (uint8_t *)(*(d->base_pre) + d->pre); + switch (xd->mode_info_context->mbmi.sb_type) { + case BLOCK_SIZE_SB8X8: + vp9_mse8x8(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, + (unsigned int *)(best_motion_err)); + break; + case BLOCK_SIZE_SB16X8: + vp9_mse16x8(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, + (unsigned int *)(best_motion_err)); + break; + case BLOCK_SIZE_SB8X16: + vp9_mse8x16(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, + (unsigned int *)(best_motion_err)); + break; + default: + vp9_mse16x16(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, + (unsigned int *)(best_motion_err)); + break; + } +} - vp9_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, - (unsigned int *)(best_motion_err)); +static enum BlockSize get_bs(BLOCK_SIZE_TYPE b) { + switch (b) { + case BLOCK_SIZE_SB8X8: + return BLOCK_8X8; + case BLOCK_SIZE_SB16X8: + return BLOCK_16X8; + case BLOCK_SIZE_SB8X16: + return BLOCK_8X16; + default: + return BLOCK_16X16; + } } static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -364,8 +388,6 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) { MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; int num00; int_mv tmp_mv; @@ -375,7 +397,8 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int step_param = 3; int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; int n; - vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; + vp9_variance_fn_ptr_t v_fn_ptr = + cpi->fn_ptr[get_bs(xd->mode_info_context->mbmi.sb_type)]; int new_mv_mode_penalty = 256; int sr = 0; @@ -392,16 +415,29 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, further_steps -= sr; // override the default variance function to use MSE - v_fn_ptr.vf = vp9_mse16x16; + switch (xd->mode_info_context->mbmi.sb_type) { + case BLOCK_SIZE_SB8X8: + v_fn_ptr.vf = vp9_mse8x8; + break; + case BLOCK_SIZE_SB16X8: + v_fn_ptr.vf = vp9_mse16x8; + break; + case BLOCK_SIZE_SB8X16: + v_fn_ptr.vf = vp9_mse8x16; + break; + default: + v_fn_ptr.vf = vp9_mse16x16; + break; + } // Set up pointers for this macro block recon buffer - xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; + xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset; // Initial step/diamond search centred on best mv tmp_mv.as_int = 0; ref_mv_full.as_mv.col = ref_mv->as_mv.col >> 3; ref_mv_full.as_mv.row = ref_mv->as_mv.row >> 3; - tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv, step_param, + tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, step_param, x->sadperbit16, &num00, &v_fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); @@ -424,7 +460,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (num00) num00--; else { - tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv, + tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, step_param + n, x->sadperbit16, &num00, &v_fn_ptr, x->nmvjointcost, @@ -448,13 +484,13 @@ void vp9_first_pass(VP9_COMP *cpi) { MACROBLOCKD *const xd = &x->e_mbd; int recon_yoffset, recon_uvoffset; - YV12_BUFFER_CONFIG *lst_yv12 = - &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]]; - YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; - YV12_BUFFER_CONFIG *gld_yv12 = - &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]]; - int recon_y_stride = lst_yv12->y_stride; - int recon_uv_stride = lst_yv12->uv_stride; + const int lst_yv12_idx = cm->ref_frame_map[cpi->lst_fb_idx]; + const int gld_yv12_idx = cm->ref_frame_map[cpi->gld_fb_idx]; + YV12_BUFFER_CONFIG *const lst_yv12 = &cm->yv12_fb[lst_yv12_idx]; + YV12_BUFFER_CONFIG *const new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; + YV12_BUFFER_CONFIG *const gld_yv12 = &cm->yv12_fb[gld_yv12_idx]; + const int recon_y_stride = lst_yv12->y_stride; + const int recon_uv_stride = lst_yv12->uv_stride; int64_t intra_error = 0; int64_t coded_error = 0; int64_t sr_coded_error = 0; @@ -477,9 +513,9 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_clear_system_state(); // __asm emms; - x->src = * cpi->Source; - xd->pre = *lst_yv12; - xd->dst = *new_yv12; + vp9_setup_src_planes(x, cpi->Source, 0, 0); + setup_pre_planes(xd, lst_yv12, NULL, 0, 0, NULL, NULL); + setup_dst_planes(xd, new_yv12, 0, 0); x->partition_info = x->pi; @@ -487,12 +523,8 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_build_block_offsets(x); - vp9_setup_block_dptrs(&x->e_mbd); + vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); - vp9_setup_block_ptrs(x); - - // set up frame new frame for intra coded blocks - vp9_setup_intra_recon(new_yv12); vp9_frame_init_quantizer(cpi); // Initialise the MV cost table to the defaults @@ -500,7 +532,7 @@ void vp9_first_pass(VP9_COMP *cpi) { // if ( 0 ) { vp9_init_mv_probs(cm); - vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); + vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y_dc_delta_q); } // for each macroblock row in image @@ -515,10 +547,9 @@ void vp9_first_pass(VP9_COMP *cpi) { recon_uvoffset = (mb_row * recon_uv_stride * 8); // Set up limit values for motion vectors to prevent them extending outside the UMV borders - x->mv_row_min = -((mb_row * 16) + (VP9BORDERINPIXELS - 16)); + x->mv_row_min = -((mb_row * 16) + (VP9BORDERINPIXELS - 8)); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) - + (VP9BORDERINPIXELS - 16); - + + (VP9BORDERINPIXELS - 8); // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { @@ -526,11 +557,31 @@ void vp9_first_pass(VP9_COMP *cpi) { int gf_motion_error = INT_MAX; int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); - xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; - xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset; - xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; + xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; + xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; + xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); + if (mb_col * 2 + 1 < cm->mi_cols) { + if (mb_row * 2 + 1 < cm->mi_rows) { + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; + } else { + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB16X8; + } + } else { + if (mb_row * 2 + 1 < cm->mi_rows) { + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X16; + } else { + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X8; + } + } + xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; + set_mi_row_col(cm, xd, + mb_row << 1, + 1 << mi_height_log2(xd->mode_info_context->mbmi.sb_type), + mb_col << 1, + 1 << mi_height_log2(xd->mode_info_context->mbmi.sb_type)); + // do intra 16x16 prediction this_error = vp9_encode_intra(cpi, x, use_dc_pred); @@ -544,9 +595,9 @@ void vp9_first_pass(VP9_COMP *cpi) { intra_error += (int64_t)this_error; // Set up limit values for motion vectors to prevent them extending outside the UMV borders - x->mv_col_min = -((mb_col * 16) + (VP9BORDERINPIXELS - 16)); + x->mv_col_min = -((mb_col * 16) + (VP9BORDERINPIXELS - 8)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) - + (VP9BORDERINPIXELS - 16); + + (VP9BORDERINPIXELS - 8); // Other than for the first frame do a motion search if (cm->current_video_frame > 0) { @@ -592,9 +643,9 @@ void vp9_first_pass(VP9_COMP *cpi) { } // Reset to last frame as reference buffer - xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; - xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; - xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; + xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; + xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset; + xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset; // In accumulating a score for the older reference frame // take the best of the motion predicted score and @@ -626,7 +677,12 @@ void vp9_first_pass(VP9_COMP *cpi) { this_error = motion_error; vp9_set_mbmode_and_mvs(x, NEWMV, &mv); xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_encode_inter16x16y(x, mb_row, mb_col); + xd->mode_info_context->mbmi.ref_frame[0] = LAST_FRAME; + xd->mode_info_context->mbmi.ref_frame[1] = NONE; + vp9_build_inter_predictors_sby(xd, mb_row << 1, + mb_col << 1, + xd->mode_info_context->mbmi.sb_type); + vp9_encode_sby(cm, x, xd->mode_info_context->mbmi.sb_type); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; @@ -679,22 +735,19 @@ void vp9_first_pass(VP9_COMP *cpi) { coded_error += (int64_t)this_error; // adjust to the next column of macroblocks - x->src.y_buffer += 16; - x->src.u_buffer += 8; - x->src.v_buffer += 8; + x->plane[0].src.buf += 16; + x->plane[1].src.buf += 8; + x->plane[2].src.buf += 8; recon_yoffset += 16; recon_uvoffset += 8; } // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols; + x->plane[1].src.buf += 8 * x->plane[1].src.stride - 8 * cm->mb_cols; + x->plane[2].src.buf += 8 * x->plane[1].src.stride - 8 * cm->mb_cols; - // extend the recon for intra prediction - vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); vp9_clear_system_state(); // __asm emms; } @@ -746,25 +799,23 @@ void vp9_first_pass(VP9_COMP *cpi) { } // TODO: handle the case when duration is set to 0, or something less - // than the full time between subsequent cpi->source_time_stamp s . + // than the full time between subsequent values of cpi->source_time_stamp. fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start); // don't want to do output stats with a stack variable! - memcpy(cpi->twopass.this_frame_stats, - &fps, - sizeof(FIRSTPASS_STATS)); - output_stats(cpi, cpi->output_pkt_list, cpi->twopass.this_frame_stats); - accumulate_stats(cpi->twopass.total_stats, &fps); + cpi->twopass.this_frame_stats = fps; + output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.this_frame_stats); + accumulate_stats(&cpi->twopass.total_stats, &fps); } // Copy the previous Last Frame back into gf and and arf buffers if // the prediction is good enough... but also dont allow it to lag too far if ((cpi->twopass.sr_update_lag > 3) || ((cm->current_video_frame > 0) && - (cpi->twopass.this_frame_stats->pcnt_inter > 0.20) && - ((cpi->twopass.this_frame_stats->intra_error / - DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats->coded_error)) > + (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && + ((cpi->twopass.this_frame_stats.intra_error / + DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) > 2.0))) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); cpi->twopass.sr_update_lag = 1; @@ -772,14 +823,13 @@ void vp9_first_pass(VP9_COMP *cpi) { cpi->twopass.sr_update_lag++; // swap frame pointers so last frame refers to the frame we just compressed - vp9_swap_yv12_buffer(lst_yv12, new_yv12); - vp8_yv12_extend_frame_borders(lst_yv12); + swap_yv12(lst_yv12, new_yv12); + + vp9_extend_frame_borders(lst_yv12, cm->subsampling_x, cm->subsampling_y); // Special case for the first frame. Copy into the GF buffer as a second reference. - if (cm->current_video_frame == 0) { + if (cm->current_video_frame == 0) vp8_yv12_copy_frame(lst_yv12, gld_yv12); - } - // use this to see what the first pass reconstruction looks like if (0) { @@ -849,38 +899,28 @@ static double calc_correction_factor(double err_per_mb, double err_divisor, double pt_low, double pt_high, - int Q) { - double power_term; - double error_term = err_per_mb / err_divisor; - double correction_factor; + int q) { + const double error_term = err_per_mb / err_divisor; // Adjustment based on actual quantizer to power term. - power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low; - power_term = (power_term > pt_high) ? pt_high : power_term; + const double power_term = MIN(vp9_convert_qindex_to_q(q) * 0.01 + pt_low, + pt_high); // Calculate correction factor if (power_term < 1.0) assert(error_term >= 0.0); - correction_factor = pow(error_term, power_term); - - // Clip range - correction_factor = - (correction_factor < 0.05) - ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor; - return correction_factor; + return fclamp(pow(error_term, power_term), 0.05, 5.0); } // Given a current maxQ value sets a range for future values. // PGW TODO.. -// This code removes direct dependency on QIndex to determin the range +// This code removes direct dependency on QIndex to determine the range // (now uses the actual quantizer) but has not been tuned. static void adjust_maxq_qrange(VP9_COMP *cpi) { int i; - double q; - // Set the max corresponding to cpi->avg_q * 2.0 - q = cpi->avg_q * 2.0; + double q = cpi->avg_q * 2.0; cpi->twopass.maxq_max_limit = cpi->worst_quality; for (i = cpi->best_quality; i <= cpi->worst_quality; i++) { cpi->twopass.maxq_max_limit = i; @@ -901,12 +941,11 @@ static void adjust_maxq_qrange(VP9_COMP *cpi) { static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh) { - int Q; + int q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; - double section_err = (fpstats->coded_error / fpstats->count); - double sr_err_diff; + double section_err = fpstats->coded_error / fpstats->count; double sr_correction; double err_per_mb = section_err / num_mbs; double err_correction_factor; @@ -915,92 +954,74 @@ static int estimate_max_q(VP9_COMP *cpi, if (section_target_bandwitdh <= 0) return cpi->twopass.maxq_max_limit; // Highest value allowed - target_norm_bits_per_mb = - (section_target_bandwitdh < (1 << 20)) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); + target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20) + ? (512 * section_target_bandwitdh) / num_mbs + : 512 * (section_target_bandwitdh / num_mbs); // Look at the drop in prediction quality between the last frame // and the GF buffer (which contained an older frame). if (fpstats->sr_coded_error > fpstats->coded_error) { - sr_err_diff = - (fpstats->sr_coded_error - fpstats->coded_error) / - (fpstats->count * cpi->common.MBs); - sr_correction = (sr_err_diff / 32.0); - sr_correction = pow(sr_correction, 0.25); - if (sr_correction < 0.75) - sr_correction = 0.75; - else if (sr_correction > 1.25) - sr_correction = 1.25; + double sr_err_diff = (fpstats->sr_coded_error - fpstats->coded_error) / + (fpstats->count * cpi->common.MBs); + sr_correction = fclamp(pow(sr_err_diff / 32.0, 0.25), 0.75, 1.25); } else { sr_correction = 0.75; } // Calculate a corrective factor based on a rolling ratio of bits spent // vs target bits - if ((cpi->rolling_target_bits > 0) && - (cpi->active_worst_quality < cpi->worst_quality)) { - double rolling_ratio; - - rolling_ratio = (double)cpi->rolling_actual_bits / - (double)cpi->rolling_target_bits; + if (cpi->rolling_target_bits > 0 && + cpi->active_worst_quality < cpi->worst_quality) { + double rolling_ratio = (double)cpi->rolling_actual_bits / + (double)cpi->rolling_target_bits; if (rolling_ratio < 0.95) cpi->twopass.est_max_qcorrection_factor -= 0.005; else if (rolling_ratio > 1.05) cpi->twopass.est_max_qcorrection_factor += 0.005; - cpi->twopass.est_max_qcorrection_factor = - (cpi->twopass.est_max_qcorrection_factor < 0.1) - ? 0.1 - : (cpi->twopass.est_max_qcorrection_factor > 10.0) - ? 10.0 : cpi->twopass.est_max_qcorrection_factor; + cpi->twopass.est_max_qcorrection_factor = fclamp( + cpi->twopass.est_max_qcorrection_factor, 0.1, 10.0); } // Corrections for higher compression speed settings // (reduced compression expected) - if (cpi->compressor_speed == 1) { - if (cpi->oxcf.cpu_used <= 5) - speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); - else - speed_correction = 1.25; - } + if (cpi->compressor_speed == 1) + speed_correction = cpi->oxcf.cpu_used <= 5 ? + 1.04 + (cpi->oxcf.cpu_used * 0.04) : + 1.25; // Try and pick a max Q that will be high enough to encode the // content at the given rate. - for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) { + for (q = cpi->twopass.maxq_min_limit; q < cpi->twopass.maxq_max_limit; q++) { int bits_per_mb_at_this_q; - err_correction_factor = - calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.4, 0.90, Q) * - sr_correction * speed_correction * - cpi->twopass.est_max_qcorrection_factor; - + err_correction_factor = calc_correction_factor(err_per_mb, + ERR_DIVISOR, 0.4, 0.90, q) * + sr_correction * speed_correction * + cpi->twopass.est_max_qcorrection_factor; - bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor); + bits_per_mb_at_this_q = vp9_bits_per_mb(INTER_FRAME, q, + err_correction_factor); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } // Restriction on active max q for constrained quality mode. - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < cpi->cq_target_quality)) { - Q = cpi->cq_target_quality; - } + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && + q < cpi->cq_target_quality) + q = cpi->cq_target_quality; // Adjust maxq_min_limit and maxq_max_limit limits based on - // averaga q observed in clip for non kf/gf/arf frames + // average q observed in clip for non kf/gf/arf frames // Give average a chance to settle though. // PGW TODO.. This code is broken for the extended Q range - if ((cpi->ni_frames > - ((int)cpi->twopass.total_stats->count >> 8)) && - (cpi->ni_frames > 25)) { + if (cpi->ni_frames > ((int)cpi->twopass.total_stats.count >> 8) && + cpi->ni_frames > 25) adjust_maxq_qrange(cpi); - } - return Q; + return q; } // For cq mode estimate a cq level that matches the observed @@ -1008,7 +1029,7 @@ static int estimate_max_q(VP9_COMP *cpi, static int estimate_cq(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh) { - int Q; + int q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; @@ -1052,36 +1073,36 @@ static int estimate_cq(VP9_COMP *cpi, } // II ratio correction factor for clip as a whole - clip_iiratio = cpi->twopass.total_stats->intra_error / - DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats->coded_error); + clip_iiratio = cpi->twopass.total_stats.intra_error / + DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error); clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); if (clip_iifactor < 0.80) clip_iifactor = 0.80; // Try and pick a Q that can encode the content at the given rate. - for (Q = 0; Q < MAXQ; Q++) { + for (q = 0; q < MAXQ; q++) { int bits_per_mb_at_this_q; // Error per MB based correction factor err_correction_factor = - calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) * + calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, q) * sr_correction * speed_correction * clip_iifactor; bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor); + vp9_bits_per_mb(INTER_FRAME, q, err_correction_factor); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } // Clip value to range "best allowed to (worst allowed - 1)" - Q = select_cq_level(Q); - if (Q >= cpi->worst_quality) - Q = cpi->worst_quality - 1; - if (Q < cpi->best_quality) - Q = cpi->best_quality; + q = select_cq_level(q); + if (q >= cpi->worst_quality) + q = cpi->worst_quality - 1; + if (q < cpi->best_quality) + q = cpi->best_quality; - return Q; + return q; } @@ -1098,28 +1119,27 @@ void vp9_init_second_pass(VP9_COMP *cpi) { if (two_pass_min_rate < lower_bounds_min_rate) two_pass_min_rate = lower_bounds_min_rate; - zero_stats(cpi->twopass.total_stats); - zero_stats(cpi->twopass.total_left_stats); + zero_stats(&cpi->twopass.total_stats); + zero_stats(&cpi->twopass.total_left_stats); if (!cpi->twopass.stats_in_end) return; - *cpi->twopass.total_stats = *cpi->twopass.stats_in_end; - *cpi->twopass.total_left_stats = *cpi->twopass.total_stats; + cpi->twopass.total_stats = *cpi->twopass.stats_in_end; + cpi->twopass.total_left_stats = cpi->twopass.total_stats; // each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame // encoded in the second pass is a guess. However the sum duration is not. // Its calculated based on the actual durations of all frames from the first // pass. - vp9_new_frame_rate(cpi, - 10000000.0 * cpi->twopass.total_stats->count / - cpi->twopass.total_stats->duration); + vp9_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / + cpi->twopass.total_stats.duration); cpi->output_frame_rate = cpi->oxcf.frame_rate; - cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration * + cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0); - cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration * + cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0); // Calculate a minimum intra value to be used in determining the IIratio @@ -1145,7 +1165,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) { sum_iiratio += IIRatio; } - cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats->count); + cpi->twopass.avg_iiratio = sum_iiratio / + DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count); // Reset file position reset_fpf_position(cpi, start_pos); @@ -1185,9 +1206,8 @@ static double get_prediction_decay_rate(VP9_COMP *cpi, // Look at the observed drop in prediction quality between the last frame // and the GF buffer (which contains an older frame). - mb_sr_err_diff = - (next_frame->sr_coded_error - next_frame->coded_error) / - (cpi->common.MBs); + mb_sr_err_diff = (next_frame->sr_coded_error - next_frame->coded_error) / + cpi->common.MBs; if (mb_sr_err_diff <= 512.0) { second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0); second_ref_decay = pow(second_ref_decay, 0.5); @@ -1214,14 +1234,14 @@ static int detect_transition_to_still( int still_interval, double loop_decay_rate, double last_decay_rate) { - int trans_to_still = FALSE; + int trans_to_still = 0; // Break clause to detect very still sections after motion // For example a static image after a fade or other transition // instead of a clean scene cut. - if ((frame_interval > MIN_GF_INTERVAL) && - (loop_decay_rate >= 0.999) && - (last_decay_rate < 0.9)) { + if (frame_interval > MIN_GF_INTERVAL && + loop_decay_rate >= 0.999 && + last_decay_rate < 0.9) { int j; FIRSTPASS_STATS *position = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_next_frame; @@ -1243,7 +1263,7 @@ static int detect_transition_to_still( // Only if it does do we signal a transition to still if (j == still_interval) - trans_to_still = TRUE; + trans_to_still = 1; } return trans_to_still; @@ -1255,7 +1275,7 @@ static int detect_transition_to_still( static int detect_flash(VP9_COMP *cpi, int offset) { FIRSTPASS_STATS next_frame; - int flash_detected = FALSE; + int flash_detected = 0; // Read the frame data. // The return is FALSE (no flash detected) if not a valid frame @@ -1265,10 +1285,9 @@ static int detect_flash(VP9_COMP *cpi, int offset) { // are reasonably well predicted by an earlier (pre flash) frame. // The recovery after a flash is indicated by a high pcnt_second_ref // comapred to pcnt_inter. - if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) && - (next_frame.pcnt_second_ref >= 0.5)) { - flash_detected = TRUE; - } + if (next_frame.pcnt_second_ref > next_frame.pcnt_inter && + next_frame.pcnt_second_ref >= 0.5) + flash_detected = 1; } return flash_detected; @@ -1350,13 +1369,9 @@ static double calc_frame_boost( return frame_boost; } -static int calc_arf_boost( - VP9_COMP *cpi, - int offset, - int f_frames, - int b_frames, - int *f_boost, - int *b_boost) { +static int calc_arf_boost(VP9_COMP *cpi, int offset, + int f_frames, int b_frames, + int *f_boost, int *b_boost) { FIRSTPASS_STATS this_frame; int i; @@ -1367,7 +1382,7 @@ static int calc_arf_boost( double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; int arf_boost; - int flash_detected = FALSE; + int flash_detected = 0; // Search forward from the proposed arf/next gf position for (i = 0; i < f_frames; i++) { @@ -1379,15 +1394,14 @@ static int calc_arf_boost( &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - // We want to discount the the flash frame itself and the recovery + // We want to discount the flash frame itself and the recovery // frame that follows as both will have poor scores. flash_detected = detect_flash(cpi, (i + offset)) || detect_flash(cpi, (i + offset + 1)); // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator = - decay_accumulator * get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } @@ -1423,10 +1437,9 @@ static int calc_arf_boost( // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator = - decay_accumulator * get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR : decay_accumulator; + ? MIN_DECAY_FACTOR : decay_accumulator; } boost_score += (decay_accumulator * @@ -1442,80 +1455,144 @@ static int calc_arf_boost( return arf_boost; } -static void configure_arnr_filter(VP9_COMP *cpi, - FIRSTPASS_STATS *this_frame, - int group_boost) { - int half_gf_int; - int frames_after_arf; - int frames_bwd = cpi->oxcf.arnr_max_frames - 1; - int frames_fwd = cpi->oxcf.arnr_max_frames - 1; - int q; - - // Define the arnr filter width for this group of frames: - // We only filter frames that lie within a distance of half - // the GF interval from the ARF frame. We also have to trap - // cases where the filter extends beyond the end of clip. - // Note: this_frame->frame has been updated in the loop - // so it now points at the ARF frame. - half_gf_int = cpi->baseline_gf_interval >> 1; - frames_after_arf = (int)(cpi->twopass.total_stats->count - - this_frame->frame - 1); - - switch (cpi->oxcf.arnr_type) { - case 1: // Backward filter - frames_fwd = 0; - if (frames_bwd > half_gf_int) - frames_bwd = half_gf_int; - break; +#if CONFIG_MULTIPLE_ARF +// Work out the frame coding order for a GF or an ARF group. +// The current implementation codes frames in their natural order for a +// GF group, and inserts additional ARFs into an ARF group using a +// binary split approach. +// NOTE: this function is currently implemented recursively. +static void schedule_frames(VP9_COMP *cpi, const int start, const int end, + const int arf_idx, const int gf_or_arf_group, + const int level) { + int i, abs_end, half_range; + int *cfo = cpi->frame_coding_order; + int idx = cpi->new_frame_coding_order_period; + + // If (end < 0) an ARF should be coded at position (-end). + assert(start >= 0); + + // printf("start:%d end:%d\n", start, end); + + // GF Group: code frames in logical order. + if (gf_or_arf_group == 0) { + assert(end >= start); + for (i = start; i <= end; ++i) { + cfo[idx] = i; + cpi->arf_buffer_idx[idx] = arf_idx; + cpi->arf_weight[idx] = -1; + ++idx; + } + cpi->new_frame_coding_order_period = idx; + return; + } - case 2: // Forward filter - if (frames_fwd > half_gf_int) - frames_fwd = half_gf_int; - if (frames_fwd > frames_after_arf) - frames_fwd = frames_after_arf; - frames_bwd = 0; - break; + // ARF Group: work out the ARF schedule. + // Mark ARF frames as negative. + if (end < 0) { + // printf("start:%d end:%d\n", -end, -end); + // ARF frame is at the end of the range. + cfo[idx] = end; + // What ARF buffer does this ARF use as predictor. + cpi->arf_buffer_idx[idx] = (arf_idx > 2) ? (arf_idx - 1) : 2; + cpi->arf_weight[idx] = level; + ++idx; + abs_end = -end; + } else { + abs_end = end; + } - case 3: // Centered filter - default: - frames_fwd >>= 1; - if (frames_fwd > frames_after_arf) - frames_fwd = frames_after_arf; - if (frames_fwd > half_gf_int) - frames_fwd = half_gf_int; - - frames_bwd = frames_fwd; - - // For even length filter there is one more frame backward - // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. - if (frames_bwd < half_gf_int) - frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; - break; + half_range = (abs_end - start) >> 1; + + // ARFs may not be adjacent, they must be separated by at least + // MIN_GF_INTERVAL non-ARF frames. + if ((start + MIN_GF_INTERVAL) >= (abs_end - MIN_GF_INTERVAL)) { + // printf("start:%d end:%d\n", start, abs_end); + // Update the coding order and active ARF. + for (i = start; i <= abs_end; ++i) { + cfo[idx] = i; + cpi->arf_buffer_idx[idx] = arf_idx; + cpi->arf_weight[idx] = -1; + ++idx; + } + cpi->new_frame_coding_order_period = idx; + } else { + // Place a new ARF at the mid-point of the range. + cpi->new_frame_coding_order_period = idx; + schedule_frames(cpi, start, -(start + half_range), arf_idx + 1, + gf_or_arf_group, level + 1); + schedule_frames(cpi, start + half_range + 1, abs_end, arf_idx, + gf_or_arf_group, level + 1); } +} - cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; +#define FIXED_ARF_GROUP_SIZE 16 - // Adjust the strength based on active max q - q = ((int)vp9_convert_qindex_to_q(cpi->active_worst_quality) >> 1); - if (q > 8) { - cpi->active_arnr_strength = cpi->oxcf.arnr_strength; +void define_fixed_arf_period(VP9_COMP *cpi) { + int i; + int max_level = INT_MIN; + + assert(cpi->multi_arf_enabled); + assert(cpi->oxcf.lag_in_frames >= FIXED_ARF_GROUP_SIZE); + + // Save the weight of the last frame in the sequence before next + // sequence pattern overwrites it. + cpi->this_frame_weight = cpi->arf_weight[cpi->sequence_number]; + assert(cpi->this_frame_weight >= 0); + + // Initialize frame coding order variables. + cpi->new_frame_coding_order_period = 0; + cpi->next_frame_in_order = 0; + cpi->arf_buffered = 0; + vp9_zero(cpi->frame_coding_order); + vp9_zero(cpi->arf_buffer_idx); + vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight)); + + if (cpi->twopass.frames_to_key <= (FIXED_ARF_GROUP_SIZE + 8)) { + // Setup a GF group close to the keyframe. + cpi->source_alt_ref_pending = 0; + cpi->baseline_gf_interval = cpi->twopass.frames_to_key; + schedule_frames(cpi, 0, (cpi->baseline_gf_interval - 1), 2, 0, 0); } else { - cpi->active_arnr_strength = cpi->oxcf.arnr_strength - (8 - q); - if (cpi->active_arnr_strength < 0) - cpi->active_arnr_strength = 0; + // Setup a fixed period ARF group. + cpi->source_alt_ref_pending = 1; + cpi->baseline_gf_interval = FIXED_ARF_GROUP_SIZE; + schedule_frames(cpi, 0, -(cpi->baseline_gf_interval - 1), 2, 1, 0); } - // Adjust number of frames in filter and strength based on gf boost level. - if (cpi->active_arnr_frames > (group_boost / 150)) { - cpi->active_arnr_frames = (group_boost / 150); - cpi->active_arnr_frames += !(cpi->active_arnr_frames & 1); + // Replace level indicator of -1 with correct level. + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + if (cpi->arf_weight[i] > max_level) { + max_level = cpi->arf_weight[i]; + } + } + ++max_level; + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + if (cpi->arf_weight[i] == -1) { + cpi->arf_weight[i] = max_level; + } + } + cpi->max_arf_level = max_level; +#if 0 + printf("\nSchedule: "); + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + printf("%4d ", cpi->frame_coding_order[i]); + } + printf("\n"); + printf("ARFref: "); + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + printf("%4d ", cpi->arf_buffer_idx[i]); } - if (cpi->active_arnr_strength > (group_boost / 300)) { - cpi->active_arnr_strength = (group_boost / 300); + printf("\n"); + printf("Weight: "); + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + printf("%4d ", cpi->arf_weight[i]); } + printf("\n"); +#endif } +#endif -// Analyse and define a gf/arf group . +// Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { FIRSTPASS_STATS next_frame; FIRSTPASS_STATS *start_pos; @@ -1619,10 +1696,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } // Break clause to detect very still sections after motion - // (for example a staic image after a fade or other transition). + // (for example a static image after a fade or other transition). if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, last_loop_decay_rate)) { - allow_alt_ref = FALSE; + allow_alt_ref = 0; break; } } @@ -1637,9 +1714,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break at cpi->max_gf_interval unless almost totally static (i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) || ( - // Dont break out with a very short interval + // Don't break out with a very short interval (i > MIN_GF_INTERVAL) && - // Dont break out very close to a key frame + // Don't break out very close to a key frame ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && ((boost_score > 125.0) || (next_frame.pcnt_inter < 0.75)) && (!flash_detected) && @@ -1652,12 +1729,12 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { break; } - vpx_memcpy(this_frame, &next_frame, sizeof(*this_frame)); + *this_frame = next_frame; old_boost_score = boost_score; } - // Dont allow a gf too near the next kf + // Don't allow a gf too near the next kf if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL) { while (i < cpi->twopass.frames_to_key) { i++; @@ -1672,10 +1749,22 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } - // Set the interval till the next gf or arf. + // Set the interval until the next gf or arf. cpi->baseline_gf_interval = i; - // Should we use the alternate refernce frame +#if CONFIG_MULTIPLE_ARF + if (cpi->multi_arf_enabled) { + // Initialize frame coding order variables. + cpi->new_frame_coding_order_period = 0; + cpi->next_frame_in_order = 0; + cpi->arf_buffered = 0; + vp9_zero(cpi->frame_coding_order); + vp9_zero(cpi->arf_buffer_idx); + vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight)); + } +#endif + + // Should we use the alternate reference frame if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) && (i >= MIN_GF_INTERVAL) && @@ -1686,15 +1775,65 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { ((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) && (boost_score > 100)) { - // Alterrnative boost calculation for alt ref + // Alternative boost calculation for alt ref cpi->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost); - cpi->source_alt_ref_pending = TRUE; + cpi->source_alt_ref_pending = 1; - configure_arnr_filter(cpi, this_frame, cpi->gfu_boost); +#if CONFIG_MULTIPLE_ARF + // Set the ARF schedule. + if (cpi->multi_arf_enabled) { + schedule_frames(cpi, 0, -(cpi->baseline_gf_interval - 1), 2, 1, 0); + } +#endif } else { cpi->gfu_boost = (int)boost_score; - cpi->source_alt_ref_pending = FALSE; + cpi->source_alt_ref_pending = 0; +#if CONFIG_MULTIPLE_ARF + // Set the GF schedule. + if (cpi->multi_arf_enabled) { + schedule_frames(cpi, 0, cpi->baseline_gf_interval - 1, 2, 0, 0); + assert(cpi->new_frame_coding_order_period == cpi->baseline_gf_interval); + } +#endif + } + +#if CONFIG_MULTIPLE_ARF + if (cpi->multi_arf_enabled && (cpi->common.frame_type != KEY_FRAME)) { + int max_level = INT_MIN; + // Replace level indicator of -1 with correct level. + for (i = 0; i < cpi->frame_coding_order_period; ++i) { + if (cpi->arf_weight[i] > max_level) { + max_level = cpi->arf_weight[i]; + } + } + ++max_level; + for (i = 0; i < cpi->frame_coding_order_period; ++i) { + if (cpi->arf_weight[i] == -1) { + cpi->arf_weight[i] = max_level; + } + } + cpi->max_arf_level = max_level; + } +#if 0 + if (cpi->multi_arf_enabled) { + printf("\nSchedule: "); + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + printf("%4d ", cpi->frame_coding_order[i]); + } + printf("\n"); + printf("ARFref: "); + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + printf("%4d ", cpi->arf_buffer_idx[i]); + } + printf("\n"); + printf("Weight: "); + for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { + printf("%4d ", cpi->arf_weight[i]); + } + printf("\n"); } +#endif +#endif // Now decide how many bits should be allocated to the GF group as a // proportion of those remaining in the kf group. @@ -1702,7 +1841,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. // This is also important for short clips where there may only be one // key frame. - if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count - + if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame)) { cpi->twopass.kf_group_bits = (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; @@ -1736,29 +1875,26 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->twopass.modified_error_used += gf_group_err; // Assign bits to the arf or gf. - for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) { - int boost; + for (i = 0; + i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); + ++i) { int allocation_chunks; - int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; + int q = cpi->oxcf.fixed_q < 0 ? cpi->last_q[INTER_FRAME] + : cpi->oxcf.fixed_q; int gf_bits; - boost = (cpi->gfu_boost * vp9_gfboost_qadjust(Q)) / 100; + int boost = (cpi->gfu_boost * vp9_gfboost_qadjust(q)) / 100; // Set max and minimum boost and hence minimum allocation - if (boost > ((cpi->baseline_gf_interval + 1) * 200)) - boost = ((cpi->baseline_gf_interval + 1) * 200); - else if (boost < 125) - boost = 125; + boost = clamp(boost, 125, (cpi->baseline_gf_interval + 1) * 200); if (cpi->source_alt_ref_pending && i == 0) - allocation_chunks = - ((cpi->baseline_gf_interval + 1) * 100) + boost; + allocation_chunks = ((cpi->baseline_gf_interval + 1) * 100) + boost; else - allocation_chunks = - (cpi->baseline_gf_interval * 100) + (boost - 100); + allocation_chunks = (cpi->baseline_gf_interval * 100) + (boost - 100); // Prevent overflow - if (boost > 1028) { + if (boost > 1023) { int divisor = boost >> 10; boost /= divisor; allocation_chunks /= divisor; @@ -1766,59 +1902,55 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the number of bits to be spent on the gf or arf based on // the boost number - gf_bits = (int)((double)boost * - (cpi->twopass.gf_group_bits / - (double)allocation_chunks)); + gf_bits = (int)((double)boost * (cpi->twopass.gf_group_bits / + (double)allocation_chunks)); // If the frame that is to be boosted is simpler than the average for // the gf/arf group then use an alternative calculation // based on the error score of the frame itself if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) { - double alt_gf_grp_bits; - int alt_gf_bits; - - alt_gf_grp_bits = + double alt_gf_grp_bits = (double)cpi->twopass.kf_group_bits * (mod_frame_err * (double)cpi->baseline_gf_interval) / DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left); - alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits / + int alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits / (double)allocation_chunks)); - if (gf_bits > alt_gf_bits) { + if (gf_bits > alt_gf_bits) gf_bits = alt_gf_bits; - } } // Else if it is harder than other frames in the group make sure it at // least receives an allocation in keeping with its relative error // score, otherwise it may be worse off than an "un-boosted" frame else { - int alt_gf_bits = - (int)((double)cpi->twopass.kf_group_bits * - mod_frame_err / - DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left)); + int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits * + mod_frame_err / + DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left)); - if (alt_gf_bits > gf_bits) { + if (alt_gf_bits > gf_bits) gf_bits = alt_gf_bits; - } } // Dont allow a negative value for gf_bits if (gf_bits < 0) gf_bits = 0; - gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame + // Add in minimum for a frame + gf_bits += cpi->min_frame_bandwidth; if (i == 0) { cpi->twopass.gf_bits = gf_bits; } - if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) { - cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame + if (i == 1 || (!cpi->source_alt_ref_pending + && (cpi->common.frame_type != KEY_FRAME))) { + // Per frame bit target for this frame + cpi->per_frame_bandwidth = gf_bits; } } { - // Adjust KF group bits and error remainin + // Adjust KF group bits and error remaining cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err; cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; @@ -1835,33 +1967,27 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { else cpi->twopass.gf_group_error_left = (int64_t)gf_group_err; - cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; + cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits + - cpi->min_frame_bandwidth; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; // This condition could fail if there are two kfs very close together - // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the - // calculation of cpi->twopass.alt_extra_bits. + // despite (MIN_GF_INTERVAL) and would cause a divide by 0 in the + // calculation of alt_extra_bits. if (cpi->baseline_gf_interval >= 3) { - int boost = (cpi->source_alt_ref_pending) - ? b_boost : cpi->gfu_boost; + const int boost = cpi->source_alt_ref_pending ? b_boost : cpi->gfu_boost; if (boost >= 150) { - int pct_extra; - - pct_extra = (boost - 100) / 50; + int alt_extra_bits; + int pct_extra = (boost - 100) / 50; pct_extra = (pct_extra > 20) ? 20 : pct_extra; - cpi->twopass.alt_extra_bits = (int) - ((cpi->twopass.gf_group_bits * pct_extra) / 100); - cpi->twopass.gf_group_bits -= cpi->twopass.alt_extra_bits; - cpi->twopass.alt_extra_bits /= - ((cpi->baseline_gf_interval - 1) >> 1); - } else - cpi->twopass.alt_extra_bits = 0; - } else - cpi->twopass.alt_extra_bits = 0; + alt_extra_bits = (int)((cpi->twopass.gf_group_bits * pct_extra) / 100); + cpi->twopass.gf_group_bits -= alt_extra_bits; + } + } } if (cpi->common.frame_type != KEY_FRAME) { @@ -1887,24 +2013,28 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Allocate bits to a normal frame that is neither a gf an arf or a key frame. static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int target_frame_size; // gf_group_error_left + int target_frame_size; double modified_err; - double err_fraction; // What portion of the remaining GF group error is used by this frame + double err_fraction; - int max_bits = frame_max_bits(cpi); // Max for a single frame + // Max for a single frame. + int max_bits = frame_max_bits(cpi); - // Calculate modified prediction error used in bit allocation + // Calculate modified prediction error used in bit allocation. modified_err = calculate_modified_err(cpi, this_frame); if (cpi->twopass.gf_group_error_left > 0) - err_fraction = modified_err / cpi->twopass.gf_group_error_left; // What portion of the remaining GF group error is used by this frame + // What portion of the remaining GF group error is used by this frame. + err_fraction = modified_err / cpi->twopass.gf_group_error_left; else err_fraction = 0.0; - target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); // How many of those bits available for allocation should we give it? + // How many of those bits available for allocation should we give it? + target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); - // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end. + // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at + // the top end. if (target_frame_size < 0) target_frame_size = 0; else { @@ -1915,54 +2045,43 @@ static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { target_frame_size = (int)cpi->twopass.gf_group_bits; } - // Adjust error remaining + // Adjust error and bits remaining. cpi->twopass.gf_group_error_left -= (int64_t)modified_err; - cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining + cpi->twopass.gf_group_bits -= target_frame_size; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; - target_frame_size += cpi->min_frame_bandwidth; // Add in the minimum number of bits that is set aside for every frame. - + // Add in the minimum number of bits that is set aside for every frame. + target_frame_size += cpi->min_frame_bandwidth; - cpi->per_frame_bandwidth = target_frame_size; // Per frame bit target for this frame + // Per frame bit target for this frame. + cpi->per_frame_bandwidth = target_frame_size; } // Make a damped adjustment to the active max q. static int adjust_active_maxq(int old_maxqi, int new_maxqi) { int i; - int ret_val = new_maxqi; - double old_q; - double new_q; - double target_q; - - old_q = vp9_convert_qindex_to_q(old_maxqi); - new_q = vp9_convert_qindex_to_q(new_maxqi); - - target_q = ((old_q * 7.0) + new_q) / 8.0; + const double old_q = vp9_convert_qindex_to_q(old_maxqi); + const double new_q = vp9_convert_qindex_to_q(new_maxqi); + const double target_q = ((old_q * 7.0) + new_q) / 8.0; if (target_q > old_q) { - for (i = old_maxqi; i <= new_maxqi; i++) { - if (vp9_convert_qindex_to_q(i) >= target_q) { - ret_val = i; - break; - } - } + for (i = old_maxqi; i <= new_maxqi; i++) + if (vp9_convert_qindex_to_q(i) >= target_q) + return i; } else { - for (i = old_maxqi; i >= new_maxqi; i--) { - if (vp9_convert_qindex_to_q(i) <= target_q) { - ret_val = i; - break; - } - } + for (i = old_maxqi; i >= new_maxqi; i--) + if (vp9_convert_qindex_to_q(i) <= target_q) + return i; } - return ret_val; + return new_maxqi; } void vp9_second_pass(VP9_COMP *cpi) { int tmp_q; - int frames_left = (int)(cpi->twopass.total_stats->count - + int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame); FIRSTPASS_STATS this_frame; @@ -1971,9 +2090,8 @@ void vp9_second_pass(VP9_COMP *cpi) { double this_frame_intra_error; double this_frame_coded_error; - if (!cpi->twopass.stats_in) { + if (!cpi->twopass.stats_in) return; - } vp9_clear_system_state(); @@ -1983,12 +2101,8 @@ void vp9_second_pass(VP9_COMP *cpi) { // Set a cq_level in constrained quality mode. if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - int est_cq; - - est_cq = - estimate_cq(cpi, - cpi->twopass.total_left_stats, - (int)(cpi->twopass.bits_left / frames_left)); + int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats, + (int)(cpi->twopass.bits_left / frames_left)); cpi->cq_target_quality = cpi->oxcf.cq_level; if (est_cq > cpi->cq_target_quality) @@ -1999,14 +2113,12 @@ void vp9_second_pass(VP9_COMP *cpi) { cpi->twopass.maxq_max_limit = cpi->worst_quality; cpi->twopass.maxq_min_limit = cpi->best_quality; - tmp_q = estimate_max_q( - cpi, - cpi->twopass.total_left_stats, - (int)(cpi->twopass.bits_left / frames_left)); + tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats, + (int)(cpi->twopass.bits_left / frames_left)); - cpi->active_worst_quality = tmp_q; - cpi->ni_av_qi = tmp_q; - cpi->avg_q = vp9_convert_qindex_to_q(tmp_q); + cpi->active_worst_quality = tmp_q; + cpi->ni_av_qi = tmp_q; + cpi->avg_q = vp9_convert_qindex_to_q(tmp_q); #ifndef ONE_SHOT_Q_ESTIMATE // Limit the maxq value returned subsequently. @@ -2024,15 +2136,15 @@ void vp9_second_pass(VP9_COMP *cpi) { // radical adjustments to the allowed quantizer range just to use up a // few surplus bits or get beneath the target rate. else if ((cpi->common.current_video_frame < - (((unsigned int)cpi->twopass.total_stats->count * 255) >> 8)) && + (((unsigned int)cpi->twopass.total_stats.count * 255) >> 8)) && ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < - (unsigned int)cpi->twopass.total_stats->count)) { + (unsigned int)cpi->twopass.total_stats.count)) { if (frames_left < 1) frames_left = 1; tmp_q = estimate_max_q( cpi, - cpi->twopass.total_left_stats, + &cpi->twopass.total_left_stats, (int)(cpi->twopass.bits_left / frames_left)); // Make a damped adjustment to active max Q @@ -2051,15 +2163,24 @@ void vp9_second_pass(VP9_COMP *cpi) { // keyframe and section processing ! if (cpi->twopass.frames_to_key == 0) { // Define next KF group and assign bits to it - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); } // Is this a GF / ARF (Note that a KF is always also a GF) if (cpi->frames_till_gf_update_due == 0) { // Define next gf group and assign bits to it - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - define_gf_group(cpi, &this_frame_copy); + this_frame_copy = this_frame; + +#if CONFIG_MULTIPLE_ARF + if (cpi->multi_arf_enabled) { + define_fixed_arf_period(cpi); + } else { +#endif + define_gf_group(cpi, &this_frame_copy); +#if CONFIG_MULTIPLE_ARF + } +#endif // If we are going to code an altref frame at the end of the group // and the current frame is not a key frame.... @@ -2071,14 +2192,14 @@ void vp9_second_pass(VP9_COMP *cpi) { // Assign a standard frames worth of bits from those allocated // to the GF group int bak = cpi->per_frame_bandwidth; - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + this_frame_copy = this_frame; assign_std_frame_bits(cpi, &this_frame_copy); cpi->per_frame_bandwidth = bak; } } else { // Otherwise this is an ordinary frame // Assign bits from those allocated to the GF group - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); + this_frame_copy = this_frame; assign_std_frame_bits(cpi, &this_frame_copy); } @@ -2101,15 +2222,15 @@ void vp9_second_pass(VP9_COMP *cpi) { cpi->twopass.frames_to_key--; - // Update the total stats remaining sturcture - subtract_stats(cpi->twopass.total_left_stats, &this_frame); + // Update the total stats remaining structure + subtract_stats(&cpi->twopass.total_left_stats, &this_frame); } static int test_candidate_kf(VP9_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame) { - int is_viable_kf = FALSE; + int is_viable_kf = 0; // Does the frame satisfy the primary criteria of a key frame // If so, then examine how well it predicts subsequent frames @@ -2136,7 +2257,7 @@ static int test_candidate_kf(VP9_COMP *cpi, double decay_accumulator = 1.0; double next_iiratio; - vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); + local_next_frame = *next_frame; // Note the starting file position so we can reset to it start_pos = cpi->twopass.stats_in; @@ -2178,14 +2299,15 @@ static int test_candidate_kf(VP9_COMP *cpi, break; } - // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on + // If there is tolerable prediction for at least the next 3 frames then + // break out else discard this potential key frame and move on if (boost_score > 30.0 && (i > 3)) - is_viable_kf = TRUE; + is_viable_kf = 1; else { // Reset the file position reset_fpf_position(cpi, start_pos); - is_viable_kf = FALSE; + is_viable_kf = 0; } } @@ -2201,7 +2323,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; double boost_score = 0; - double old_boost_score = 0.0; double loop_decay_rate; double kf_mod_err = 0.0; @@ -2221,7 +2342,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->this_key_frame_forced = cpi->next_key_frame_forced; // Clear the alt ref active flag as this can never be active on a key frame - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_active = 0; // Kf is always a gf so clear frames till next gf counter cpi->frames_till_gf_update_due = 0; @@ -2229,9 +2350,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->twopass.frames_to_key = 1; // Take a copy of the initial frame details - vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame)); + first_frame = *this_frame; - cpi->twopass.kf_group_bits = 0; // Total bits avaialable to kf group + cpi->twopass.kf_group_bits = 0; // Total bits available to kf group cpi->twopass.kf_group_error_left = 0; // Group modified error score. kf_mod_err = calculate_modified_err(cpi, this_frame); @@ -2248,16 +2369,16 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { kf_group_coded_err += this_frame->coded_error; // load a the next frame's stats - vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame)); + last_frame = *this_frame; input_stats(cpi, this_frame); // Provided that we are not at the end of the file... if (cpi->oxcf.auto_key && lookup_next_frame_stats(cpi, &next_frame) != EOF) { // Normal scene cut check - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) { + if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) break; - } + // How fast is prediction quality decaying loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); @@ -2267,19 +2388,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // quality since the last GF or KF. recent_loop_decay[i % 8] = loop_decay_rate; decay_accumulator = 1.0; - for (j = 0; j < 8; j++) { - decay_accumulator = decay_accumulator * recent_loop_decay[j]; - } + for (j = 0; j < 8; j++) + decay_accumulator *= recent_loop_decay[j]; // Special check for transition or high motion followed by a // to a static scene. - if (detect_transition_to_still(cpi, i, - (cpi->key_frame_frequency - i), - loop_decay_rate, - decay_accumulator)) { + if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i, + loop_decay_rate, decay_accumulator)) break; - } - // Step on to the next frame cpi->twopass.frames_to_key++; @@ -2306,7 +2422,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->twopass.frames_to_key /= 2; // Copy first frame details - vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); + tmp_frame = first_frame; // Reset to the start of the group reset_fpf_position(cpi, start_position); @@ -2329,9 +2445,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Reset to the start of the group reset_fpf_position(cpi, current_pos); - cpi->next_key_frame_forced = TRUE; + cpi->next_key_frame_forced = 1; } else - cpi->next_key_frame_forced = FALSE; + cpi->next_key_frame_forced = 0; // Special case for the last frame of the file if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) { @@ -2373,22 +2489,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { boost_score = 0.0; loop_decay_rate = 1.00; // Starting decay rate + // Scan through the kf group collating various stats. for (i = 0; i < cpi->twopass.frames_to_key; i++) { double r; if (EOF == input_stats(cpi, &next_frame)) break; - if (next_frame.intra_error > cpi->twopass.kf_intra_err_min) - r = (IIKFACTOR2 * next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); - else - r = (IIKFACTOR2 * cpi->twopass.kf_intra_err_min / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); - - if (r > RMAX) - r = RMAX; - // Monitor for static sections. if ((next_frame.pcnt_inter - next_frame.pcnt_motion) < zero_motion_accumulator) { @@ -2396,22 +2503,28 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { (next_frame.pcnt_inter - next_frame.pcnt_motion); } - // How fast is prediction quality decaying - if (!detect_flash(cpi, 0)) { - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - decay_accumulator = decay_accumulator * loop_decay_rate; - decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR : decay_accumulator; - } + // For the first few frames collect data to decide kf boost. + if (i <= (cpi->max_gf_interval * 2)) { + if (next_frame.intra_error > cpi->twopass.kf_intra_err_min) + r = (IIKFACTOR2 * next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); + else + r = (IIKFACTOR2 * cpi->twopass.kf_intra_err_min / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); - boost_score += (decay_accumulator * r); + if (r > RMAX) + r = RMAX; - if ((i > MIN_GF_INTERVAL) && - ((boost_score - old_boost_score) < 6.25)) { - break; - } + // How fast is prediction quality decaying + if (!detect_flash(cpi, 0)) { + loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + decay_accumulator = decay_accumulator * loop_decay_rate; + decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR + ? MIN_DECAY_FACTOR : decay_accumulator; + } - old_boost_score = boost_score; + boost_score += (decay_accumulator * r); + } } { @@ -2441,8 +2554,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int allocation_chunks; int alt_kf_bits; - if (kf_boost < (cpi->twopass.frames_to_key * 5)) - kf_boost = (cpi->twopass.frames_to_key * 5); + if (kf_boost < (cpi->twopass.frames_to_key * 3)) + kf_boost = (cpi->twopass.frames_to_key * 3); if (kf_boost < 300) // Min KF boost kf_boost = 300; diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index a89d2547e16b6706677e26fe8e9f000bab228c13..b07d92a442e892c5138e3f2b743cd2b828a7ee96 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -46,7 +46,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { unsigned int i; for (i = 0; i < ctx->max_sz; i++) - vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img); + vp9_free_frame_buffer(&ctx->buf[i].img); free(ctx->buf); } free(ctx); @@ -56,6 +56,8 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { struct lookahead_ctx * vp9_lookahead_init(unsigned int width, unsigned int height, + unsigned int subsampling_x, + unsigned int subsampling_y, unsigned int depth) { struct lookahead_ctx *ctx = NULL; @@ -71,8 +73,9 @@ struct lookahead_ctx * vp9_lookahead_init(unsigned int width, if (!ctx->buf) goto bail; for (i = 0; i < depth; i++) - if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img, - width, height, VP9BORDERINPIXELS)) + if (vp9_alloc_frame_buffer(&ctx->buf[i].img, + width, height, subsampling_x, subsampling_y, + VP9BORDERINPIXELS)) goto bail; } return ctx; @@ -81,20 +84,27 @@ bail: return NULL; } +#define USE_PARTIAL_COPY 0 int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int64_t ts_start, int64_t ts_end, unsigned int flags, unsigned char *active_map) { struct lookahead_entry *buf; +#if USE_PARTIAL_COPY int row, col, active_end; int mb_rows = (src->y_height + 15) >> 4; int mb_cols = (src->y_width + 15) >> 4; +#endif if (ctx->sz + 1 > ctx->max_sz) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); +#if USE_PARTIAL_COPY + // TODO(jkoleszar): This is disabled for now, as + // vp9_copy_and_extend_frame_with_rect is not subsampling/alpha aware. + // Only do this partial copy if the following conditions are all met: // 1. Lookahead queue has has size of 1. // 2. Active map is provided. @@ -137,6 +147,11 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, } else { vp9_copy_and_extend_frame(src, &buf->img); } +#else + // Partial copy not implemented yet + vp9_copy_and_extend_frame(src, &buf->img); +#endif + buf->ts_start = ts_start; buf->ts_end = ts_end; buf->flags = flags; diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h index 2406618b9c42ba83265ef82a7777df41b2dabf48..81baa2c6fd172f3a601cd1302934cc2316993239 100644 --- a/vp9/encoder/vp9_lookahead.h +++ b/vp9/encoder/vp9_lookahead.h @@ -31,6 +31,8 @@ struct lookahead_ctx; */ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, unsigned int height, + unsigned int subsampling_x, + unsigned int subsampling_y, unsigned int depth); diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 715d683778a3dfc596c6ddfb8961b8020e6d8e62..65fdcbe50cf614658a4cfe298dd5104b4ce5cd61 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -9,13 +9,13 @@ */ #include <limits.h> + +#include <vpx_mem/vpx_mem.h> #include <vp9/encoder/vp9_encodeintra.h> #include <vp9/encoder/vp9_rdopt.h> -#include <vp9/common/vp9_setupintrarecon.h> #include <vp9/common/vp9_blockd.h> #include <vp9/common/vp9_reconinter.h> #include <vp9/common/vp9_systemdependent.h> -#include <vpx_mem/vpx_mem.h> #include <vp9/encoder/vp9_segmentation.h> static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, @@ -25,21 +25,18 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &xd->block[0]; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; unsigned int best_err; - - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; + const int tmp_col_min = x->mv_col_min; + const int tmp_col_max = x->mv_col_max; + const int tmp_row_min = x->mv_row_min; + const int tmp_row_max = x->mv_row_max; int_mv ref_full; // Further step/diamond searches as necessary int step_param = cpi->sf.first_step + - (cpi->Speed < 8 ? (cpi->Speed > 5 ? 1 : 0) : 2); + (cpi->speed < 8 ? (cpi->speed > 5 ? 1 : 0) : 2); vp9_clamp_mv_min_max(x, ref_mv); @@ -47,15 +44,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, ref_full.as_mv.row = ref_mv->as_mv.row >> 3; /*cpi->sf.search_method == HEX*/ - best_err = vp9_hex_search( - x, b, d, - &ref_full, dst_mv, - step_param, - x->errorperbit, - &v_fn_ptr, - NULL, NULL, - NULL, NULL, - ref_mv); + best_err = vp9_hex_search(x, &ref_full, dst_mv, step_param, x->errorperbit, + &v_fn_ptr, NULL, NULL, NULL, NULL, ref_mv); // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) @@ -63,7 +53,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, int distortion; unsigned int sse; best_err = cpi->find_fractional_mv_step( - x, b, d, + x, dst_mv, ref_mv, x->errorperbit, &v_fn_ptr, NULL, NULL, @@ -71,9 +61,10 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, } vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); - best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, - xd->predictor, 16, INT_MAX); + vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); + best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].dst.buf, xd->plane[0].dst.stride, + INT_MAX); /* restore UMV window */ x->mv_col_min = tmp_col_min; @@ -84,49 +75,27 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, return best_err; } -static int do_16x16_motion_search -( - VP9_COMP *cpi, - int_mv *ref_mv, - int_mv *dst_mv, - YV12_BUFFER_CONFIG *buf, - int buf_mb_y_offset, - YV12_BUFFER_CONFIG *ref, - int mb_y_offset, - int mb_row, - int mb_col) { - MACROBLOCK *const x = &cpi->mb; +static int do_16x16_motion_search(VP9_COMP *cpi, + int_mv *ref_mv, int_mv *dst_mv, + int buf_mb_y_offset, int mb_y_offset, + int mb_row, int mb_col) { + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err, tmp_err; int_mv tmp_mv; - int n; - - for (n = 0; n < 16; n++) { - BLOCKD *d = &xd->block[n]; - BLOCK *b = &x->block[n]; - - b->base_src = &buf->y_buffer; - b->src_stride = buf->y_stride; - b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset; - - d->base_pre = &ref->y_buffer; - d->pre_stride = ref->y_stride; - d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset; - } // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction - xd->pre.y_buffer = ref->y_buffer + mb_y_offset; - xd->pre.y_stride = ref->y_stride; - err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, - xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); + err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, + INT_MAX); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the // starting point (best reference) for the search tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col); if (tmp_err < err) { - err = tmp_err; + err = tmp_err; dst_mv->as_int = tmp_mv.as_int; } @@ -147,51 +116,26 @@ static int do_16x16_motion_search return err; } -static int do_16x16_zerozero_search -( - VP9_COMP *cpi, - int_mv *dst_mv, - YV12_BUFFER_CONFIG *buf, - int buf_mb_y_offset, - YV12_BUFFER_CONFIG *ref, - int mb_y_offset -) { - MACROBLOCK *const x = &cpi->mb; +static int do_16x16_zerozero_search(VP9_COMP *cpi, + int_mv *dst_mv, + int buf_mb_y_offset, int mb_y_offset) { + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err; - int n; - - for (n = 0; n < 16; n++) { - BLOCKD *d = &xd->block[n]; - BLOCK *b = &x->block[n]; - - b->base_src = &buf->y_buffer; - b->src_stride = buf->y_stride; - b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset; - - d->base_pre = &ref->y_buffer; - d->pre_stride = ref->y_stride; - d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset; - } // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction - xd->pre.y_buffer = ref->y_buffer + mb_y_offset; - xd->pre.y_stride = ref->y_stride; - err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, - xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); + err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, + INT_MAX); dst_mv->as_int = 0; return err; } -static int find_best_16x16_intra -( - VP9_COMP *cpi, - YV12_BUFFER_CONFIG *buf, - int mb_y_offset, - MB_PREDICTION_MODE *pbest_mode -) { +static int find_best_16x16_intra(VP9_COMP *cpi, + int mb_y_offset, + MB_PREDICTION_MODE *pbest_mode) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; MB_PREDICTION_MODE best_mode = -1, mode; @@ -201,11 +145,19 @@ static int find_best_16x16_intra // we're intentionally not doing 4x4, we just want a rough estimate for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; + const int bwl = b_width_log2(BLOCK_SIZE_MB16X16), bw = 4 << bwl; + const int bhl = b_height_log2(BLOCK_SIZE_MB16X16), bh = 4 << bhl; xd->mode_info_context->mbmi.mode = mode; - vp9_build_intra_predictors_mby(xd); - err = vp9_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset, - buf->y_stride, best_err); + vp9_build_intra_predictors(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].dst.buf, xd->plane[0].dst.stride, + xd->mode_info_context->mbmi.mode, + bw, bh, + xd->up_available, xd->left_available, + xd->right_available); + err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err); + // find best if (err < best_err) { best_err = err; @@ -234,26 +186,35 @@ static void update_mbgraph_mb_stats int mb_row, int mb_col ) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; int intra_error; + VP9_COMMON *cm = &cpi->common; // FIXME in practice we're completely ignoring chroma here - xd->dst.y_buffer = buf->y_buffer + mb_y_offset; + x->plane[0].src.buf = buf->y_buffer + mb_y_offset; + x->plane[0].src.stride = buf->y_stride; + + xd->plane[0].dst.buf = cm->yv12_fb[cm->new_fb_idx].y_buffer + mb_y_offset; + xd->plane[0].dst.stride = cm->yv12_fb[cm->new_fb_idx].y_stride; // do intra 16x16 prediction - intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset, &stats->ref[INTRA_FRAME].m.mode); + intra_error = find_best_16x16_intra(cpi, mb_y_offset, + &stats->ref[INTRA_FRAME].m.mode); if (intra_error <= 0) intra_error = 1; stats->ref[INTRA_FRAME].err = intra_error; // Golden frame MV search, if it exists and is different than last frame if (golden_ref) { - int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, - &stats->ref[GOLDEN_FRAME].m.mv, - buf, mb_y_offset, - golden_ref, gld_y_offset, - mb_row, mb_col); + int g_motion_error; + xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset; + xd->plane[0].pre[0].stride = golden_ref->y_stride; + g_motion_error = do_16x16_motion_search(cpi, + prev_golden_ref_mv, + &stats->ref[GOLDEN_FRAME].m.mv, + mb_y_offset, gld_y_offset, + mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { stats->ref[GOLDEN_FRAME].err = INT_MAX; @@ -262,16 +223,12 @@ static void update_mbgraph_mb_stats // Alt-ref frame MV search, if it exists and is different than last/golden frame if (alt_ref) { - // int a_motion_error = do_16x16_motion_search(cpi, prev_alt_ref_mv, - // &stats->ref[ALTREF_FRAME].m.mv, - // buf, mb_y_offset, - // alt_ref, arf_y_offset); - - int a_motion_error = - do_16x16_zerozero_search(cpi, - &stats->ref[ALTREF_FRAME].m.mv, - buf, mb_y_offset, - alt_ref, arf_y_offset); + int a_motion_error; + xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; + xd->plane[0].pre[0].stride = alt_ref->y_stride; + a_motion_error = do_16x16_zerozero_search(cpi, + &stats->ref[ALTREF_FRAME].m.mv, + mb_y_offset, arf_y_offset); stats->ref[ALTREF_FRAME].err = a_motion_error; } else { @@ -280,17 +237,15 @@ static void update_mbgraph_mb_stats } } -static void update_mbgraph_frame_stats -( - VP9_COMP *cpi, - MBGRAPH_FRAME_STATS *stats, - YV12_BUFFER_CONFIG *buf, - YV12_BUFFER_CONFIG *golden_ref, - YV12_BUFFER_CONFIG *alt_ref -) { - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; +static void update_mbgraph_frame_stats(VP9_COMP *cpi, + MBGRAPH_FRAME_STATS *stats, + YV12_BUFFER_CONFIG *buf, + YV12_BUFFER_CONFIG *golden_ref, + YV12_BUFFER_CONFIG *alt_ref) { + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + VP9_COMMON *const cm = &cpi->common; + int mb_col, mb_row, offset = 0; int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; int_mv arf_top_mv, gld_top_mv; @@ -302,14 +257,17 @@ static void update_mbgraph_frame_stats // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_top_mv.as_int = 0; gld_top_mv.as_int = 0; - x->mv_row_min = -(VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND); - x->mv_row_max = (cm->mb_rows - 1) * 16 + VP9BORDERINPIXELS - - 16 - VP9_INTERP_EXTEND; + x->mv_row_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND); + x->mv_row_max = (cm->mb_rows - 1) * 8 + VP9BORDERINPIXELS + - 8 - VP9_INTERP_EXTEND; xd->up_available = 0; - xd->dst.y_stride = buf->y_stride; - xd->pre.y_stride = buf->y_stride; - xd->dst.uv_stride = buf->uv_stride; + xd->plane[0].dst.stride = buf->y_stride; + xd->plane[0].pre[0].stride = buf->y_stride; + xd->plane[1].dst.stride = buf->uv_stride; xd->mode_info_context = &mi_local; + mi_local.mbmi.sb_type = BLOCK_SIZE_MB16X16; + mi_local.mbmi.ref_frame[0] = LAST_FRAME; + mi_local.mbmi.ref_frame[1] = NONE; for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { int_mv arf_left_mv, gld_left_mv; @@ -320,9 +278,9 @@ static void update_mbgraph_frame_stats // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_left_mv.as_int = arf_top_mv.as_int; gld_left_mv.as_int = gld_top_mv.as_int; - x->mv_col_min = -(VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND); - x->mv_col_max = (cm->mb_cols - 1) * 16 + VP9BORDERINPIXELS - - 16 - VP9_INTERP_EXTEND; + x->mv_col_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND); + x->mv_col_max = (cm->mb_cols - 1) * 8 + VP9BORDERINPIXELS + - 8 - VP9_INTERP_EXTEND; xd->left_available = 0; for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { @@ -379,17 +337,16 @@ static void separate_arf_mbs(VP9_COMP *cpi) { for (offset = 0, mb_row = 0; mb_row < cm->mb_rows; offset += cm->mb_cols, mb_row++) { for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - MBGRAPH_MB_STATS *mb_stats = - &frame_stats->mb_stats[offset + mb_col]; + MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col]; int altref_err = mb_stats->ref[ALTREF_FRAME].err; int intra_err = mb_stats->ref[INTRA_FRAME ].err; int golden_err = mb_stats->ref[GOLDEN_FRAME].err; // Test for altref vs intra and gf and that its mv was 0,0. - if ((altref_err > 1000) || - (altref_err > intra_err) || - (altref_err > golden_err)) { + if (altref_err > 1000 || + altref_err > intra_err || + altref_err > golden_err) { arf_not_zz[offset + mb_col]++; } } @@ -404,10 +361,16 @@ static void separate_arf_mbs(VP9_COMP *cpi) { // goes in segment 0 if (arf_not_zz[offset + mb_col]) { ncnt[0]++; - cpi->segmentation_map[offset + mb_col] = 0; + cpi->segmentation_map[offset * 4 + 2 * mb_col] = 0; + cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 0; + cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 0; + cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols + 1] = 0; } else { + cpi->segmentation_map[offset * 4 + 2 * mb_col] = 1; + cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 1; + cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 1; + cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols + 1] = 1; ncnt[1]++; - cpi->segmentation_map[offset + mb_col] = 1; } } } @@ -425,10 +388,10 @@ static void separate_arf_mbs(VP9_COMP *cpi) { cpi->static_mb_pct = 0; cpi->seg0_cnt = ncnt[0]; - vp9_enable_segmentation((VP9_PTR) cpi); + vp9_enable_segmentation((VP9_PTR)cpi); } else { cpi->static_mb_pct = 0; - vp9_disable_segmentation((VP9_PTR) cpi); + vp9_disable_segmentation((VP9_PTR)cpi); } // Free localy allocated storage @@ -463,8 +426,7 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) { // the ARF MC search backwards, to get optimal results for MV caching for (i = 0; i < n_frames; i++) { MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - struct lookahead_entry *q_cur = - vp9_lookahead_peek(cpi->lookahead, i); + struct lookahead_entry *q_cur = vp9_lookahead_peek(cpi->lookahead, i); assert(q_cur != NULL); diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index e642b7487b3218a39a2e180e5c9999b5f8bfdbd6..2e99736ce04b7879bc42580f250877e8832cb846 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -56,8 +56,9 @@ int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; - return ((mvjcost[vp9_get_mv_joint(v)] + - mvcost[0][v.row] + mvcost[1][v.col]) * weight) >> 7; + return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] + + mvcost[0][v.row] + + mvcost[1][v.col]) * weight, 7); } static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], @@ -66,9 +67,9 @@ static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; - return ((mvjcost[vp9_get_mv_joint(v)] + - mvcost[0][v.row] + mvcost[1][v.col]) * - error_per_bit + 4096) >> 13; + return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] + + mvcost[0][v.row] + + mvcost[1][v.col]) * error_per_bit, 13); } return 0; } @@ -79,9 +80,9 @@ static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost, MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; - return ((mvjsadcost[vp9_get_mv_joint(v)] + - mvsadcost[0][v.row] + mvsadcost[1][v.col]) * - error_per_bit + 128) >> 8; + return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] + + mvsadcost[0][v.row] + + mvsadcost[1][v.col]) * error_per_bit, 8); } return 0; } @@ -222,7 +223,7 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { /* returns subpixel variance error function */ #define DIST(r, c) \ - vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) + vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse) /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER(v, r, c) \ @@ -238,14 +239,15 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }, \ v = INT_MAX;) -int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1) { - uint8_t *z = (*(b->base_src) + b->src); + uint8_t *z = x->plane[0].src.buf; + int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; int rr, rc, br, bc, hstep; @@ -263,9 +265,11 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int offset; int usehp = xd->allow_high_precision_mv; - uint8_t *y = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - y_stride = d->pre_stride; + uint8_t *y = xd->plane[0].pre[0].buf + + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + + bestmv->as_mv.col; + + y_stride = xd->plane[0].pre[0].stride; rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col; @@ -288,7 +292,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, bestmv->as_mv.col <<= 3; // calculate central point error - besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); + besterr = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -409,6 +413,200 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, return besterr; } + +#undef DIST +/* returns subpixel variance error function */ +#define DIST(r, c) \ + vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ + z, src_stride, &sse, second_pred) + +int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, int w, int h) { + uint8_t *z = x->plane[0].src.buf; + int src_stride = x->plane[0].src.stride; + MACROBLOCKD *xd = &x->e_mbd; + + int rr, rc, br, bc, hstep; + int tr, tc; + unsigned int besterr = INT_MAX; + unsigned int left, right, up, down, diag; + unsigned int sse; + unsigned int whichdir; + unsigned int halfiters = 4; + unsigned int quarteriters = 4; + unsigned int eighthiters = 4; + int thismse; + int maxc, minc, maxr, minr; + int y_stride; + int offset; + int usehp = xd->allow_high_precision_mv; + + uint8_t *comp_pred = vpx_memalign(16, w * h * sizeof(uint8_t)); + uint8_t *y = xd->plane[0].pre[0].buf + + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + + bestmv->as_mv.col; + + y_stride = xd->plane[0].pre[0].stride; + + rr = ref_mv->as_mv.row; + rc = ref_mv->as_mv.col; + br = bestmv->as_mv.row << 3; + bc = bestmv->as_mv.col << 3; + hstep = 4; + minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - + ((1 << MV_MAX_BITS) - 1)); + maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + + ((1 << MV_MAX_BITS) - 1)); + minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - + ((1 << MV_MAX_BITS) - 1)); + maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + + ((1 << MV_MAX_BITS) - 1)); + + tr = br; + tc = bc; + + + offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; + + // central mv + bestmv->as_mv.row <<= 3; + bestmv->as_mv.col <<= 3; + + // calculate central point error + // TODO(yunqingwang): central pointer error was already calculated in full- + // pixel search, and can be passed in this function. + comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, + error_per_bit, xd->allow_high_precision_mv); + + // Each subsequent iteration checks at least one point in + // common with the last iteration could be 2 ( if diag selected) + while (--halfiters) { + // 1/2 pel + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(down, tr + hstep, tc); + + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + + switch (whichdir) { + case 0: + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } + + // no reason to check the same one again. + if (tr == br && tc == bc) + break; + + tr = br; + tc = bc; + } + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + hstep >>= 1; + while (--quarteriters) { + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(down, tr + hstep, tc); + + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + + switch (whichdir) { + case 0: + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } + + // no reason to check the same one again. + if (tr == br && tc == bc) + break; + + tr = br; + tc = bc; + } + + if (xd->allow_high_precision_mv) { + usehp = vp9_use_nmv_hp(&ref_mv->as_mv); + } else { + usehp = 0; + } + + if (usehp) { + hstep >>= 1; + while (--eighthiters) { + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(down, tr + hstep, tc); + + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + + switch (whichdir) { + case 0: + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } + + // no reason to check the same one again. + if (tr == br && tc == bc) + break; + + tr = br; + tc = bc; + } + } + bestmv->as_mv.row = br; + bestmv->as_mv.col = bc; + + vpx_free(comp_pred); + + if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + + #undef MVC #undef PRE #undef DIST @@ -417,7 +615,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #undef MIN #undef MAX -int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_find_best_sub_pixel_step(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, @@ -428,7 +626,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv this_mv; int_mv orig_mv; int yrow_movedback = 0, ycol_movedback = 0; - uint8_t *z = (*(b->base_src) + b->src); + uint8_t *z = x->plane[0].src.buf; + int src_stride = x->plane[0].src.stride; int left, right, up, down, diag; unsigned int sse; int whichdir; @@ -437,9 +636,10 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MACROBLOCKD *xd = &x->e_mbd; int usehp = xd->allow_high_precision_mv; - uint8_t *y = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - y_stride = d->pre_stride; + uint8_t *y = xd->plane[0].pre[0].buf + + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + + bestmv->as_mv.col; + y_stride = xd->plane[0].pre[0].stride; // central mv bestmv->as_mv.row <<= 3; @@ -448,7 +648,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, orig_mv = *bestmv; // calculate central point error - bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); + bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -456,7 +656,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -468,7 +668,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -482,7 +682,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -494,7 +694,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -516,23 +716,25 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, case 0: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride, + &sse); break; case 1: this_mv.as_mv.col += 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride, + &sse); break; case 2: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); break; case 3: default: this_mv.as_mv.col += 4; this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); break; } @@ -571,11 +773,11 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, this_mv.as_mv.col = startmv.as_mv.col - 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - b->src_stride, &sse); + src_stride, &sse); } left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, @@ -591,7 +793,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, this_mv.as_mv.col += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -609,11 +811,11 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, this_mv.as_mv.row = startmv.as_mv.row - 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), - z, b->src_stride, &sse); + z, src_stride, &sse); } up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, @@ -628,7 +830,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, this_mv.as_mv.row += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -655,20 +857,25 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);; + thismse = vfp->svf(y - 1, y_stride, + SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse); } } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse); + thismse = vfp->svf(y - y_stride, y_stride, + SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse); + thismse = vfp->svf(y - y_stride - 1, y_stride, + SP(6), SP(6), z, src_stride, &sse); } } @@ -678,10 +885,13 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse); + thismse = vfp->svf(y - y_stride, y_stride, + SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); } break; @@ -690,12 +900,13 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - b->src_stride, &sse); + src_stride, &sse); } break; @@ -704,7 +915,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, this_mv.as_mv.row += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); break; } @@ -746,11 +957,11 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, this_mv.as_mv.col = startmv.as_mv.col - 1; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); } left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, @@ -765,7 +976,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, this_mv.as_mv.col += 2; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); + z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -781,10 +992,13 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, if (startmv.as_mv.row & 7) { this_mv.as_mv.row = startmv.as_mv.row - 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); + thismse = vfp->svf(y - y_stride, y_stride, + SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, @@ -798,7 +1012,9 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } this_mv.as_mv.row += 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -824,20 +1040,26 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);; + thismse = vfp->svf(y - 1, y_stride, + SP(7), SP(this_mv.as_mv.row), + z, src_stride, &sse); } } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); + thismse = vfp->svf(y - y_stride, y_stride, + SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse); + thismse = vfp->svf(y - y_stride - 1, y_stride, + SP(7), SP(7), z, src_stride, &sse); } } @@ -847,10 +1069,13 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); + thismse = vfp->svf(y - y_stride, y_stride, + SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } break; @@ -859,17 +1084,22 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y - 1, y_stride, + SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse); } break; case 3: this_mv.as_mv.col += 1; this_mv.as_mv.row += 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); + thismse = vfp->svf(y, y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + z, src_stride, &sse); break; } @@ -888,7 +1118,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #undef SP -int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_find_best_half_pixel_step(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, @@ -898,7 +1128,8 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int bestmse = INT_MAX; int_mv startmv; int_mv this_mv; - uint8_t *z = (*(b->base_src) + b->src); + uint8_t *z = x->plane[0].src.buf; + int src_stride = x->plane[0].src.stride; int left, right, up, down, diag; unsigned int sse; int whichdir; @@ -906,9 +1137,9 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int y_stride; MACROBLOCKD *xd = &x->e_mbd; - uint8_t *y = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - y_stride = d->pre_stride; + uint8_t *y = xd->plane[0].pre[0].buf + + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; + y_stride = xd->plane[0].pre[0].stride; // central mv bestmv->as_mv.row <<= 3; @@ -916,7 +1147,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, startmv = *bestmv; // calculate central point error - bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); + bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -924,7 +1155,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -936,7 +1167,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -950,7 +1181,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, // go up then down and check error this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -962,7 +1193,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, xd->allow_high_precision_mv); @@ -981,23 +1212,25 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, case 0: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, + z, src_stride, &sse); break; case 1: this_mv.as_mv.col += 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, + z, src_stride, &sse); break; case 2: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); break; case 3: default: this_mv.as_mv.col += 4; this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); break; } @@ -1057,8 +1290,6 @@ static const MV next_chkpts[6][3] = { int vp9_hex_search ( MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, @@ -1068,13 +1299,14 @@ int vp9_hex_search int *mvjcost, int *mvcost[2], int_mv *center_mv ) { + const MACROBLOCKD* const xd = &x->e_mbd; MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} }; MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; int i, j; - uint8_t *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - int in_what_stride = d->pre_stride; + uint8_t *what = x->plane[0].src.buf; + int what_stride = x->plane[0].src.stride; + int in_what_stride = xd->plane[0].pre[0].stride; int br, bc; int_mv this_mv; unsigned int bestsad = 0x7fffffff; @@ -1095,8 +1327,8 @@ int vp9_hex_search bc = ref_mv->as_mv.col; // Work out the start point for the search - base_offset = (uint8_t *)(*(d->base_pre) + d->pre); - this_offset = base_offset + (br * (d->pre_stride)) + bc; + base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); + this_offset = base_offset + (br * (xd->plane[0].pre[0].stride)) + bc; this_mv.as_mv.row = br; this_mv.as_mv.col = bc; bestsad = vfp->sdf(what, what_stride, this_offset, @@ -1211,17 +1443,18 @@ cal_neighbors: #undef CHECK_POINT #undef CHECK_BETTER -int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_diamond_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { int i, j, step; - uint8_t *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; + const MACROBLOCKD* const xd = &x->e_mbd; + uint8_t *what = x->plane[0].src.buf; + int what_stride = x->plane[0].src.stride; uint8_t *in_what; - int in_what_stride = d->pre_stride; + int in_what_stride = xd->plane[0].pre[0].stride; uint8_t *best_address; int tot_steps; @@ -1237,7 +1470,6 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, uint8_t *check_here; int thissad; - MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; @@ -1254,8 +1486,8 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, best_mv->as_mv.col = ref_col; // Work out the start point for the search - in_what = (uint8_t *)(*(d->base_pre) + d->pre + - (ref_row * (d->pre_stride)) + ref_col); + in_what = (uint8_t *)(xd->plane[0].pre[0].buf + + (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); best_address = in_what; // Check the starting position @@ -1322,17 +1554,18 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, xd->allow_high_precision_mv); } -int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_diamond_search_sadx4(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { int i, j, step; - uint8_t *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; + const MACROBLOCKD* const xd = &x->e_mbd; + uint8_t *what = x->plane[0].src.buf; + int what_stride = x->plane[0].src.stride; uint8_t *in_what; - int in_what_stride = d->pre_stride; + int in_what_stride = xd->plane[0].pre[0].stride; uint8_t *best_address; int tot_steps; @@ -1350,7 +1583,6 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, uint8_t *check_here; unsigned int thissad; - MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; @@ -1367,8 +1599,8 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, best_mv->as_mv.col = ref_col; // Work out the start point for the search - in_what = (uint8_t *)(*(d->base_pre) + d->pre + - (ref_row * (d->pre_stride)) + ref_col); + in_what = (uint8_t *)(xd->plane[0].pre[0].buf + + (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); best_address = in_what; // Check the starting position @@ -1472,14 +1704,14 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ -int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, - BLOCKD *d, int_mv *mvp_full, int step_param, +int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, + int_mv *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, vp9_variance_fn_ptr_t *fn_ptr, int_mv *ref_mv, int_mv *dst_mv) { int_mv temp_mv; int thissme, n, num00; - int bestsme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv, + int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, step_param, sadpb, &num00, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); @@ -1498,7 +1730,7 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, if (num00) num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv, + thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); @@ -1519,7 +1751,7 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, int search_range = 8; int_mv best_mv; best_mv.as_int = dst_mv->as_int; - thissme = cpi->refining_search_sad(x, b, d, &best_mv, sadpb, search_range, + thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); @@ -1531,25 +1763,25 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, return bestsme; } -int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, +int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], - int_mv *center_mv) { - uint8_t *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; + int_mv *center_mv, int n) { + const MACROBLOCKD* const xd = &x->e_mbd; + uint8_t *what = x->plane[0].src.buf; + int what_stride = x->plane[0].src.stride; uint8_t *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; + int in_what_stride = xd->plane[0].pre[0].stride; + int mv_stride = xd->plane[0].pre[0].stride; uint8_t *bestaddress; - int_mv *best_mv = &d->bmi.as_mv[0]; + int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0]; int_mv this_mv; int bestsad = INT_MAX; int r, c; uint8_t *check_here; int thissad; - MACROBLOCKD *xd = &x->e_mbd; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; @@ -1567,8 +1799,8 @@ int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; + in_what = xd->plane[0].pre[0].buf; + bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; @@ -1627,24 +1859,24 @@ int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, return INT_MAX; } -int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, +int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, - int *mvcost[2], int_mv *center_mv) { - uint8_t *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; + int *mvcost[2], int_mv *center_mv, int n) { + const MACROBLOCKD* const xd = &x->e_mbd; + uint8_t *what = x->plane[0].src.buf; + int what_stride = x->plane[0].src.stride; uint8_t *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; + int in_what_stride = xd->plane[0].pre[0].stride; + int mv_stride = xd->plane[0].pre[0].stride; uint8_t *bestaddress; - int_mv *best_mv = &d->bmi.as_mv[0]; + int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0]; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; uint8_t *check_here; unsigned int thissad; - MACROBLOCKD *xd = &x->e_mbd; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; @@ -1664,8 +1896,8 @@ int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; + in_what = xd->plane[0].pre[0].buf; + bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; @@ -1755,25 +1987,25 @@ int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, return INT_MAX; } -int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, +int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], - int_mv *center_mv) { - uint8_t *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; + int_mv *center_mv, int n) { + const MACROBLOCKD* const xd = &x->e_mbd; + uint8_t *what = x->plane[0].src.buf; + int what_stride = x->plane[0].src.stride; uint8_t *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; + int in_what_stride = xd->plane[0].pre[0].stride; + int mv_stride = xd->plane[0].pre[0].stride; uint8_t *bestaddress; - int_mv *best_mv = &d->bmi.as_mv[0]; + int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0]; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; uint8_t *check_here; unsigned int thissad; - MACROBLOCKD *xd = &x->e_mbd; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; @@ -1794,8 +2026,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; + in_what = xd->plane[0].pre[0].buf; + bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; @@ -1909,25 +2141,25 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, else return INT_MAX; } -int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_refining_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, int search_range, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { + const MACROBLOCKD* const xd = &x->e_mbd; MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; int i, j; int this_row_offset, this_col_offset; - int what_stride = b->src_stride; - int in_what_stride = d->pre_stride; - uint8_t *what = (*(b->base_src) + b->src); - uint8_t *best_address = (uint8_t *)(*(d->base_pre) + d->pre + - (ref_mv->as_mv.row * (d->pre_stride)) + - ref_mv->as_mv.col); + int what_stride = x->plane[0].src.stride; + int in_what_stride = xd->plane[0].pre[0].stride; + uint8_t *what = x->plane[0].src.buf; + uint8_t *best_address = xd->plane[0].pre[0].buf + + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + + ref_mv->as_mv.col; uint8_t *check_here; unsigned int thissad; int_mv this_mv; unsigned int bestsad = INT_MAX; - MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; @@ -1987,25 +2219,25 @@ int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, return INT_MAX; } -int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_refining_search_sadx4(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, int search_range, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { + const MACROBLOCKD* const xd = &x->e_mbd; MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; int i, j; int this_row_offset, this_col_offset; - int what_stride = b->src_stride; - int in_what_stride = d->pre_stride; - uint8_t *what = (*(b->base_src) + b->src); - uint8_t *best_address = (uint8_t *)(*(d->base_pre) + d->pre + - (ref_mv->as_mv.row * (d->pre_stride)) + - ref_mv->as_mv.col); + int what_stride = x->plane[0].src.stride; + int in_what_stride = xd->plane[0].pre[0].stride; + uint8_t *what = x->plane[0].src.buf; + uint8_t *best_address = xd->plane[0].pre[0].buf + + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + + ref_mv->as_mv.col; uint8_t *check_here; unsigned int thissad; int_mv this_mv; unsigned int bestsad = INT_MAX; - MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; int *mvjsadcost = x->nmvjointsadcost; @@ -2094,33 +2326,104 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, return INT_MAX; } +/* This function is called when we do joint motion search in comp_inter_inter + * mode. + */ +int vp9_refining_search_8p_c(MACROBLOCK *x, + int_mv *ref_mv, int error_per_bit, + int search_range, vp9_variance_fn_ptr_t *fn_ptr, + int *mvjcost, int *mvcost[2], int_mv *center_mv, + const uint8_t *second_pred, int w, int h) { + const MACROBLOCKD* const xd = &x->e_mbd; + MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, + {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; + int i, j; + int this_row_offset, this_col_offset; + int what_stride = x->plane[0].src.stride; + int in_what_stride = xd->plane[0].pre[0].stride; + uint8_t *what = x->plane[0].src.buf; + uint8_t *best_address = xd->plane[0].pre[0].buf + + (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + + ref_mv->as_mv.col; + uint8_t *check_here; + unsigned int thissad; + int_mv this_mv; + unsigned int bestsad = INT_MAX; + int_mv fcenter_mv; -#ifdef ENTROPY_STATS -void print_mode_context(VP9_COMMON *pc) { - FILE *f = fopen("vp9_modecont.c", "a"); - int i, j; + int *mvjsadcost = x->nmvjointsadcost; + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; + + /* Compound pred buffer */ + uint8_t *comp_pred = vpx_memalign(16, w * h * sizeof(uint8_t)); + + fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; + fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + + /* Get compound pred by averaging two pred blocks. */ + comp_avg_pred(comp_pred, second_pred, w, h, best_address, in_what_stride); - fprintf(f, "#include \"vp9_entropy.h\"\n"); - fprintf(f, "const int vp9_mode_contexts[INTER_MODE_CONTEXTS][4] ="); - fprintf(f, "{\n"); - for (j = 0; j < INTER_MODE_CONTEXTS; j++) { - fprintf(f, " {/* %d */ ", j); - fprintf(f, " "); - for (i = 0; i < 4; i++) { - int this_prob; + bestsad = fn_ptr->sdf(what, what_stride, comp_pred, w, 0x7fffffff) + + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + + for (i = 0; i < search_range; i++) { + int best_site = -1; + + for (j = 0; j < 8; j++) { + this_row_offset = ref_mv->as_mv.row + neighbors[j].row; + this_col_offset = ref_mv->as_mv.col + neighbors[j].col; - // context probs - this_prob = get_binary_prob(pc->fc.mv_ref_ct[j][i][0], - pc->fc.mv_ref_ct[j][i][1]); + if ((this_col_offset > x->mv_col_min) && + (this_col_offset < x->mv_col_max) && + (this_row_offset > x->mv_row_min) && + (this_row_offset < x->mv_row_max)) { + check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + + best_address; - fprintf(f, "%5d, ", this_prob); + /* Get compound block and use it to calculate SAD. */ + comp_avg_pred(comp_pred, second_pred, w, h, check_here, + in_what_stride); + thissad = fn_ptr->sdf(what, what_stride, comp_pred, w, bestsad); + + if (thissad < bestsad) { + this_mv.as_mv.row = this_row_offset; + this_mv.as_mv.col = this_col_offset; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, + mvsadcost, error_per_bit); + + if (thissad < bestsad) { + bestsad = thissad; + best_site = j; + } + } + } + } + + if (best_site == -1) { + break; + } else { + ref_mv->as_mv.row += neighbors[best_site].row; + ref_mv->as_mv.col += neighbors[best_site].col; + best_address += (neighbors[best_site].row) * in_what_stride + + neighbors[best_site].col; } - fprintf(f, " },\n"); } - fprintf(f, "};\n"); - fclose(f); -} + this_mv.as_mv.row = ref_mv->as_mv.row << 3; + this_mv.as_mv.col = ref_mv->as_mv.col << 3; -#endif/* END MV ref count ENTROPY_STATS stats code */ + if (bestsad < INT_MAX) { + int besterr; + comp_avg_pred(comp_pred, second_pred, w, h, best_address, in_what_stride); + besterr = fn_ptr->vf(what, what_stride, comp_pred, w, + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, + xd->allow_high_precision_mv); + vpx_free(comp_pred); + return besterr; + } else { + vpx_free(comp_pred); + return INT_MAX; + } +} diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index fd1bb2b4e47e48b873d7d410bbf38ed27ae5ff1a..28b2efd288b58b2426d442ef42b32f550a96b7e7 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -15,10 +15,6 @@ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_variance.h" -#ifdef ENTROPY_STATS -void print_mode_context(VP9_COMMON *pc); -#endif - // The maximum number of steps in a step search given the largest // allowed initial step #define MAX_MVSEARCH_STEPS 11 @@ -37,13 +33,13 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride); // Runs sequence of diamond searches in smaller steps for RD struct VP9_COMP; -int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, - BLOCKD *d, int_mv *mvp_full, int step_param, +int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, + int_mv *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, vp9_variance_fn_ptr_t *fn_ptr, int_mv *ref_mv, int_mv *dst_mv); -int vp9_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +int vp9_hex_search(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv, int search_param, int error_per_bit, const vp9_variance_fn_ptr_t *vf, @@ -51,27 +47,27 @@ int vp9_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvjcost, int *mvcost[2], int_mv *center_mv); -typedef int (fractional_mv_step_fp) (MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv +typedef int (fractional_mv_step_fp) (MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse); extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively; extern fractional_mv_step_fp vp9_find_best_sub_pixel_step; extern fractional_mv_step_fp vp9_find_best_half_pixel_step; -typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], - int_mv *center_mv); + int_mv *center_mv, int n); -typedef int (*vp9_refining_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +typedef int (*vp9_refining_search_fn_t)(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv); -typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, +typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, @@ -79,5 +75,19 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvjcost, int *mvcost[2], int_mv *center_mv); +int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int *mvjcost, int *mvcost[2], + int *distortion, unsigned int *sse1, + const uint8_t *second_pred, + int w, int h); +int vp9_refining_search_8p_c(MACROBLOCK *x, + int_mv *ref_mv, int error_per_bit, + int search_range, vp9_variance_fn_ptr_t *fn_ptr, + int *mvjcost, int *mvcost[2], + int_mv *center_mv, const uint8_t *second_pred, + int w, int h); #endif // VP9_ENCODER_VP9_MCOMP_H_ diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c index 7d9462f94aecb1d081c14b1eef8a78ade40f1ec3..f2e4ce40f61e0aaf695c73aeedc20cecb9e0e904 100644 --- a/vp9/encoder/vp9_modecosts.c +++ b/vp9/encoder/vp9_modecosts.c @@ -17,32 +17,23 @@ void vp9_init_mode_costs(VP9_COMP *c) { VP9_COMMON *x = &c->common; - const vp9_tree_p T = vp9_bmode_tree; - const vp9_tree_p KT = vp9_kf_bmode_tree; + const vp9_tree_p KT = vp9_intra_mode_tree; int i, j; - for (i = 0; i < VP9_KF_BINTRAMODES; i++) { - for (j = 0; j < VP9_KF_BINTRAMODES; j++) { - vp9_cost_tokens((int *)c->mb.bmode_costs[i][j], - x->kf_bmode_prob[i][j], KT); + for (i = 0; i < VP9_INTRA_MODES; i++) { + for (j = 0; j < VP9_INTRA_MODES; j++) { + vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], + x->kf_y_mode_prob[i][j], KT); } } - vp9_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T); - vp9_cost_tokens((int *)c->mb.inter_bmode_costs, - x->fc.sub_mv_ref_prob[0], vp9_sub_mv_ref_tree); - // TODO(rbultje) separate tables for superblock costing? - vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp9_ymode_tree); - vp9_cost_tokens(c->mb.mbmode_cost[0], - x->kf_ymode_prob[c->common.kf_ymode_probs_index], - vp9_kf_ymode_tree); + vp9_cost_tokens(c->mb.mbmode_cost, x->fc.y_mode_prob[1], + vp9_intra_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[1], - x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); + x->fc.uv_mode_prob[VP9_INTRA_MODES - 1], vp9_intra_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], - x->kf_uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); - vp9_cost_tokens(c->mb.i8x8_mode_costs, - x->fc.i8x8_mode_prob, vp9_i8x8_mode_tree); + x->kf_uv_mode_prob[VP9_INTRA_MODES - 1], vp9_intra_mode_tree); for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i) vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i], diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 656975aa46700224cf13fce962b6343a496e49c2..92927370a281a08e8425723b265efa516db3ead5 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -32,7 +32,6 @@ #include "vp9/common/vp9_postproc.h" #endif #include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_swapyv12buffer.h" #include "vpx_ports/vpx_timer.h" #include "vp9/common/vp9_seg_common.h" @@ -97,28 +96,22 @@ FILE *kf_list; FILE *keyfile; #endif -#if 0 -extern int skip_true_count; -extern int skip_false_count; -#endif - #ifdef ENTROPY_STATS -extern int intra_mode_stats[VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES] - [VP9_KF_BINTRAMODES]; +extern int intra_mode_stats[VP9_INTRA_MODES] + [VP9_INTRA_MODES] + [VP9_INTRA_MODES]; #endif #ifdef NMV_STATS extern void init_nmvstats(); extern void print_nmvstats(); #endif - -#if CONFIG_CODE_NONZEROCOUNT -#ifdef NZC_STATS -extern void init_nzcstats(); -extern void print_nzcstats(); -#endif +#ifdef MODE_STATS +extern void init_tx_count_stats(); +extern void write_tx_count_stats(); +extern void init_switchable_interp_stats(); +extern void write_switchable_interp_stats(); #endif #ifdef SPEEDSTATS @@ -128,22 +121,9 @@ unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #if defined(SECTIONBITS_OUTPUT) extern unsigned __int64 Sectionbits[500]; #endif -#ifdef MODE_STATS -extern int64_t Sectionbits[500]; -extern unsigned int y_modes[VP9_YMODES]; -extern unsigned int i8x8_modes[VP9_I8X8_MODES]; -extern unsigned int uv_modes[VP9_UV_MODES]; -extern unsigned int uv_modes_y[VP9_YMODES][VP9_UV_MODES]; -extern unsigned int b_modes[B_MODE_COUNT]; -extern unsigned int inter_y_modes[MB_MODE_COUNT]; -extern unsigned int inter_uv_modes[VP9_UV_MODES]; -extern unsigned int inter_b_modes[B_MODE_COUNT]; -#endif extern void vp9_init_quantizer(VP9_COMP *cpi); -static int base_skip_false_prob[QINDEX_RANGE][3]; - // Tables relating active max Q to active min Q static int kf_low_motion_minq[QINDEX_RANGE]; static int kf_high_motion_minq[QINDEX_RANGE]; @@ -161,6 +141,11 @@ static int calculate_minq_index(double maxq, const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c, maxq); + // Special case handling to deal with the step from q2.0 + // down to lossless mode represented by q 1.0. + if (minqtarget <= 2.0) + return 0; + for (i = 0; i < QINDEX_RANGE; i++) { if (minqtarget <= vp9_convert_qindex_to_q(i)) return i; @@ -177,15 +162,16 @@ static void init_minq_luts(void) { kf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000003, - -0.000015, - 0.074, + 0.000001, + -0.0004, + 0.15, 0.0); kf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000004, - -0.000125, - 0.14, + 0.000002, + -0.0012, + 0.5, 0.0); + gf_low_motion_minq[i] = calculate_minq_index(maxq, 0.0000015, -0.0009, @@ -214,51 +200,6 @@ static void set_mvcost(MACROBLOCK *mb) { mb->mvsadcost = mb->nmvsadcost; } } -static void init_base_skip_probs(void) { - int i; - - for (i = 0; i < QINDEX_RANGE; i++) { - const double q = vp9_convert_qindex_to_q(i); - - // Exponential decay caluclation of baseline skip prob with clamping - // Based on crude best fit of old table. - const int t = (int)(564.25 * pow(2.71828, (-0.012 * q))); - - base_skip_false_prob[i][1] = clip_prob(t); - base_skip_false_prob[i][2] = clip_prob(t * 3 / 4); - base_skip_false_prob[i][0] = clip_prob(t * 5 / 4); - } -} - -static void update_base_skip_probs(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - if (cm->frame_type != KEY_FRAME) { - vp9_update_skip_probs(cpi); - - if (cpi->refresh_alt_ref_frame) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->last_skip_false_probs[2][k] = cm->mbskip_pred_probs[k]; - cpi->last_skip_probs_q[2] = cm->base_qindex; - } else if (cpi->refresh_golden_frame) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->last_skip_false_probs[1][k] = cm->mbskip_pred_probs[k]; - cpi->last_skip_probs_q[1] = cm->base_qindex; - } else { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->last_skip_false_probs[0][k] = cm->mbskip_pred_probs[k]; - cpi->last_skip_probs_q[0] = cm->base_qindex; - - // update the baseline table for the current q - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->base_skip_false_prob[cm->base_qindex][k] = - cm->mbskip_pred_probs[k]; - } - } -} void vp9_initialize_enc() { static int init_done = 0; @@ -269,20 +210,16 @@ void vp9_initialize_enc() { vp9_init_quant_tables(); vp9_init_me_luts(); init_minq_luts(); - init_base_skip_probs(); + // init_base_skip_probs(); init_done = 1; } } -#ifdef PACKET_TESTING -extern FILE *vpxlogc; -#endif static void setup_features(VP9_COMP *cpi) { MACROBLOCKD *xd = &cpi->mb.e_mbd; // Set up default state for MB feature flags - - xd->segmentation_enabled = 0; // Default segmentation disabled + xd->segmentation_enabled = 0; xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; @@ -300,21 +237,7 @@ static void setup_features(VP9_COMP *cpi) { set_default_lf_deltas(cpi); } - static void dealloc_compressor_data(VP9_COMP *cpi) { - vpx_free(cpi->tplist); - cpi->tplist = NULL; - - // Delete last frame MV storage buffers - vpx_free(cpi->lfmv); - cpi->lfmv = 0; - - vpx_free(cpi->lf_ref_frame_sign_bias); - cpi->lf_ref_frame_sign_bias = 0; - - vpx_free(cpi->lf_ref_frame); - cpi->lf_ref_frame = 0; - // Delete sementation map vpx_free(cpi->segmentation_map); cpi->segmentation_map = 0; @@ -326,20 +249,16 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->active_map); cpi->active_map = 0; - vp9_de_alloc_frame_buffers(&cpi->common); + vp9_free_frame_buffers(&cpi->common); - vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf); - vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source); - vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer); + vp9_free_frame_buffer(&cpi->last_frame_uf); + vp9_free_frame_buffer(&cpi->scaled_source); + vp9_free_frame_buffer(&cpi->alt_ref_buffer); vp9_lookahead_destroy(cpi->lookahead); vpx_free(cpi->tok); cpi->tok = 0; - // Structure used to monitor GF usage - vpx_free(cpi->gf_active_flags); - cpi->gf_active_flags = 0; - // Activity mask based per mb zbin adjustments vpx_free(cpi->mb_activity_map); cpi->mb_activity_map = 0; @@ -348,15 +267,6 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->mb.pip); cpi->mb.pip = 0; - - vpx_free(cpi->twopass.total_stats); - cpi->twopass.total_stats = 0; - - vpx_free(cpi->twopass.total_left_stats); - cpi->twopass.total_left_stats = 0; - - vpx_free(cpi->twopass.this_frame_stats); - cpi->twopass.this_frame_stats = 0; } // Computes a q delta (in "q index" terms) to get from a starting q value @@ -394,7 +304,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { // Disable and clear down for KF if (cm->frame_type == KEY_FRAME) { // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols)); + vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; cpi->static_mb_pct = 0; @@ -407,7 +317,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { } else if (cpi->refresh_alt_ref_frame) { // If this is an alt ref frame // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols)); + vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; cpi->static_mb_pct = 0; @@ -437,9 +347,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) { xd->mb_segment_abs_delta = SEGMENT_DELTADATA; } - } - // All other frames if segmentation has been enabled - else if (xd->segmentation_enabled) { + } else if (xd->segmentation_enabled) { + // All other frames if segmentation has been enabled + // First normal frame in a valid gf or alt ref group if (cpi->common.frames_since_golden == 0) { // Set up segment features for normal frames in an arf group @@ -451,7 +361,6 @@ static void configure_static_seg_features(VP9_COMP *cpi) { qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 1.125)); vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); - vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, 0); vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_Q); vp9_set_segdata(xd, 1, SEG_LVL_ALT_LF, -2); @@ -459,39 +368,37 @@ static void configure_static_seg_features(VP9_COMP *cpi) { // Segment coding disabled for compred testing if (high_q || (cpi->static_mb_pct == 100)) { - vp9_set_segref(xd, 1, ALTREF_FRAME); + vp9_set_segdata(xd, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP); } - } - // Disable segmentation and clear down features if alt ref - // is not active for this group - else { + } else { + // Disable segmentation and clear down features if alt ref + // is not active for this group + vp9_disable_segmentation((VP9_PTR)cpi); - vpx_memset(cpi->segmentation_map, 0, - (cm->mb_rows * cm->mb_cols)); + vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; vp9_clearall_segfeatures(xd); } - } + } else if (cpi->is_src_frame_alt_ref) { + // Special case where we are coding over the top of a previous + // alt ref frame. + // Segment coding disabled for compred testing - // Special case where we are coding over the top of a previous - // alt ref frame. - // Segment coding disabled for compred testing - else if (cpi->is_src_frame_alt_ref) { // Enable ref frame features for segment 0 as well vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME); vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); // All mbs should use ALTREF_FRAME - vp9_clear_segref(xd, 0); - vp9_set_segref(xd, 0, ALTREF_FRAME); - vp9_clear_segref(xd, 1); - vp9_set_segref(xd, 1, ALTREF_FRAME); + vp9_clear_segdata(xd, 0, SEG_LVL_REF_FRAME); + vp9_set_segdata(xd, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); + vp9_clear_segdata(xd, 1, SEG_LVL_REF_FRAME); + vp9_set_segdata(xd, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); // Skip all MBs if high Q (0,0 mv and skip coeffs) if (high_q) { @@ -500,9 +407,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) { } // Enable data udpate xd->update_mb_segmentation_data = 1; - } - // All other frames. - else { + } else { + // All other frames. + // No updates.. leave things as they are. xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; @@ -510,6 +417,69 @@ static void configure_static_seg_features(VP9_COMP *cpi) { } } +#ifdef ENTROPY_STATS +void vp9_update_mode_context_stats(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + int i, j; + unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] = + cm->fc.inter_mode_counts; + int64_t (*mv_ref_stats)[VP9_INTER_MODES - 1][2] = cpi->mv_ref_stats; + FILE *f; + + // Read the past stats counters + f = fopen("mode_context.bin", "rb"); + if (!f) { + vpx_memset(cpi->mv_ref_stats, 0, sizeof(cpi->mv_ref_stats)); + } else { + fread(cpi->mv_ref_stats, sizeof(cpi->mv_ref_stats), 1, f); + fclose(f); + } + + // Add in the values for this frame + for (i = 0; i < INTER_MODE_CONTEXTS; i++) { + for (j = 0; j < VP9_INTER_MODES - 1; j++) { + mv_ref_stats[i][j][0] += (int64_t)inter_mode_counts[i][j][0]; + mv_ref_stats[i][j][1] += (int64_t)inter_mode_counts[i][j][1]; + } + } + + // Write back the accumulated stats + f = fopen("mode_context.bin", "wb"); + fwrite(cpi->mv_ref_stats, sizeof(cpi->mv_ref_stats), 1, f); + fclose(f); +} + +void print_mode_context(VP9_COMP *cpi) { + FILE *f = fopen("vp9_modecont.c", "a"); + int i, j; + + fprintf(f, "#include \"vp9_entropy.h\"\n"); + fprintf( + f, + "const int inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1] ="); + fprintf(f, "{\n"); + for (j = 0; j < INTER_MODE_CONTEXTS; j++) { + fprintf(f, " {/* %d */ ", j); + fprintf(f, " "); + for (i = 0; i < VP9_INTER_MODES - 1; i++) { + int this_prob; + int64_t count = cpi->mv_ref_stats[j][i][0] + cpi->mv_ref_stats[j][i][1]; + if (count) + this_prob = ((cpi->mv_ref_stats[j][i][0] * 256) + (count >> 1)) / count; + else + this_prob = 128; + + // context probs + fprintf(f, "%5d, ", this_prob); + } + fprintf(f, " },\n"); + } + + fprintf(f, "};\n"); + fclose(f); +} +#endif // ENTROPY_STATS + // DEBUG: Print out the segment id of each MB in the current frame. static void print_seg_map(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; @@ -519,8 +489,8 @@ static void print_seg_map(VP9_COMP *cpi) { fprintf(statsfile, "%10d\n", cm->current_video_frame); - for (row = 0; row < cpi->common.mb_rows; row++) { - for (col = 0; col < cpi->common.mb_cols; col++) { + for (row = 0; row < cpi->common.mi_rows; row++) { + for (col = 0; col < cpi->common.mi_cols; col++) { fprintf(statsfile, "%10d", cpi->segmentation_map[map_index]); map_index++; } @@ -537,14 +507,13 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { MODE_INFO *mi, *mi_ptr = cm->mi; uint8_t *cache_ptr = cm->last_frame_seg_map, *cache; - for (row = 0; row < cm->mb_rows; row++) { + for (row = 0; row < cm->mi_rows; row++) { mi = mi_ptr; cache = cache_ptr; - for (col = 0; col < cm->mb_cols; col++, mi++, cache++) { + for (col = 0; col < cm->mi_cols; col++, mi++, cache++) cache[0] = mi->mbmi.segment_id; - } mi_ptr += cm->mode_info_stride; - cache_ptr += cm->mb_cols; + cache_ptr += cm->mi_cols; } } @@ -561,10 +530,8 @@ static void set_default_lf_deltas(VP9_COMP *cpi) { cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2; cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2; - cpi->mb.e_mbd.mode_lf_deltas[0] = 4; // BPRED - cpi->mb.e_mbd.mode_lf_deltas[1] = -2; // Zero - cpi->mb.e_mbd.mode_lf_deltas[2] = 2; // New mv - cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv + cpi->mb.e_mbd.mode_lf_deltas[0] = 0; // Zero + cpi->mb.e_mbd.mode_lf_deltas[1] = 0; // New mv } static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { @@ -573,9 +540,8 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { int i; // Set baseline threshold values - for (i = 0; i < MAX_MODES; ++i) { - sf->thresh_mult[i] = (mode == 0) ? -500 : 0; - } + for (i = 0; i < MAX_MODES; ++i) + sf->thresh_mult[i] = mode == 0 ? -500 : 0; sf->thresh_mult[THR_ZEROMV ] = 0; sf->thresh_mult[THR_ZEROG ] = 0; @@ -601,7 +567,6 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_D63_PRED ] += speed_multiplier * 1500; sf->thresh_mult[THR_B_PRED ] += speed_multiplier * 2500; - sf->thresh_mult[THR_I8X8_PRED] += speed_multiplier * 2500; sf->thresh_mult[THR_NEWMV ] += speed_multiplier * 1000; sf->thresh_mult[THR_NEWG ] += speed_multiplier * 1000; @@ -611,43 +576,39 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_SPLITG ] += speed_multiplier * 2500; sf->thresh_mult[THR_SPLITA ] += speed_multiplier * 2500; - sf->thresh_mult[THR_COMP_ZEROLG ] += speed_multiplier * 1500; sf->thresh_mult[THR_COMP_ZEROLA ] += speed_multiplier * 1500; sf->thresh_mult[THR_COMP_ZEROGA ] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_NEARESTLG] += speed_multiplier * 1500; sf->thresh_mult[THR_COMP_NEARESTLA] += speed_multiplier * 1500; sf->thresh_mult[THR_COMP_NEARESTGA] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_NEARLG ] += speed_multiplier * 1500; sf->thresh_mult[THR_COMP_NEARLA ] += speed_multiplier * 1500; sf->thresh_mult[THR_COMP_NEARGA ] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_NEWLG ] += speed_multiplier * 2000; sf->thresh_mult[THR_COMP_NEWLA ] += speed_multiplier * 2000; sf->thresh_mult[THR_COMP_NEWGA ] += speed_multiplier * 2000; sf->thresh_mult[THR_COMP_SPLITLA ] += speed_multiplier * 4500; sf->thresh_mult[THR_COMP_SPLITGA ] += speed_multiplier * 4500; - sf->thresh_mult[THR_COMP_SPLITLG ] += speed_multiplier * 4500; - -#if CONFIG_COMP_INTERINTRA_PRED - sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] += speed_multiplier * 1500; - - sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] += speed_multiplier * 1500; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] += speed_multiplier * 1500; - - sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] += speed_multiplier * 2000; - sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] += speed_multiplier * 2000; - sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] += speed_multiplier * 2000; -#endif + if (speed > 4) { + for (i = 0; i < MAX_MODES; ++i) + sf->thresh_mult[i] = INT_MAX; + + sf->thresh_mult[THR_DC ] = 0; + sf->thresh_mult[THR_TM ] = 0; + sf->thresh_mult[THR_NEWMV ] = 4000; + sf->thresh_mult[THR_NEWG ] = 4000; + sf->thresh_mult[THR_NEWA ] = 4000; + sf->thresh_mult[THR_NEARESTMV] = 0; + sf->thresh_mult[THR_NEARESTG ] = 0; + sf->thresh_mult[THR_NEARESTA ] = 0; + sf->thresh_mult[THR_NEARMV ] = 2000; + sf->thresh_mult[THR_NEARG ] = 2000; + sf->thresh_mult[THR_NEARA ] = 2000; + sf->thresh_mult[THR_COMP_NEARESTLA] = 2000; + sf->recode_loop = 0; + } /* disable frame modes if flags not set */ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { @@ -656,12 +617,6 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_ZEROMV ] = INT_MAX; sf->thresh_mult[THR_NEARMV ] = INT_MAX; sf->thresh_mult[THR_SPLITMV ] = INT_MAX; -#if CONFIG_COMP_INTERINTRA_PRED - sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = INT_MAX; -#endif } if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { sf->thresh_mult[THR_NEARESTG ] = INT_MAX; @@ -669,12 +624,6 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_NEARG ] = INT_MAX; sf->thresh_mult[THR_NEWG ] = INT_MAX; sf->thresh_mult[THR_SPLITG ] = INT_MAX; -#if CONFIG_COMP_INTERINTRA_PRED - sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = INT_MAX; -#endif } if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { sf->thresh_mult[THR_NEARESTA ] = INT_MAX; @@ -682,22 +631,8 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_NEARA ] = INT_MAX; sf->thresh_mult[THR_NEWA ] = INT_MAX; sf->thresh_mult[THR_SPLITA ] = INT_MAX; -#if CONFIG_COMP_INTERINTRA_PRED - sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = INT_MAX; - sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = INT_MAX; -#endif } - if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) != - (VP9_LAST_FLAG | VP9_GOLD_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROLG ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTLG] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARLG ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWLG ] = INT_MAX; - sf->thresh_mult[THR_COMP_SPLITLG ] = INT_MAX; - } if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) { sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; @@ -719,7 +654,7 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { void vp9_set_speed_features(VP9_COMP *cpi) { SPEED_FEATURES *sf = &cpi->sf; int mode = cpi->compressor_speed; - int speed = cpi->Speed; + int speed = cpi->speed; int i; // Only modes 0 and 1 supported for now in experimental code basae @@ -736,23 +671,23 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // best quality defaults sf->RD = 1; sf->search_method = NSTEP; - sf->improved_dct = 1; sf->auto_filter = 1; sf->recode_loop = 1; sf->quarter_pixel_search = 1; sf->half_pixel_search = 1; sf->iterative_sub_pixel = 1; - sf->no_skip_block4x4_search = 1; - if (cpi->oxcf.lossless) - sf->optimize_coefficients = 0; - else - sf->optimize_coefficients = 1; - + sf->optimize_coefficients = !cpi->oxcf.lossless; sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->static_segmentation = 1; - sf->splitmode_breakout = 0; - sf->mb16_breakout = 0; + sf->comp_inter_joint_search_thresh = BLOCK_SIZE_AB4X4; + sf->adpative_rd_thresh = 0; + +#if CONFIG_MULTIPLE_ARF + // Switch segmentation off. + sf->static_segmentation = 0; +#else + sf->static_segmentation = 0; +#endif switch (mode) { case 0: // best quality mode @@ -760,52 +695,19 @@ void vp9_set_speed_features(VP9_COMP *cpi) { break; case 1: - sf->static_segmentation = 1; - sf->splitmode_breakout = 1; - sf->mb16_breakout = 0; - +#if CONFIG_MULTIPLE_ARF + // Switch segmentation off. + sf->static_segmentation = 0; +#else + sf->static_segmentation = 0; +#endif + sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8; + sf->adpative_rd_thresh = 1; if (speed > 0) { - /* Disable coefficient optimization above speed 0 */ + sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; sf->optimize_coefficients = 0; - sf->no_skip_block4x4_search = 0; - sf->first_step = 1; - - cpi->mode_check_freq[THR_SPLITG] = 2; - cpi->mode_check_freq[THR_SPLITA] = 2; - cpi->mode_check_freq[THR_SPLITMV] = 0; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 2; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 2; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 0; } - - if (speed > 1) { - cpi->mode_check_freq[THR_SPLITG] = 4; - cpi->mode_check_freq[THR_SPLITA] = 4; - cpi->mode_check_freq[THR_SPLITMV] = 2; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 4; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 4; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 2; - } - - if (speed > 2) { - cpi->mode_check_freq[THR_SPLITG] = 15; - cpi->mode_check_freq[THR_SPLITA] = 15; - cpi->mode_check_freq[THR_SPLITMV] = 7; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 15; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 15; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 7; - - sf->improved_dct = 0; - - // Only do recode loop on key frames, golden frames and - // alt ref frames - sf->recode_loop = 2; - } - break; }; /* switch */ @@ -817,7 +719,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // so make sure they are always turned off. if (cpi->pass == 1) { sf->optimize_coefficients = 0; - sf->improved_dct = 0; } cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16; @@ -830,9 +731,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; - cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair; - cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8; - cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16; vp9_init_quantizer(cpi); @@ -844,26 +742,27 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->find_fractional_mv_step = vp9_find_best_half_pixel_step; } - if (cpi->sf.optimize_coefficients == 1 && cpi->pass != 1) - cpi->mb.optimize = 1; - else - cpi->mb.optimize = 0; + cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; #ifdef SPEEDSTATS - frames_at_speed[cpi->Speed]++; + frames_at_speed[cpi->speed]++; #endif } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + cpi->lookahead = vp9_lookahead_init(cpi->oxcf.width, cpi->oxcf.height, + cm->subsampling_x, cm->subsampling_y, cpi->oxcf.lag_in_frames); if (!cpi->lookahead) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate lag buffers"); - if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer, - cpi->oxcf.width, cpi->oxcf.height, - VP9BORDERINPIXELS)) + if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, + cpi->oxcf.width, cpi->oxcf.height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); } @@ -871,8 +770,8 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) { static int alloc_partition_data(VP9_COMP *cpi) { vpx_free(cpi->mb.pip); - cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) * - (cpi->common.mb_rows + 1), + cpi->mb.pip = vpx_calloc((cpi->common.mode_info_stride) * + (cpi->common.mi_rows + 64 / MI_SIZE), sizeof(PARTITION_INFO)); if (!cpi->mb.pip) return 1; @@ -893,20 +792,24 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate partition data"); - if (vp8_yv12_alloc_frame_buffer(&cpi->last_frame_uf, - cm->width, cm->height, VP9BORDERINPIXELS)) + if (vp9_alloc_frame_buffer(&cpi->last_frame_uf, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate last frame buffer"); - if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source, - cm->width, cm->height, VP9BORDERINPIXELS)) + if (vp9_alloc_frame_buffer(&cpi->scaled_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); vpx_free(cpi->tok); { - unsigned int tokens = cm->mb_rows * cm->mb_cols * (24 * 16 + 1); + unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols); CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); } @@ -916,13 +819,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { cpi->gf_bad_count = 0; cpi->gf_update_recommended = 0; - - // Structures used to minitor GF usage - vpx_free(cpi->gf_active_flags); - CHECK_MEM_ERROR(cpi->gf_active_flags, - vpx_calloc(1, cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - vpx_free(cpi->mb_activity_map); CHECK_MEM_ERROR(cpi->mb_activity_map, vpx_calloc(sizeof(unsigned int), @@ -932,58 +828,26 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { CHECK_MEM_ERROR(cpi->mb_norm_activity_map, vpx_calloc(sizeof(unsigned int), cm->mb_rows * cm->mb_cols)); - - vpx_free(cpi->twopass.total_stats); - - cpi->twopass.total_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - vpx_free(cpi->twopass.total_left_stats); - cpi->twopass.total_left_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - vpx_free(cpi->twopass.this_frame_stats); - - cpi->twopass.this_frame_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - if (!cpi->twopass.total_stats || - !cpi->twopass.total_left_stats || - !cpi->twopass.this_frame_stats) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate firstpass stats"); - - vpx_free(cpi->tplist); - - CHECK_MEM_ERROR(cpi->tplist, - vpx_malloc(sizeof(TOKENLIST) * (cpi->common.mb_rows))); } static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; - /* our internal buffers are always multiples of 16 */ - int aligned_width = (cm->width + 15) & ~15; - int aligned_height = (cm->height + 15) & ~15; - - cm->mb_rows = aligned_height >> 4; - cm->mb_cols = aligned_width >> 4; - cm->MBs = cm->mb_rows * cm->mb_cols; - cm->mode_info_stride = cm->mb_cols + 1; - memset(cm->mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO)); - vp9_update_mode_info_border(cm, cm->mip); + vp9_update_frame_size(cm); - cm->mi = cm->mip + cm->mode_info_stride + 1; - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - vp9_update_mode_info_in_image(cm, cm->mi); - - /* Update size of buffers local to this frame */ - if (vp8_yv12_realloc_frame_buffer(&cpi->last_frame_uf, - cm->width, cm->height, VP9BORDERINPIXELS)) + // Update size of buffers local to this frame + if (vp9_realloc_frame_buffer(&cpi->last_frame_uf, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to reallocate last frame buffer"); - if (vp8_yv12_realloc_frame_buffer(&cpi->scaled_source, - cm->width, cm->height, VP9BORDERINPIXELS)) + if (vp9_realloc_frame_buffer(&cpi->scaled_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to reallocate scaled source buffer"); @@ -1026,7 +890,7 @@ int vp9_reverse_trans(int x) { return 63; }; void vp9_new_frame_rate(VP9_COMP *cpi, double framerate) { - if (framerate < .1) + if (framerate < 0.1) framerate = 30; cpi->oxcf.frame_rate = framerate; @@ -1035,8 +899,8 @@ void vp9_new_frame_rate(VP9_COMP *cpi, double framerate) { cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - if (cpi->min_frame_bandwidth < FRAME_OVERHEAD_BITS) - cpi->min_frame_bandwidth = FRAME_OVERHEAD_BITS; + + cpi->min_frame_bandwidth = MAX(cpi->min_frame_bandwidth, FRAME_OVERHEAD_BITS); // Set Maximum gf/arf interval cpi->max_gf_interval = 16; @@ -1074,10 +938,10 @@ static void set_tile_limits(VP9_COMP *cpi) { vp9_get_tile_n_bits(cm, &min_log2_tiles, &max_log2_tiles); max_log2_tiles += min_log2_tiles; - if (cm->log2_tile_columns < min_log2_tiles) - cm->log2_tile_columns = min_log2_tiles; - else if (cm->log2_tile_columns > max_log2_tiles) - cm->log2_tile_columns = max_log2_tiles; + + cm->log2_tile_columns = clamp(cm->log2_tile_columns, + min_log2_tiles, max_log2_tiles); + cm->tile_columns = 1 << cm->log2_tile_columns; cm->tile_rows = 1 << cm->log2_tile_rows; } @@ -1085,16 +949,18 @@ static void set_tile_limits(VP9_COMP *cpi) { static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { VP9_COMP *cpi = (VP9_COMP *)(ptr); VP9_COMMON *const cm = &cpi->common; + int i; cpi->oxcf = *oxcf; - cpi->goldfreq = 7; cm->version = oxcf->version; - vp9_setup_version(cm); cm->width = oxcf->width; cm->height = oxcf->height; + cm->subsampling_x = 0; + cm->subsampling_y = 0; + vp9_alloc_compressor_data(cpi); // change includes all joint functionality vp9_change_config(ptr, oxcf); @@ -1124,12 +990,9 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { set_tile_limits(cpi); - { - int i; - cpi->fixed_divide[0] = 0; - for (i = 1; i < 512; i++) - cpi->fixed_divide[i] = 0x80000 / i; - } + cpi->fixed_divide[0] = 0; + for (i = 1; i < 512; i++) + cpi->fixed_divide[i] = 0x80000 / i; } @@ -1142,7 +1005,6 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { if (cm->version != oxcf->version) { cm->version = oxcf->version; - vp9_setup_version(cm); } cpi->oxcf = *oxcf; @@ -1157,13 +1019,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { case MODE_SECONDPASS: cpi->pass = 2; cpi->compressor_speed = 1; - - if (cpi->oxcf.cpu_used < -5) { - cpi->oxcf.cpu_used = -5; - } - - if (cpi->oxcf.cpu_used > 5) - cpi->oxcf.cpu_used = 5; + cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5); break; case MODE_SECONDPASS_BEST: @@ -1178,11 +1034,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.lossless = oxcf->lossless; if (cpi->oxcf.lossless) { - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_iwalsh4x4_1; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add; } else { - cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4_1; - cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add; } cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL; @@ -1193,7 +1049,8 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { // cpi->use_last_frame_only = 0; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 1; - cm->refresh_entropy_probs = 1; + cm->refresh_frame_context = 1; + cm->reset_frame_context = 0; setup_features(cpi); cpi->mb.e_mbd.allow_high_precision_mv = 0; // Default mv precision adaptation @@ -1207,8 +1064,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { } // At the moment the first order values may not be > MAXQ - if (cpi->oxcf.fixed_q > MAXQ) - cpi->oxcf.fixed_q = MAXQ; + cpi->oxcf.fixed_q = MIN(cpi->oxcf.fixed_q, MAXQ); // local file playback mode == really big buffer if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { @@ -1244,29 +1100,19 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->best_quality = cpi->oxcf.best_allowed_q; // active values should only be modified if out of new range - if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) { - cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; - } - // less likely - else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q) { - cpi->active_worst_quality = cpi->oxcf.best_allowed_q; - } - if (cpi->active_best_quality < cpi->oxcf.best_allowed_q) { - cpi->active_best_quality = cpi->oxcf.best_allowed_q; - } - // less likely - else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q) { - cpi->active_best_quality = cpi->oxcf.worst_allowed_q; - } + cpi->active_worst_quality = clamp(cpi->active_worst_quality, + cpi->oxcf.best_allowed_q, + cpi->oxcf.worst_allowed_q); + + cpi->active_best_quality = clamp(cpi->active_best_quality, + cpi->oxcf.best_allowed_q, + cpi->oxcf.worst_allowed_q); - cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; + cpi->buffered_mode = cpi->oxcf.optimal_buffer_level > 0; cpi->cq_target_quality = cpi->oxcf.cq_level; - if (!cm->use_bilinear_mc_filter) - cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; - else - cm->mcomp_filter_type = BILINEAR; + cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; cpi->target_bandwidth = cpi->oxcf.target_bandwidth; @@ -1274,22 +1120,17 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cm->display_height = cpi->oxcf.height; // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) - if (cpi->oxcf.Sharpness > 7) - cpi->oxcf.Sharpness = 7; + cpi->oxcf.Sharpness = MIN(7, cpi->oxcf.Sharpness); cm->sharpness_level = cpi->oxcf.Sharpness; - // Increasing the size of the frame beyond the first seen frame, or some - // otherwise signalled maximum size, is not supported. - // TODO(jkoleszar): exit gracefully. - if (!cpi->initial_width) { - alloc_raw_frame_buffers(cpi); - vp9_alloc_compressor_data(cpi); - cpi->initial_width = cm->width; - cpi->initial_height = cm->height; + if (cpi->initial_width) { + // Increasing the size of the frame beyond the first seen frame, or some + // otherwise signalled maximum size, is not supported. + // TODO(jkoleszar): exit gracefully. + assert(cm->width <= cpi->initial_width); + assert(cm->height <= cpi->initial_height); } - assert(cm->width <= cpi->initial_width); - assert(cm->height <= cpi->initial_height); update_frame_size(cpi); if (cpi->oxcf.fixed_q >= 0) { @@ -1298,18 +1139,22 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->last_boosted_qindex = cpi->oxcf.fixed_q; } - cpi->Speed = cpi->oxcf.cpu_used; + cpi->speed = cpi->oxcf.cpu_used; - // force to allowlag to 0 if lag_in_frames is 0; if (cpi->oxcf.lag_in_frames == 0) { + // force to allowlag to 0 if lag_in_frames is 0; cpi->oxcf.allow_lag = 0; - } - // Limit on lag buffers as these are not currently dynamically allocated - else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) + } else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) { + // Limit on lag buffers as these are not currently dynamically allocated cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; + } // YX Temp +#if CONFIG_MULTIPLE_ARF + vp9_zero(cpi->alt_ref_source); +#else cpi->alt_ref_source = NULL; +#endif cpi->is_src_frame_alt_ref = 0; #if 0 @@ -1396,29 +1241,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { init_config((VP9_PTR)cpi, oxcf); - memcpy(cpi->base_skip_false_prob, base_skip_false_prob, sizeof(base_skip_false_prob)); cpi->common.current_video_frame = 0; cpi->kf_overspend_bits = 0; cpi->kf_bitrate_adjustment = 0; - cpi->frames_till_gf_update_due = 0; + cpi->frames_till_gf_update_due = 0; cpi->gf_overspend_bits = 0; - cpi->non_gf_bitrate_adjustment = 0; - cm->prob_last_coded = 128; - cm->prob_gf_coded = 128; - cm->prob_intra_coded = 63; - cm->sb32_coded = 200; - cm->sb64_coded = 200; - for (i = 0; i < COMP_PRED_CONTEXTS; i++) - cm->prob_comppred[i] = 128; - for (i = 0; i < TX_SIZE_MAX_SB - 1; i++) - cm->prob_tx[i] = 128; - - // Prime the recent reference frame useage counters. - // Hereafter they will be maintained as a sort of moving average - cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; - cpi->recent_ref_frame_usage[LAST_FRAME] = 1; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; - cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; + cpi->non_gf_bitrate_adjustment = 0; // Set reference frame sign bias for ALTREF frame to 1 (for now) cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; @@ -1429,22 +1257,18 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->alt_is_last = 0; cpi->gold_is_alt = 0; - // allocate memory for storing last frame's MVs for MV prediction. - CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int_mv))); - CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int))); - CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int))); - // Create the encoder segmentation map and set all entries to 0 - CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); + CHECK_MEM_ERROR(cpi->segmentation_map, + vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols, 1)); // And a copy in common for temporal coding CHECK_MEM_ERROR(cm->last_frame_seg_map, - vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); + vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols, 1)); // And a place holder structure is the coding context // for use if we want to save and restore it CHECK_MEM_ERROR(cpi->coding_context.last_frame_seg_map_copy, - vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); + vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols, 1)); CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); vpx_memset(cpi->active_map, 1, (cpi->common.mb_rows * cpi->common.mb_cols)); @@ -1462,23 +1286,13 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { if (cpi->pass != 1) init_context_counters(); #endif -#ifdef MODE_STATS - vp9_zero(y_modes); - vp9_zero(i8x8_modes); - vp9_zero(uv_modes); - vp9_zero(uv_modes_y); - vp9_zero(b_modes); - vp9_zero(inter_y_modes); - vp9_zero(inter_uv_modes); - vp9_zero(inter_b_modes); -#endif + #ifdef NMV_STATS init_nmvstats(); #endif -#if CONFIG_CODE_NONZEROCOUNT -#ifdef NZC_STATS - init_nzcstats(); -#endif +#ifdef MODE_STATS + init_tx_count_stats(); + init_switchable_interp_stats(); #endif /*Initialize the feed-forward activity masking.*/ @@ -1486,13 +1300,26 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->frames_since_key = 8; // Give a sensible default for the first frame. cpi->key_frame_frequency = cpi->oxcf.key_freq; - cpi->this_key_frame_forced = FALSE; - cpi->next_key_frame_forced = FALSE; + cpi->this_key_frame_forced = 0; + cpi->next_key_frame_forced = 0; - cpi->source_alt_ref_pending = FALSE; - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_pending = 0; + cpi->source_alt_ref_active = 0; cpi->refresh_alt_ref_frame = 0; +#if CONFIG_MULTIPLE_ARF + // Turn multiple ARF usage on/off. This is a quick hack for the initial test + // version. It should eventually be set via the codec API. + cpi->multi_arf_enabled = 1; + + if (cpi->multi_arf_enabled) { + cpi->sequence_number = 0; + cpi->frame_coding_order_period = 0; + vp9_zero(cpi->frame_coding_order); + vp9_zero(cpi->arf_buffer_idx); + } +#endif + cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; #if CONFIG_INTERNAL_STATS cpi->b_calculate_ssimg = 0; @@ -1514,6 +1341,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->tot_recode_hits = 0; cpi->summed_quality = 0; cpi->summed_weights = 0; + cpi->summedp_quality = 0; + cpi->summedp_weights = 0; } if (cpi->b_calculate_ssimg) { @@ -1555,9 +1384,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX]; cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp); - for (i = 0; i < KEY_FRAME_CONTEXT; i++) { + for (i = 0; i < KEY_FRAME_CONTEXT; i++) cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; - } #ifdef OUTPUT_YUV_SRC yuv_file = fopen("bd.yuv", "ab"); @@ -1589,14 +1417,14 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_set_speed_features(cpi); // Set starting values of RD threshold multipliers (128 = *1) - for (i = 0; i < MAX_MODES; i++) { + for (i = 0; i < MAX_MODES; i++) cpi->rd_thresh_mult[i] = 128; - } -#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \ +#define BFP(BT, SDF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF)\ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svaf = SVAF; \ cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \ cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \ cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \ @@ -1604,33 +1432,69 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; + BFP(BLOCK_32X16, vp9_sad32x16, vp9_variance32x16, vp9_sub_pixel_variance32x16, + vp9_sub_pixel_avg_variance32x16, NULL, NULL, + NULL, NULL, NULL, + vp9_sad32x16x4d) + + BFP(BLOCK_16X32, vp9_sad16x32, vp9_variance16x32, vp9_sub_pixel_variance16x32, + vp9_sub_pixel_avg_variance16x32, NULL, NULL, + NULL, NULL, NULL, + vp9_sad16x32x4d) + + BFP(BLOCK_64X32, vp9_sad64x32, vp9_variance64x32, vp9_sub_pixel_variance64x32, + vp9_sub_pixel_avg_variance64x32, NULL, NULL, + NULL, NULL, NULL, + vp9_sad64x32x4d) + + BFP(BLOCK_32X64, vp9_sad32x64, vp9_variance32x64, vp9_sub_pixel_variance32x64, + vp9_sub_pixel_avg_variance32x64, NULL, NULL, + NULL, NULL, NULL, + vp9_sad32x64x4d) BFP(BLOCK_32X32, vp9_sad32x32, vp9_variance32x32, vp9_sub_pixel_variance32x32, - vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v, + vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h, + vp9_variance_halfpixvar32x32_v, vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, vp9_sad32x32x4d) BFP(BLOCK_64X64, vp9_sad64x64, vp9_variance64x64, vp9_sub_pixel_variance64x64, - vp9_variance_halfpixvar64x64_h, vp9_variance_halfpixvar64x64_v, + vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h, + vp9_variance_halfpixvar64x64_v, vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8, vp9_sad64x64x4d) BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16, - vp9_variance_halfpixvar16x16_h, vp9_variance_halfpixvar16x16_v, - vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8, - vp9_sad16x16x4d) + vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h, + vp9_variance_halfpixvar16x16_v, + vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8, + vp9_sad16x16x4d) BFP(BLOCK_16X8, vp9_sad16x8, vp9_variance16x8, vp9_sub_pixel_variance16x8, - NULL, NULL, NULL, vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d) + vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL, + vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d) BFP(BLOCK_8X16, vp9_sad8x16, vp9_variance8x16, vp9_sub_pixel_variance8x16, - NULL, NULL, NULL, vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d) + vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL, + vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d) BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8, - NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) + vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL, + vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) + + BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4, + vp9_sub_pixel_avg_variance8x4, NULL, NULL, + NULL, NULL, vp9_sad8x4x8, + vp9_sad8x4x4d) + + BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8, + vp9_sub_pixel_avg_variance4x8, NULL, NULL, + NULL, NULL, vp9_sad4x8x8, + vp9_sad4x8x4d) BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4, - NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) + vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL, + vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) cpi->full_search_sad = vp9_full_search_sad; cpi->diamond_search_sad = vp9_diamond_search_sad; @@ -1651,13 +1515,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->common.error.setjmp = 0; vp9_zero(cpi->y_uv_mode_count) -#if CONFIG_CODE_NONZEROCOUNT - vp9_zero(cm->fc.nzc_counts_4x4); - vp9_zero(cm->fc.nzc_counts_8x8); - vp9_zero(cm->fc.nzc_counts_16x16); - vp9_zero(cm->fc.nzc_counts_32x32); - vp9_zero(cm->fc.nzc_pcat_counts); -#endif return (VP9_PTR) cpi; } @@ -1678,18 +1535,18 @@ void vp9_remove_compressor(VP9_PTR *ptr) { if (cpi->pass != 1) { print_context_counters(); print_tree_update_probs(); - print_mode_context(&cpi->common); + print_mode_context(cpi); } #endif #ifdef NMV_STATS if (cpi->pass != 1) print_nmvstats(); #endif -#if CONFIG_CODE_NONZEROCOUNT -#ifdef NZC_STATS - if (cpi->pass != 1) - print_nzcstats(); -#endif +#ifdef MODE_STATS + if (cpi->pass != 1) { + write_tx_count_stats(); + write_switchable_interp_stats(); + } #endif #if CONFIG_INTERNAL_STATS @@ -1703,24 +1560,29 @@ void vp9_remove_compressor(VP9_PTR *ptr) { - cpi->first_time_stamp_ever) / 10000000.000; double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000; double dr = (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded; -#if defined(MODE_STATS) - print_mode_contexts(&cpi->common); -#endif + if (cpi->b_calculate_psnr) { YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.ref_frame_map[cpi->lst_fb_idx]]; - double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height; + double samples = 3.0 / 2 * cpi->count * + lst_yv12->y_width * lst_yv12->y_height; double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error); double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2); - double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0); - - fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\tVPXSSIM\t Time(ms)\n"); - fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n", - dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim, + double total_ssim = 100 * pow(cpi->summed_quality / + cpi->summed_weights, 8.0); + double total_ssimp = 100 * pow(cpi->summedp_quality / + cpi->summedp_weights, 8.0); + + fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" + "VPXSSIM\tVPSSIMP\t Time(ms)\n"); + fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n", + dr, cpi->total / cpi->count, total_psnr, + cpi->totalp / cpi->count, total_psnr2, total_ssim, total_ssimp, total_encode_time); -// fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f %10ld\n", -// dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim, -// total_encode_time, cpi->tot_recode_hits); +// fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f %10ld\n", +// dr, cpi->total / cpi->count, total_psnr, +// cpi->totalp / cpi->count, total_psnr2, total_ssim, +// total_encode_time, cpi->tot_recode_hits); } if (cpi->b_calculate_ssimg) { @@ -1738,88 +1600,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) { #endif - -#ifdef MODE_STATS - { - extern int count_mb_seg[4]; - char modes_stats_file[250]; - FILE *f; - double dr = (double)cpi->oxcf.frame_rate * (double)cpi->bytes * (double)8 / (double)cpi->count / (double)1000; - sprintf(modes_stats_file, "modes_q%03d.stt", cpi->common.base_qindex); - f = fopen(modes_stats_file, "w"); - fprintf(f, "intra_mode in Intra Frames:\n"); - { - int i; - fprintf(f, "Y: "); - for (i = 0; i < VP9_YMODES; i++) fprintf(f, " %8d,", y_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "I8: "); - for (i = 0; i < VP9_I8X8_MODES; i++) fprintf(f, " %8d,", i8x8_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "UV: "); - for (i = 0; i < VP9_UV_MODES; i++) fprintf(f, " %8d,", uv_modes[i]); - fprintf(f, "\n"); - } - { - int i, j; - fprintf(f, "KeyFrame Y-UV:\n"); - for (i = 0; i < VP9_YMODES; i++) { - fprintf(f, "%2d:", i); - for (j = 0; j < VP9_UV_MODES; j++) fprintf(f, "%8d, ", uv_modes_y[i][j]); - fprintf(f, "\n"); - } - } - { - int i, j; - fprintf(f, "Inter Y-UV:\n"); - for (i = 0; i < VP9_YMODES; i++) { - fprintf(f, "%2d:", i); - for (j = 0; j < VP9_UV_MODES; j++) fprintf(f, "%8d, ", cpi->y_uv_mode_count[i][j]); - fprintf(f, "\n"); - } - } - { - int i; - - fprintf(f, "B: "); - for (i = 0; i < VP9_NKF_BINTRAMODES; i++) - fprintf(f, "%8d, ", b_modes[i]); - - fprintf(f, "\n"); - - } - - fprintf(f, "Modes in Inter Frames:\n"); - { - int i; - fprintf(f, "Y: "); - for (i = 0; i < MB_MODE_COUNT; i++) fprintf(f, " %8d,", inter_y_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "UV: "); - for (i = 0; i < VP9_UV_MODES; i++) fprintf(f, " %8d,", inter_uv_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "B: "); - for (i = 0; i < B_MODE_COUNT; i++) fprintf(f, "%8d, ", inter_b_modes[i]); - fprintf(f, "\n"); - } - fprintf(f, "P:%8d, %8d, %8d, %8d\n", count_mb_seg[0], count_mb_seg[1], count_mb_seg[2], count_mb_seg[3]); - fprintf(f, "PB:%8d, %8d, %8d, %8d\n", inter_b_modes[LEFT4X4], inter_b_modes[ABOVE4X4], inter_b_modes[ZERO4X4], inter_b_modes[NEW4X4]); - fclose(f); - } -#endif - #ifdef ENTROPY_STATS { int i, j, k; @@ -1827,18 +1607,18 @@ void vp9_remove_compressor(VP9_PTR *ptr) { fprintf(fmode, "\n#include \"vp9_entropymode.h\"\n\n"); fprintf(fmode, "const unsigned int vp9_kf_default_bmode_counts "); - fprintf(fmode, "[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES]" - "[VP9_KF_BINTRAMODES] =\n{\n"); + fprintf(fmode, "[VP9_INTRA_MODES][VP9_INTRA_MODES]" + "[VP9_INTRA_MODES] =\n{\n"); - for (i = 0; i < VP9_KF_BINTRAMODES; i++) { + for (i = 0; i < VP9_INTRA_MODES; i++) { fprintf(fmode, " { // Above Mode : %d\n", i); - for (j = 0; j < VP9_KF_BINTRAMODES; j++) { + for (j = 0; j < VP9_INTRA_MODES; j++) { fprintf(fmode, " {"); - for (k = 0; k < VP9_KF_BINTRAMODES; k++) { + for (k = 0; k < VP9_INTRA_MODES; k++) { if (!intra_mode_stats[i][j][k]) fprintf(fmode, " %5d, ", 1); else @@ -1988,8 +1768,8 @@ static void generate_psnr_packet(VP9_COMP *cpi) { pkt.data.psnr.samples[0] = width * height; pkt.data.psnr.samples[1] = width * height; - width = (width + 1) / 2; - height = (height + 1) / 2; + width = orig->uv_width; + height = orig->uv_height; sse = calc_plane_error(orig->u_buffer, orig->uv_stride, recon->u_buffer, recon->uv_stride, @@ -2098,10 +1878,7 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, return 0; } int vp9_update_entropy(VP9_PTR comp, int update) { - VP9_COMP *cpi = (VP9_COMP *) comp; - VP9_COMMON *cm = &cpi->common; - cm->refresh_entropy_probs = update; - + ((VP9_COMP *)comp)->common.refresh_frame_context = update; return 0; } @@ -2146,7 +1923,7 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } while (--h); src = s->u_buffer; - h = (cm->height + 1) / 2; + h = s->uv_height; do { fwrite(src, s->uv_width, 1, yuv_rec_file); @@ -2154,12 +1931,24 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } while (--h); src = s->v_buffer; - h = (cm->height + 1) / 2; + h = s->uv_height; do { fwrite(src, s->uv_width, 1, yuv_rec_file); src += s->uv_stride; } while (--h); + +#if CONFIG_ALPHA + if (s->alpha_buffer) { + src = s->alpha_buffer; + h = s->alpha_height; + do { + fwrite(src, s->alpha_width, 1, yuv_rec_file); + src += s->alpha_stride; + } while (--h); + } +#endif + fflush(yuv_rec_file); } #endif @@ -2170,56 +1959,35 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, const int in_h = src_fb->y_crop_height; const int out_w = dst_fb->y_crop_width; const int out_h = dst_fb->y_crop_height; - int x, y; + int x, y, i; + + uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer, + src_fb->alpha_buffer}; + int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride, + src_fb->alpha_stride}; + + uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer, + dst_fb->alpha_buffer}; + int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride, + dst_fb->alpha_stride}; for (y = 0; y < out_h; y += 16) { for (x = 0; x < out_w; x += 16) { - int x_q4 = x * 16 * in_w / out_w; - int y_q4 = y * 16 * in_h / out_h; - uint8_t *src, *dst; - int src_stride, dst_stride; - - - src = src_fb->y_buffer + - y * in_h / out_h * src_fb->y_stride + - x * in_w / out_w; - dst = dst_fb->y_buffer + - y * dst_fb->y_stride + - x; - src_stride = src_fb->y_stride; - dst_stride = dst_fb->y_stride; - - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, - 16, 16); - - x_q4 >>= 1; - y_q4 >>= 1; - src_stride = src_fb->uv_stride; - dst_stride = dst_fb->uv_stride; - - src = src_fb->u_buffer + - y / 2 * in_h / out_h * src_fb->uv_stride + - x / 2 * in_w / out_w; - dst = dst_fb->u_buffer + - y / 2 * dst_fb->uv_stride + - x / 2; - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, - 8, 8); - - src = src_fb->v_buffer + - y / 2 * in_h / out_h * src_fb->uv_stride + - x / 2 * in_w / out_w; - dst = dst_fb->v_buffer + - y / 2 * dst_fb->uv_stride + - x / 2; - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, - 8, 8); + for (i = 0; i < MAX_MB_PLANE; ++i) { + const int factor = i == 0 ? 1 : 2; + const int x_q4 = x * (16 / factor) * in_w / out_w; + const int y_q4 = y * (16 / factor) * in_h / out_h; + const int src_stride = src_strides[i]; + const int dst_stride = dst_strides[i]; + uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride + + x / factor * in_w / out_w; + uint8_t *dst = dsts[i] + y * dst_stride + x; + + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 16 / factor, 16 / factor); + } } } @@ -2228,62 +1996,35 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, static void update_alt_ref_frame_stats(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - // Update data structure that monitors level of reference to last GF - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - // this frame refreshes means next frames don't unless specified by user cpi->common.frames_since_golden = 0; - // Clear the alternate reference update pending flag. - cpi->source_alt_ref_pending = FALSE; - - // Set the alternate refernce frame active flag - cpi->source_alt_ref_active = TRUE; - +#if CONFIG_MULTIPLE_ARF + if (!cpi->multi_arf_enabled) +#endif + // Clear the alternate reference update pending flag. + cpi->source_alt_ref_pending = 0; + // Set the alternate reference frame active flag + cpi->source_alt_ref_active = 1; } static void update_golden_frame_stats(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - // Update the Golden frame usage counts. if (cpi->refresh_golden_frame) { - // Update data structure that monitors level of reference to last GF - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - // this frame refreshes means next frames don't unless specified by user cpi->refresh_golden_frame = 0; cpi->common.frames_since_golden = 0; - // if ( cm->frame_type == KEY_FRAME ) - // { - cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; - cpi->recent_ref_frame_usage[LAST_FRAME] = 1; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; - cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; - // } - // else - // { - // // Carry a potrtion of count over to begining of next gf sequence - // cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5; - // } - // ******** Fixed Q test code only ************ // If we are going to use the ALT reference for the next group of frames set a flag to say so. if (cpi->oxcf.fixed_q >= 0 && cpi->oxcf.play_alternate && !cpi->refresh_alt_ref_frame) { - cpi->source_alt_ref_pending = TRUE; + cpi->source_alt_ref_pending = 1; cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; } if (!cpi->source_alt_ref_pending) - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_active = 0; // Decrement count down till next gf if (cpi->frames_till_gf_update_due > 0) @@ -2298,13 +2039,6 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { cpi->common.frames_till_alt_ref_frame--; cpi->common.frames_since_golden++; - - if (cpi->common.frames_since_golden > 1) { - cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME]; - cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME]; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]; - cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; - } } } @@ -2384,7 +2118,8 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { int h = 2 * (prev[0] - next[0]) + (prev[1] - next[1]) + (prev[-1] - next[-1]); h = (h < 0 ? -h : h); v = (v < 0 ? -v : v); - if (h > EDGE_THRESH || v > EDGE_THRESH) num_edge_pels++; + if (h > EDGE_THRESH || v > EDGE_THRESH) + num_edge_pels++; curr++; prev++; next++; @@ -2393,7 +2128,7 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { prev += frame->y_stride - frame->y_width + 2; next += frame->y_stride - frame->y_width + 2; } - return (double)num_edge_pels / (double)num_pels; + return (double)num_edge_pels / num_pels; } // Function to test for conditions that indicate we should loop @@ -2401,11 +2136,11 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q, int maxq, int minq) { - int force_recode = FALSE; + int force_recode = 0; VP9_COMMON *cm = &cpi->common; // Is frame recode allowed at all - // Yes if either recode mode 1 is selected or mode two is selcted + // Yes if either recode mode 1 is selected or mode two is selected // and the frame is a key frame. golden frame or alt_ref_frame if ((cpi->sf.recode_loop == 1) || ((cpi->sf.recode_loop == 2) && @@ -2415,21 +2150,19 @@ static int recode_loop_test(VP9_COMP *cpi, // General over and under shoot tests if (((cpi->projected_frame_size > high_limit) && (q < maxq)) || ((cpi->projected_frame_size < low_limit) && (q > minq))) { - force_recode = TRUE; + force_recode = 1; } // Special Constrained quality tests else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { // Undershoot and below auto cq level - if ((q > cpi->cq_target_quality) && - (cpi->projected_frame_size < - ((cpi->this_frame_target * 7) >> 3))) { - force_recode = TRUE; - } - // Severe undershoot and between auto and user cq level - else if ((q > cpi->oxcf.cq_level) && - (cpi->projected_frame_size < cpi->min_frame_bandwidth) && - (cpi->active_best_quality > cpi->oxcf.cq_level)) { - force_recode = TRUE; + if (q > cpi->cq_target_quality && + cpi->projected_frame_size < ((cpi->this_frame_target * 7) >> 3)) { + force_recode = 1; + } else if (q > cpi->oxcf.cq_level && + cpi->projected_frame_size < cpi->min_frame_bandwidth && + cpi->active_best_quality > cpi->oxcf.cq_level) { + // Severe undershoot and between auto and user cq level + force_recode = 1; cpi->active_best_quality = cpi->oxcf.cq_level; } } @@ -2448,13 +2181,19 @@ static void update_reference_frames(VP9_COMP * const cpi) { &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); - } else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { + } +#if CONFIG_MULTIPLE_ARF + else if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame && + !cpi->refresh_alt_ref_frame) { +#else + else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { +#endif /* Preserve the previously existing golden frame and update the frame in * the alt ref slot instead. This is highly specific to the current use of * alt-ref as a forward reference, and this needs to be generalized as * other uses are implemented (like RTC/temporal scaling) * - * The update to the buffer in the alt ref slot was signalled in + * The update to the buffer in the alt ref slot was signaled in * vp9_pack_bitstream(), now swap the buffer pointers so that it's treated * as the golden frame next time. */ @@ -2466,10 +2205,16 @@ static void update_reference_frames(VP9_COMP * const cpi) { tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; - } else { /* For non key/golden frames */ + } else { /* For non key/golden frames */ if (cpi->refresh_alt_ref_frame) { + int arf_idx = cpi->alt_fb_idx; +#if CONFIG_MULTIPLE_ARF + if (cpi->multi_arf_enabled) { + arf_idx = cpi->arf_buffer_idx[cpi->sequence_number + 1]; + } +#endif ref_cnt_fb(cm->fb_idx_ref_cnt, - &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); + &cm->ref_frame_map[arf_idx], cm->new_fb_idx); } if (cpi->refresh_golden_frame) { @@ -2485,7 +2230,7 @@ static void update_reference_frames(VP9_COMP * const cpi) { } static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { - if (cm->no_lpf || cpi->mb.e_mbd.lossless) { + if (cpi->mb.e_mbd.lossless) { cm->filter_level = 0; } else { struct vpx_usec_timer timer; @@ -2493,10 +2238,8 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vp9_clear_system_state(); vpx_usec_timer_start(&timer); - if (cpi->sf.auto_filter == 0) - vp9_pick_filter_level_fast(cpi->Source, cpi); - else - vp9_pick_filter_level(cpi->Source, cpi); + + vp9_pick_filter_level(cpi->Source, cpi); vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); @@ -2504,11 +2247,11 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { if (cm->filter_level > 0) { vp9_set_alt_lf_level(cpi, cm->filter_level); - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level, 0, - cm->dering_enabled); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level, 0); } - vp8_yv12_extend_frame_borders(cm->frame_to_show); + vp9_extend_frame_borders(cm->frame_to_show, + cm->subsampling_x, cm->subsampling_y); } @@ -2551,20 +2294,6 @@ void vp9_select_interp_filter_type(VP9_COMP *cpi) { } } -#if CONFIG_COMP_INTERINTRA_PRED -static void select_interintra_mode(VP9_COMP *cpi) { - static const double threshold = 0.01; - VP9_COMMON *cm = &cpi->common; - // FIXME(debargha): Make this RD based - int sum = cpi->interintra_select_count[1] + cpi->interintra_select_count[0]; - if (sum) { - double fraction = (double) cpi->interintra_select_count[1] / sum; - // printf("fraction: %f\n", fraction); - cm->use_interintra = (fraction > threshold); - } -} -#endif - static void scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int i; @@ -2576,9 +2305,10 @@ static void scale_references(VP9_COMP *cpi) { ref->y_crop_height != cm->height) { int new_fb = get_free_fb(cm); - vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb], - cm->width, cm->height, - VP9BORDERINPIXELS); + vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb], + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS); scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]); cpi->scaled_ref_idx[i] = new_fb; } else { @@ -2592,9 +2322,8 @@ static void release_scaled_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int i; - for (i = 0; i < 3; i++) { + for (i = 0; i < 3; i++) cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--; - } } static void encode_frame_to_data_rate(VP9_COMP *cpi, @@ -2603,12 +2332,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, unsigned int *frame_flags) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &cpi->mb.e_mbd; - - int Q; + TX_SIZE t; + int q; int frame_over_shoot_limit; int frame_under_shoot_limit; - int Loop = FALSE; + int loop = 0; int loop_count; int q_low; @@ -2616,10 +2345,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, int top_index; int bottom_index; - int active_worst_qchanged = FALSE; + int active_worst_qchanged = 0; - int overshoot_seen = FALSE; - int undershoot_seen = FALSE; + int overshoot_seen = 0; + int undershoot_seen = 0; SPEED_FEATURES *sf = &cpi->sf; #if RESET_FOREACH_FILTER @@ -2634,11 +2363,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, /* list of filters to search over */ int mcomp_filters_to_search[] = { -#if CONFIG_ENABLE_6TAP - EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE -#else - EIGHTTAP, EIGHTTAP_SHARP, EIGHTTAP_SMOOTH, SWITCHABLE -#endif + EIGHTTAP, EIGHTTAP_SHARP, EIGHTTAP_SMOOTH, SWITCHABLE }; int mcomp_filters = sizeof(mcomp_filters_to_search) / sizeof(*mcomp_filters_to_search); @@ -2646,8 +2371,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, int64_t mcomp_filter_cost[4]; /* Scale the source buffer, if required */ - if (cm->mb_cols * 16 != cpi->un_scaled_source->y_width || - cm->mb_rows * 16 != cpi->un_scaled_source->y_height) { + if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width || + cm->mi_rows * 8 != cpi->un_scaled_source->y_height) { scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source); cpi->Source = &cpi->scaled_source; } else { @@ -2663,7 +2388,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // For an alt ref frame in 2 pass we skip the call to the second // pass function that sets the target bandwidth so must set it here if (cpi->refresh_alt_ref_frame) { - cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame + // Per frame bit target for the alt ref frame + cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // per second target bitrate cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * cpi->output_frame_rate); @@ -2678,17 +2404,14 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->zbin_mode_boost = 0; // if (cpi->oxcf.lossless) - cpi->zbin_mode_boost_enabled = FALSE; + cpi->zbin_mode_boost_enabled = 0; // else - // cpi->zbin_mode_boost_enabled = TRUE; + // cpi->zbin_mode_boost_enabled = 1; // Current default encoder behaviour for the altref sign bias - if (cpi->source_alt_ref_active) - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; - else - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; + cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->source_alt_ref_active; - // Check to see if a key frame is signalled + // Check to see if a key frame is signaled // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass. if ((cm->current_video_frame == 0) || (cm->frame_flags & FRAMEFLAGS_KEY) || @@ -2715,25 +2438,26 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } // The alternate reference frame cannot be active for a key frame - cpi->source_alt_ref_active = FALSE; + cpi->source_alt_ref_active = 0; // Reset the RD threshold multipliers to default of * 1 (128) - for (i = 0; i < MAX_MODES; i++) { + for (i = 0; i < MAX_MODES; i++) cpi->rd_thresh_mult[i] = 128; - } cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0); cm->frame_parallel_decoding_mode = (cpi->oxcf.frame_parallel_decoding_mode != 0); if (cm->error_resilient_mode) { cm->frame_parallel_decoding_mode = 1; - cm->refresh_entropy_probs = 0; + cm->reset_frame_context = 0; + cm->refresh_frame_context = 0; } } - // Configure use of segmentation for enhanced coding of static regions. + // Configure experimental use of segmentation for enhanced coding of + // static regions if indicated. // Only allowed for now in second pass of two pass (as requires lagged coding) - // and if the relevent speed feature flag is set. + // and if the relevant speed feature flag is set. if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) { configure_static_seg_features(cpi); } @@ -2744,45 +2468,64 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_clear_system_state(); // Set an active best quality and if necessary active worst quality - Q = cpi->active_worst_quality; + q = cpi->active_worst_quality; if (cm->frame_type == KEY_FRAME) { - int high = 2000; - int low = 400; - - if (cpi->kf_boost > high) - cpi->active_best_quality = kf_low_motion_minq[Q]; - else if (cpi->kf_boost < low) - cpi->active_best_quality = kf_high_motion_minq[Q]; - else { - int gap = high - low; - int offset = high - cpi->kf_boost; - int qdiff = kf_high_motion_minq[Q] - kf_low_motion_minq[Q]; - int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; - - cpi->active_best_quality = kf_low_motion_minq[Q] + adjustment; - } - - // Make an adjustment based on the %s static - // The main impact of this is at lower Q to prevent overly large key - // frames unless a lot of the image is static. - if (cpi->kf_zeromotion_pct < 64) - cpi->active_best_quality += 4 - (cpi->kf_zeromotion_pct >> 4); - +#if !CONFIG_MULTIPLE_ARF // Special case for key frames forced because we have reached // the maximum key frame interval. Here force the Q to a range // based on the ambient Q to reduce the risk of popping if (cpi->this_key_frame_forced) { int delta_qindex; int qindex = cpi->last_boosted_qindex; + double last_boosted_q = vp9_convert_qindex_to_q(qindex); + + delta_qindex = compute_qdelta(cpi, last_boosted_q, + (last_boosted_q * 0.75)); + + cpi->active_best_quality = MAX(qindex + delta_qindex, cpi->best_quality); + } else { + int high = 5000; + int low = 400; + double q_adj_factor = 1.0; + double q_val; + + // Baseline value derived from cpi->active_worst_quality and kf boost + if (cpi->kf_boost > high) { + cpi->active_best_quality = kf_low_motion_minq[q]; + } else if (cpi->kf_boost < low) { + cpi->active_best_quality = kf_high_motion_minq[q]; + } else { + const int gap = high - low; + const int offset = high - cpi->kf_boost; + const int qdiff = kf_high_motion_minq[q] - kf_low_motion_minq[q]; + const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; + + cpi->active_best_quality = kf_low_motion_minq[q] + adjustment; + } - delta_qindex = compute_qdelta(cpi, qindex, - (qindex * 0.75)); - cpi->active_best_quality = qindex + delta_qindex; - if (cpi->active_best_quality < cpi->best_quality) - cpi->active_best_quality = cpi->best_quality; + // Allow somewhat lower kf minq with small image formats. + if ((cm->width * cm->height) <= (352 * 288)) { + q_adj_factor -= 0.25; + } + + // Make a further adjustment based on the kf zero motion measure. + q_adj_factor += 0.05 - (0.001 * (double)cpi->kf_zeromotion_pct); + + // Convert the adjustment factor to a qindex delta on active_best_quality. + q_val = vp9_convert_qindex_to_q(cpi->active_best_quality); + cpi->active_best_quality += + compute_qdelta(cpi, q_val, (q_val * q_adj_factor)); } +#else + double current_q; + + // Force the KF quantizer to be 30% of the active_worst_quality. + current_q = vp9_convert_qindex_to_q(cpi->active_worst_quality); + cpi->active_best_quality = cpi->active_worst_quality + + compute_qdelta(cpi, current_q, current_q * 0.3); +#endif } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) { int high = 2000; int low = 400; @@ -2790,47 +2533,45 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Use the lower of cpi->active_worst_quality and recent // average Q as basis for GF/ARF Q limit unless last frame was // a key frame. - if ((cpi->frames_since_key > 1) && - (cpi->avg_frame_qindex < cpi->active_worst_quality)) { - Q = cpi->avg_frame_qindex; + if (cpi->frames_since_key > 1 && + cpi->avg_frame_qindex < cpi->active_worst_quality) { + q = cpi->avg_frame_qindex; } // For constrained quality dont allow Q less than the cq level - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < cpi->cq_target_quality)) { - Q = cpi->cq_target_quality; + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && + q < cpi->cq_target_quality) { + q = cpi->cq_target_quality; } - if (cpi->gfu_boost > high) - cpi->active_best_quality = gf_low_motion_minq[Q]; - else if (cpi->gfu_boost < low) - cpi->active_best_quality = gf_high_motion_minq[Q]; - else { - int gap = high - low; - int offset = high - cpi->gfu_boost; - int qdiff = gf_high_motion_minq[Q] - gf_low_motion_minq[Q]; - int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; - - cpi->active_best_quality = gf_low_motion_minq[Q] + adjustment; + if (cpi->gfu_boost > high) { + cpi->active_best_quality = gf_low_motion_minq[q]; + } else if (cpi->gfu_boost < low) { + cpi->active_best_quality = gf_high_motion_minq[q]; + } else { + const int gap = high - low; + const int offset = high - cpi->gfu_boost; + const int qdiff = gf_high_motion_minq[q] - gf_low_motion_minq[q]; + const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; + + cpi->active_best_quality = gf_low_motion_minq[q] + adjustment; } // Constrained quality use slightly lower active best. - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - cpi->active_best_quality = - cpi->active_best_quality * 15 / 16; - } + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + cpi->active_best_quality = cpi->active_best_quality * 15 / 16; } else { #ifdef ONE_SHOT_Q_ESTIMATE #ifdef STRICT_ONE_SHOT_Q - cpi->active_best_quality = Q; + cpi->active_best_quality = q; #else - cpi->active_best_quality = inter_minq[Q]; + cpi->active_best_quality = inter_minq[q]; #endif #else - cpi->active_best_quality = inter_minq[Q]; + cpi->active_best_quality = inter_minq[q]; #endif - // For the constant/constrained quality mode we dont want + // For the constant/constrained quality mode we don't want // q to fall below the cq level. if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (cpi->active_best_quality < cpi->cq_target_quality)) { @@ -2859,22 +2600,45 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Special case code to try and match quality with forced key frames if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { - Q = cpi->last_boosted_qindex; + q = cpi->last_boosted_qindex; } else { // Determine initial Q to try - Q = vp9_regulate_q(cpi, cpi->this_frame_target); + q = vp9_regulate_q(cpi, cpi->this_frame_target); } vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - // Limit Q range for the adaptive loop. - bottom_index = cpi->active_best_quality; - top_index = cpi->active_worst_quality; - q_low = cpi->active_best_quality; - q_high = cpi->active_worst_quality; +#if CONFIG_MULTIPLE_ARF + // Force the quantizer determined by the coding order pattern. + if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME)) { + double new_q; + double current_q = vp9_convert_qindex_to_q(cpi->active_worst_quality); + int level = cpi->this_frame_weight; + assert(level >= 0); + // Set quantizer steps at 10% increments. + new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); + q = cpi->active_worst_quality + compute_qdelta(cpi, current_q, new_q); + + bottom_index = q; + top_index = q; + q_low = q; + q_high = q; + + printf("frame:%d q:%d\n", cm->current_video_frame, q); + } else { +#endif + // Limit Q range for the adaptive loop. + bottom_index = cpi->active_best_quality; + top_index = cpi->active_worst_quality; + q_low = cpi->active_best_quality; + q_high = cpi->active_worst_quality; +#if CONFIG_MULTIPLE_ARF + } +#endif loop_count = 0; + vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes)); if (cm->frame_type != KEY_FRAME) { /* TODO: Decide this more intelligently */ @@ -2885,16 +2649,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; } /* TODO: Decide this more intelligently */ - xd->allow_high_precision_mv = (Q < HIGH_PRECISION_MV_QTHRESH); + xd->allow_high_precision_mv = q < HIGH_PRECISION_MV_QTHRESH; set_mvcost(&cpi->mb); } -#if CONFIG_COMP_INTERINTRA_PRED - if (cm->current_video_frame == 0) { - cm->use_interintra = 1; - } -#endif - #if CONFIG_POSTPROC if (cpi->oxcf.noise_sensitivity > 0) { @@ -2919,7 +2677,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, break; } - vp9_denoise(cpi->Source, cpi->Source, l, 1, 0); + vp9_denoise(cpi->Source, cpi->Source, l); } #endif @@ -2942,82 +2700,35 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, do { vp9_clear_system_state(); // __asm emms; - vp9_set_quantizer(cpi, Q); + vp9_set_quantizer(cpi, q); if (loop_count == 0) { - // setup skip prob for costing in mode/mv decision - if (cpi->common.mb_no_coeff_skip) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; k++) - cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[Q][k]; - - if (cm->frame_type != KEY_FRAME) { - if (cpi->refresh_alt_ref_frame) { - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[2][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k]; - } - } else if (cpi->refresh_golden_frame) { - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[1][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k]; - } - } else { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[0][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[0][k]; - } - } - - // as this is for cost estimate, let's make sure it does not - // get extreme either way - { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - if (cm->mbskip_pred_probs[k] < 5) - cm->mbskip_pred_probs[k] = 5; - - if (cm->mbskip_pred_probs[k] > 250) - cm->mbskip_pred_probs[k] = 250; - - if (cpi->is_src_frame_alt_ref) - cm->mbskip_pred_probs[k] = 1; - } - } - } - } - // Set up entropy depending on frame type. if (cm->frame_type == KEY_FRAME) { /* Choose which entropy context to use. When using a forward reference - * frame, it immediately follows the keyframe, and thus benefits from - * using the same entropy context established by the keyframe. Otherwise, - * use the default context 0. - */ + * frame, it immediately follows the keyframe, and thus benefits from + * using the same entropy context established by the keyframe. + * Otherwise, use the default context 0. + */ cm->frame_context_idx = cpi->oxcf.play_alternate; vp9_setup_key_frame(cpi); } else { - /* Choose which entropy context to use. Currently there are only two - * contexts used, one for normal frames and one for alt ref frames. - */ + /* Choose which entropy context to use. Currently there are only two + * contexts used, one for normal frames and one for alt ref frames. + */ cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; vp9_setup_inter_frame(cpi); } } // transform / motion compensation build reconstruction frame -#if CONFIG_MODELCOEFPROB && ADJUST_KF_COEF_PROBS - if (cm->frame_type == KEY_FRAME) - vp9_adjust_default_coef_probs(cm); -#endif vp9_encode_frame(cpi); // Update the skip mb flag probabilities based on the distribution // seen in the last encoder iteration. - update_base_skip_probs(cpi); + // update_base_skip_probs(cpi); vp9_clear_system_state(); // __asm emms; @@ -3032,138 +2743,133 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; - active_worst_qchanged = FALSE; + active_worst_qchanged = 0; // Special case handling for forced key frames if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { - int last_q = Q; + int last_q = q; int kf_err = vp9_calc_ss_err(cpi->Source, &cm->yv12_fb[cm->new_fb_idx]); int high_err_target = cpi->ambient_err; - int low_err_target = (cpi->ambient_err >> 1); + int low_err_target = cpi->ambient_err >> 1; // Prevent possible divide by zero error below for perfect KF - kf_err += (!kf_err); + kf_err += !kf_err; // The key frame is not good enough or we can afford // to make it better without undue risk of popping. - if (((kf_err > high_err_target) && - (cpi->projected_frame_size <= frame_over_shoot_limit)) || - ((kf_err > low_err_target) && - (cpi->projected_frame_size <= frame_under_shoot_limit))) { + if ((kf_err > high_err_target && + cpi->projected_frame_size <= frame_over_shoot_limit) || + (kf_err > low_err_target && + cpi->projected_frame_size <= frame_under_shoot_limit)) { // Lower q_high - q_high = (Q > q_low) ? (Q - 1) : q_low; + q_high = q > q_low ? q - 1 : q_low; // Adjust Q - Q = (Q * high_err_target) / kf_err; - if (Q < ((q_high + q_low) >> 1)) - Q = (q_high + q_low) >> 1; - } - // The key frame is much better than the previous frame - else if ((kf_err < low_err_target) && - (cpi->projected_frame_size >= frame_under_shoot_limit)) { + q = (q * high_err_target) / kf_err; + q = MIN(q, (q_high + q_low) >> 1); + } else if (kf_err < low_err_target && + cpi->projected_frame_size >= frame_under_shoot_limit) { + // The key frame is much better than the previous frame // Raise q_low - q_low = (Q < q_high) ? (Q + 1) : q_high; + q_low = q < q_high ? q + 1 : q_high; // Adjust Q - Q = (Q * low_err_target) / kf_err; - if (Q > ((q_high + q_low + 1) >> 1)) - Q = (q_high + q_low + 1) >> 1; + q = (q * low_err_target) / kf_err; + q = MIN(q, (q_high + q_low + 1) >> 1); } // Clamp Q to upper and lower limits: - if (Q > q_high) - Q = q_high; - else if (Q < q_low) - Q = q_low; + q = clamp(q, q_low, q_high); - Loop = ((Q != last_q)) ? TRUE : FALSE; + loop = q != last_q; } // Is the projected frame size out of range and are we allowed to attempt to recode. else if (recode_loop_test(cpi, frame_over_shoot_limit, frame_under_shoot_limit, - Q, top_index, bottom_index)) { - int last_q = Q; - int Retries = 0; + q, top_index, bottom_index)) { + int last_q = q; + int retries = 0; // Frame size out of permitted range: // Update correction factor & compute new Q to try... // Frame is too large if (cpi->projected_frame_size > cpi->this_frame_target) { - q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value + // Raise Qlow as to at least the current value + q_low = q < q_high ? q + 1 : q_high; - if (undershoot_seen || (loop_count > 1)) { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. + if (undershoot_seen || loop_count > 1) { + // Update rate_correction_factor unless cpi->active_worst_quality + // has changed. if (!active_worst_qchanged) vp9_update_rate_correction_factors(cpi, 1); - Q = (q_high + q_low + 1) / 2; + q = (q_high + q_low + 1) / 2; } else { // Update rate_correction_factor unless cpi->active_worst_quality has changed. if (!active_worst_qchanged) vp9_update_rate_correction_factors(cpi, 0); - Q = vp9_regulate_q(cpi, cpi->this_frame_target); + q = vp9_regulate_q(cpi, cpi->this_frame_target); - while ((Q < q_low) && (Retries < 10)) { + while (q < q_low && retries < 10) { vp9_update_rate_correction_factors(cpi, 0); - Q = vp9_regulate_q(cpi, cpi->this_frame_target); - Retries++; + q = vp9_regulate_q(cpi, cpi->this_frame_target); + retries++; } } - overshoot_seen = TRUE; - } - // Frame is too small - else { - q_high = (Q > q_low) ? (Q - 1) : q_low; + overshoot_seen = 1; + } else { + // Frame is too small + q_high = q > q_low ? q - 1 : q_low; - if (overshoot_seen || (loop_count > 1)) { + if (overshoot_seen || loop_count > 1) { // Update rate_correction_factor unless cpi->active_worst_quality has changed. if (!active_worst_qchanged) vp9_update_rate_correction_factors(cpi, 1); - Q = (q_high + q_low) / 2; + q = (q_high + q_low) / 2; } else { // Update rate_correction_factor unless cpi->active_worst_quality has changed. if (!active_worst_qchanged) vp9_update_rate_correction_factors(cpi, 0); - Q = vp9_regulate_q(cpi, cpi->this_frame_target); + q = vp9_regulate_q(cpi, cpi->this_frame_target); // Special case reset for qlow for constrained quality. // This should only trigger where there is very substantial // undershoot on a frame and the auto cq level is above // the user passsed in value. - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < q_low)) { - q_low = Q; + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && q < q_low) { + q_low = q; } - while ((Q > q_high) && (Retries < 10)) { + while (q > q_high && retries < 10) { vp9_update_rate_correction_factors(cpi, 0); - Q = vp9_regulate_q(cpi, cpi->this_frame_target); - Retries++; + q = vp9_regulate_q(cpi, cpi->this_frame_target); + retries++; } } - undershoot_seen = TRUE; + undershoot_seen = 1; } // Clamp Q to upper and lower limits: - Q = clamp(Q, q_low, q_high); + q = clamp(q, q_low, q_high); - Loop = Q != last_q; - } else - Loop = FALSE; + loop = q != last_q; + } else { + loop = 0; + } if (cpi->is_src_frame_alt_ref) - Loop = FALSE; + loop = 0; - if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) { + if (!loop && cm->frame_type != KEY_FRAME && sf->search_best_filter) { if (mcomp_filter_index < mcomp_filters) { int64_t err = vp9_calc_ss_err(cpi->Source, &cm->yv12_fb[cm->new_fb_idx]); @@ -3174,7 +2880,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (mcomp_filter_index < mcomp_filters) { cm->mcomp_filter_type = mcomp_filters_to_search[mcomp_filter_index]; loop_count = -1; - Loop = TRUE; + loop = 1; } else { int f; int64_t best_cost = mcomp_filter_cost[0]; @@ -3187,7 +2893,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } if (mcomp_best_filter != mcomp_filters_to_search[mcomp_filters - 1]) { loop_count = -1; - Loop = TRUE; + loop = 1; cm->mcomp_filter_type = mcomp_best_filter; } /* @@ -3197,12 +2903,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, */ } #if RESET_FOREACH_FILTER - if (Loop == TRUE) { - overshoot_seen = FALSE; - undershoot_seen = FALSE; + if (loop) { + overshoot_seen = 0; + undershoot_seen = 0; q_low = q_low0; q_high = q_high0; - Q = Q0; + q = Q0; cpi->rate_correction_factor = rate_correction_factor0; cpi->gf_rate_correction_factor = gf_rate_correction_factor0; cpi->active_best_quality = active_best_quality0; @@ -3212,14 +2918,14 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } - if (Loop == TRUE) { + if (loop) { loop_count++; #if CONFIG_INTERNAL_STATS cpi->tot_recode_hits++; #endif } - } while (Loop == TRUE); + } while (loop); // Special case code to reduce pulsing when key frames are forced at a // fixed interval. Note the reconstruction error if it is the frame before @@ -3229,51 +2935,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, &cm->yv12_fb[cm->new_fb_idx]); } - // This frame's MVs are saved and will be used in next frame's MV - // prediction. Last frame has one more line(add to bottom) and one - // more column(add to right) than cm->mip. The edge elements are - // initialized to 0. - if (cm->show_frame) { // do not save for altref frame - int mb_row; - int mb_col; - MODE_INFO *tmp = cm->mip; - - if (cm->frame_type != KEY_FRAME) { - for (mb_row = 0; mb_row < cm->mb_rows + 1; mb_row ++) { - for (mb_col = 0; mb_col < cm->mb_cols + 1; mb_col ++) { - if (tmp->mbmi.ref_frame != INTRA_FRAME) - cpi->lfmv[mb_col + mb_row * (cm->mode_info_stride + 1)].as_int = tmp->mbmi.mv[0].as_int; - - cpi->lf_ref_frame_sign_bias[mb_col + mb_row * (cm->mode_info_stride + 1)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame]; - cpi->lf_ref_frame[mb_col + mb_row * (cm->mode_info_stride + 1)] = tmp->mbmi.ref_frame; - tmp++; - } - } - } - } - - // Update the GF useage maps. - // This is done after completing the compression of a frame when all modes - // etc. are finalized but before loop filter - vp9_update_gf_useage_maps(cpi, cm, &cpi->mb); - if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1; -#if 0 - { - FILE *f = fopen("gfactive.stt", "a"); - fprintf(f, "%8d %8d %8d %8d %8d\n", - cm->current_video_frame, - (100 * cpi->gf_active_count) - / (cpi->common.mb_rows * cpi->common.mb_cols), - cpi->this_iiratio, - cpi->next_iiratio, - cpi->refresh_golden_frame); - fclose(f); - } -#endif - cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; #if WRITE_RECON_BUFFER @@ -3288,38 +2952,42 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); +#if WRITE_RECON_BUFFER + if (cm->show_frame) + write_cx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 2000); + else + write_cx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 3000); +#endif + // build the bitstream cpi->dummy_packing = 0; vp9_pack_bitstream(cpi, dest, size); - if (cpi->mb.e_mbd.update_mb_segmentation_map) { + if (xd->update_mb_segmentation_map) { update_reference_segmentation_map(cpi); } release_scaled_references(cpi); update_reference_frames(cpi); - vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4); - vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8); - vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); - vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32); + + for (t = TX_4X4; t <= TX_32X32; t++) + vp9_full_to_model_counts(cpi->common.fc.coef_counts[t], + cpi->coef_counts[t]); if (!cpi->common.error_resilient_mode && !cpi->common.frame_parallel_decoding_mode) { vp9_adapt_coef_probs(&cpi->common); -#if CONFIG_CODE_NONZEROCOUNT - vp9_adapt_nzc_probs(&cpi->common); -#endif } + if (cpi->common.frame_type != KEY_FRAME) { - vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count); - vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); + vp9_copy(cpi->common.fc.y_mode_counts, cpi->y_mode_count); vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count); - vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count); - vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count); - vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count); - vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count); -#if CONFIG_COMP_INTERINTRA_PRED - vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count); -#endif + vp9_copy(cpi->common.fc.partition_counts, cpi->partition_count); + vp9_copy(cm->fc.intra_inter_count, cpi->intra_inter_count); + vp9_copy(cm->fc.comp_inter_count, cpi->comp_inter_count); + vp9_copy(cm->fc.single_ref_count, cpi->single_ref_count); + vp9_copy(cm->fc.comp_ref_count, cpi->comp_ref_count); cpi->common.fc.NMVcount = cpi->NMVcount; if (!cpi->common.error_resilient_mode && !cpi->common.frame_parallel_decoding_mode) { @@ -3328,9 +2996,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv); } } -#if CONFIG_COMP_INTERINTRA_PRED - if (cm->frame_type != KEY_FRAME) - select_interintra_mode(cpi); + +#ifdef ENTROPY_STATS + vp9_update_mode_context_stats(cpi); #endif /* Move storing frame_type out of the above loop since it is also @@ -3368,16 +3036,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; // Keep a record from which we can calculate the average Q excluding GF updates and key frames - if ((cm->frame_type != KEY_FRAME) - && !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { + if (cm->frame_type != KEY_FRAME && + !cpi->refresh_golden_frame && + !cpi->refresh_alt_ref_frame) { cpi->ni_frames++; - cpi->tot_q += vp9_convert_qindex_to_q(Q); + cpi->tot_q += vp9_convert_qindex_to_q(q); cpi->avg_q = cpi->tot_q / (double)cpi->ni_frames; - // Calculate the average Q for normal inter frames (not key or GFU - // frames). - cpi->ni_tot_qi += Q; - cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); + // Calculate the average Q for normal inter frames (not key or GFU frames). + cpi->ni_tot_qi += q; + cpi->ni_av_qi = cpi->ni_tot_qi / cpi->ni_frames; } // Update the buffer level variable. @@ -3406,7 +3074,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } // Actual bits spent - cpi->total_actual_bits += cpi->projected_frame_size; + cpi->total_actual_bits += cpi->projected_frame_size; // Debug stats cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); @@ -3417,20 +3085,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; - if (cpi->twopass.kf_group_bits < 0) - cpi->twopass.kf_group_bits = 0; + cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) { cpi->twopass.gf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; + cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); } // Update the skip mb flag probabilities based on the distribution seen // in this frame. - update_base_skip_probs(cpi); + // update_base_skip_probs(cpi); -#if 0 // 1 && CONFIG_INTERNAL_STATS +#if 0 && CONFIG_INTERNAL_STATS { FILE *f = fopen("tmp.stt", "a"); int recon_err; @@ -3440,7 +3106,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, recon_err = vp9_calc_ss_err(cpi->Source, &cm->yv12_fb[cm->new_fb_idx]); - if (cpi->twopass.total_left_stats->coded_error != 0.0) + if (cpi->twopass.total_left_stats.coded_error != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d" "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f" "%6d %6d %5d %5d %5d %8.2f %10d %10.3f" @@ -3463,9 +3129,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, (double)cpi->twopass.bits_left / - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits, recon_err, cpi->kf_boost, cpi->kf_zeromotion_pct); else @@ -3492,7 +3158,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits, recon_err, cpi->kf_boost, cpi->kf_zeromotion_pct); @@ -3577,10 +3243,33 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Tell the caller that the frame was coded as a key frame *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY; - // As this frame is a key frame the next defaults to an inter frame. +#if CONFIG_MULTIPLE_ARF + // Reset the sequence number. + if (cpi->multi_arf_enabled) { + cpi->sequence_number = 0; + cpi->frame_coding_order_period = cpi->new_frame_coding_order_period; + cpi->new_frame_coding_order_period = -1; + } +#endif + + // As this frame is a key frame the next defaults to an inter frame. cm->frame_type = INTER_FRAME; } else { *frame_flags = cm->frame_flags&~FRAMEFLAGS_KEY; + +#if CONFIG_MULTIPLE_ARF + /* Increment position in the coded frame sequence. */ + if (cpi->multi_arf_enabled) { + ++cpi->sequence_number; + if (cpi->sequence_number >= cpi->frame_coding_order_period) { + cpi->sequence_number = 0; + cpi->frame_coding_order_period = cpi->new_frame_coding_order_period; + cpi->new_frame_coding_order_period = -1; + } + cpi->this_frame_weight = cpi->arf_weight[cpi->sequence_number]; + assert(cpi->this_frame_weight >= 0); + } +#endif } // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas. @@ -3592,16 +3281,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->last_width = cm->width; cm->last_height = cm->height; - // Dont increment frame counters if this was an altref buffer update not a real frame + // Don't increment frame counters if this was an altref buffer + // update not a real frame + cm->last_show_frame = cm->show_frame; if (cm->show_frame) { - cm->current_video_frame++; - cpi->frames_since_key++; + ++cm->current_video_frame; + ++cpi->frames_since_key; } // reset to normal state now that we are done. - - #if 0 { char filename[512]; @@ -3620,11 +3309,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cm->show_frame) { vpx_memcpy(cm->prev_mip, cm->mip, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); + cm->mode_info_stride * (cm->mi_rows + 64 / MI_SIZE) * + sizeof(MODE_INFO)); } else { vpx_memset(cm->prev_mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); + cm->mode_info_stride * (cm->mi_rows + 64 / MI_SIZE) * + sizeof(MODE_INFO)); } + // restore prev_mi + cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; } static void Pass2Encode(VP9_COMP *cpi, unsigned long *size, @@ -3662,6 +3355,15 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, struct vpx_usec_timer timer; int res = 0; + if (!cpi->initial_width) { + // TODO(jkoleszar): Support 1/4 subsampling? + cm->subsampling_x = sd->uv_width < sd->y_width; + cm->subsampling_y = sd->uv_height < sd->y_height; + alloc_raw_frame_buffers(cpi); + + cpi->initial_width = cm->width; + cpi->initial_height = cm->height; + } vpx_usec_timer_start(&timer); if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL)) @@ -3676,15 +3378,24 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, static int frame_is_reference(const VP9_COMP *cpi) { const VP9_COMMON *cm = &cpi->common; - const MACROBLOCKD *xd = &cpi->mb.e_mbd; + const MACROBLOCKD *mb = &cpi->mb.e_mbd; - return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame - || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame - || cm->refresh_entropy_probs - || xd->mode_ref_lf_delta_update - || xd->update_mb_segmentation_map || xd->update_mb_segmentation_data; + return cm->frame_type == KEY_FRAME || + cpi->refresh_last_frame || + cpi->refresh_golden_frame || + cpi->refresh_alt_ref_frame || + cm->refresh_frame_context || + mb->mode_ref_lf_delta_update || + mb->update_mb_segmentation_map || + mb->update_mb_segmentation_data; } +#if CONFIG_MULTIPLE_ARF +int is_next_frame_arf(VP9_COMP *cpi) { + // Negative entry in frame_coding_order indicates an ARF at this position. + return cpi->frame_coding_order[cpi->sequence_number + 1] < 0 ? 1 : 0; +} +#endif int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, @@ -3693,6 +3404,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, VP9_COMMON *cm = &cpi->common; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; + int i; + // FILE *fp_out = fopen("enc_frame_type.txt", "a"); if (!cpi) return -1; @@ -3704,46 +3417,117 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->mb.e_mbd.allow_high_precision_mv = ALTREF_HIGH_PRECISION_MV; set_mvcost(&cpi->mb); - // Should we code an alternate reference frame - if (cpi->oxcf.play_alternate && - cpi->source_alt_ref_pending) { - if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, - cpi->frames_till_gf_update_due))) { + // Should we code an alternate reference frame. + if (cpi->oxcf.play_alternate && cpi->source_alt_ref_pending) { + int frames_to_arf; + +#if CONFIG_MULTIPLE_ARF + assert(!cpi->multi_arf_enabled || + cpi->frame_coding_order[cpi->sequence_number] < 0); + + if (cpi->multi_arf_enabled && (cpi->pass == 2)) + frames_to_arf = (-cpi->frame_coding_order[cpi->sequence_number]) + - cpi->next_frame_in_order; + else +#endif + frames_to_arf = cpi->frames_till_gf_update_due; + + assert(frames_to_arf < cpi->twopass.frames_to_key); + + if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) { +#if CONFIG_MULTIPLE_ARF + cpi->alt_ref_source[cpi->arf_buffered] = cpi->source; +#else cpi->alt_ref_source = cpi->source; +#endif + if (cpi->oxcf.arnr_max_frames > 0) { - vp9_temporal_filter_prepare(cpi, cpi->frames_till_gf_update_due); + // Produce the filtered ARF frame. + // TODO(agrange) merge these two functions. + configure_arnr_filter(cpi, cm->current_video_frame + frames_to_arf, + cpi->gfu_boost); + vp9_temporal_filter_prepare(cpi, frames_to_arf); force_src_buffer = &cpi->alt_ref_buffer; } - cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due; + + cm->show_frame = 0; + cm->intra_only = 0; cpi->refresh_alt_ref_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 0; - cm->show_frame = 0; - cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag. cpi->is_src_frame_alt_ref = 0; + + // TODO(agrange) This needs to vary depending on where the next ARF is. + cm->frames_till_alt_ref_frame = frames_to_arf; + +#if CONFIG_MULTIPLE_ARF + if (!cpi->multi_arf_enabled) +#endif + cpi->source_alt_ref_pending = 0; // Clear Pending altf Ref flag. } } if (!cpi->source) { +#if CONFIG_MULTIPLE_ARF + int i; +#endif if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) { cm->show_frame = 1; +#if CONFIG_MULTIPLE_ARF + // Is this frame the ARF overlay. + cpi->is_src_frame_alt_ref = 0; + for (i = 0; i < cpi->arf_buffered; ++i) { + if (cpi->source == cpi->alt_ref_source[i]) { + cpi->is_src_frame_alt_ref = 1; + cpi->refresh_golden_frame = 1; + break; + } + } +#else cpi->is_src_frame_alt_ref = cpi->alt_ref_source && (cpi->source == cpi->alt_ref_source); - +#endif if (cpi->is_src_frame_alt_ref) { - cpi->refresh_last_frame = 0; + // Current frame is an ARF overlay frame. +#if CONFIG_MULTIPLE_ARF + cpi->alt_ref_source[i] = NULL; +#else cpi->alt_ref_source = NULL; +#endif + // Don't refresh the last buffer for an ARF overlay frame. It will + // become the GF so preserve last as an alternative prediction option. + cpi->refresh_last_frame = 0; } +#if CONFIG_MULTIPLE_ARF + ++cpi->next_frame_in_order; +#endif } } if (cpi->source) { - cpi->un_scaled_source = - cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img; + cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer + : &cpi->source->img; *time_stamp = cpi->source->ts_start; *time_end = cpi->source->ts_end; *frame_flags = cpi->source->flags; + + // fprintf(fp_out, " Frame:%d", cm->current_video_frame); +#if CONFIG_MULTIPLE_ARF + if (cpi->multi_arf_enabled) { + // fprintf(fp_out, " seq_no:%d this_frame_weight:%d", + // cpi->sequence_number, cpi->this_frame_weight); + } else { + // fprintf(fp_out, "\n"); + } +#else + // fprintf(fp_out, "\n"); +#endif + +#if CONFIG_MULTIPLE_ARF + if ((cm->frame_type != KEY_FRAME) && (cpi->pass == 2)) + cpi->source_alt_ref_pending = is_next_frame_arf(cpi); +#endif } else { *size = 0; if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) { @@ -3751,6 +3535,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->twopass.first_pass_done = 1; } + // fclose(fp_out); return -1; } @@ -3768,32 +3553,26 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, this_duration = cpi->source->ts_end - cpi->source->ts_start; step = 1; } else { - int64_t last_duration; + int64_t last_duration = cpi->last_end_time_stamp_seen + - cpi->last_time_stamp_seen; this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; - last_duration = cpi->last_end_time_stamp_seen - - cpi->last_time_stamp_seen; + // do a step update if the duration changes by 10% if (last_duration) step = (int)((this_duration - last_duration) * 10 / last_duration); } if (this_duration) { - if (step) + if (step) { vp9_new_frame_rate(cpi, 10000000.0 / this_duration); - else { - double avg_duration, interval; - - /* Average this frame's rate into the last second's average - * frame rate. If we haven't seen 1 second yet, then average - * over the whole interval seen. - */ - interval = (double)(cpi->source->ts_end - - cpi->first_time_stamp_ever); - if (interval > 10000000.0) - interval = 10000000; - - avg_duration = 10000000.0 / cpi->oxcf.frame_rate; + } else { + // Average this frame's rate into the last second's average + // frame rate. If we haven't seen 1 second yet, then average + // over the whole interval seen. + const double interval = MIN((double)(cpi->source->ts_end + - cpi->first_time_stamp_ever), 10000000.0); + double avg_duration = 10000000.0 / cpi->oxcf.frame_rate; avg_duration *= (interval - avg_duration + this_duration); avg_duration /= interval; @@ -3811,39 +3590,56 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, // Clear down mmx registers vp9_clear_system_state(); // __asm emms; - cm->frame_type = INTER_FRAME; - cm->frame_flags = *frame_flags; - -#if 0 - - if (cpi->refresh_alt_ref_frame) { - // cpi->refresh_golden_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_last_frame = 0; - } else { - cpi->refresh_golden_frame = 0; - cpi->refresh_last_frame = 1; - } - -#endif - /* find a free buffer for the new frame, releasing the reference previously * held. */ cm->fb_idx_ref_cnt[cm->new_fb_idx]--; cm->new_fb_idx = get_free_fb(cm); +#if CONFIG_MULTIPLE_ARF + /* Set up the correct ARF frame. */ + if (cpi->refresh_alt_ref_frame) { + ++cpi->arf_buffered; + } + if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) && + (cpi->pass == 2)) { + cpi->alt_fb_idx = cpi->arf_buffer_idx[cpi->sequence_number]; + } +#endif + /* Get the mapping of L/G/A to the reference buffer pool */ cm->active_ref_idx[0] = cm->ref_frame_map[cpi->lst_fb_idx]; cm->active_ref_idx[1] = cm->ref_frame_map[cpi->gld_fb_idx]; cm->active_ref_idx[2] = cm->ref_frame_map[cpi->alt_fb_idx]; - /* Reset the frame pointers to the current frame size */ - vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], - cm->width, cm->height, - VP9BORDERINPIXELS); +#if 0 // CONFIG_MULTIPLE_ARF + if (cpi->multi_arf_enabled) { + fprintf(fp_out, " idx(%d, %d, %d, %d) active(%d, %d, %d)", + cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx, cm->new_fb_idx, + cm->active_ref_idx[0], cm->active_ref_idx[1], cm->active_ref_idx[2]); + if (cpi->refresh_alt_ref_frame) + fprintf(fp_out, " type:ARF"); + if (cpi->is_src_frame_alt_ref) + fprintf(fp_out, " type:OVERLAY[%d]", cpi->alt_fb_idx); + fprintf(fp_out, "\n"); + } +#endif + + cm->frame_type = INTER_FRAME; + cm->frame_flags = *frame_flags; + + // Reset the frame pointers to the current frame size + vp9_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9BORDERINPIXELS); + + // Calculate scaling factors for each of the 3 available references + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) + vp9_setup_scale_factors(cm, i); vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm); + if (cpi->pass == 1) { Pass1Encode(cpi, size, dest, frame_flags); } else if (cpi->pass == 2) { @@ -3852,30 +3648,27 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, encode_frame_to_data_rate(cpi, size, dest, frame_flags); } - if (cm->refresh_entropy_probs) { - vpx_memcpy(&cm->frame_contexts[cm->frame_context_idx], &cm->fc, - sizeof(cm->fc)); - } + if (cm->refresh_frame_context) + cm->frame_contexts[cm->frame_context_idx] = cm->fc; if (*size > 0) { // if its a dropped frame honor the requests on subsequent frames cpi->droppable = !frame_is_reference(cpi); // return to normal state - cm->refresh_entropy_probs = 1; + cm->reset_frame_context = 0; + cm->refresh_frame_context = 1; cpi->refresh_alt_ref_frame = 0; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 1; cm->frame_type = INTER_FRAME; - } vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); - if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame) { + if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame) generate_psnr_packet(cpi); - } #if CONFIG_INTERNAL_STATS @@ -3923,7 +3716,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, double weight = 0; #if CONFIG_POSTPROC vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer, - cm->filter_level * 10 / 6, 1, 0); + cm->filter_level * 10 / 6); #endif vp9_clear_system_state(); @@ -3950,10 +3743,16 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->totalp += frame_psnr2; frame_ssim2 = vp9_calc_ssim(cpi->Source, - &cm->post_proc_buffer, 1, &weight); + recon, 1, &weight); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; + + frame_ssim2 = vp9_calc_ssim(cpi->Source, + &cm->post_proc_buffer, 1, &weight); + + cpi->summedp_quality += frame_ssim2 * weight; + cpi->summedp_weights += weight; #if 0 { FILE *f = fopen("q_used.stt", "a"); @@ -3975,12 +3774,11 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->total_ssimg_v += v; cpi->total_ssimg_all += frame_all; } - } } #endif - + // fclose(fp_out); return 0; } @@ -4013,8 +3811,9 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest, } int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, - unsigned int cols, int delta_q[4], int delta_lf[4], - unsigned int threshold[4]) { + unsigned int cols, int delta_q[MAX_MB_SEGMENTS], + int delta_lf[MAX_MB_SEGMENTS], + unsigned int threshold[MAX_MB_SEGMENTS]) { VP9_COMP *cpi = (VP9_COMP *) comp; signed char feature_data[SEG_LVL_MAX][MAX_MB_SEGMENTS]; MACROBLOCKD *xd = &cpi->mb.e_mbd; @@ -4034,25 +3833,15 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, // Activate segmentation. vp9_enable_segmentation((VP9_PTR)cpi); - // Set up the quant segment data - feature_data[SEG_LVL_ALT_Q][0] = delta_q[0]; - feature_data[SEG_LVL_ALT_Q][1] = delta_q[1]; - feature_data[SEG_LVL_ALT_Q][2] = delta_q[2]; - feature_data[SEG_LVL_ALT_Q][3] = delta_q[3]; - - // Set up the loop segment data s - feature_data[SEG_LVL_ALT_LF][0] = delta_lf[0]; - feature_data[SEG_LVL_ALT_LF][1] = delta_lf[1]; - feature_data[SEG_LVL_ALT_LF][2] = delta_lf[2]; - feature_data[SEG_LVL_ALT_LF][3] = delta_lf[3]; - - cpi->segment_encode_breakout[0] = threshold[0]; - cpi->segment_encode_breakout[1] = threshold[1]; - cpi->segment_encode_breakout[2] = threshold[2]; - cpi->segment_encode_breakout[3] = threshold[3]; + // Set up the quan, LF and breakout threshold segment data + for (i = 0; i < MAX_MB_SEGMENTS; i++) { + feature_data[SEG_LVL_ALT_Q][i] = delta_q[i]; + feature_data[SEG_LVL_ALT_LF][i] = delta_lf[i]; + cpi->segment_encode_breakout[i] = threshold[i]; + } // Enable the loop and quant changes in the feature mask - for (i = 0; i < 4; i++) { + for (i = 0; i < MAX_MB_SEGMENTS; i++) { if (delta_q[i]) vp9_enable_segfeature(xd, i, SEG_LVL_ALT_Q); else @@ -4079,8 +3868,9 @@ int vp9_set_active_map(VP9_PTR comp, unsigned char *map, if (map) { vpx_memcpy(cpi->active_map, map, rows * cols); cpi->active_map_enabled = 1; - } else + } else { cpi->active_map_enabled = 0; + } return 0; } else { @@ -4095,10 +3885,7 @@ int vp9_set_internal_size(VP9_PTR comp, VP9_COMMON *cm = &cpi->common; int hr = 0, hs = 0, vr = 0, vs = 0; - if (horiz_mode > ONETWO) - return -1; - - if (vert_mode > ONETWO) + if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1; Scale2Ratio(horiz_mode, &hr, &hs); @@ -4141,6 +3928,5 @@ int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { int vp9_get_quantizer(VP9_PTR c) { - VP9_COMP *cpi = (VP9_COMP *) c; - return cpi->common.base_qindex; + return ((VP9_COMP *)c)->common.base_qindex; } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 891cc3f5223fb748e340c7b91f0a7494c72618fd..2b20f009c008b1c385b24bddcbf6063905c961fe 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -30,23 +30,24 @@ #include "vp9/encoder/vp9_lookahead.h" // Experimental rate control switches -// #define ONE_SHOT_Q_ESTIMATE 1 -// #define STRICT_ONE_SHOT_Q 1 -// #define DISABLE_RC_LONG_TERM_MEM 1 +#if CONFIG_ONESHOTQ +#define ONE_SHOT_Q_ESTIMATE 0 +#define STRICT_ONE_SHOT_Q 0 +#define DISABLE_RC_LONG_TERM_MEM 0 +#endif // #define SPEEDSTATS 1 +#if CONFIG_MULTIPLE_ARF +// Set MIN_GF_INTERVAL to 1 for the full decomposition. +#define MIN_GF_INTERVAL 2 +#else #define MIN_GF_INTERVAL 4 +#endif #define DEFAULT_GF_INTERVAL 7 #define KEY_FRAME_CONTEXT 5 -#define MAX_LAG_BUFFERS 25 - -#if CONFIG_COMP_INTERINTRA_PRED -#define MAX_MODES 54 -#else -#define MAX_MODES 42 -#endif +#define MAX_MODES 36 #define MIN_THRESHMULT 32 #define MAX_THRESHMULT 512 @@ -63,63 +64,35 @@ typedef struct { int nmvcosts[2][MV_VALS]; int nmvcosts_hp[2][MV_VALS]; -#ifdef MODE_STATS - // Stats - int y_modes[VP9_YMODES]; - int uv_modes[VP9_UV_MODES]; - int i8x8_modes[VP9_I8X8_MODES]; - int b_modes[B_MODE_COUNT]; - int inter_y_modes[MB_MODE_COUNT]; - int inter_uv_modes[VP9_UV_MODES]; - int inter_b_modes[B_MODE_COUNT]; -#endif - vp9_prob segment_pred_probs[PREDICTION_PROBS]; - unsigned char ref_pred_probs_update[PREDICTION_PROBS]; - vp9_prob ref_pred_probs[PREDICTION_PROBS]; - vp9_prob prob_comppred[COMP_PRED_CONTEXTS]; + vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; + vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS]; + vp9_prob single_ref_prob[REF_CONTEXTS][2]; + vp9_prob comp_ref_prob[REF_CONTEXTS]; unsigned char *last_frame_seg_map_copy; // 0 = Intra, Last, GF, ARF signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; - // 0 = BPRED, ZERO_MV, MV, SPLIT + // 0 = ZERO_MV, MV signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; - vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; + vp9_coeff_probs_model coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES]; - vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; - vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ - vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; - vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1]; - vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; - vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; - vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; + vp9_prob y_mode_prob[4][VP9_INTRA_MODES - 1]; + vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; + vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; -#if CONFIG_COMP_INTERINTRA_PRED - vp9_prob interintra_prob; -#endif - int mv_ref_ct[INTER_MODE_CONTEXTS][4][2]; - int vp9_mode_contexts[INTER_MODE_CONTEXTS][4]; - -#if CONFIG_CODE_NONZEROCOUNT - vp9_prob nzc_probs_4x4 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES]; - vp9_prob nzc_probs_8x8 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES]; - vp9_prob nzc_probs_16x16 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES]; - vp9_prob nzc_probs_32x32 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES]; - vp9_prob nzc_pcat_probs[MAX_NZC_CONTEXTS] - [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA]; -#endif + int inter_mode_counts[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2]; + vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1]; + + vp9_prob tx_probs_8x8p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 3]; + vp9_prob tx_probs_16x16p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; + vp9_prob tx_probs_32x32p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; + vp9_prob mbskip_probs[MBSKIP_CONTEXTS]; } CODING_CONTEXT; typedef struct { @@ -142,8 +115,7 @@ typedef struct { double new_mv_count; double duration; double count; -} -FIRSTPASS_STATS; +} FIRSTPASS_STATS; typedef struct { int frames_so_far; @@ -155,7 +127,6 @@ typedef struct { double frame_mvr_abs; double frame_mvc; double frame_mvc_abs; - } ONEPASS_FRAMESTATS; typedef struct { @@ -207,11 +178,6 @@ typedef enum { THR_SPLITA, THR_B_PRED, - THR_I8X8_PRED, - - THR_COMP_ZEROLG, - THR_COMP_NEARESTLG, - THR_COMP_NEARLG, THR_COMP_ZEROLA, THR_COMP_NEARESTLA, @@ -221,31 +187,12 @@ typedef enum { THR_COMP_NEARESTGA, THR_COMP_NEARGA, - THR_COMP_NEWLG, THR_COMP_NEWLA, THR_COMP_NEWGA, - THR_COMP_SPLITLG, THR_COMP_SPLITLA, THR_COMP_SPLITGA, -#if CONFIG_COMP_INTERINTRA_PRED - THR_COMP_INTERINTRA_ZEROL, - THR_COMP_INTERINTRA_NEARESTL, - THR_COMP_INTERINTRA_NEARL, - THR_COMP_INTERINTRA_NEWL, - - THR_COMP_INTERINTRA_ZEROG, - THR_COMP_INTERINTRA_NEARESTG, - THR_COMP_INTERINTRA_NEARG, - THR_COMP_INTERINTRA_NEWG, - - THR_COMP_INTERINTRA_ZEROA, - THR_COMP_INTERINTRA_NEARESTA, - THR_COMP_INTERINTRA_NEARA, - THR_COMP_INTERINTRA_NEWA, -#endif -} -THR_MODES; +} THR_MODES; typedef enum { DIAMOND = 0, @@ -256,7 +203,6 @@ typedef enum { typedef struct { int RD; SEARCH_METHODS search_method; - int improved_dct; int auto_filter; int recode_loop; int iterative_sub_pixel; @@ -266,58 +212,50 @@ typedef struct { int max_step_search_steps; int first_step; int optimize_coefficients; - int no_skip_block4x4_search; int search_best_filter; - int splitmode_breakout; - int mb16_breakout; int static_segmentation; + int comp_inter_joint_search_thresh; + int adpative_rd_thresh; } SPEED_FEATURES; -typedef struct { - MACROBLOCK mb; - int totalrate; -} MB_ROW_COMP; - -typedef struct { - TOKENEXTRA *start; - TOKENEXTRA *stop; -} TOKENLIST; - -typedef struct { - int ithread; - void *ptr1; - void *ptr2; -} ENCODETHREAD_DATA; -typedef struct { - int ithread; - void *ptr1; -} LPFTHREAD_DATA; - enum BlockSize { - BLOCK_16X8 = PARTITIONING_16X8, - BLOCK_8X16 = PARTITIONING_8X16, - BLOCK_8X8 = PARTITIONING_8X8, - BLOCK_4X4 = PARTITIONING_4X4, + BLOCK_4X4, + BLOCK_4X8, + BLOCK_8X4, + BLOCK_8X8, + BLOCK_8X16, + BLOCK_16X8, BLOCK_16X16, - BLOCK_MAX_SEGMENTS, - BLOCK_32X32 = BLOCK_MAX_SEGMENTS, + BLOCK_32X32, + BLOCK_32X16, + BLOCK_16X32, + BLOCK_64X32, + BLOCK_32X64, BLOCK_64X64, BLOCK_MAX_SB_SEGMENTS, }; typedef struct VP9_COMP { - DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, y_quant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, unsigned char, y_quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, y_zbin[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, y_round[QINDEX_RANGE][16]); + + DECLARE_ALIGNED(16, short, uv_quant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, unsigned char, uv_quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, uv_zbin[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, uv_round[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]); +#if CONFIG_ALPHA + DECLARE_ALIGNED(16, short, a_quant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, unsigned char, a_quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, a_zbin[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, a_round[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, zrun_zbin_boost_a[QINDEX_RANGE][16]); +#endif + DECLARE_ALIGNED(16, short, zrun_zbin_boost_y[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]); MACROBLOCK mb; @@ -326,7 +264,11 @@ typedef struct VP9_COMP { struct lookahead_ctx *lookahead; struct lookahead_entry *source; +#if CONFIG_MULTIPLE_ARF + struct lookahead_entry *alt_ref_source[NUM_REF_FRAMES]; +#else struct lookahead_entry *alt_ref_source; +#endif YV12_BUFFER_CONFIG *Source; YV12_BUFFER_CONFIG *un_scaled_source; @@ -345,6 +287,9 @@ typedef struct VP9_COMP { int lst_fb_idx; int gld_fb_idx; int alt_fb_idx; +#if CONFIG_MULTIPLE_ARF + int alt_ref_fb_idx[NUM_REF_FRAMES - 3]; +#endif int refresh_last_frame; int refresh_golden_frame; int refresh_alt_ref_frame; @@ -358,6 +303,12 @@ typedef struct VP9_COMP { unsigned int key_frame_frequency; unsigned int this_key_frame_forced; unsigned int next_key_frame_forced; +#if CONFIG_MULTIPLE_ARF + // Position within a frame coding order (including any additional ARF frames). + unsigned int sequence_number; + // Next frame in naturally occurring order that has not yet been coded. + int next_frame_in_order; +#endif // Ambient reconstruction err target for force key frames int ambient_err; @@ -367,16 +318,19 @@ typedef struct VP9_COMP { unsigned int mode_chosen_counts[MAX_MODES]; int rd_thresh_mult[MAX_MODES]; - int rd_baseline_thresh[MAX_MODES]; - int rd_threshes[MAX_MODES]; + int rd_baseline_thresh[BLOCK_SIZE_TYPES][MAX_MODES]; + int rd_threshes[BLOCK_SIZE_TYPES][MAX_MODES]; + int rd_thresh_freq_fact[BLOCK_SIZE_TYPES][MAX_MODES]; + int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES]; int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES]; - int comp_pred_count[COMP_PRED_CONTEXTS]; - int single_pred_count[COMP_PRED_CONTEXTS]; + unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2]; + unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2]; + unsigned int single_ref_count[REF_CONTEXTS][2][2]; + unsigned int comp_ref_count[REF_CONTEXTS][2]; + // FIXME contextualize - int txfm_count_32x32p[TX_SIZE_MAX_SB]; - int txfm_count_16x16p[TX_SIZE_MAX_MB]; - int txfm_count_8x8p[TX_SIZE_MAX_MB - 1]; + int64_t rd_tx_select_diff[NB_TXFM_MODES]; int rd_tx_select_threshes[4][NB_TXFM_MODES]; @@ -396,7 +350,6 @@ typedef struct VP9_COMP { double gf_rate_correction_factor; int frames_till_gf_update_due; // Count down till next GF - int current_gf_interval; // GF interval chosen when we coded the last GF int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative) @@ -453,56 +406,15 @@ typedef struct VP9_COMP { int cq_target_quality; - int sb32_count[2]; - int sb64_count[2]; - int sb_ymode_count [VP9_I32X32_MODES]; - int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ - int bmode_count[VP9_NKF_BINTRAMODES]; - int i8x8_mode_count[VP9_I8X8_MODES]; - int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS]; - int mbsplit_count[VP9_NUMMBSPLITS]; - int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES]; -#if CONFIG_COMP_INTERINTRA_PRED - unsigned int interintra_count[2]; - unsigned int interintra_select_count[2]; -#endif + int y_mode_count[4][VP9_INTRA_MODES]; + int y_uv_mode_count[VP9_INTRA_MODES][VP9_INTRA_MODES]; + unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; nmv_context_counts NMVcount; - vp9_coeff_count coef_counts_4x4[BLOCK_TYPES]; - vp9_coeff_probs frame_coef_probs_4x4[BLOCK_TYPES]; - vp9_coeff_stats frame_branch_ct_4x4[BLOCK_TYPES]; - - vp9_coeff_count coef_counts_8x8[BLOCK_TYPES]; - vp9_coeff_probs frame_coef_probs_8x8[BLOCK_TYPES]; - vp9_coeff_stats frame_branch_ct_8x8[BLOCK_TYPES]; - - vp9_coeff_count coef_counts_16x16[BLOCK_TYPES]; - vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES]; - - vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; - vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES]; - vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES]; - -#if CONFIG_CODE_NONZEROCOUNT - vp9_prob frame_nzc_probs_4x4 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES]; - unsigned int frame_nzc_branch_ct_4x4 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES][2]; - vp9_prob frame_nzc_probs_8x8 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES]; - unsigned int frame_nzc_branch_ct_8x8 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES][2]; - vp9_prob frame_nzc_probs_16x16 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES]; - unsigned int frame_nzc_branch_ct_16x16 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES][2]; - vp9_prob frame_nzc_probs_32x32 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES]; - unsigned int frame_nzc_branch_ct_32x32 - [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES][2]; -#endif + vp9_coeff_count coef_counts[TX_SIZE_MAX_SB][BLOCK_TYPES]; + vp9_coeff_probs_model frame_coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES]; + vp9_coeff_stats frame_branch_ct[TX_SIZE_MAX_SB][BLOCK_TYPES]; int gfu_boost; int last_boost; @@ -521,7 +433,6 @@ typedef struct VP9_COMP { int mbgraph_n_frames; // number of frames filled in the above int static_mb_pct; // % forced skip mbs by segmentation int seg0_progress, seg0_idx, seg0_cnt; - int ref_pred_count[3][2]; int decimation_factor; int decimation_count; @@ -529,7 +440,7 @@ typedef struct VP9_COMP { // for real time encoding int avg_encode_time; // microsecond int avg_pick_mode_time; // microsecond - int Speed; + int speed; unsigned int cpu_freq; // Mhz int compressor_speed; @@ -542,12 +453,8 @@ typedef struct VP9_COMP { vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS]; int last_skip_probs_q[3]; - int recent_ref_frame_usage[MAX_REF_FRAMES]; - int count_mb_ref_frame_usage[MAX_REF_FRAMES]; int ref_frame_flags; - unsigned char ref_pred_probs_update[PREDICTION_PROBS]; - SPEED_FEATURES sf; int error_bins[1024]; @@ -555,8 +462,6 @@ typedef struct VP9_COMP { int inter_zz_count; int gf_bad_count; int gf_update_recommended; - int skip_true_count[3]; - int skip_false_count[3]; unsigned char *segmentation_map; @@ -566,8 +471,6 @@ typedef struct VP9_COMP { unsigned char *active_map; unsigned int active_map_enabled; - TOKENLIST *tplist; - fractional_mv_step_fp *find_fractional_mv_step; vp9_full_search_fn_t full_search_sad; vp9_refining_search_fn_t refining_search_sad; @@ -578,16 +481,14 @@ typedef struct VP9_COMP { uint64_t time_pick_lpf; uint64_t time_encode_mb_row; - int base_skip_false_prob[QINDEX_RANGE][3]; - struct twopass_rc { unsigned int section_intra_rating; unsigned int next_iiratio; unsigned int this_iiratio; - FIRSTPASS_STATS *total_stats; - FIRSTPASS_STATS *this_frame_stats; + FIRSTPASS_STATS total_stats; + FIRSTPASS_STATS this_frame_stats; FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start; - FIRSTPASS_STATS *total_left_stats; + FIRSTPASS_STATS total_left_stats; int first_pass_done; int64_t bits_left; int64_t clip_bits_total; @@ -640,6 +541,8 @@ typedef struct VP9_COMP { int bytes; double summed_quality; double summed_weights; + double summedp_quality; + double summedp_weights; unsigned int tot_recode_hits; @@ -656,19 +559,8 @@ typedef struct VP9_COMP { unsigned int activity_avg; unsigned int *mb_activity_map; int *mb_norm_activity_map; - - // Record of which MBs still refer to last golden frame either - // directly or through 0,0 - unsigned char *gf_active_flags; - int gf_active_count; - int output_partition; - // Store last frame's MV info for next frame MV prediction - int_mv *lfmv; - int *lf_ref_frame_sign_bias; - int *lf_ref_frame; - /* force next frame to intra when kf_auto says so */ int force_next_frame_intra; @@ -680,14 +572,37 @@ typedef struct VP9_COMP { [VP9_SWITCHABLE_FILTERS]; unsigned int best_switchable_interp_count[VP9_SWITCHABLE_FILTERS]; -#if CONFIG_NEW_MVREF - unsigned int mb_mv_ref_count[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; -#endif - int initial_width; int initial_height; + +#if CONFIG_MULTIPLE_ARF + // ARF tracking variables. + int multi_arf_enabled; + unsigned int frame_coding_order_period; + unsigned int new_frame_coding_order_period; + int frame_coding_order[MAX_LAG_BUFFERS * 2]; + int arf_buffer_idx[MAX_LAG_BUFFERS * 3 / 2]; + int arf_weight[MAX_LAG_BUFFERS]; + int arf_buffered; + int this_frame_weight; + int max_arf_level; +#endif + +#ifdef ENTROPY_STATS + int64_t mv_ref_stats[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2]; +#endif } VP9_COMP; +static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { + if (ref_frame == LAST_FRAME) { + return cpi->lst_fb_idx; + } else if (ref_frame == GOLDEN_FRAME) { + return cpi->gld_fb_idx; + } else { + return cpi->alt_fb_idx; + } +} + void vp9_encode_frame(VP9_COMP *cpi); void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 645d66b258e0453a374cc0b57876b3035654429b..a87d058381b83c29d181f6d316f560841196a416 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -120,111 +120,6 @@ static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) { return max_filter_level; } -void vp9_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - int best_err = 0; - int filt_err = 0; - int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); - int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); - int filt_val; - int best_filt_val = cm->filter_level; - - // Make a copy of the unfiltered / processed recon buffer - vp9_yv12_copy_partial_frame(cm->frame_to_show, &cpi->last_frame_uf, 3); - - if (cm->frame_type == KEY_FRAME) - cm->sharpness_level = 0; - else - cm->sharpness_level = cpi->oxcf.Sharpness; - - if (cm->sharpness_level != cm->last_sharpness_level) { - vp9_loop_filter_update_sharpness(&cm->lf_info, cm->sharpness_level); - cm->last_sharpness_level = cm->sharpness_level; - } - - // Start the search at the previous frame filter level unless it is now out of range. - if (cm->filter_level < min_filter_level) - cm->filter_level = min_filter_level; - else if (cm->filter_level > max_filter_level) - cm->filter_level = max_filter_level; - - filt_val = cm->filter_level; - best_filt_val = filt_val; - - // Get the err using the previous frame's filter value. - vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - - best_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3); - - // Re-instate the unfiltered frame - vp9_yv12_copy_partial_frame(&cpi->last_frame_uf, cm->frame_to_show, 3); - - filt_val -= (1 + ((filt_val > 10) ? 1 : 0)); - - // Search lower filter levels - while (filt_val >= min_filter_level) { - // Apply the loop filter - vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - - // Get the err for filtered frame - filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3); - - // Re-instate the unfiltered frame - vp9_yv12_copy_partial_frame(&cpi->last_frame_uf, cm->frame_to_show, 3); - - - // Update the best case record or exit loop. - if (filt_err < best_err) { - best_err = filt_err; - best_filt_val = filt_val; - } else - break; - - // Adjust filter level - filt_val -= (1 + ((filt_val > 10) ? 1 : 0)); - } - - // Search up (note that we have already done filt_val = cm->filter_level) - filt_val = cm->filter_level + (1 + ((filt_val > 10) ? 1 : 0)); - - if (best_filt_val == cm->filter_level) { - // Resist raising filter level for very small gains - best_err -= (best_err >> 10); - - while (filt_val < max_filter_level) { - // Apply the loop filter - vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - - // Get the err for filtered frame - filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3); - - // Re-instate the unfiltered frame - vp9_yv12_copy_partial_frame(&cpi->last_frame_uf, - cm->frame_to_show, 3); - - // Update the best case record or exit loop. - if (filt_err < best_err) { - // Do not raise filter level if improvement is < 1 part in 4096 - best_err = filt_err - (filt_err >> 10); - - best_filt_val = filt_val; - } else - break; - - // Adjust filter level - filt_val += (1 + ((filt_val > 10) ? 1 : 0)); - } - } - - cm->filter_level = best_filt_val; - - if (cm->filter_level < min_filter_level) - cm->filter_level = min_filter_level; - - if (cm->filter_level > max_filter_level) - cm->filter_level = max_filter_level; -} // Stub function for now Alt LF not used void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) { @@ -268,7 +163,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { // Get baseline error score vp9_set_alt_lf_level(cpi, filt_mid); - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1, 0); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1); best_err = vp9_calc_ss_err(sd, cm->frame_to_show); filt_best = filt_mid; @@ -293,7 +188,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { if ((filt_direction <= 0) && (filt_low != filt_mid)) { // Get Low filter error score vp9_set_alt_lf_level(cpi, filt_low); - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1, 0); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1); filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); @@ -313,7 +208,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { // Now look at filt_high if ((filt_direction >= 0) && (filt_high != filt_mid)) { vp9_set_alt_lf_level(cpi, filt_high); - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1, 0); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1); filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); @@ -338,30 +233,4 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { } cm->filter_level = filt_best; - -#if CONFIG_LOOP_DERING - /* Decide whether to turn on deringing filter */ - { // NOLINT - int best_dering = 0; - int this_dering; - int last_err_diff = INT_MAX; - - for (this_dering = 1; this_dering <= 16; this_dering++) { - vp9_set_alt_lf_level(cpi, filt_best); - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1, this_dering); - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); - vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - if (filt_err < best_err) { - best_err = filt_err; - best_dering = this_dering; - last_err_diff = INT_MAX; - } else { - if (filt_err - best_err > last_err_diff) - break; - last_err_diff = filt_err - best_err; - } - } - cm->dering_enabled = best_dering; - } -#endif } diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h index ca3cab618009c108c122e9288c474f0c536ce97f..698cb8d0d63e3cb6a7f74d596c50cef156dc0a09 100644 --- a/vp9/encoder/vp9_picklpf.h +++ b/vp9/encoder/vp9_picklpf.h @@ -15,9 +15,6 @@ struct yv12_buffer_config; struct VP9_COMP; -void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd, - struct VP9_COMP *cpi); - void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); void vp9_pick_filter_level(struct yv12_buffer_config *sd, diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 881fce50f4f11feeef99d96898d89e4366b3623b..53d8be7755abfb086871c4ae907c889c73315f61 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -21,337 +21,9 @@ extern int enc_debug; #endif -static INLINE int plane_idx(MACROBLOCKD *xd, int b_idx) { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - if (b_idx < (16 << (sb_type * 2))) - return 0; // Y - else if (b_idx < (20 << (sb_type * 2))) - return 16; // U - assert(b_idx < (24 << (sb_type * 2))); - return 20; // V -} - -void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { - MACROBLOCKD *const xd = &mb->e_mbd; - BLOCK *const b = &mb->block[0]; - BLOCKD *const d = &xd->block[0]; - int i, rc, eob; - int zbin; - int x, y, z, sz; - int16_t *coeff_ptr = mb->coeff + b_idx * 16; - int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16; - int16_t *zbin_boost_ptr = b->zrun_zbin_boost; - int16_t *zbin_ptr = b->zbin; - int16_t *round_ptr = b->round; - int16_t *quant_ptr = b->quant; - uint8_t *quant_shift_ptr = b->quant_shift; - int16_t *dequant_ptr = d->dequant; - int zbin_oq_value = b->zbin_extra; - const int *pt_scan; -#if CONFIG_CODE_NONZEROCOUNT - int nzc = 0; -#endif - - assert(plane_idx(xd, b_idx) == 0); - switch (tx_type) { - case ADST_DCT: - pt_scan = vp9_row_scan_4x4; - break; - case DCT_ADST: - pt_scan = vp9_col_scan_4x4; - break; - default: - pt_scan = vp9_default_zig_zag1d_4x4; - break; - } - - vpx_memset(qcoeff_ptr, 0, 32); - vpx_memset(dqcoeff_ptr, 0, 32); - - eob = -1; - - if (!b->skip_block) { - for (i = 0; i < 16; i++) { - rc = pt_scan[i]; - z = coeff_ptr[rc]; - - zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; - zbin_boost_ptr++; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += round_ptr[rc]; - y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs -#if CONFIG_CODE_NONZEROCOUNT - ++nzc; // number of nonzero coeffs -#endif - zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength - } - } - } - } - - xd->eobs[b_idx] = eob + 1; -#if CONFIG_CODE_NONZEROCOUNT - xd->nzcs[b_idx] = nzc; -#endif -} - -void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { - MACROBLOCKD *const xd = &mb->e_mbd; - const int c_idx = plane_idx(xd, b_idx); - BLOCK *const b = &mb->block[c_idx]; - BLOCKD *const d = &xd->block[c_idx]; - int i, rc, eob; - int zbin; - int x, y, z, sz; - int16_t *coeff_ptr = mb->coeff + b_idx * 16; - int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16; - int16_t *zbin_boost_ptr = b->zrun_zbin_boost; - int16_t *zbin_ptr = b->zbin; - int16_t *round_ptr = b->round; - int16_t *quant_ptr = b->quant; - uint8_t *quant_shift_ptr = b->quant_shift; - int16_t *dequant_ptr = d->dequant; - int zbin_oq_value = b->zbin_extra; -#if CONFIG_CODE_NONZEROCOUNT - int nzc = 0; -#endif - - vpx_memset(qcoeff_ptr, 0, 32); - vpx_memset(dqcoeff_ptr, 0, 32); - - eob = -1; - - if (!b->skip_block) { - for (i = 0; i < 16; i++) { - rc = vp9_default_zig_zag1d_4x4[i]; - z = coeff_ptr[rc]; - - zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; - zbin_boost_ptr++; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += round_ptr[rc]; - - y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs -#if CONFIG_CODE_NONZEROCOUNT - ++nzc; // number of nonzero coeffs -#endif - zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength - } - } - } - } - - xd->eobs[b_idx] = eob + 1; -#if CONFIG_CODE_NONZEROCOUNT - xd->nzcs[b_idx] = nzc; -#endif -} - -void vp9_quantize_mby_4x4(MACROBLOCK *x) { - int i; - - for (i = 0; i < 16; i++) { - TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, i); - if (tx_type != DCT_DCT) { - vp9_ht_quantize_b_4x4(x, i, tx_type); - } else { - x->quantize_b_4x4(x, i); - } - } -} - -void vp9_quantize_mbuv_4x4(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i++) - x->quantize_b_4x4(x, i); -} - -void vp9_quantize_mb_4x4(MACROBLOCK *x) { - vp9_quantize_mby_4x4(x); - vp9_quantize_mbuv_4x4(x); -} - -void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { - MACROBLOCKD *const xd = &mb->e_mbd; - int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx; - int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx; - const int c_idx = plane_idx(xd, b_idx); - BLOCK *const b = &mb->block[c_idx]; - BLOCKD *const d = &xd->block[c_idx]; - const int *pt_scan; - - switch (tx_type) { - case ADST_DCT: - pt_scan = vp9_row_scan_8x8; - break; - case DCT_ADST: - pt_scan = vp9_col_scan_8x8; - break; - default: - pt_scan = vp9_default_zig_zag1d_8x8; - break; - } - - vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t)); - vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t)); - - if (!b->skip_block) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - int zero_run; - int16_t *zbin_boost_ptr = b->zrun_zbin_boost; - int16_t *coeff_ptr = mb->coeff + 16 * b_idx; - int16_t *zbin_ptr = b->zbin; - int16_t *round_ptr = b->round; - int16_t *quant_ptr = b->quant; - uint8_t *quant_shift_ptr = b->quant_shift; - int16_t *dequant_ptr = d->dequant; - int zbin_oq_value = b->zbin_extra; -#if CONFIG_CODE_NONZEROCOUNT - int nzc = 0; -#endif - - eob = -1; - - // Special case for DC as it is the one triggering access in various - // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0] - { - z = coeff_ptr[0]; - zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value); - zero_run = 1; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += (round_ptr[0]); - y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x)) - >> quant_shift_ptr[0]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[0] = x; // write to destination - dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value - - if (y) { - eob = 0; // last nonzero coeffs -#if CONFIG_CODE_NONZEROCOUNT - ++nzc; // number of nonzero coeffs -#endif - zero_run = 0; - } - } - } - for (i = 1; i < 64; i++) { - rc = pt_scan[i]; - z = coeff_ptr[rc]; - zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value); - // The original code was incrementing zero_run while keeping it at - // maximum 15 by adding "(zero_run < 15)". The same is achieved by - // removing the opposite of the sign mask of "(zero_run - 15)". - zero_run -= (zero_run - 15) >> 31; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += (round_ptr[rc != 0]); - y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x)) - >> quant_shift_ptr[1]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs -#if CONFIG_CODE_NONZEROCOUNT - ++nzc; // number of nonzero coeffs -#endif - zero_run = 0; - } - } - } - xd->eobs[b_idx] = eob + 1; -#if CONFIG_CODE_NONZEROCOUNT - xd->nzcs[b_idx] = nzc; -#endif - } else { - xd->eobs[b_idx] = 0; -#if CONFIG_CODE_NONZEROCOUNT - xd->nzcs[b_idx] = 0; -#endif - } -} - -void vp9_quantize_mby_8x8(MACROBLOCK *x) { - int i; - -#if CONFIG_CODE_NONZEROCOUNT - for (i = 0; i < 16; i ++) { - x->e_mbd.nzcs[i] = 0; - } -#endif - for (i = 0; i < 16; i += 4) { - TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (i & 8) + ((i & 4) >> 1)); - x->quantize_b_8x8(x, i, tx_type); - } -} - -void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { - int i; - -#if CONFIG_CODE_NONZEROCOUNT - for (i = 16; i < 24; i ++) { - x->e_mbd.nzcs[i] = 0; - } -#endif - for (i = 16; i < 24; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT); -} - -void vp9_quantize_mb_8x8(MACROBLOCK *x) { - vp9_quantize_mby_8x8(x); - vp9_quantize_mbuv_8x8(x); -} - -void vp9_quantize_mby_16x16(MACROBLOCK *x) { - TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, 0); -#if CONFIG_CODE_NONZEROCOUNT - int i; - for (i = 0; i < 16; i++) { - x->e_mbd.nzcs[i] = 0; - } -#endif - x->quantize_b_16x16(x, 0, tx_type); -} - -void vp9_quantize_mb_16x16(MACROBLOCK *x) { - vp9_quantize_mby_16x16(x); - vp9_quantize_mbuv_8x8(x); +static INLINE int plane_idx(int plane) { + return plane == 0 ? 0 : + plane == 1 ? 16 : 20; } static void quantize(int16_t *zbin_boost_orig_ptr, @@ -361,18 +33,12 @@ static void quantize(int16_t *zbin_boost_orig_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, -#if CONFIG_CODE_NONZEROCOUNT - uint16_t *nzc_ptr, -#endif const int *scan, int mul) { int i, rc, eob; int zbin; int x, y, z, sz; int zero_run = 0; int16_t *zbin_boost_ptr = zbin_boost_orig_ptr; -#if CONFIG_CODE_NONZEROCOUNT - int nzc = 0; -#endif vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); @@ -401,207 +67,70 @@ static void quantize(int16_t *zbin_boost_orig_ptr, if (y) { eob = i; // last nonzero coeffs zero_run = 0; -#if CONFIG_CODE_NONZEROCOUNT - ++nzc; // number of nonzero coeffs -#endif } } } } *eob_ptr = eob + 1; -#if CONFIG_CODE_NONZEROCOUNT - *nzc_ptr = nzc; -#endif } -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { +void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs, + TX_TYPE tx_type) { MACROBLOCKD *const xd = &mb->e_mbd; - const int c_idx = plane_idx(xd, b_idx); - BLOCK *const b = &mb->block[c_idx]; - BLOCKD *const d = &xd->block[c_idx]; - const int *pt_scan; - - switch (tx_type) { - case ADST_DCT: - pt_scan = vp9_row_scan_16x16; + const int mul = n_coeffs == 1024 ? 2 : 1; + const int *scan; + + // These contexts may be available in the caller + switch (n_coeffs) { + case 4 * 4: + scan = get_scan_4x4(tx_type); + break; + case 8 * 8: + scan = get_scan_8x8(tx_type); break; - case DCT_ADST: - pt_scan = vp9_col_scan_16x16; + case 16 * 16: + scan = get_scan_16x16(tx_type); break; default: - pt_scan = vp9_default_zig_zag1d_16x16; + scan = vp9_default_scan_32x32; break; } - quantize(b->zrun_zbin_boost, - mb->coeff + 16 * b_idx, - 256, b->skip_block, - b->zbin, b->round, b->quant, b->quant_shift, - xd->qcoeff + 16 * b_idx, - xd->dqcoeff + 16 * b_idx, - d->dequant, - b->zbin_extra, - &xd->eobs[b_idx], -#if CONFIG_CODE_NONZEROCOUNT - &xd->nzcs[b_idx], -#endif - pt_scan, 1); -} - -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) { + quantize(mb->plane[plane].zrun_zbin_boost, + BLOCK_OFFSET(mb->plane[plane].coeff, block, 16), + n_coeffs, mb->skip_block, + mb->plane[plane].zbin, + mb->plane[plane].round, + mb->plane[plane].quant, + mb->plane[plane].quant_shift, + BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16), + BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + xd->plane[plane].dequant, + mb->plane[plane].zbin_extra, + &xd->plane[plane].eobs[block], + scan, mul); +} + +void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, + int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; - const int c_idx = plane_idx(xd, b_idx); - BLOCK *const b = &mb->block[c_idx]; - BLOCKD *const d = &xd->block[c_idx]; - - quantize(b->zrun_zbin_boost, - mb->coeff + b_idx * 16, - 1024, b->skip_block, - b->zbin, - b->round, b->quant, b->quant_shift, - xd->qcoeff + b_idx * 16, - xd->dqcoeff + b_idx * 16, - d->dequant, - b->zbin_extra, - &xd->eobs[b_idx], -#if CONFIG_CODE_NONZEROCOUNT - &xd->nzcs[b_idx], -#endif - vp9_default_zig_zag1d_32x32, 2); -} - -void vp9_quantize_sby_32x32(MACROBLOCK *x) { - vp9_regular_quantize_b_32x32(x, 0); -} - -void vp9_quantize_sby_16x16(MACROBLOCK *x) { - int n; - - for (n = 0; n < 4; n++) { - TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, - (16 * (n & 2)) + ((n & 1) * 4)); - x->quantize_b_16x16(x, n * 16, tx_type); - } -} - -void vp9_quantize_sby_8x8(MACROBLOCK *x) { - int n; - - for (n = 0; n < 16; n++) { - TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, - (4 * (n & 12)) + ((n & 3) * 2)); - x->quantize_b_8x8(x, n * 4, tx_type); - } -} - -void vp9_quantize_sby_4x4(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 64; n++) { - const TX_TYPE tx_type = get_tx_type_4x4(xd, n); - if (tx_type != DCT_DCT) { - vp9_ht_quantize_b_4x4(x, n, tx_type); - } else { - x->quantize_b_4x4(x, n); - } - } -} - -void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { - x->quantize_b_16x16(x, 64, DCT_DCT); - x->quantize_b_16x16(x, 80, DCT_DCT); -} - -void vp9_quantize_sbuv_8x8(MACROBLOCK *x) { - int i; - - for (i = 64; i < 96; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT); -} - -void vp9_quantize_sbuv_4x4(MACROBLOCK *x) { - int i; - - for (i = 64; i < 96; i++) - x->quantize_b_4x4(x, i); -} - -void vp9_quantize_sb64y_32x32(MACROBLOCK *x) { - int n; - - for (n = 0; n < 4; n++) - vp9_regular_quantize_b_32x32(x, n * 64); -} - -void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { - int n; - - for (n = 0; n < 16; n++) { - TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, - (16 * (n & 12)) + ((n & 3) * 4)); - x->quantize_b_16x16(x, n * 16, tx_type); - } -} - -void vp9_quantize_sb64y_8x8(MACROBLOCK *x) { - int n; - - for (n = 0; n < 64; n++) { - TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, - (4 * (n & 56)) + ((n & 7) * 2)); - x->quantize_b_8x8(x, n * 4, tx_type); - } -} - -void vp9_quantize_sb64y_4x4(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 256; n++) { - const TX_TYPE tx_type = get_tx_type_4x4(xd, n); - if (tx_type != DCT_DCT) { - vp9_ht_quantize_b_4x4(x, n, tx_type); - } else { - x->quantize_b_4x4(x, n); - } - } -} - -void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) { - vp9_regular_quantize_b_32x32(x, 256); - vp9_regular_quantize_b_32x32(x, 320); -} - -void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) { - int i; - - for (i = 256; i < 384; i += 16) - x->quantize_b_16x16(x, i, DCT_DCT); -} - -void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) { - int i; - - for (i = 256; i < 384; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT); -} - -void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) { - int i; - - for (i = 256; i < 384; i++) - x->quantize_b_4x4(x, i); -} - -/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of - * these two C functions if corresponding optimized routine is not available. - * NEON optimized version implements currently the fast quantization for pair - * of blocks. */ -void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2) { - vp9_regular_quantize_b_4x4(x, b_idx1); - vp9_regular_quantize_b_4x4(x, b_idx2); + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); + const int *pt_scan = get_scan_4x4(tx_type); + + quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, + BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), + 16, mb->skip_block, + mb->plane[pb_idx.plane].zbin, + mb->plane[pb_idx.plane].round, + mb->plane[pb_idx.plane].quant, + mb->plane[pb_idx.plane].quant_shift, + BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), + BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), + xd->plane[pb_idx.plane].dequant, + mb->plane[pb_idx.plane].zbin_extra, + &xd->plane[pb_idx.plane].eobs[pb_idx.block], + pt_scan, 1); } static void invert_quant(int16_t *quant, uint8_t *shift, int d) { @@ -618,6 +147,10 @@ static void invert_quant(int16_t *quant, uint8_t *shift, int d) { void vp9_init_quantizer(VP9_COMP *cpi) { int i; int quant_val; + int quant_uv_val; +#if CONFIG_ALPHA + int quant_alpha_val; +#endif int q; static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12, @@ -631,130 +164,126 @@ void vp9_init_quantizer(VP9_COMP *cpi) { qrounding_factor = 64; } // dc values - quant_val = vp9_dc_quant(q, cpi->common.y1dc_delta_q); - invert_quant(cpi->Y1quant[q] + 0, cpi->Y1quant_shift[q] + 0, quant_val); - cpi->Y1zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); - cpi->Y1round[q][0] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y1dequant[q][0] = quant_val; - cpi->zrun_zbin_boost_y1[q][0] = (quant_val * zbin_boost[0]) >> 7; - - quant_val = vp9_dc_uv_quant(q, cpi->common.uvdc_delta_q); - invert_quant(cpi->UVquant[q] + 0, cpi->UVquant_shift[q] + 0, quant_val); - cpi->UVzbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); - cpi->UVround[q][0] = (qrounding_factor * quant_val) >> 7; - cpi->common.UVdequant[q][0] = quant_val; + quant_val = vp9_dc_quant(q, cpi->common.y_dc_delta_q); + invert_quant(cpi->y_quant[q] + 0, cpi->y_quant_shift[q] + 0, quant_val); + cpi->y_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); + cpi->y_round[q][0] = (qrounding_factor * quant_val) >> 7; + cpi->common.y_dequant[q][0] = quant_val; + cpi->zrun_zbin_boost_y[q][0] = (quant_val * zbin_boost[0]) >> 7; + + quant_val = vp9_dc_quant(q, cpi->common.uv_dc_delta_q); + invert_quant(cpi->uv_quant[q] + 0, cpi->uv_quant_shift[q] + 0, quant_val); + cpi->uv_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); + cpi->uv_round[q][0] = (qrounding_factor * quant_val) >> 7; + cpi->common.uv_dequant[q][0] = quant_val; cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7; +#if CONFIG_ALPHA + quant_val = vp9_dc_quant(q, cpi->common.a_dc_delta_q); + invert_quant(cpi->a_quant[q] + 0, cpi->a_quant_shift[q] + 0, quant_val); + cpi->a_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); + cpi->a_round[q][0] = (qrounding_factor * quant_val) >> 7; + cpi->common.a_dequant[q][0] = quant_val; + cpi->zrun_zbin_boost_a[q][0] = (quant_val * zbin_boost[0]) >> 7; +#endif + + quant_val = vp9_ac_quant(q, 0); + cpi->common.y_dequant[q][1] = quant_val; + quant_uv_val = vp9_ac_quant(q, cpi->common.uv_ac_delta_q); + cpi->common.uv_dequant[q][1] = quant_uv_val; +#if CONFIG_ALPHA + quant_alpha_val = vp9_ac_quant(q, cpi->common.a_ac_delta_q); + cpi->common.a_dequant[q][1] = quant_alpha_val; +#endif // all the 4x4 ac values =; for (i = 1; i < 16; i++) { - int rc = vp9_default_zig_zag1d_4x4[i]; - - quant_val = vp9_ac_yquant(q); - invert_quant(cpi->Y1quant[q] + rc, cpi->Y1quant_shift[q] + rc, quant_val); - cpi->Y1zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); - cpi->Y1round[q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y1dequant[q][rc] = quant_val; - cpi->zrun_zbin_boost_y1[q][i] = + int rc = vp9_default_scan_4x4[i]; + + invert_quant(cpi->y_quant[q] + rc, cpi->y_quant_shift[q] + rc, quant_val); + cpi->y_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); + cpi->y_round[q][rc] = (qrounding_factor * quant_val) >> 7; + cpi->zrun_zbin_boost_y[q][i] = ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7); - quant_val = vp9_ac_uv_quant(q, cpi->common.uvac_delta_q); - invert_quant(cpi->UVquant[q] + rc, cpi->UVquant_shift[q] + rc, quant_val); - cpi->UVzbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); - cpi->UVround[q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.UVdequant[q][rc] = quant_val; + invert_quant(cpi->uv_quant[q] + rc, cpi->uv_quant_shift[q] + rc, + quant_uv_val); + cpi->uv_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_uv_val, 7); + cpi->uv_round[q][rc] = (qrounding_factor * quant_uv_val) >> 7; cpi->zrun_zbin_boost_uv[q][i] = - ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7); + ROUND_POWER_OF_TWO(quant_uv_val * zbin_boost[i], 7); + +#if CONFIG_ALPHA + invert_quant(cpi->a_quant[q] + rc, cpi->a_quant_shift[q] + rc, + quant_alpha_val); + cpi->a_zbin[q][rc] = + ROUND_POWER_OF_TWO(qzbin_factor * quant_alpha_val, 7); + cpi->a_round[q][rc] = (qrounding_factor * quant_alpha_val) >> 7; + cpi->zrun_zbin_boost_a[q][i] = + ROUND_POWER_OF_TWO(quant_alpha_val * zbin_boost[i], 7); +#endif } } } void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { int i; - int QIndex; MACROBLOCKD *xd = &x->e_mbd; int zbin_extra; int segment_id = xd->mode_info_context->mbmi.segment_id; - - // Select the baseline MB Q index allowing for any segment level change. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) { - // Abs Value - if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) - QIndex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - - // Delta Value - else { - QIndex = cpi->common.base_qindex + - vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - - // Clamp to valid range - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; - } - } else - QIndex = cpi->common.base_qindex; + const int qindex = vp9_get_qindex(xd, segment_id, cpi->common.base_qindex); // Y - zbin_extra = (cpi->common.Y1dequant[QIndex][1] * - (cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - for (i = 0; i < 16; i++) { - x->block[i].quant = cpi->Y1quant[QIndex]; - x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; - x->block[i].zbin = cpi->Y1zbin[QIndex]; - x->block[i].round = cpi->Y1round[QIndex]; - x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; - x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; - x->block[i].zbin_extra = (int16_t)zbin_extra; - - // Segment skip feature. - x->block[i].skip_block = - vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - } + zbin_extra = (cpi->common.y_dequant[qindex][1] * + (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; + + x->plane[0].quant = cpi->y_quant[qindex]; + x->plane[0].quant_shift = cpi->y_quant_shift[qindex]; + x->plane[0].zbin = cpi->y_zbin[qindex]; + x->plane[0].round = cpi->y_round[qindex]; + x->plane[0].zrun_zbin_boost = cpi->zrun_zbin_boost_y[qindex]; + x->plane[0].zbin_extra = (int16_t)zbin_extra; + x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex]; // UV - zbin_extra = (cpi->common.UVdequant[QIndex][1] * - (cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - for (i = 16; i < 24; i++) { - x->block[i].quant = cpi->UVquant[QIndex]; - x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; - x->block[i].zbin = cpi->UVzbin[QIndex]; - x->block[i].round = cpi->UVround[QIndex]; - x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex]; - x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex]; - x->block[i].zbin_extra = (int16_t)zbin_extra; - - // Segment skip feature. - x->block[i].skip_block = - vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + zbin_extra = (cpi->common.uv_dequant[qindex][1] * + (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; + + for (i = 1; i < 3; i++) { + x->plane[i].quant = cpi->uv_quant[qindex]; + x->plane[i].quant_shift = cpi->uv_quant_shift[qindex]; + x->plane[i].zbin = cpi->uv_zbin[qindex]; + x->plane[i].round = cpi->uv_round[qindex]; + x->plane[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[qindex]; + x->plane[i].zbin_extra = (int16_t)zbin_extra; + x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex]; } +#if CONFIG_ALPHA + x->plane[3].quant = cpi->a_quant[qindex]; + x->plane[3].quant_shift = cpi->a_quant_shift[qindex]; + x->plane[3].zbin = cpi->a_zbin[qindex]; + x->plane[3].round = cpi->a_round[qindex]; + x->plane[3].zrun_zbin_boost = cpi->zrun_zbin_boost_a[qindex]; + x->plane[3].zbin_extra = (int16_t)zbin_extra; + x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex]; +#endif + + x->skip_block = vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + /* save this macroblock QIndex for vp9_update_zbin_extra() */ - x->e_mbd.q_index = QIndex; + x->e_mbd.q_index = qindex; } void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { - int i; - int QIndex = x->e_mbd.q_index; - int zbin_extra; + const int qindex = x->e_mbd.q_index; + const int y_zbin_extra = (cpi->common.y_dequant[qindex][1] * + (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; + const int uv_zbin_extra = (cpi->common.uv_dequant[qindex][1] * + (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; - // Y - zbin_extra = (cpi->common.Y1dequant[QIndex][1] * - (cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - for (i = 0; i < 16; i++) { - x->block[i].zbin_extra = (int16_t)zbin_extra; - } - - // UV - zbin_extra = (cpi->common.UVdequant[QIndex][1] * - (cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - for (i = 16; i < 24; i++) { - x->block[i].zbin_extra = (int16_t)zbin_extra; - } + x->plane[0].zbin_extra = (int16_t)y_zbin_extra; + x->plane[1].zbin_extra = (int16_t)uv_zbin_extra; + x->plane[2].zbin_extra = (int16_t)uv_zbin_extra; } void vp9_frame_init_quantizer(VP9_COMP *cpi) { @@ -770,15 +299,11 @@ void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) { cm->base_qindex = Q; - // Set lossless mode - if (cm->base_qindex <= 4) - cm->base_qindex = 0; - // if any of the delta_q values are changing update flag will // have to be set. - cm->y1dc_delta_q = 0; - cm->uvdc_delta_q = 0; - cm->uvac_delta_q = 0; + cm->y_dc_delta_q = 0; + cm->uv_dc_delta_q = 0; + cm->uv_ac_delta_q = 0; // quantizer has to be reinitialized if any delta_q changes. // As there are not any here for now this is inactive code. diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 6ba6cbdd9d1cfc7bac197dba99618b3ecbf663b7..2b1eeabbed54ab68ca3587a9373e00ba1da351d4 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -22,46 +22,15 @@ #define prototype_quantize_mb(sym) \ void (sym)(MACROBLOCK *x) -#if ARCH_X86 || ARCH_X86_64 -#include "x86/vp9_quantize_x86.h" -#endif - -void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type); -void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx); -void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2); -void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type); -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type); -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx); - -void vp9_quantize_mb_4x4(MACROBLOCK *x); -void vp9_quantize_mb_8x8(MACROBLOCK *x); - -void vp9_quantize_mbuv_4x4(MACROBLOCK *x); -void vp9_quantize_mby_4x4(MACROBLOCK *x); - -void vp9_quantize_mby_8x8(MACROBLOCK *x); -void vp9_quantize_mbuv_8x8(MACROBLOCK *x); - -void vp9_quantize_mb_16x16(MACROBLOCK *x); -void vp9_quantize_mby_16x16(MACROBLOCK *x); - -void vp9_quantize_sby_32x32(MACROBLOCK *x); -void vp9_quantize_sby_16x16(MACROBLOCK *x); -void vp9_quantize_sby_8x8(MACROBLOCK *x); -void vp9_quantize_sby_4x4(MACROBLOCK *x); -void vp9_quantize_sbuv_16x16(MACROBLOCK *x); -void vp9_quantize_sbuv_8x8(MACROBLOCK *x); -void vp9_quantize_sbuv_4x4(MACROBLOCK *x); - -void vp9_quantize_sb64y_32x32(MACROBLOCK *x); -void vp9_quantize_sb64y_16x16(MACROBLOCK *x); -void vp9_quantize_sb64y_8x8(MACROBLOCK *x); -void vp9_quantize_sb64y_4x4(MACROBLOCK *x); -void vp9_quantize_sb64uv_32x32(MACROBLOCK *x); -void vp9_quantize_sb64uv_16x16(MACROBLOCK *x); -void vp9_quantize_sb64uv_8x8(MACROBLOCK *x); -void vp9_quantize_sb64uv_4x4(MACROBLOCK *x); - +void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coefs, + TX_TYPE tx_type); + +void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2, + int y_blocks); +void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, + int y_blocks); +void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, + int y_blocks); struct VP9_COMP; extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q); diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index d26f5ec460d78cddc432be3c934b0a0a3449b5f6..430d3a8d67ad940f6201ba2a4255451ced5d6a8a 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -30,16 +30,6 @@ #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 -#ifdef MODE_STATS -extern unsigned int y_modes[VP9_YMODES]; -extern unsigned int uv_modes[VP9_UV_MODES]; -extern unsigned int b_modes[B_MODE_COUNT]; - -extern unsigned int inter_y_modes[MB_MODE_COUNT]; -extern unsigned int inter_uv_modes[VP9_UV_MODES]; -extern unsigned int inter_b_modes[B_MODE_COUNT]; -#endif - // Bits Per MB at different Q (Multiplied by 512) #define BPER_MB_NORMBITS 9 @@ -89,7 +79,7 @@ static const unsigned int prior_key_frame_weight[KEY_FRAME_CONTEXT] = { 1, 2, 3, // tables if and when things settle down in the experimental bitstream double vp9_convert_qindex_to_q(int qindex) { // Convert the index to a real Q value (scaled down to match old Q values) - return vp9_ac_yquant(qindex) / 4.0; + return vp9_ac_quant(qindex, 0) / 4.0; } int vp9_gfboost_qadjust(int qindex) { @@ -112,7 +102,7 @@ int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex, const double q = vp9_convert_qindex_to_q(qindex); int enumerator = frame_type == KEY_FRAME ? 4000000 : 2500000; - // q based adjustment to baseline enumberator + // q based adjustment to baseline enumerator enumerator += (int)(enumerator * q) >> 12; return (int)(0.5 + (enumerator * correction_factor / q)); } @@ -132,52 +122,31 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - vp9_copy(cc->vp9_mode_contexts, cm->fc.vp9_mode_contexts); + vp9_copy(cc->inter_mode_probs, cm->fc.inter_mode_probs); - vp9_copy(cc->ymode_prob, cm->fc.ymode_prob); - vp9_copy(cc->sb_ymode_prob, cm->fc.sb_ymode_prob); - vp9_copy(cc->bmode_prob, cm->fc.bmode_prob); + vp9_copy(cc->y_mode_prob, cm->fc.y_mode_prob); vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob); - vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob); - vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob); - vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob); - - // Stats -#ifdef MODE_STATS - vp9_copy(cc->y_modes, y_modes); - vp9_copy(cc->uv_modes, uv_modes); - vp9_copy(cc->b_modes, b_modes); - vp9_copy(cc->inter_y_modes, inter_y_modes); - vp9_copy(cc->inter_uv_modes, inter_uv_modes); - vp9_copy(cc->inter_b_modes, inter_b_modes); -#endif + vp9_copy(cc->partition_prob, cm->fc.partition_prob); vp9_copy(cc->segment_pred_probs, cm->segment_pred_probs); - vp9_copy(cc->ref_pred_probs_update, cpi->ref_pred_probs_update); - vp9_copy(cc->ref_pred_probs, cm->ref_pred_probs); - vp9_copy(cc->prob_comppred, cm->prob_comppred); + + vp9_copy(cc->intra_inter_prob, cm->fc.intra_inter_prob); + vp9_copy(cc->comp_inter_prob, cm->fc.comp_inter_prob); + vp9_copy(cc->single_ref_prob, cm->fc.single_ref_prob); + vp9_copy(cc->comp_ref_prob, cm->fc.comp_ref_prob); vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, - cm->last_frame_seg_map, (cm->mb_rows * cm->mb_cols)); + cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); vp9_copy(cc->last_ref_lf_deltas, xd->last_ref_lf_deltas); vp9_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas); - vp9_copy(cc->coef_probs_4x4, cm->fc.coef_probs_4x4); - vp9_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8); - vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); - vp9_copy(cc->coef_probs_32x32, cm->fc.coef_probs_32x32); + vp9_copy(cc->coef_probs, cm->fc.coef_probs); vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); -#if CONFIG_COMP_INTERINTRA_PRED - cc->interintra_prob = cm->fc.interintra_prob; -#endif -#if CONFIG_CODE_NONZEROCOUNT - vp9_copy(cc->nzc_probs_4x4, cm->fc.nzc_probs_4x4); - vp9_copy(cc->nzc_probs_8x8, cm->fc.nzc_probs_8x8); - vp9_copy(cc->nzc_probs_16x16, cm->fc.nzc_probs_16x16); - vp9_copy(cc->nzc_probs_32x32, cm->fc.nzc_probs_32x32); - vp9_copy(cc->nzc_pcat_probs, cm->fc.nzc_pcat_probs); -#endif + vp9_copy(cc->tx_probs_8x8p, cm->fc.tx_probs_8x8p); + vp9_copy(cc->tx_probs_16x16p, cm->fc.tx_probs_16x16p); + vp9_copy(cc->tx_probs_32x32p, cm->fc.tx_probs_32x32p); + vp9_copy(cc->mbskip_probs, cm->fc.mbskip_probs); } void vp9_restore_coding_context(VP9_COMP *cpi) { @@ -193,53 +162,32 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - vp9_copy(cm->fc.vp9_mode_contexts, cc->vp9_mode_contexts); + vp9_copy(cm->fc.inter_mode_probs, cc->inter_mode_probs); - vp9_copy(cm->fc.ymode_prob, cc->ymode_prob); - vp9_copy(cm->fc.sb_ymode_prob, cc->sb_ymode_prob); - vp9_copy(cm->fc.bmode_prob, cc->bmode_prob); - vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob); + vp9_copy(cm->fc.y_mode_prob, cc->y_mode_prob); vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob); - vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob); - vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob); - - // Stats -#ifdef MODE_STATS - vp9_copy(y_modes, cc->y_modes); - vp9_copy(uv_modes, cc->uv_modes); - vp9_copy(b_modes, cc->b_modes); - vp9_copy(inter_y_modes, cc->inter_y_modes); - vp9_copy(inter_uv_modes, cc->inter_uv_modes); - vp9_copy(inter_b_modes, cc->inter_b_modes); -#endif + vp9_copy(cm->fc.partition_prob, cc->partition_prob); vp9_copy(cm->segment_pred_probs, cc->segment_pred_probs); - vp9_copy(cpi->ref_pred_probs_update, cc->ref_pred_probs_update); - vp9_copy(cm->ref_pred_probs, cc->ref_pred_probs); - vp9_copy(cm->prob_comppred, cc->prob_comppred); + + vp9_copy(cm->fc.intra_inter_prob, cc->intra_inter_prob); + vp9_copy(cm->fc.comp_inter_prob, cc->comp_inter_prob); + vp9_copy(cm->fc.single_ref_prob, cc->single_ref_prob); + vp9_copy(cm->fc.comp_ref_prob, cc->comp_ref_prob); vpx_memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy, - (cm->mb_rows * cm->mb_cols)); + (cm->mi_rows * cm->mi_cols)); vp9_copy(xd->last_ref_lf_deltas, cc->last_ref_lf_deltas); vp9_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas); - vp9_copy(cm->fc.coef_probs_4x4, cc->coef_probs_4x4); - vp9_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8); - vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); - vp9_copy(cm->fc.coef_probs_32x32, cc->coef_probs_32x32); + vp9_copy(cm->fc.coef_probs, cc->coef_probs); vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); -#if CONFIG_COMP_INTERINTRA_PRED - cm->fc.interintra_prob = cc->interintra_prob; -#endif -#if CONFIG_CODE_NONZEROCOUNT - vp9_copy(cm->fc.nzc_probs_4x4, cc->nzc_probs_4x4); - vp9_copy(cm->fc.nzc_probs_8x8, cc->nzc_probs_8x8); - vp9_copy(cm->fc.nzc_probs_16x16, cc->nzc_probs_16x16); - vp9_copy(cm->fc.nzc_probs_32x32, cc->nzc_probs_32x32); - vp9_copy(cm->fc.nzc_pcat_probs, cc->nzc_pcat_probs); -#endif + vp9_copy(cm->fc.tx_probs_8x8p, cc->tx_probs_8x8p); + vp9_copy(cm->fc.tx_probs_16x16p, cc->tx_probs_16x16p); + vp9_copy(cm->fc.tx_probs_32x32p, cc->tx_probs_32x32p); + vp9_copy(cm->fc.mbskip_probs, cc->mbskip_probs); } void vp9_setup_key_frame(VP9_COMP *cpi) { @@ -258,12 +206,11 @@ void vp9_setup_key_frame(VP9_COMP *cpi) { void vp9_setup_inter_frame(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &cpi->mb.e_mbd; - if (cm->error_resilient_mode) + if (cm->error_resilient_mode || cm->intra_only) vp9_setup_past_independence(cm, xd); assert(cm->frame_context_idx < NUM_FRAME_CONTEXTS); - vpx_memcpy(&cm->fc, &cm->frame_contexts[cm->frame_context_idx], - sizeof(cm->fc)); + cm->fc = cm->frame_contexts[cm->frame_context_idx]; } static int estimate_bits_at_q(int frame_kind, int q, int mbs, @@ -300,7 +247,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { } -// Do the best we can to define the parameteres for the next GF based +// Do the best we can to define the parameters for the next GF based // on what information we have available. // // In this experimental code only two pass is supported @@ -358,16 +305,13 @@ static void calc_pframe_target_size(VP9_COMP *cpi) { (estimate_bits_at_q(1, q, cpi->common.MBs, 1.0) * cpi->last_boost) / 100; } - } else { // If there is an active ARF at this location use the minimum - // bits on this frame even if it is a contructed arf. + // bits on this frame even if it is a constructed arf. // The active maximum quantizer insures that an appropriate - // number of bits will be spent if needed for contstructed ARFs. + // number of bits will be spent if needed for constructed ARFs. cpi->this_frame_target = 0; } - - cpi->current_gf_interval = cpi->frames_till_gf_update_due; } } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7434e5cdd3831cb7e4616601f7f20f0bc67d9be7..7eff2d04d0c7aec2677e2eaf695ff1a15d0beae4 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -13,8 +13,8 @@ #include <math.h> #include <limits.h> #include <assert.h> -#include "vp9/common/vp9_pragmas.h" +#include "vp9/common/vp9_pragmas.h" #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_treewriter.h" #include "vp9/encoder/vp9_onyx_int.h" @@ -34,7 +34,6 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_systemdependent.h" #include "vp9/encoder/vp9_encodemv.h" - #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_entropy.h" @@ -42,32 +41,16 @@ #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_common.h" -#define MAXF(a,b) (((a) > (b)) ? (a) : (b)) - #define INVALID_MV 0x80008000 /* Factor to weigh the rate for switchable interp filters */ #define SWITCHABLE_INTERP_RATE_FACTOR 1 -static const int auto_speed_thresh[17] = { - 1000, - 200, - 150, - 130, - 150, - 125, - 120, - 115, - 115, - 115, - 115, - 115, - 115, - 115, - 115, - 115, - 105 -}; +DECLARE_ALIGNED(16, extern const uint8_t, + vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); + +#define I4X4_PRED 0x8000 +#define SPLITMV 0x10000 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {ZEROMV, LAST_FRAME, NONE}, @@ -104,117 +87,62 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {SPLITMV, GOLDEN_FRAME, NONE}, {SPLITMV, ALTREF_FRAME, NONE}, - {B_PRED, INTRA_FRAME, NONE}, - {I8X8_PRED, INTRA_FRAME, NONE}, + {I4X4_PRED, INTRA_FRAME, NONE}, /* compound prediction modes */ - {ZEROMV, LAST_FRAME, GOLDEN_FRAME}, - {NEARESTMV, LAST_FRAME, GOLDEN_FRAME}, - {NEARMV, LAST_FRAME, GOLDEN_FRAME}, - - {ZEROMV, ALTREF_FRAME, LAST_FRAME}, - {NEARESTMV, ALTREF_FRAME, LAST_FRAME}, - {NEARMV, ALTREF_FRAME, LAST_FRAME}, + {ZEROMV, LAST_FRAME, ALTREF_FRAME}, + {NEARESTMV, LAST_FRAME, ALTREF_FRAME}, + {NEARMV, LAST_FRAME, ALTREF_FRAME}, {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, {NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, - {NEWMV, LAST_FRAME, GOLDEN_FRAME}, - {NEWMV, ALTREF_FRAME, LAST_FRAME }, + {NEWMV, LAST_FRAME, ALTREF_FRAME}, {NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, - {SPLITMV, LAST_FRAME, GOLDEN_FRAME}, - {SPLITMV, ALTREF_FRAME, LAST_FRAME }, + {SPLITMV, LAST_FRAME, ALTREF_FRAME}, {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME}, - -#if CONFIG_COMP_INTERINTRA_PRED - /* compound inter-intra prediction */ - {ZEROMV, LAST_FRAME, INTRA_FRAME}, - {NEARESTMV, LAST_FRAME, INTRA_FRAME}, - {NEARMV, LAST_FRAME, INTRA_FRAME}, - {NEWMV, LAST_FRAME, INTRA_FRAME}, - - {ZEROMV, GOLDEN_FRAME, INTRA_FRAME}, - {NEARESTMV, GOLDEN_FRAME, INTRA_FRAME}, - {NEARMV, GOLDEN_FRAME, INTRA_FRAME}, - {NEWMV, GOLDEN_FRAME, INTRA_FRAME}, - - {ZEROMV, ALTREF_FRAME, INTRA_FRAME}, - {NEARESTMV, ALTREF_FRAME, INTRA_FRAME}, - {NEARMV, ALTREF_FRAME, INTRA_FRAME}, - {NEWMV, ALTREF_FRAME, INTRA_FRAME}, -#endif }; -static void fill_token_costs(vp9_coeff_count *c, - vp9_coeff_probs *p, - int block_type_counts) { - int i, j, k, l; - - for (i = 0; i < block_type_counts; i++) - for (j = 0; j < REF_TYPES; j++) - for (k = 0; k < COEF_BANDS; k++) - for (l = 0; l < PREV_COEF_CONTEXTS; l++) { - vp9_cost_tokens_skip((int *)(c[i][j][k][l]), - p[i][j][k][l], - vp9_coef_tree); - } -} +// The baseline rd thresholds for breaking out of the rd loop for +// certain modes are assumed to be based on 8x8 blocks. +// This table is used to correct for blocks size. +// The factors here are << 2 (2 = x0.5, 32 = x8 etc). +static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] = + {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; -#if CONFIG_CODE_NONZEROCOUNT -static void fill_nzc_costs(VP9_COMP *cpi, int block_size) { - int nzc_context, r, b, nzc, values; - int cost[16]; - values = block_size * block_size + 1; - - for (nzc_context = 0; nzc_context < MAX_NZC_CONTEXTS; ++nzc_context) { - for (r = 0; r < REF_TYPES; ++r) { - for (b = 0; b < BLOCK_TYPES; ++b) { - unsigned int *nzc_costs; - if (block_size == 4) { - vp9_cost_tokens(cost, - cpi->common.fc.nzc_probs_4x4[nzc_context][r][b], - vp9_nzc4x4_tree); - nzc_costs = cpi->mb.nzc_costs_4x4[nzc_context][r][b]; - } else if (block_size == 8) { - vp9_cost_tokens(cost, - cpi->common.fc.nzc_probs_8x8[nzc_context][r][b], - vp9_nzc8x8_tree); - nzc_costs = cpi->mb.nzc_costs_8x8[nzc_context][r][b]; - } else if (block_size == 16) { - vp9_cost_tokens(cost, - cpi->common.fc.nzc_probs_16x16[nzc_context][r][b], - vp9_nzc16x16_tree); - nzc_costs = cpi->mb.nzc_costs_16x16[nzc_context][r][b]; - } else { - vp9_cost_tokens(cost, - cpi->common.fc.nzc_probs_32x32[nzc_context][r][b], - vp9_nzc32x32_tree); - nzc_costs = cpi->mb.nzc_costs_32x32[nzc_context][r][b]; - } +#define BASE_RD_THRESH_FREQ_FACT 16 +#define MAX_RD_THRESH_FREQ_FACT 32 +#define MAX_RD_THRESH_FREQ_INC 1 - for (nzc = 0; nzc < values; ++nzc) { - int e, c, totalcost = 0; - c = codenzc(nzc); - totalcost = cost[c]; - if ((e = vp9_extranzcbits[c])) { - int x = nzc - vp9_basenzcvalue[c]; - while (e--) { - totalcost += vp9_cost_bit( - cpi->common.fc.nzc_pcat_probs[nzc_context] - [c - NZC_TOKENS_NOEXTRA][e], - ((x >> e) & 1)); - } +static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES], + vp9_coeff_count (*cnoskip)[BLOCK_TYPES], + vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { + int i, j, k, l; + TX_SIZE t; + for (t = TX_4X4; t <= TX_32X32; t++) + for (i = 0; i < BLOCK_TYPES; i++) + for (j = 0; j < REF_TYPES; j++) + for (k = 0; k < COEF_BANDS; k++) + for (l = 0; l < PREV_COEF_CONTEXTS; l++) { + vp9_prob probs[ENTROPY_NODES]; + vp9_model_to_full_probs(p[t][i][j][k][l], probs); + vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs, + vp9_coef_tree); +#if CONFIG_BALANCED_COEFTREE + // Replace the eob node prob with a very small value so that the + // cost approximately equals the cost without the eob node + probs[1] = 1; + vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree); +#else + vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs, + vp9_coef_tree); + assert(c[t][i][j][k][l][DCT_EOB_TOKEN] == + cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]); +#endif } - nzc_costs[nzc] = totalcost; - } - } - } - } } -#endif - static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -236,12 +164,12 @@ void vp9_init_me_luts() { for (i = 0; i < QINDEX_RANGE; i++) { sad_per_bit16lut[i] = (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); - sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742); + sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742); } } static int compute_rd_mult(int qindex) { - int q = vp9_dc_quant(qindex, 0); + const int q = vp9_dc_quant(qindex, 0); return (11 * q * q) >> 2; } @@ -252,7 +180,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { - int q, i; + int q, i, bsize; vp9_clear_system_state(); // __asm emms; @@ -260,7 +188,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { // for key frames, golden frames and arf frames. // if (cpi->common.refresh_golden_frame || // cpi->common.refresh_alt_ref_frame) - qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex); + qindex = clamp(qindex, 0, MAXQ); cpi->RDMULT = compute_rd_mult(qindex); if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { @@ -284,44 +212,56 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { cpi->RDDIV = 1; cpi->RDMULT /= 100; - for (i = 0; i < MAX_MODES; i++) { - if (cpi->sf.thresh_mult[i] < INT_MAX) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; - } else { - cpi->rd_threshes[i] = INT_MAX; + for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { + for (i = 0; i < MAX_MODES; ++i) { + // Threshold here seem unecessarily harsh but fine given actual + // range of values used for cpi->sf.thresh_mult[] + int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); + + // *4 relates to the scaling of rd_thresh_block_size_factor[] + if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) { + cpi->rd_threshes[bsize][i] = + cpi->sf.thresh_mult[i] * q * + rd_thresh_block_size_factor[bsize] / (4 * 100); + } else { + cpi->rd_threshes[bsize][i] = INT_MAX; + } + cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; + cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } else { cpi->RDDIV = 100; - for (i = 0; i < MAX_MODES; i++) { - if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; - } else { - cpi->rd_threshes[i] = INT_MAX; + for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { + for (i = 0; i < MAX_MODES; i++) { + // Threshold here seem unecessarily harsh but fine given actual + // range of values used for cpi->sf.thresh_mult[] + int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); + + if (cpi->sf.thresh_mult[i] < thresh_max) { + cpi->rd_threshes[bsize][i] = + cpi->sf.thresh_mult[i] * q * + rd_thresh_block_size_factor[bsize] / 4; + } else { + cpi->rd_threshes[bsize][i] = INT_MAX; + } + cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; + cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } - fill_token_costs(cpi->mb.token_costs[TX_4X4], - cpi->common.fc.coef_probs_4x4, BLOCK_TYPES); - fill_token_costs(cpi->mb.token_costs[TX_8X8], - cpi->common.fc.coef_probs_8x8, BLOCK_TYPES); - fill_token_costs(cpi->mb.token_costs[TX_16X16], - cpi->common.fc.coef_probs_16x16, BLOCK_TYPES); - fill_token_costs(cpi->mb.token_costs[TX_32X32], - cpi->common.fc.coef_probs_32x32, BLOCK_TYPES); -#if CONFIG_CODE_NONZEROCOUNT - fill_nzc_costs(cpi, 4); - fill_nzc_costs(cpi, 8); - fill_nzc_costs(cpi, 16); - fill_nzc_costs(cpi, 32); -#endif + fill_token_costs(cpi->mb.token_costs, + cpi->mb.token_costs_noskip, + cpi->common.fc.coef_probs); + + for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) + vp9_cost_tokens(cpi->mb.partition_cost[i], + cpi->common.fc.partition_prob[cpi->common.frame_type][i], + vp9_partition_tree); /*rough estimate for costing*/ - cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; vp9_init_mode_costs(cpi); if (cpi->common.frame_type != KEY_FRAME) { @@ -345,389 +285,136 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { return error; } -int vp9_mbblock_error_c(MACROBLOCK *mb) { - BLOCK *be; - BLOCKD *bd; - int i, j; - int berror, error = 0; - - for (i = 0; i < 16; i++) { - be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - berror = 0; - for (j = 0; j < 16; j++) { - int this_diff = be->coeff[j] - bd->dqcoeff[j]; - berror += this_diff * this_diff; - } - error += berror; - } - return error; -} - -int vp9_mbuverror_c(MACROBLOCK *mb) { - BLOCK *be; - BLOCKD *bd; - - int i, error = 0; - - for (i = 16; i < 24; i++) { - be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - - error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16); - } - - return error; -} - -int vp9_uvsse(MACROBLOCK *x) { - uint8_t *uptr, *vptr; - uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); - uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); - int uv_stride = x->block[16].src_stride; - - unsigned int sse1 = 0; - unsigned int sse2 = 0; - int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row; - int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col; - int offset; - int pre_stride = x->e_mbd.block[16].pre_stride; - - if (mv_row < 0) - mv_row -= 1; - else - mv_row += 1; - - if (mv_col < 0) - mv_col -= 1; - else - mv_col += 1; - - mv_row /= 2; - mv_col /= 2; - - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->e_mbd.pre.u_buffer + offset; - vptr = x->e_mbd.pre.v_buffer + offset; - - if ((mv_row | mv_col) & 7) { - vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1, - (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2); - vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1, - (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1); - sse2 += sse1; - } else { - vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); - vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); - sse2 += sse1; - } - return sse2; -} - static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, - int ib, PLANE_TYPE type, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, - TX_SIZE tx_size) { + int plane, int block, PLANE_TYPE type, + ENTROPY_CONTEXT *A, + ENTROPY_CONTEXT *L, + TX_SIZE tx_size, + int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; - const int eob = xd->eobs[ib]; int c = 0; int cost = 0, pad; const int *scan, *nb; - const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; - const int ref = mbmi->ref_frame != INTRA_FRAME; + const int eob = xd->plane[plane].eobs[block]; + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, + block, 16); + const int ref = mbmi->ref_frame[0] != INTRA_FRAME; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; - ENTROPY_CONTEXT a_ec, l_ec; - ENTROPY_CONTEXT *const a1 = a + - sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); - ENTROPY_CONTEXT *const l1 = l + - sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); - -#if CONFIG_CODE_NONZEROCOUNT - int nzc_context = vp9_get_nzc_context(cm, xd, ib); - unsigned int *nzc_cost; -#else + ENTROPY_CONTEXT above_ec, left_ec; + TX_TYPE tx_type = DCT_DCT; + const int segment_id = xd->mode_info_context->mbmi.segment_id; - vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] - [ENTROPY_NODES]; -#endif + unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = + mb->token_costs_noskip[tx_size][type][ref]; + int seg_eob, default_eob; uint8_t token_cache[1024]; + const uint8_t * band_translate; // Check for consistency of tx_size with mode info + assert((!type && !plane) || (type && plane)); if (type == PLANE_TYPE_Y_WITH_DC) { assert(xd->mode_info_context->mbmi.txfm_size == tx_size); } else { - TX_SIZE tx_size_uv = get_uv_tx_size(xd); + TX_SIZE tx_size_uv = get_uv_tx_size(mbmi); assert(tx_size == tx_size_uv); } switch (tx_size) { case TX_4X4: { - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_4x4(xd, ib) : DCT_DCT; - a_ec = *a; - l_ec = *l; -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type]; -#else - coef_probs = cm->fc.coef_probs_4x4; -#endif + tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_4x4(xd, block) : DCT_DCT; + above_ec = A[0] != 0; + left_ec = L[0] != 0; seg_eob = 16; - if (tx_type == ADST_DCT) { - scan = vp9_row_scan_4x4; - } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan_4x4; - } else { - scan = vp9_default_zig_zag1d_4x4; - } + scan = get_scan_4x4(tx_type); + band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; - a_ec = (a[0] + a[1]) != 0; - l_ec = (l[0] + l[1]) != 0; - if (tx_type == ADST_DCT) { - scan = vp9_row_scan_8x8; - } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan_8x8; - } else { - scan = vp9_default_zig_zag1d_8x8; - } -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type]; -#else - coef_probs = cm->fc.coef_probs_8x8; -#endif + const int sz = 1 + b_width_log2(sb_type); + const int x = block & ((1 << sz) - 1), y = block - x; + TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; + above_ec = (A[0] + A[1]) != 0; + left_ec = (L[0] + L[1]) != 0; + scan = get_scan_8x8(tx_type); seg_eob = 64; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; - if (tx_type == ADST_DCT) { - scan = vp9_row_scan_16x16; - } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan_16x16; - } else { - scan = vp9_default_zig_zag1d_16x16; - } -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type]; -#else - coef_probs = cm->fc.coef_probs_16x16; -#endif + const int sz = 2 + b_width_log2(sb_type); + const int x = block & ((1 << sz) - 1), y = block - x; + TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; + scan = get_scan_16x16(tx_type); seg_eob = 256; - if (type == PLANE_TYPE_UV) { - a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - } else { - a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; - } + above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; + left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: - scan = vp9_default_zig_zag1d_32x32; -#if CONFIG_CODE_NONZEROCOUNT - nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type]; -#else - coef_probs = cm->fc.coef_probs_32x32; -#endif + scan = vp9_default_scan_32x32; seg_eob = 1024; - if (type == PLANE_TYPE_UV) { - ENTROPY_CONTEXT *a2, *a3, *l2, *l3; - a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a_ec = (a[0] + a[1] + a1[0] + a1[1] + - a2[0] + a2[1] + a3[0] + a3[1]) != 0; - l_ec = (l[0] + l[1] + l1[0] + l1[1] + - l2[0] + l2[1] + l3[0] + l3[1]) != 0; - } else { - a_ec = (a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]) != 0; - } + above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; + left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; + band_translate = vp9_coefband_trans_8x8plus; break; default: abort(); break; } + assert(eob <= seg_eob); - VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); + pt = combine_entropy_contexts(above_ec, left_ec); nb = vp9_get_coef_neighbors_handle(scan, &pad); default_eob = seg_eob; -#if CONFIG_CODE_NONZEROCOUNT == 0 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; -#endif + + /* sanity check to ensure that we do not have spurious non-zero q values */ + if (eob < seg_eob) + assert(qcoeff_ptr[scan[eob]] == 0); { -#if CONFIG_CODE_NONZEROCOUNT - int nzc = 0; -#endif - for (; c < eob; c++) { + for (c = 0; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; - int t = vp9_dct_value_tokens_ptr[v].Token; -#if CONFIG_CODE_NONZEROCOUNT - nzc += (v != 0); -#endif - token_cache[c] = t; - cost += token_costs[get_coef_band(scan, tx_size, c)][pt][t]; - cost += vp9_dct_value_cost_ptr[v]; -#if !CONFIG_CODE_NONZEROCOUNT - if (!c || token_cache[c - 1]) - cost += vp9_cost_bit(coef_probs[type][ref] - [get_coef_band(scan, tx_size, c)] - [pt][0], 1); -#endif - pt = vp9_get_coef_context(scan, nb, pad, token_cache, c + 1, default_eob); + int t = vp9_dct_value_tokens_ptr[v].token; + int band = get_coef_band(band_translate, c); + if (c) + pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); + + if (!c || token_cache[scan[c - 1]]) // do not skip eob + cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v]; + else + cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v]; + token_cache[scan[c]] = vp9_pt_energy_class[t]; + } + if (c < seg_eob) { + if (c) + pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); + cost += mb->token_costs_noskip[tx_size][type][ref] + [get_coef_band(band_translate, c)] + [pt][DCT_EOB_TOKEN]; } -#if CONFIG_CODE_NONZEROCOUNT - cost += nzc_cost[nzc]; -#else - if (c < seg_eob) - cost += mb->token_costs[tx_size][type][ref] - [get_coef_band(scan, tx_size, c)] - [pt][DCT_EOB_TOKEN]; -#endif } // is eob first coefficient; - pt = (c > 0); - *a = *l = pt; - if (tx_size >= TX_8X8) { - a[1] = l[1] = pt; - if (tx_size >= TX_16X16) { - if (type == PLANE_TYPE_UV) { - a1[0] = a1[1] = l1[0] = l1[1] = pt; - } else { - a[2] = a[3] = l[2] = l[3] = pt; - if (tx_size >= TX_32X32) { - a1[0] = a1[1] = a1[2] = a1[3] = pt; - l1[0] = l1[1] = l1[2] = l1[3] = pt; - } - } - } + for (pt = 0; pt < (1 << tx_size); pt++) { + A[pt] = L[pt] = c > 0; } - return cost; -} - -static int rdcost_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *mb) { - int cost = 0; - int b; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left; - - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - - for (b = 0; b < 16; b++) - cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b], - TX_4X4); - - return cost; -} - -static void macro_block_yrd_4x4(VP9_COMMON *const cm, - MACROBLOCK *mb, - int *rate, - int *distortion, - int *skippable) { - MACROBLOCKD *const xd = &mb->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_transform_mby_4x4(mb); - vp9_quantize_mby_4x4(mb); - - *distortion = vp9_mbblock_error(mb) >> 2; - *rate = rdcost_mby_4x4(cm, mb); - *skippable = vp9_mby_is_skippable_4x4(xd); -} - -static int rdcost_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *mb) { - int cost = 0; - int b; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left; - - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - - for (b = 0; b < 16; b += 4) - cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_8X8][b], - tl + vp9_block2left[TX_8X8][b], - TX_8X8); return cost; } -static void macro_block_yrd_8x8(VP9_COMMON *const cm, - MACROBLOCK *mb, - int *rate, - int *distortion, - int *skippable) { - MACROBLOCKD *const xd = &mb->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_mby_8x8(mb); - vp9_quantize_mby_8x8(mb); - - *distortion = vp9_mbblock_error(mb) >> 2; - *rate = rdcost_mby_8x8(cm, mb); - *skippable = vp9_mby_is_skippable_8x8(xd); -} - -static int rdcost_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *mb) { - MACROBLOCKD *const xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left; - - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - - return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); -} - -static void macro_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *mb, - int *rate, int *distortion, int *skippable) { - MACROBLOCKD *const xd = &mb->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_16X16; - vp9_transform_mby_16x16(mb); - vp9_quantize_mby_16x16(mb); - // TODO(jingning) is it possible to quickly determine whether to force - // trailing coefficients to be zero, instead of running trellis - // optimization in the rate-distortion optimization loop? - if (mb->optimize && - xd->mode_info_context->mbmi.mode < I8X8_PRED) - vp9_optimize_mby_16x16(cm, mb); - - *distortion = vp9_mbblock_error(mb) >> 2; - *rate = rdcost_mby_16x16(cm, mb); - *skippable = vp9_mby_is_skippable_16x16(xd); -} - static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int (*r)[2], int *rate, int *d, int *distortion, @@ -737,40 +424,33 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - vp9_prob skip_prob = cm->mb_no_coeff_skip ? - vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128; + vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP); int64_t rd[TX_SIZE_MAX_SB][2]; int n, m; + int s0, s1; + + const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE); for (n = TX_4X4; n <= max_txfm_size; n++) { r[n][1] = r[n][0]; for (m = 0; m <= n - (n == max_txfm_size); m++) { if (m == n) - r[n][1] += vp9_cost_zero(cm->prob_tx[m]); + r[n][1] += vp9_cost_zero(tx_probs[m]); else - r[n][1] += vp9_cost_one(cm->prob_tx[m]); + r[n][1] += vp9_cost_one(tx_probs[m]); } } - if (cm->mb_no_coeff_skip) { - int s0, s1; + assert(skip_prob > 0); + s0 = vp9_cost_bit(skip_prob, 0); + s1 = vp9_cost_bit(skip_prob, 1); - assert(skip_prob > 0); - s0 = vp9_cost_bit(skip_prob, 0); - s1 = vp9_cost_bit(skip_prob, 1); - - for (n = TX_4X4; n <= max_txfm_size; n++) { - if (s[n]) { - rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); - } else { - rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); - rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); - } - } - } else { - for (n = TX_4X4; n <= max_txfm_size; n++) { - rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]); - rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]); + for (n = TX_4X4; n <= max_txfm_size; n++) { + if (s[n]) { + rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); + } else { + rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); + rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } } @@ -780,17 +460,19 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_32X32; - } else if ( cm->txfm_mode == ALLOW_16X16 || - (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) || - (cm->txfm_mode == TX_MODE_SELECT && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1])) { + } else if (max_txfm_size >= TX_16X16 && + (cm->txfm_mode == ALLOW_16X16 || + cm->txfm_mode == ALLOW_32X32 || + (cm->txfm_mode == TX_MODE_SELECT && + rd[TX_16X16][1] < rd[TX_8X8][1] && + rd[TX_16X16][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode == ALLOW_8X8 || + cm->txfm_mode == ALLOW_16X16 || + cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { mbmi->txfm_size = TX_8X8; } else { - assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT); mbmi->txfm_size = TX_4X4; } @@ -800,522 +482,353 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; - txfm_cache[ALLOW_16X16] = rd[TX_16X16][0]; - txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0]; + txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; + txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; if (max_txfm_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) + else if (max_txfm_size >= TX_16X16 && + rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; else txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? rd[TX_4X4][1] : rd[TX_8X8][1]; } -static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skippable, - int64_t txfm_cache[NB_TXFM_MODES]) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB]; - - vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor, - x->block[0].src_stride); - - macro_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); - macro_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); - macro_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); - - choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable, - txfm_cache, TX_16X16); -} - -static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { - const unsigned int *p = (const unsigned int *)predictor; - unsigned int *d = (unsigned int *)dst; - d[0] = p[0]; - d[4] = p[4]; - d[8] = p[8]; - d[12] = p[12]; -} - -static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, - int block_size, int shift) { +static int block_error(int16_t *coeff, int16_t *dqcoeff, + int block_size, int shift) { int i; int64_t error = 0; for (i = 0; i < block_size; i++) { - unsigned int this_diff = coeff[i] - dqcoeff[i]; - error += this_diff * this_diff; + int this_diff = coeff[i] - dqcoeff[i]; + error += (unsigned)this_diff * this_diff; } error >>= shift; return error > INT_MAX ? INT_MAX : (int)error; } -static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; - - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - - for (b = 0; b < 64; b++) - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_sb[TX_4X4][b], - tl + vp9_block2left_sb[TX_4X4][b], TX_4X4); - - return cost; +static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, + 16 << (bwl + bhl), shift); } -static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_transform_sby_4x4(x); - vp9_quantize_sby_4x4(x); +static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + int64_t sum = 0; + int plane; - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); - *rate = rdcost_sby_4x4(cm, x); - *skippable = vp9_sby_is_skippable_4x4(xd); + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + const int subsampling = x->e_mbd.plane[plane].subsampling_x + + x->e_mbd.plane[plane].subsampling_y; + sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, + 16 << (bwl + bhl - subsampling), 0); + } + sum >>= shift; + return sum > INT_MAX ? INT_MAX : (int)sum; } -static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; - - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - - for (b = 0; b < 64; b += 4) - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_sb[TX_8X8][b], - tl + vp9_block2left_sb[TX_8X8][b], TX_8X8); - - return cost; -} +struct rdcost_block_args { + VP9_COMMON *cm; + MACROBLOCK *x; + ENTROPY_CONTEXT t_above[16]; + ENTROPY_CONTEXT t_left[16]; + TX_SIZE tx_size; + int bw; + int bh; + int cost; +}; -static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; +static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct rdcost_block_args* args = arg; + int x_idx, y_idx; + MACROBLOCKD * const xd = &args->x->e_mbd; - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_sby_8x8(x); - vp9_quantize_sby_8x8(x); + txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx, + &y_idx); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); - *rate = rdcost_sby_8x8(cm, x); - *skippable = vp9_sby_is_skippable_8x8(xd); + args->cost += cost_coeffs(args->cm, args->x, plane, block, + xd->plane[plane].plane_type, args->t_above + x_idx, + args->t_left + y_idx, args->tx_size, + args->bw * args->bh); } -static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; - - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - - for (b = 0; b < 64; b += 16) - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_sb[TX_16X16][b], - tl + vp9_block2left_sb[TX_16X16][b], TX_16X16); - - return cost; -} +static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { + MACROBLOCKD * const xd = &x->e_mbd; + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + const int bw = 1 << bwl, bh = 1 << bhl; + struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 }; -static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; + vpx_memcpy(&args.t_above, xd->plane[plane].above_context, + sizeof(ENTROPY_CONTEXT) * bw); + vpx_memcpy(&args.t_left, xd->plane[plane].left_context, + sizeof(ENTROPY_CONTEXT) * bh); - xd->mode_info_context->mbmi.txfm_size = TX_16X16; - vp9_transform_sby_16x16(x); - vp9_quantize_sby_16x16(x); + foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); - *rate = rdcost_sby_16x16(cm, x); - *skippable = vp9_sby_is_skippable_16x16(xd); + return args.cost; } -static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - MACROBLOCKD * const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; - - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); +static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { + int cost = 0, plane; - return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + cost += rdcost_plane(cm, x, plane, bsize, tx_size); + } + return cost; } -static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { +static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, int *skippable, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; + xd->mode_info_context->mbmi.txfm_size = tx_size; - xd->mode_info_context->mbmi.txfm_size = TX_32X32; - vp9_transform_sby_32x32(x); - vp9_quantize_sby_32x32(x); + if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) + vp9_encode_intra_block_y(cm, x, bsize); + else + vp9_xform_quant_sby(cm, x, bsize); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 0); - *rate = rdcost_sby_32x32(cm, x); - *skippable = vp9_sby_is_skippable_32x32(xd); + *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2); + *rate = rdcost_plane(cm, x, 0, bsize, tx_size); + *skippable = vp9_sby_is_skippable(xd, bsize); } static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, - int *skip, + int *skip, BLOCK_SIZE_TYPE bs, int64_t txfm_cache[NB_TXFM_MODES]) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB]; - const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - - vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); - super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); - super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); - super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); - super_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); - - choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, - TX_SIZE_MAX_SB - 1); -} - -static int rdcost_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); + assert(bs == mbmi->sb_type); + if (mbmi->ref_frame[0] > INTRA_FRAME) + vp9_subtract_sby(x, bs); - for (b = 0; b < 256; b++) - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_sb64[TX_4X4][b], - tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4); + if (cpi->speed > 4) { + if (bs >= BLOCK_SIZE_SB32X32) { + mbmi->txfm_size = TX_32X32; + } else if (bs >= BLOCK_SIZE_MB16X16) { + mbmi->txfm_size = TX_16X16; + } else if (bs >= BLOCK_SIZE_SB8X8) { + mbmi->txfm_size = TX_8X8; + } else { + mbmi->txfm_size = TX_4X4; + } + vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t)); + super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs, + mbmi->txfm_size); + return; + } + if (bs >= BLOCK_SIZE_SB32X32) + super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], + bs, TX_32X32); + if (bs >= BLOCK_SIZE_MB16X16) + super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], + bs, TX_16X16); + super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs, + TX_8X8); + super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs, + TX_4X4); + + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, + skip, txfm_cache, + TX_32X32 - (bs < BLOCK_SIZE_SB32X32) + - (bs < BLOCK_SIZE_MB16X16)); +} + +static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, + MB_PREDICTION_MODE *best_mode, + int *bmode_costs, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, + int *bestrate, int *bestratey, + int *bestdistortion, + BLOCK_SIZE_TYPE bsize) { + MB_PREDICTION_MODE mode; + MACROBLOCKD *xd = &x->e_mbd; + int64_t best_rd = INT64_MAX; + int rate = 0; + int distortion; + VP9_COMMON *const cm = &cpi->common; + const int src_stride = x->plane[0].src.stride; + uint8_t *src, *dst; + int16_t *src_diff, *coeff; - return cost; -} + ENTROPY_CONTEXT ta[2], tempa[2]; + ENTROPY_CONTEXT tl[2], templ[2]; + TX_TYPE tx_type = DCT_DCT; + TX_TYPE best_tx_type = DCT_DCT; + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); + int idx, idy, block; + DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]); -static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; + assert(ib < 4); + vpx_memcpy(ta, a, sizeof(ta)); + vpx_memcpy(tl, l, sizeof(tl)); xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_transform_sb64y_4x4(x); - vp9_quantize_sb64y_4x4(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); - *rate = rdcost_sb64y_4x4(cm, x); - *skippable = vp9_sb64y_is_skippable_4x4(xd); -} + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { + int64_t this_rd; + int ratey = 0; -static int rdcost_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; + rate = bmode_costs[mode]; + distortion = 0; + + vpx_memcpy(tempa, ta, sizeof(ta)); + vpx_memcpy(templ, tl, sizeof(tl)); + + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + xd->mode_info_context->bmi[block].as_mode.first = mode; + src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + x->plane[0].src.buf, src_stride); + src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block, + x->plane[0].src_diff); + coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16); + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode, + dst, xd->plane[0].dst.stride); + vp9_subtract_block(4, 4, src_diff, 8, + src, src_stride, + dst, xd->plane[0].dst.stride); + + tx_type = get_tx_type_4x4(xd, block); + if (tx_type != DCT_DCT) { + vp9_short_fht4x4(src_diff, coeff, 8, tx_type); + x->quantize_b_4x4(x, block, tx_type, 16); + } else { + x->fwd_txm4x4(src_diff, coeff, 16); + x->quantize_b_4x4(x, block, tx_type, 16); + } - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); + ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, + tempa + idx, templ + idy, TX_4X4, 16); + distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, + block, 16), 16) >> 2; - for (b = 0; b < 256; b += 4) - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_sb64[TX_8X8][b], - tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8); + if (best_tx_type != DCT_DCT) + vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + dst, xd->plane[0].dst.stride, best_tx_type); + else + xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + dst, xd->plane[0].dst.stride); + } + } - return cost; -} + rate += ratey; + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); -static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; + if (this_rd < best_rd) { + *bestrate = rate; + *bestratey = ratey; + *bestdistortion = distortion; + best_rd = this_rd; + *best_mode = mode; + best_tx_type = tx_type; + vpx_memcpy(a, tempa, sizeof(tempa)); + vpx_memcpy(l, templ, sizeof(templ)); + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + vpx_memcpy(best_dqcoeff[idy * 2 + idx], + BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + sizeof(best_dqcoeff[0])); + } + } + } + } - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_sb64y_8x8(x); - vp9_quantize_sb64y_8x8(x); + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + xd->mode_info_context->bmi[block].as_mode.first = *best_mode; + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); - *rate = rdcost_sb64y_8x8(cm, x); - *skippable = vp9_sb64y_is_skippable_8x8(xd); -} + vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode, + dst, xd->plane[0].dst.stride); + // inverse transform + if (best_tx_type != DCT_DCT) + vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst, + xd->plane[0].dst.stride, best_tx_type); + else + xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst, + xd->plane[0].dst.stride); + } + } -static int rdcost_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; + return best_rd; +} - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); +static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, + int *Rate, int *rate_y, + int *Distortion, int64_t best_rd) { + int i, j; + MACROBLOCKD *const xd = &mb->e_mbd; + BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); + int idx, idy; + int cost = 0; + int distortion = 0; + int tot_rate_y = 0; + int64_t total_rd = 0; + ENTROPY_CONTEXT t_above[4], t_left[4]; + int *bmode_costs; + MODE_INFO *const mic = xd->mode_info_context; - for (b = 0; b < 256; b += 16) - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_sb64[TX_16X16][b], - tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16); + vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); + vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); - return cost; -} + bmode_costs = mb->mbmode_cost; -static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, - int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + const int mis = xd->mode_info_stride; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); + int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); + int UNINITIALIZED_IS_SAFE(d); + i = idy * 2 + idx; - xd->mode_info_context->mbmi.txfm_size = TX_16X16; - vp9_transform_sb64y_16x16(x); - vp9_quantize_sb64y_16x16(x); + if (xd->frame_type == KEY_FRAME) { + const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis); + const MB_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(mic, i) : DC_PRED; - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); - *rate = rdcost_sb64y_16x16(cm, x); - *skippable = vp9_sb64y_is_skippable_16x16(xd); -} + bmode_costs = mb->y_mode_costs[A][L]; + } -static int rdcost_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - int cost = 0, b; - MACROBLOCKD * const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left; + total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, + t_above + idx, t_left + idy, + &r, &ry, &d, bsize); + cost += r; + distortion += d; + tot_rate_y += ry; - vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); + mic->bmi[i].as_mode.first = best_mode; + for (j = 1; j < bh; ++j) + mic->bmi[i + j * 2].as_mode.first = best_mode; + for (j = 1; j < bw; ++j) + mic->bmi[i + j].as_mode.first = best_mode; - for (b = 0; b < 256; b += 64) - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_sb64[TX_32X32][b], - tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32); - - return cost; -} - -static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, - int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_32X32; - vp9_transform_sb64y_32x32(x); - vp9_quantize_sb64y_32x32(x); - - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 0); - *rate = rdcost_sb64y_32x32(cm, x); - *skippable = vp9_sb64y_is_skippable_32x32(xd); -} - -static void super_block_64_yrd(VP9_COMP *cpi, - MACROBLOCK *x, int *rate, int *distortion, - int *skip, - int64_t txfm_cache[NB_TXFM_MODES]) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB]; - const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - - vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); - super_block64_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); - super_block64_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); - super_block64_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); - super_block64_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); - - choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, - TX_SIZE_MAX_SB - 1); -} - -static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { - const unsigned int *p = (const unsigned int *)predictor; - unsigned int *d = (unsigned int *)dst; - d[0] = p[0]; - d[1] = p[1]; - d[4] = p[4]; - d[5] = p[5]; - d[8] = p[8]; - d[9] = p[9]; - d[12] = p[12]; - d[13] = p[13]; - d[16] = p[16]; - d[17] = p[17]; - d[20] = p[20]; - d[21] = p[21]; - d[24] = p[24]; - d[25] = p[25]; - d[28] = p[28]; - d[29] = p[29]; -} - -static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, - BLOCKD *b, B_PREDICTION_MODE *best_mode, - int *bmode_costs, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int *bestrate, int *bestratey, - int *bestdistortion) { - B_PREDICTION_MODE mode; - MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = INT64_MAX; - int rate = 0; - int distortion; - VP9_COMMON *const cm = &cpi->common; - - ENTROPY_CONTEXT ta = *a, tempa = *a; - ENTROPY_CONTEXT tl = *l, templ = *l; - TX_TYPE tx_type = DCT_DCT; - TX_TYPE best_tx_type = DCT_DCT; - /* - * The predictor buffer is a 2d buffer with a stride of 16. Create - * a temp buffer that meets the stride requirements, but we are only - * interested in the left 4x4 block - * */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4); - DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); - -#if CONFIG_NEWBINTRAMODES - b->bmi.as_mode.context = vp9_find_bpred_context(xd, b); -#endif - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - for (mode = B_DC_PRED; mode < LEFT4X4; mode++) { - int64_t this_rd; - int ratey; - -#if CONFIG_NEWBINTRAMODES - if (xd->frame_type == KEY_FRAME) { - if (mode == B_CONTEXT_PRED) continue; - } else { - if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS && - mode < B_CONTEXT_PRED) - continue; - } -#endif - - b->bmi.as_mode.first = mode; -#if CONFIG_NEWBINTRAMODES - rate = bmode_costs[ - mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode]; -#else - rate = bmode_costs[mode]; -#endif - - vp9_intra4x4_predict(xd, b, mode, b->predictor); - vp9_subtract_b(be, b, 16); - - b->bmi.as_mode.first = mode; - tx_type = get_tx_type_4x4(xd, be - x->block); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); - vp9_ht_quantize_b_4x4(x, be - x->block, tx_type); - } else { - x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, be - x->block); - } - - tempa = ta; - templ = tl; - - ratey = cost_coeffs(cm, x, b - xd->block, - PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); - rate += ratey; - distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; - - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - - if (this_rd < best_rd) { - *bestrate = rate; - *bestratey = ratey; - *bestdistortion = distortion; - best_rd = this_rd; - *best_mode = mode; - best_tx_type = tx_type; - *a = tempa; - *l = templ; - copy_predictor(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); - } - } - b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode); - - // inverse transform - if (best_tx_type != DCT_DCT) - vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type); - else - xd->inv_txm4x4(best_dqcoeff, b->diff, 32); - - vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - - return best_rd; -} - -static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, - int *Rate, int *rate_y, - int *Distortion, int64_t best_rd) { - int i; - MACROBLOCKD *const xd = &mb->e_mbd; - int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; - int distortion = 0; - int tot_rate_y = 0; - int64_t total_rd = 0; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - int *bmode_costs; - - vpx_memcpy(&t_above, xd->above_context, - sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, - sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - xd->mode_info_context->mbmi.mode = B_PRED; - bmode_costs = mb->inter_bmode_costs; - - for (i = 0; i < 16; i++) { - MODE_INFO *const mic = xd->mode_info_context; - const int mis = xd->mode_info_stride; - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); - int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); - - if (xd->frame_type == KEY_FRAME) { - const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); - const B_PREDICTION_MODE L = left_block_mode(mic, i); - - bmode_costs = mb->bmode_costs[A][L]; + if (total_rd >= best_rd) + break; } -#if CONFIG_NEWBINTRAMODES - mic->bmi[i].as_mode.context = vp9_find_bpred_context(xd, xd->block + i); -#endif - - total_rd += rd_pick_intra4x4block( - cpi, mb, mb->block + i, xd->block + i, &best_mode, - bmode_costs, ta + vp9_block2above[TX_4X4][i], - tl + vp9_block2left[TX_4X4][i], &r, &ry, &d); - - cost += r; - distortion += d; - tot_rate_y += ry; - - mic->bmi[i].as_mode.first = best_mode; - -#if 0 // CONFIG_NEWBINTRAMODES - printf("%d %d\n", mic->bmi[i].as_mode.first, mic->bmi[i].as_mode.context); -#endif - - if (total_rd >= best_rd) - break; } if (total_rd >= best_rd) @@ -1324,73 +837,135 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, *Rate = cost; *rate_y = tot_rate_y; *Distortion = distortion; + xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } -static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable, +static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, + int *rate, int *rate_tokenonly, + int *distortion, int *skippable, + BLOCK_SIZE_TYPE bsize, int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + MACROBLOCKD *const xd = &x->e_mbd; int this_rate, this_rate_tokenonly; int this_distortion, s; int64_t best_rd = INT64_MAX, this_rd; + TX_SIZE UNINITIALIZED_IS_SAFE(best_tx); + int i; + int *bmode_costs = x->mbmode_cost; + + if (bsize < BLOCK_SIZE_SB8X8) { + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4; + return best_rd; + } + + for (i = 0; i < NB_TXFM_MODES; i++) + txfm_cache[i] = INT64_MAX; /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { + int64_t local_txfm_cache[NB_TXFM_MODES]; + MODE_INFO *const mic = xd->mode_info_context; + const int mis = xd->mode_info_stride; + + if (cpi->common.frame_type == KEY_FRAME) { + const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); + const MB_PREDICTION_MODE L = xd->left_available ? + left_block_mode(mic, 0) : DC_PRED; + + bmode_costs = x->y_mode_costs[A][L]; + } x->e_mbd.mode_info_context->mbmi.mode = mode; - vp9_build_intra_predictors_sby_s(&x->e_mbd); - super_block_yrd(cpi, x, &this_rate_tokenonly, - &this_distortion, &s, txfm_cache); - this_rate = this_rate_tokenonly + - x->mbmode_cost[x->e_mbd.frame_type] - [x->e_mbd.mode_info_context->mbmi.mode]; + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, + bsize, local_txfm_cache); + + this_rate = this_rate_tokenonly + bmode_costs[mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; + best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; } + + for (i = 0; i < NB_TXFM_MODES; i++) { + int64_t adj_rd = this_rd + local_txfm_cache[i] - + local_txfm_cache[cpi->common.txfm_mode]; + if (adj_rd < txfm_cache[i]) { + txfm_cache[i] = adj_rd; + } + } } x->e_mbd.mode_info_context->mbmi.mode = mode_selected; + x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx; return best_rd; } -static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable, - int64_t txfm_cache[NB_TXFM_MODES]) { +static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, + int *skippable, BLOCK_SIZE_TYPE bsize, + TX_SIZE uv_tx_size) { + MACROBLOCKD *const xd = &x->e_mbd; + if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) + vp9_encode_intra_block_uv(cm, x, bsize); + else + vp9_xform_quant_sbuv(cm, x, bsize); + + *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2); + *rate = rdcost_uv(cm, x, bsize, uv_tx_size); + *skippable = vp9_sbuv_is_skippable(xd, bsize); +} + +static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, int *skippable, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + + if (mbmi->ref_frame[0] > INTRA_FRAME) + vp9_subtract_sbuv(x, bsize); + + if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) { + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_32X32); + } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) { + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_16X16); + } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) { + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_8X8); + } else { + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_4X4); + } +} + +static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, + int *rate, int *rate_tokenonly, + int *distortion, int *skippable, + BLOCK_SIZE_TYPE bsize) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int this_rate, this_rate_tokenonly; - int this_distortion, s; int64_t best_rd = INT64_MAX, this_rd; + int this_rate_tokenonly, this_rate; + int this_distortion, s; - /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { - x->e_mbd.mode_info_context->mbmi.mode = mode; - vp9_build_intra_predictors_sb64y_s(&x->e_mbd); - - super_block_64_yrd(cpi, x, &this_rate_tokenonly, - &this_distortion, &s, txfm_cache); + x->e_mbd.mode_info_context->mbmi.uv_mode = mode; + super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, + &this_distortion, &s, bsize); this_rate = this_rate_tokenonly + - x->mbmode_cost[x->e_mbd.frame_type] - [x->e_mbd.mode_info_context->mbmi.mode]; + x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { @@ -1403,3606 +978,1400 @@ static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, } } - x->e_mbd.mode_info_context->mbmi.mode = mode_selected; + x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; return best_rd; } -static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *Rate, - int *rate_y, - int *Distortion, - int *skippable, - int64_t txfm_cache[NB_TXFM_MODES]) { - MB_PREDICTION_MODE mode; - TX_SIZE txfm_size = 0; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - int rate, ratey; - int distortion, skip; - int64_t best_rd = INT64_MAX; - int64_t this_rd; +int vp9_cost_mv_ref(VP9_COMP *cpi, + MB_PREDICTION_MODE m, + const int mode_context) { + MACROBLOCKD *xd = &cpi->mb.e_mbd; + int segment_id = xd->mode_info_context->mbmi.segment_id; - int i; - for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; + // Dont account for mode here if segment skip is enabled. + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { + VP9_COMMON *pc = &cpi->common; - // Y Search for 16x16 intra prediction mode - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int64_t local_txfm_cache[NB_TXFM_MODES]; + vp9_prob p[VP9_INTER_MODES - 1]; + assert(NEARESTMV <= m && m <= NEWMV); + vp9_mv_ref_probs(pc, p, mode_context); + return cost_token(vp9_sb_mv_ref_tree, p, + vp9_sb_mv_ref_encoding_array - NEARESTMV + m); + } else + return 0; +} - mbmi->mode = mode; +void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { + x->e_mbd.mode_info_context->mbmi.mode = mb; + x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; +} - vp9_build_intra_predictors_mby(xd); +static int labels2mode(MACROBLOCK *x, int i, + MB_PREDICTION_MODE this_mode, + int_mv *this_mv, int_mv *this_second_mv, + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int_mv seg_mvs[MAX_REF_FRAMES], + int_mv *best_ref_mv, + int_mv *second_best_ref_mv, + int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mic = xd->mode_info_context; + MB_MODE_INFO * mbmi = &mic->mbmi; + int cost = 0, thismvcost = 0; + int idx, idy; + int bw = 1 << b_width_log2(mbmi->sb_type); + int bh = 1 << b_height_log2(mbmi->sb_type); - macro_block_yrd(cpi, x, &ratey, &distortion, &skip, local_txfm_cache); + /* We have to be careful retrieving previously-encoded motion vectors. + Ones from this macroblock have to be pulled from the BLOCKD array + as they have not yet made it to the bmi array in our MB_MODE_INFO. */ + MB_PREDICTION_MODE m; - // FIXME add compoundmode cost - // FIXME add rate for mode2 - rate = ratey + x->mbmode_cost[xd->frame_type][mbmi->mode]; + // the only time we should do costing for new motion vector or mode + // is when we are on a new label (jbb May 08, 2007) + switch (m = this_mode) { + case NEWMV: + this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; + thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, + 102, xd->allow_high_precision_mv); + if (mbmi->ref_frame[1] > 0) { + this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; + thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, + mvjcost, mvcost, 102, + xd->allow_high_precision_mv); + } + break; + case NEARESTMV: + this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; + if (mbmi->ref_frame[1] > 0) + this_second_mv->as_int = + frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; + break; + case NEARMV: + this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; + if (mbmi->ref_frame[1] > 0) + this_second_mv->as_int = + frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; + break; + case ZEROMV: + this_mv->as_int = 0; + if (mbmi->ref_frame[1] > 0) + this_second_mv->as_int = 0; + break; + default: + break; + } - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + cost = vp9_cost_mv_ref(cpi, this_mode, + mbmi->mb_mode_context[mbmi->ref_frame[0]]); - if (this_rd < best_rd) { - mode_selected = mode; - txfm_size = mbmi->txfm_size; - best_rd = this_rd; - *Rate = rate; - *rate_y = ratey; - *Distortion = distortion; - *skippable = skip; - } + mic->bmi[i].as_mv[0].as_int = this_mv->as_int; + if (mbmi->ref_frame[1] > 0) + mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd = this_rd + local_txfm_cache[i] - - local_txfm_cache[cpi->common.txfm_mode]; - if (adj_rd < txfm_cache[i]) { - txfm_cache[i] = adj_rd; - } + x->partition_info->bmi[i].mode = m; + x->partition_info->bmi[i].mv.as_int = this_mv->as_int; + if (mbmi->ref_frame[1] > 0) + x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + vpx_memcpy(&mic->bmi[i + idy * 2 + idx], + &mic->bmi[i], sizeof(mic->bmi[i])); + vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx], + &x->partition_info->bmi[i], + sizeof(x->partition_info->bmi[i])); } } - mbmi->txfm_size = txfm_size; - mbmi->mode = mode_selected; - - return best_rd; + cost += thismvcost; + return cost; } - -static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, - B_PREDICTION_MODE *best_mode, - int *mode_costs, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int *bestrate, int *bestratey, - int *bestdistortion) { - VP9_COMMON *const cm = &cpi->common; - MB_PREDICTION_MODE mode; +static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, + MACROBLOCK *x, + int i, + int *labelyrate, + int *distortion, + ENTROPY_CONTEXT *ta, + ENTROPY_CONTEXT *tl) { + int k; MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = INT64_MAX; - int distortion = 0, rate = 0; - BLOCK *be = x->block + ib; - BLOCKD *b = xd->block + ib; - ENTROPY_CONTEXT_PLANES ta, tl; - ENTROPY_CONTEXT *ta0, *ta1, besta0 = 0, besta1 = 0; - ENTROPY_CONTEXT *tl0, *tl1, bestl0 = 0, bestl1 = 0; + BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int idx, idy; + const int src_stride = x->plane[0].src.stride; + uint8_t* const src = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src.buf, src_stride); + int16_t* src_diff = + raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src_diff); + int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); + uint8_t* const pre = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); + uint8_t* const dst = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + int thisdistortion = 0; + int thisrate = 0; - /* - * The predictor buffer is a 2d buffer with a stride of 16. Create - * a temp buffer that meets the stride requirements, but we are only - * interested in the left 8x8 block - * */ - DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 8); - DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16 * 4); + *labelyrate = 0; + *distortion = 0; - // perform transformation of dimension 8x8 - // note the input and output index mapping - int idx = (ib & 0x02) ? (ib + 2) : ib; + vp9_build_inter_predictor(pre, + xd->plane[0].pre[0].stride, + dst, + xd->plane[0].dst.stride, + &xd->mode_info_context->bmi[i].as_mv[0], + &xd->scale_factor[0], + 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix); + + // TODO(debargha): Make this work properly with the + // implicit-compoundinter-weight experiment when implicit + // weighting for splitmv modes is turned on. + if (xd->mode_info_context->mbmi.ref_frame[1] > 0) { + uint8_t* const second_pre = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[1].buf, + xd->plane[0].pre[1].stride); + vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, + dst, xd->plane[0].dst.stride, + &xd->mode_info_context->bmi[i].as_mv[1], + &xd->scale_factor[1], 4 * bw, 4 * bh, 1, + &xd->subpix); + } + + vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, + src, src_stride, + dst, xd->plane[0].dst.stride); + + k = i; + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + k += (idy * 2 + idx); + src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, + x->plane[0].src_diff); + coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); + x->fwd_txm4x4(src_diff, coeff, 16); + x->quantize_b_4x4(x, k, DCT_DCT, 16); + thisdistortion += vp9_block_error(coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, + k, 16), 16); + thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, + ta + (k & 1), + tl + (k >> 1), TX_4X4, 16); + } + } + *distortion += thisdistortion; + *labelyrate += thisrate; - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int64_t this_rd; - int rate_t = 0; + *distortion >>= 2; + return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); +} - // FIXME rate for compound mode and second intrapred mode - rate = mode_costs[mode]; - b->bmi.as_mode.first = mode; +typedef struct { + int_mv *ref_mv, *second_ref_mv; + int_mv mvp; - vp9_intra8x8_predict(xd, b, mode, b->predictor); + int64_t segment_rd; + int r; + int d; + int segment_yrate; + MB_PREDICTION_MODE modes[4]; + int_mv mvs[4], second_mvs[4]; + int eobs[4]; + int mvthresh; +} BEST_SEG_INFO; - vp9_subtract_4b_c(be, b, 16); +static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { + int r = 0; + r |= (mv->as_mv.row >> 3) < x->mv_row_min; + r |= (mv->as_mv.row >> 3) > x->mv_row_max; + r |= (mv->as_mv.col >> 3) < x->mv_col_min; + r |= (mv->as_mv.col >> 3) > x->mv_col_max; + return r; +} - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - TX_TYPE tx_type = get_tx_type_8x8(xd, ib); - if (tx_type != DCT_DCT) - vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); - else - x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x, idx, tx_type); +static enum BlockSize get_block_size(int bw, int bh) { + if (bw == 4 && bh == 4) + return BLOCK_4X4; - // compute quantization mse of 8x8 block - distortion = vp9_block_error_c((x->block + idx)->coeff, - (xd->block + idx)->dqcoeff, 64); + if (bw == 4 && bh == 8) + return BLOCK_4X8; - vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); + if (bw == 8 && bh == 4) + return BLOCK_8X4; - ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_8X8][idx]; - tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_8X8][idx]; - ta1 = ta0 + 1; - tl1 = tl0 + 1; + if (bw == 8 && bh == 8) + return BLOCK_8X8; - rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, - ta0, tl0, TX_8X8); + if (bw == 8 && bh == 16) + return BLOCK_8X16; - rate += rate_t; - } else { - static const int iblock[4] = {0, 1, 4, 5}; - TX_TYPE tx_type; - int i; - vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); - ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_4X4][ib]; - tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_4X4][ib]; - ta1 = ta0 + 1; - tl1 = tl0 + 1; - distortion = 0; - rate_t = 0; - for (i = 0; i < 4; ++i) { - int do_two = 0; - b = &xd->block[ib + iblock[i]]; - be = &x->block[ib + iblock[i]]; - tx_type = get_tx_type_4x4(xd, ib + iblock[i]); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); - vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type); - } else if (!(i & 1) && - get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) { - x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1); - do_two = 1; - } else { - x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, ib + iblock[i]); - } - distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); - rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, - i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, - TX_4X4); - if (do_two) { - i++; - rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, - i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, - TX_4X4); - } - } - b = &xd->block[ib]; - be = &x->block[ib]; - rate += rate_t; - } + if (bw == 16 && bh == 8) + return BLOCK_16X8; - distortion >>= 2; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - if (this_rd < best_rd) { - *bestrate = rate; - *bestratey = rate_t; - *bestdistortion = distortion; - besta0 = *ta0; - besta1 = *ta1; - bestl0 = *tl0; - bestl1 = *tl1; - best_rd = this_rd; - *best_mode = mode; - copy_predictor_8x8(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 64); - vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64); - } - } - b->bmi.as_mode.first = (*best_mode); - vp9_encode_intra8x8(x, ib); - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - a[vp9_block2above[TX_8X8][idx]] = besta0; - a[vp9_block2above[TX_8X8][idx] + 1] = besta1; - l[vp9_block2left[TX_8X8][idx]] = bestl0; - l[vp9_block2left[TX_8X8][idx] + 1] = bestl1; - } else { - a[vp9_block2above[TX_4X4][ib]] = besta0; - a[vp9_block2above[TX_4X4][ib + 1]] = besta1; - l[vp9_block2left[TX_4X4][ib]] = bestl0; - l[vp9_block2left[TX_4X4][ib + 4]] = bestl1; - } + if (bw == 16 && bh == 16) + return BLOCK_16X16; - return best_rd; -} + if (bw == 32 && bh == 32) + return BLOCK_32X32; -static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, - int *Rate, int *rate_y, - int *Distortion, int64_t best_rd) { - MACROBLOCKD *const xd = &mb->e_mbd; - int i, ib; - int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED]; - int distortion = 0; - int tot_rate_y = 0; - int64_t total_rd = 0; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - int *i8x8mode_costs; + if (bw == 32 && bh == 16) + return BLOCK_32X16; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (bw == 16 && bh == 32) + return BLOCK_16X32; - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + if (bw == 64 && bh == 32) + return BLOCK_64X32; - xd->mode_info_context->mbmi.mode = I8X8_PRED; - i8x8mode_costs = mb->i8x8_mode_costs; + if (bw == 32 && bh == 64) + return BLOCK_32X64; - for (i = 0; i < 4; i++) { - MODE_INFO *const mic = xd->mode_info_context; - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); - int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); - - ib = vp9_i8x8_block[i]; - total_rd += rd_pick_intra8x8block( - cpi, mb, ib, &best_mode, - i8x8mode_costs, ta, tl, &r, &ry, &d); - cost += r; - distortion += d; - tot_rate_y += ry; - mic->bmi[ib].as_mode.first = best_mode; - } + if (bw == 64 && bh == 64) + return BLOCK_64X64; - *Rate = cost; - *rate_y = tot_rate_y; - *Distortion = distortion; - return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); + assert(0); + return -1; } -static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x, - int *rate, int *rate_y, - int *distortion, - int *mode8x8, - int64_t best_yrd, - int64_t *txfm_cache) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - int cost0 = vp9_cost_bit(cm->prob_tx[0], 0); - int cost1 = vp9_cost_bit(cm->prob_tx[0], 1); - int64_t tmp_rd_4x4s, tmp_rd_8x8s; - int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd; - int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8; - - mbmi->txfm_size = TX_4X4; - tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4, - &d4x4, best_yrd); - mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; - mbmi->txfm_size = TX_8X8; - tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8, - &d8x8, best_yrd); - txfm_cache[ONLY_4X4] = tmp_rd_4x4; - txfm_cache[ALLOW_8X8] = tmp_rd_8x8; - txfm_cache[ALLOW_16X16] = tmp_rd_8x8; - tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0); - tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0); - txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? - tmp_rd_4x4s : tmp_rd_8x8s; - if (cm->txfm_mode == TX_MODE_SELECT) { - if (tmp_rd_4x4s < tmp_rd_8x8s) { - *rate = r4x4 + cost0; - *rate_y = tok4x4 + cost0; - *distortion = d4x4; - mbmi->txfm_size = TX_4X4; - tmp_rd = tmp_rd_4x4s; - } else { - *rate = r8x8 + cost1; - *rate_y = tok8x8 + cost1; - *distortion = d8x8; - mbmi->txfm_size = TX_8X8; - tmp_rd = tmp_rd_8x8s; - - mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; - } - } else if (cm->txfm_mode == ONLY_4X4) { - *rate = r4x4; - *rate_y = tok4x4; - *distortion = d4x4; - mbmi->txfm_size = TX_4X4; - tmp_rd = tmp_rd_4x4; - } else { - *rate = r8x8; - *rate_y = tok8x8; - *distortion = d8x8; - mbmi->txfm_size = TX_8X8; - tmp_rd = tmp_rd_8x8; - - mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; - } - - return tmp_rd; +static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { + MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; + x->plane[0].src.buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src.buf, + x->plane[0].src.stride); + assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); + x->e_mbd.plane[0].pre[0].buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, + x->e_mbd.plane[0].pre[0].buf, + x->e_mbd.plane[0].pre[0].stride); + if (mbmi->ref_frame[1]) + x->e_mbd.plane[0].pre[1].buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, + x->e_mbd.plane[0].pre[1].buf, + x->e_mbd.plane[0].pre[1].stride); } -static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { - int b; - int cost = 0; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - - if (backup) { - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - } else { - ta = (ENTROPY_CONTEXT *)xd->above_context; - tl = (ENTROPY_CONTEXT *)xd->left_context; - } - - for (b = 16; b < 24; b++) - cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV, - ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b], - TX_4X4); - - return cost; +static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, + struct buf_2d orig_pre[2]) { + MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; + x->plane[0].src = orig_src; + x->e_mbd.plane[0].pre[0] = orig_pre[0]; + if (mbmi->ref_frame[1]) + x->e_mbd.plane[0].pre[1] = orig_pre[1]; } +static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, + int_mv *frame_mv, + YV12_BUFFER_CONFIG **scaled_ref_frame, + int mi_row, int mi_col, + int_mv single_newmv[MAX_REF_FRAMES]); -static int64_t rd_inter16x16_uv_4x4(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip, - int do_ctx_backup) { - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); +static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, + BEST_SEG_INFO *bsi, + int_mv seg_mvs[4][MAX_REF_FRAMES], + int mi_row, int mi_col) { + int i, j; + int br = 0, bd = 0; + MB_PREDICTION_MODE this_mode; + MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; + const int label_count = 4; + int64_t this_segment_rd = 0, other_segment_rd; + int label_mv_thresh; + int rate = 0; + int sbr = 0, sbd = 0; + int segmentyrate = 0; + int best_eobs[4] = { 0 }; + BLOCK_SIZE_TYPE bsize = mbmi->sb_type; + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int idx, idy; + vp9_variance_fn_ptr_t *v_fn_ptr; + YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; + ENTROPY_CONTEXT t_above[4], t_left[4]; + ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; - *rate = rd_cost_mbuv_4x4(&cpi->common, x, do_ctx_backup); - *distortion = vp9_mbuverror(x) / 4; - *skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd); + vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} + v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)]; -static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { - int b; - int cost = 0; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; + // 64 makes this threshold really big effectively + // making it so that we very rarely check mvs on + // segments. setting this to 1 would make mv thresh + // roughly equal to what it is for macroblocks + label_mv_thresh = 1 * bsi->mvthresh / label_count; - if (backup) { - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + // Segmentation method overheads + other_segment_rd = this_segment_rd; - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - } else { - ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context; - tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context; - } + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + // TODO(jingning,rbultje): rewrite the rate-distortion optimization + // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop + int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT]; + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; + int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; + MB_PREDICTION_MODE mode_selected = ZEROMV; + int bestlabelyrate = 0; + i = idy * 2 + idx; + + frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; + frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; + vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, + &frame_mv[NEARESTMV][mbmi->ref_frame[0]], + &frame_mv[NEARMV][mbmi->ref_frame[0]], + i, 0); + if (mbmi->ref_frame[1] > 0) + vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, + &frame_mv[NEARESTMV][mbmi->ref_frame[1]], + &frame_mv[NEARMV][mbmi->ref_frame[1]], + i, 1); + + // search for the best motion vector on this segment + for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { + int64_t this_rd; + int distortion; + int labelyrate; + ENTROPY_CONTEXT t_above_s[4], t_left_s[4]; + const struct buf_2d orig_src = x->plane[0].src; + struct buf_2d orig_pre[2]; + + vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre)); + + vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); + vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); + + // motion search for newmv (single predictor case only) + if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV) { + int step_param = 0; + int further_steps; + int thissme, bestsme = INT_MAX; + int sadpb = x->sadperbit4; + int_mv mvp_full; - for (b = 16; b < 24; b += 4) - cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV, - ta + vp9_block2above[TX_8X8][b], - tl + vp9_block2left[TX_8X8][b], TX_8X8); + /* Is the best so far sufficiently good that we cant justify doing + * and new motion search. */ + if (best_label_rd < label_mv_thresh) + break; + + if (cpi->compressor_speed) { + // use previous block's result as next block's MV predictor. + if (i > 0) { + bsi->mvp.as_int = + x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; + if (i == 2) + bsi->mvp.as_int = + x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; + step_param = 2; + } + } - return cost; -} + further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; -static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip, - int do_ctx_backup) { - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); + mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; + mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; - *rate = rd_cost_mbuv_8x8(&cpi->common, x, do_ctx_backup); - *distortion = vp9_mbuverror(x) / 4; - *skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd); + // adjust src pointer for this block + mi_buf_shift(x, i); + bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, + sadpb, further_steps, 0, v_fn_ptr, + bsi->ref_mv, &mode_mv[NEWMV]); - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} + // Should we do a full search (best quality only) + if (cpi->compressor_speed == 0) { + /* Check if mvp_full is within the range. */ + clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); -static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int backup) { - int b; - int cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta, *tl; + thissme = cpi->full_search_sad(x, &mvp_full, + sadpb, 16, v_fn_ptr, + x->nmvjointcost, x->mvcost, + bsi->ref_mv, i); - if (backup) { - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + if (thissme < bestsme) { + bestsme = thissme; + mode_mv[NEWMV].as_int = + x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int; + } else { + /* The full search result is actually worse so re-instate the + * previous best vector */ + x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = + mode_mv[NEWMV].as_int; + } + } - ta = (ENTROPY_CONTEXT *) &t_above; - tl = (ENTROPY_CONTEXT *) &t_left; - } else { - ta = (ENTROPY_CONTEXT *)xd->above_context; - tl = (ENTROPY_CONTEXT *)xd->left_context; - } + if (bestsme < INT_MAX) { + int distortion; + unsigned int sse; + cpi->find_fractional_mv_step(x, &mode_mv[NEWMV], + bsi->ref_mv, x->errorperbit, v_fn_ptr, + x->nmvjointcost, x->mvcost, + &distortion, &sse); - for (b = 16; b < 24; b += 4) - cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_UV, - ta + vp9_block2above[TX_8X8][b], - tl + vp9_block2left[TX_8X8][b], TX_16X16); + // safe motion search result for use in compound prediction + seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; + } - return cost; -} + // restore src pointers + mi_buf_restore(x, orig_src, orig_pre); + } else if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV) { + if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || + seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) + continue; + + // adjust src pointers + mi_buf_shift(x, i); + if (cpi->sf.comp_inter_joint_search_thresh < bsize) { + iterative_motion_search(cpi, x, bsize, frame_mv[this_mode], + scaled_ref_frame, + mi_row, mi_col, seg_mvs[i]); + seg_mvs[i][mbmi->ref_frame[0]].as_int = + frame_mv[this_mode][mbmi->ref_frame[0]].as_int; + seg_mvs[i][mbmi->ref_frame[1]].as_int = + frame_mv[this_mode][mbmi->ref_frame[1]].as_int; + } + // restore src pointers + mi_buf_restore(x, orig_src, orig_pre); + } -static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - int backup) { - MACROBLOCKD *const xd = &x->e_mbd; + rate = labels2mode(x, i, this_mode, &mode_mv[this_mode], + &second_mode_mv[this_mode], frame_mv, seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sbuv_16x16(x); + // Trap vectors that reach beyond the UMV borders + if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || + ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || + ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { + continue; + } + if (mbmi->ref_frame[1] > 0 && + mv_check_bounds(x, &second_mode_mv[this_mode])) + continue; - *rate = rd_cost_sbuv_16x16(cm, x, backup); - *distortion = vp9_sb_block_error_c(x->coeff + 1024, - xd->dqcoeff + 1024, 512, 2); - *skip = vp9_sbuv_is_skippable_16x16(xd); -} + this_rd = encode_inter_mb_segment(&cpi->common, + x, i, &labelyrate, + &distortion, t_above_s, t_left_s); + this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); + rate += labelyrate; + + if (this_rd < best_label_rd) { + sbr = rate; + sbd = distortion; + bestlabelyrate = labelyrate; + mode_selected = this_mode; + best_label_rd = this_rd; + best_eobs[i] = x->e_mbd.plane[0].eobs[i]; + vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); + vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); + } + } /*for each 4x4 mode*/ + + vpx_memcpy(t_above, t_above_b, sizeof(t_above)); + vpx_memcpy(t_left, t_left_b, sizeof(t_left)); + + labels2mode(x, i, mode_selected, &mode_mv[mode_selected], + &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); + + br += sbr; + bd += sbd; + segmentyrate += bestlabelyrate; + this_segment_rd += best_label_rd; + other_segment_rd += best_other_rd; + + for (j = 1; j < bh; ++j) + vpx_memcpy(&x->partition_info->bmi[i + j * 2], + &x->partition_info->bmi[i], + sizeof(x->partition_info->bmi[i])); + for (j = 1; j < bw; ++j) + vpx_memcpy(&x->partition_info->bmi[i + j], + &x->partition_info->bmi[i], + sizeof(x->partition_info->bmi[i])); + } + } /* for each label */ -static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - - if (mbmi->txfm_size >= TX_16X16) { - vp9_subtract_sbuv_s_c(x->src_diff, - usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(&cpi->common, x, rate, distortion, skip, 1); - } else { - int n, r = 0, d = 0; - int skippable = 1; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; - - memcpy(t_above, xd->above_context, sizeof(t_above)); - memcpy(t_left, xd->left_context, sizeof(t_left)); - - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - int d_tmp, s_tmp, r_tmp; - - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - - if (mbmi->txfm_size == TX_4X4) { - rd_inter16x16_uv_4x4(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); - } else { - rd_inter16x16_uv_8x8(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); - } + if (this_segment_rd < bsi->segment_rd) { + bsi->r = br; + bsi->d = bd; + bsi->segment_yrate = segmentyrate; + bsi->segment_rd = this_segment_rd; - r += r_tmp; - d += d_tmp; - skippable = skippable && s_tmp; + // store everything needed to come back to this!! + for (i = 0; i < 4; i++) { + bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; + if (mbmi->ref_frame[1] > 0) + bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv; + bsi->modes[i] = x->partition_info->bmi[i].mode; + bsi->eobs[i] = best_eobs[i]; } - - *rate = r; - *distortion = d; - *skip = skippable; - xd->left_context = tl; - xd->above_context = ta; - memcpy(xd->above_context, t_above, sizeof(t_above)); - memcpy(xd->left_context, t_left, sizeof(t_left)); } - - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} - -static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, - int *distortion, int *skip); -static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip) { - super_block_64_uvrd(&cpi->common, x, rate, distortion, skip); - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - MACROBLOCKD *xd = &x->e_mbd; +static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, + int_mv *best_ref_mv, + int_mv *second_best_ref_mv, + int64_t best_rd, + int *returntotrate, + int *returnyrate, + int *returndistortion, + int *skippable, int mvthresh, + int_mv seg_mvs[4][MAX_REF_FRAMES], + int mi_row, int mi_col) { + int i; + BEST_SEG_INFO bsi; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = INT64_MAX; - int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); - int rate_to, UNINITIALIZED_IS_SAFE(skip); - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int rate; - int distortion; - int64_t this_rd; + vpx_memset(&bsi, 0, sizeof(bsi)); - mbmi->uv_mode = mode; - vp9_build_intra_predictors_mbuv(&x->e_mbd); + bsi.segment_rd = best_rd; + bsi.ref_mv = best_ref_mv; + bsi.second_ref_mv = second_best_ref_mv; + bsi.mvp.as_int = best_ref_mv->as_int; + bsi.mvthresh = mvthresh; - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); + for (i = 0; i < 4; i++) + bsi.modes[i] = ZEROMV; - rate_to = rd_cost_mbuv_4x4(&cpi->common, x, 1); - rate = rate_to - + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; + rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col); - distortion = vp9_mbuverror(x) / 4; + /* set it to the best */ + for (i = 0; i < 4; i++) { + x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int; + if (mbmi->ref_frame[1] > 0) + x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int = + bsi.second_mvs[i].as_int; + x->e_mbd.plane[0].eobs[i] = bsi.eobs[i]; + } - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + /* save partitions */ + x->partition_info->count = 4; - if (this_rd < best_rd) { - skip = vp9_mbuv_is_skippable_4x4(xd); - best_rd = this_rd; - d = distortion; - r = rate; - *rate_tokenonly = rate_to; - mode_selected = mode; - } + for (i = 0; i < x->partition_info->count; i++) { + x->partition_info->bmi[i].mode = bsi.modes[i]; + x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv; + if (mbmi->ref_frame[1] > 0) + x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv; } + /* + * used to set mbmi->mv.as_int + */ + x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int; + if (mbmi->ref_frame[1] > 0) + x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int; - *rate = r; - *distortion = d; - *skippable = skip; + *returntotrate = bsi.r; + *returndistortion = bsi.d; + *returnyrate = bsi.segment_yrate; + *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8); + mbmi->mode = bsi.modes[3]; - mbmi->uv_mode = mode_selected; + return (int)(bsi.segment_rd); } -static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { +static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, + uint8_t *ref_y_buffer, int ref_y_stride, + int ref_frame, enum BlockSize block_size ) { MACROBLOCKD *xd = &x->e_mbd; - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = INT64_MAX; - int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); - int rate_to, UNINITIALIZED_IS_SAFE(skip); + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + int_mv this_mv; + int i; + int zero_seen = 0; + int best_index = 0; + int best_sad = INT_MAX; + int this_sad = INT_MAX; - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int rate; - int distortion; - int64_t this_rd; + uint8_t *src_y_ptr = x->plane[0].src.buf; + uint8_t *ref_y_ptr; + int row_offset, col_offset; - mbmi->uv_mode = mode; - vp9_build_intra_predictors_mbuv(&x->e_mbd); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - vp9_transform_mbuv_8x8(x); + // Get the sad for each candidate reference mv + for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) { + this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int; - vp9_quantize_mbuv_8x8(x); + // The list is at an end if we see 0 for a second time. + if (!this_mv.as_int && zero_seen) + break; + zero_seen = zero_seen || !this_mv.as_int; - rate_to = rd_cost_mbuv_8x8(&cpi->common, x, 1); - rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; + row_offset = this_mv.as_mv.row >> 3; + col_offset = this_mv.as_mv.col >> 3; + ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; - distortion = vp9_mbuverror(x) / 4; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + // Find sad for current vector. + this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, + ref_y_ptr, ref_y_stride, + 0x7fffffff); - if (this_rd < best_rd) { - skip = vp9_mbuv_is_skippable_8x8(xd); - best_rd = this_rd; - d = distortion; - r = rate; - *rate_tokenonly = rate_to; - mode_selected = mode; + // Note if it is the best so far. + if (this_sad < best_sad) { + best_sad = this_sad; + best_index = i; } } - *rate = r; - *distortion = d; - *skippable = skip; - mbmi->uv_mode = mode_selected; + + // Note the index of the mv that worked best in the reference list. + x->mv_best_ref_index[ref_frame] = best_index; } -// TODO(rbultje) very similar to rd_inter32x32_uv(), merge? -static void super_block_uvrd(VP9_COMMON *const cm, - MACROBLOCK *x, - int *rate, - int *distortion, - int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - - if (mbmi->txfm_size >= TX_16X16) { - vp9_subtract_sbuv_s_c(x->src_diff, - usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(cm, x, rate, distortion, skippable, 1); +static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, + unsigned int *ref_costs_single, + unsigned int *ref_costs_comp, + vp9_prob *comp_mode_p) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + int seg_ref_active = vp9_segfeature_active(xd, segment_id, + SEG_LVL_REF_FRAME); + if (seg_ref_active) { + vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); + vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); + *comp_mode_p = 128; } else { - int d = 0, r = 0, n, s = 1; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; - - memcpy(t_above, xd->above_context, sizeof(t_above)); - memcpy(t_left, xd->left_context, sizeof(t_left)); - - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - if (mbmi->txfm_size == TX_4X4) { - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - s &= vp9_mbuv_is_skippable_4x4(xd); - } else { - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - s &= vp9_mbuv_is_skippable_8x8(xd); - } + vp9_prob intra_inter_p = vp9_get_pred_prob(cm, xd, PRED_INTRA_INTER); + vp9_prob comp_inter_p = 128; - d += vp9_mbuverror(x) >> 2; - xd->above_context = t_above + x_idx; - xd->left_context = t_left + y_idx; - if (mbmi->txfm_size == TX_4X4) { - r += rd_cost_mbuv_4x4(cm, x, 0); - } else { - r += rd_cost_mbuv_8x8(cm, x, 0); - } + if (cm->comp_pred_mode == HYBRID_PREDICTION) { + comp_inter_p = vp9_get_pred_prob(cm, xd, PRED_COMP_INTER_INTER); + *comp_mode_p = comp_inter_p; + } else { + *comp_mode_p = 128; + } + + ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); + + if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { + vp9_prob ref_single_p1 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P1); + vp9_prob ref_single_p2 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P2); + unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); + + if (cm->comp_pred_mode == HYBRID_PREDICTION) + base_cost += vp9_cost_bit(comp_inter_p, 0); + + ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = + ref_costs_single[ALTREF_FRAME] = base_cost; + ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); + ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); + ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); + ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); + ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); + } else { + ref_costs_single[LAST_FRAME] = 512; + ref_costs_single[GOLDEN_FRAME] = 512; + ref_costs_single[ALTREF_FRAME] = 512; } + if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) { + vp9_prob ref_comp_p = vp9_get_pred_prob(cm, xd, PRED_COMP_REF_P); + unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); - xd->above_context = ta_orig; - xd->left_context = tl_orig; + if (cm->comp_pred_mode == HYBRID_PREDICTION) + base_cost += vp9_cost_bit(comp_inter_p, 1); - *distortion = d; - *rate = r; - *skippable = s; + ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); + ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); + } else { + ref_costs_comp[LAST_FRAME] = 512; + ref_costs_comp[GOLDEN_FRAME] = 512; + } } } -static int rd_cost_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int backup) { - int b; - int cost = 0; +static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, + int mode_index, + PARTITION_INFO *partition, + int_mv *ref_mv, + int_mv *second_ref_mv, + int64_t comp_pred_diff[NB_PREDICTION_TYPES], + int64_t txfm_size_diff[NB_TXFM_MODES]) { MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta, *tl; - if (backup) { - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + // Take a snapshot of the coding context so it can be + // restored if we decide to encode this way + ctx->skip = x->skip; + ctx->best_mode_index = mode_index; + ctx->mic = *xd->mode_info_context; + + if (partition) + ctx->partition_info = *partition; - ta = (ENTROPY_CONTEXT *) &t_above; - tl = (ENTROPY_CONTEXT *) &t_left; - } else { - ta = (ENTROPY_CONTEXT *)xd->above_context; - tl = (ENTROPY_CONTEXT *)xd->left_context; - } + ctx->best_ref_mv.as_int = ref_mv->as_int; + ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; - for (b = 16; b < 24; b += 4) - cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_UV, - ta + vp9_block2above[TX_8X8][b], - tl + vp9_block2left[TX_8X8][b], TX_32X32); + ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; + ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; + ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; - return cost; + memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); } -static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - int backup) { - MACROBLOCKD *const xd = &x->e_mbd; +static void setup_pred_block(const MACROBLOCKD *xd, + struct buf_2d dst[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col, + const struct scale_factors *scale, + const struct scale_factors *scale_uv) { + int i; - vp9_transform_sb64uv_32x32(x); - vp9_quantize_sb64uv_32x32(x); + dst[0].buf = src->y_buffer; + dst[0].stride = src->y_stride; + dst[1].buf = src->u_buffer; + dst[2].buf = src->v_buffer; + dst[1].stride = dst[2].stride = src->uv_stride; +#if CONFIG_ALPHA + dst[3].buf = src->alpha_buffer; + dst[3].stride = src->alpha_stride; +#endif - *rate = rd_cost_sb64uv_32x32(cm, x, backup); - *distortion = vp9_sb_block_error_c(x->coeff + 4096, - xd->dqcoeff + 4096, 2048, 0); - *skip = vp9_sb64uv_is_skippable_32x32(xd); + // TODO(jkoleszar): Make scale factors per-plane data + for (i = 0; i < MAX_MB_PLANE; i++) { + setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, + i ? scale_uv : scale, + xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); + } } -static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, - int *distortion, - int *skippable) { +static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, + int idx, MV_REFERENCE_FRAME frame_type, + enum BlockSize block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[4][MAX_MB_PLANE], + struct scale_factors scale[MAX_REF_FRAMES]) { + VP9_COMMON *cm = &cpi->common; + YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; - int d = 0, r = 0, n, s = 1; - - // FIXME not needed if tx=32x32 - memcpy(t_above, xd->above_context, sizeof(t_above)); - memcpy(t_left, xd->left_context, sizeof(t_left)); - - if (mbmi->txfm_size == TX_32X32) { - vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - rd_inter64x64_uv_32x32(cm, x, &r, &d, &s, 1); - } else if (mbmi->txfm_size == TX_16X16) { - int n; - *rate = 0; - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - int r_tmp, d_tmp, s_tmp; - - vp9_subtract_sbuv_s_c(x->src_diff, - usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - src_uv_stride, - udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - dst_uv_stride); - xd->above_context = t_above + x_idx * 2; - xd->left_context = t_left + y_idx * 2; - rd_inter32x32_uv_16x16(cm, x, &r_tmp, &d_tmp, &s_tmp, 0); - r += r_tmp; - d += d_tmp; - s = s && s_tmp; - } - } else { - for (n = 0; n < 16; n++) { - int x_idx = n & 3, y_idx = n >> 2; - - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - if (mbmi->txfm_size == TX_4X4) { - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - s &= vp9_mbuv_is_skippable_4x4(xd); - } else { - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - s &= vp9_mbuv_is_skippable_8x8(xd); - } + // set up scaling factors + scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; - xd->above_context = t_above + x_idx; - xd->left_context = t_left + y_idx; - d += vp9_mbuverror(x) >> 2; - if (mbmi->txfm_size == TX_4X4) { - r += rd_cost_mbuv_4x4(cm, x, 0); - } else { - r += rd_cost_mbuv_8x8(cm, x, 0); - } - } - } - - *distortion = d; - *rate = r; - *skippable = s; + scale[frame_type].x_offset_q4 = + ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp, + VP9_REF_SCALE_SHIFT) & 0xf; + scale[frame_type].y_offset_q4 = + ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp, + VP9_REF_SCALE_SHIFT) & 0xf; - xd->left_context = tl_orig; - xd->above_context = ta_orig; -} + // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this + // use the UV scaling factors. + setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, + &scale[frame_type], &scale[frame_type]); -static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int64_t best_rd = INT64_MAX, this_rd; - int this_rate_tokenonly, this_rate; - int this_distortion, s; + // Gets an initial list of candidate vectors from neighbours and orders them + vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context, + xd->prev_mode_info_context, + frame_type, + mbmi->ref_mvs[frame_type], + cpi->common.ref_frame_sign_bias); - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - x->e_mbd.mode_info_context->mbmi.uv_mode = mode; - vp9_build_intra_predictors_sbuv_s(&x->e_mbd); + // Candidate refinement carried out at encoder and decoder + vp9_find_best_ref_mvs(xd, + mbmi->ref_mvs[frame_type], + &frame_nearest_mv[frame_type], + &frame_near_mv[frame_type]); - super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, - &this_distortion, &s); - this_rate = this_rate_tokenonly + - x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + // Further refinement that is encode side only to test the top few candidates + // in full and choose the best as the centre point for subsequent searches. + // The current implementation doesn't support scaling. + if (scale[frame_type].x_scale_fp == (1 << VP9_REF_SCALE_SHIFT) && + scale[frame_type].y_scale_fp == (1 << VP9_REF_SCALE_SHIFT)) + mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride, + frame_type, block_size); +} - if (this_rd < best_rd) { - mode_selected = mode; - best_rd = this_rd; - *rate = this_rate; - *rate_tokenonly = this_rate_tokenonly; - *distortion = this_distortion; - *skippable = s; +static void model_rd_from_var_lapndz(int var, int n, int qstep, + int *rate, int *dist) { + // This function models the rate and distortion for a Laplacian + // source with given variance when quantized with a uniform quantizer + // with given stepsize. The closed form expressions are in: + // Hang and Chen, "Source Model for transform video coder and its + // application - Part I: Fundamental Theory", IEEE Trans. Circ. + // Sys. for Video Tech., April 1997. + // The function is implemented as piecewise approximation to the + // exact computation. + // TODO(debargha): Implement the functions by interpolating from a + // look-up table + vp9_clear_system_state(); + if (var == 0 || n == 0) { + *rate = 0; + *dist = 0; + } else { + double D, R; + double s2 = (double) var / n; + double s = sqrt(s2); + double x = qstep / s; + if (x > 1.0) { + double y = exp(-x / 2); + double y2 = y * y; + D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275; + R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017; + } else { + double x2 = x * x; + D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807; + if (x > 0.125) + R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x + + 0.1626989668625); + else + R = -1.442252874826093 * log(x) + 1.944647760719664; + } + if (R < 0) { + *rate = 0; + *dist = var; + } else { + *rate = (n * R * 256 + 0.5); + *dist = (n * D * s2 + 0.5); } } + vp9_clear_system_state(); +} - x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; - - return best_rd; +static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize, + struct macroblockd_plane *pd) { + return get_block_size(plane_block_width(bsize, pd), + plane_block_height(bsize, pd)); } -static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int64_t best_rd = INT64_MAX, this_rd; - int this_rate_tokenonly, this_rate; - int this_distortion, s; +static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, + MACROBLOCK *x, MACROBLOCKD *xd, + int *out_rate_sum, int *out_dist_sum) { + // Note our transform coeffs are 8 times an orthogonal transform. + // Hence quantizer step is also 8 times. To get effective quantizer + // we need to divide by 8 before sending to modeling function. + unsigned int sse, var; + int i, rate_sum = 0, dist_sum = 0; - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - x->e_mbd.mode_info_context->mbmi.uv_mode = mode; - vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblock_plane *const p = &x->plane[i]; + struct macroblockd_plane *const pd = &xd->plane[i]; - super_block_64_uvrd(&cpi->common, x, &this_rate_tokenonly, - &this_distortion, &s); - this_rate = this_rate_tokenonly + - x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + // TODO(dkovalev) the same code in get_plane_block_size + const int bw = plane_block_width(bsize, pd); + const int bh = plane_block_height(bsize, pd); + const enum BlockSize bs = get_block_size(bw, bh); + int rate, dist; + var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse); + model_rd_from_var_lapndz(var, bw * bh, pd->dequant[1] >> 3, &rate, &dist); - if (this_rd < best_rd) { - mode_selected = mode; - best_rd = this_rd; - *rate = this_rate; - *rate_tokenonly = this_rate_tokenonly; - *distortion = this_distortion; - *skippable = s; - } + rate_sum += rate; + dist_sum += dist; } - x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; - - return best_rd; + *out_rate_sum = rate_sum; + *out_dist_sum = dist_sum; } -int vp9_cost_mv_ref(VP9_COMP *cpi, - MB_PREDICTION_MODE m, - const int mode_context) { - MACROBLOCKD *xd = &cpi->mb.e_mbd; - int segment_id = xd->mode_info_context->mbmi.segment_id; - - // Dont account for mode here if segment skip is enabled. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { - VP9_COMMON *pc = &cpi->common; - - vp9_prob p [VP9_MVREFS - 1]; - assert(NEARESTMV <= m && m <= SPLITMV); - vp9_mv_ref_probs(pc, p, mode_context); - return cost_token(vp9_mv_ref_tree, p, - vp9_mv_ref_encoding_array - NEARESTMV + m); - } else - return 0; -} +static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) { + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; -void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { - x->e_mbd.mode_info_context->mbmi.mode = mb; - x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; + const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); + const int m = vp9_switchable_interp_map[mbmi->interp_filter]; + return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; } -static int labels2mode( - MACROBLOCK *x, - int const *labelings, int which_label, - B_PREDICTION_MODE this_mode, - int_mv *this_mv, int_mv *this_second_mv, - int_mv seg_mvs[MAX_REF_FRAMES - 1], - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - int *mvjcost, int *mvcost[2]) { - MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mic = xd->mode_info_context; - MB_MODE_INFO * mbmi = &mic->mbmi; - const int mis = xd->mode_info_stride; - - int i, cost = 0, thismvcost = 0; - - /* We have to be careful retrieving previously-encoded motion vectors. - Ones from this macroblock have to be pulled from the BLOCKD array - as they have not yet made it to the bmi array in our MB_MODE_INFO. */ - for (i = 0; i < 16; ++i) { - BLOCKD *const d = xd->block + i; - const int row = i >> 2, col = i & 3; - - B_PREDICTION_MODE m; - - if (labelings[i] != which_label) - continue; - - if (col && labelings[i] == labelings[i - 1]) - m = LEFT4X4; - else if (row && labelings[i] == labelings[i - 4]) - m = ABOVE4X4; - else { - // the only time we should do costing for new motion vector or mode - // is when we are on a new label (jbb May 08, 2007) - switch (m = this_mode) { - case NEW4X4 : - if (mbmi->second_ref_frame > 0) { - this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int; - this_second_mv->as_int = - seg_mvs[mbmi->second_ref_frame - 1].as_int; - } - - thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, - 102, xd->allow_high_precision_mv); - if (mbmi->second_ref_frame > 0) { - thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, - mvjcost, mvcost, 102, - xd->allow_high_precision_mv); - } - break; - case LEFT4X4: - this_mv->as_int = col ? d[-1].bmi.as_mv[0].as_int : - left_block_mv(xd, mic, i); - if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = col ? d[-1].bmi.as_mv[1].as_int : - left_block_second_mv(xd, mic, i); - break; - case ABOVE4X4: - this_mv->as_int = row ? d[-4].bmi.as_mv[0].as_int : - above_block_mv(mic, i, mis); - if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = row ? d[-4].bmi.as_mv[1].as_int : - above_block_second_mv(mic, i, mis); - break; - case ZERO4X4: - this_mv->as_int = 0; - if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = 0; - break; - default: - break; - } - - if (m == ABOVE4X4) { // replace above with left if same - int_mv left_mv, left_second_mv; +static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, + int_mv *frame_mv, + YV12_BUFFER_CONFIG **scaled_ref_frame, + int mi_row, int mi_col, + int_mv single_newmv[MAX_REF_FRAMES]) { + int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + int refs[2] = { mbmi->ref_frame[0], + (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; + int_mv ref_mv[2]; + const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); + int ite; + // Prediction buffer from second frame. + uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); + + // Do joint motion search in compound mode to get more accurate mv. + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d scaled_first_yv12; + int last_besterr[2] = {INT_MAX, INT_MAX}; + + ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; + ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; + + if (scaled_ref_frame[0]) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[0]; + setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col, + NULL, NULL); + } + + if (scaled_ref_frame[1]) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + backup_second_yv12[i] = xd->plane[i].pre[1]; + + setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col, + NULL, NULL); + } + + xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0], + mi_row, mi_col); + xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1], + mi_row, mi_col); + scaled_first_yv12 = xd->plane[0].pre[0]; + + // Initialize mv using single prediction mode result. + frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; + frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; + + // Allow joint search multiple times iteratively for each ref frame + // and break out the search loop if it couldn't find better mv. + for (ite = 0; ite < 4; ite++) { + struct buf_2d ref_yv12[2]; + int bestsme = INT_MAX; + int sadpb = x->sadperbit16; + int_mv tmp_mv; + int search_range = 3; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + int id = ite % 2; + + // Initialized here because of compiler problem in Visual Studio. + ref_yv12[0] = xd->plane[0].pre[0]; + ref_yv12[1] = xd->plane[0].pre[1]; + + // Get pred block from second frame. + vp9_build_inter_predictor(ref_yv12[!id].buf, + ref_yv12[!id].stride, + second_pred, pw, + &frame_mv[refs[!id]], + &xd->scale_factor[!id], + pw, ph, 0, + &xd->subpix); + + // Compound motion search on first ref frame. + if (id) + xd->plane[0].pre[0] = ref_yv12[id]; + vp9_clamp_mv_min_max(x, &ref_mv[id]); + + // Use mv result from single mode as mvp. + tmp_mv.as_int = frame_mv[refs[id]].as_int; + + tmp_mv.as_mv.col >>= 3; + tmp_mv.as_mv.row >>= 3; + + // Small-range full-pixel motion search + bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, + search_range, + &cpi->fn_ptr[block_size], + x->nmvjointcost, x->mvcost, + &ref_mv[id], second_pred, + pw, ph); - left_second_mv.as_int = 0; - left_mv.as_int = col ? d[-1].bmi.as_mv[0].as_int : - left_block_mv(xd, mic, i); - if (mbmi->second_ref_frame > 0) - left_second_mv.as_int = col ? d[-1].bmi.as_mv[1].as_int : - left_block_second_mv(xd, mic, i); + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; - if (left_mv.as_int == this_mv->as_int && - (mbmi->second_ref_frame <= 0 || - left_second_mv.as_int == this_second_mv->as_int)) - m = LEFT4X4; - } + if (bestsme < INT_MAX) { + int dis; /* TODO: use dis in distortion calculation later. */ + unsigned int sse; -#if CONFIG_NEWBINTRAMODES - cost = x->inter_bmode_costs[ - m == B_CONTEXT_PRED ? m - CONTEXT_PRED_REPLACEMENTS : m]; -#else - cost = x->inter_bmode_costs[m]; -#endif + bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, + &ref_mv[id], + x->errorperbit, + &cpi->fn_ptr[block_size], + x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, + pw, ph); } - d->bmi.as_mv[0].as_int = this_mv->as_int; - if (mbmi->second_ref_frame > 0) - d->bmi.as_mv[1].as_int = this_second_mv->as_int; - - x->partition_info->bmi[i].mode = m; - x->partition_info->bmi[i].mv.as_int = this_mv->as_int; - if (mbmi->second_ref_frame > 0) - x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; - } - - cost += thismvcost; - return cost; -} - -static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, - MACROBLOCK *x, - int const *labels, - int which_label, - int *labelyrate, - int *distortion, - ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - - *labelyrate = 0; - *distortion = 0; - for (i = 0; i < 16; i++) { - if (labels[i] == which_label) { - BLOCKD *bd = &x->e_mbd.block[i]; - BLOCK *be = &x->block[i]; - int thisdistortion; - - vp9_build_inter_predictor(*(bd->base_pre) + bd->pre, - bd->pre_stride, - bd->predictor, 16, - &bd->bmi.as_mv[0], - &xd->scale_factor[0], - 4, 4, 0 /* no avg */, &xd->subpix); - - // TODO(debargha): Make this work properly with the - // implicit-compoundinter-weight experiment when implicit - // weighting for splitmv modes is turned on. - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_inter_predictor( - *(bd->base_second_pre) + bd->pre, bd->pre_stride, bd->predictor, 16, - &bd->bmi.as_mv[1], &xd->scale_factor[1], 4, 4, - 1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT) /* avg */, - &xd->subpix); - } + if (id) + xd->plane[0].pre[0] = scaled_first_yv12; - vp9_subtract_b(be, bd, 16); - x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, i); - thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); - *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_4X4][i], - tl + vp9_block2left[TX_4X4][i], TX_4X4); + if (bestsme < last_besterr[id]) { + frame_mv[refs[id]].as_int = tmp_mv.as_int; + last_besterr[id] = bestsme; + } else { + break; } } - *distortion >>= 2; - return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); -} -static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, - MACROBLOCK *x, - int const *labels, - int which_label, - int *labelyrate, - int *distortion, - int64_t *otherrd, - ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl) { - int i, j; - MACROBLOCKD *xd = &x->e_mbd; - const int iblock[4] = { 0, 1, 4, 5 }; - int othercost = 0, otherdist = 0; - ENTROPY_CONTEXT_PLANES tac, tlc; - ENTROPY_CONTEXT *tacp = (ENTROPY_CONTEXT *) &tac, - *tlcp = (ENTROPY_CONTEXT *) &tlc; - - if (otherrd) { - memcpy(&tac, ta, sizeof(ENTROPY_CONTEXT_PLANES)); - memcpy(&tlc, tl, sizeof(ENTROPY_CONTEXT_PLANES)); + // restore the predictor + if (scaled_ref_frame[0]) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[0] = backup_yv12[i]; } - *distortion = 0; - *labelyrate = 0; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - - if (labels[ib] == which_label) { - const int use_second_ref = - xd->mode_info_context->mbmi.second_ref_frame > 0; - int which_mv; - int idx = (ib & 8) + ((ib & 2) << 1); - BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx]; - BLOCK *be = &x->block[ib], *be2 = &x->block[idx]; - int thisdistortion; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre; - - // TODO(debargha): Make this work properly with the - // implicit-compoundinter-weight experiment when implicit - // weighting for splitmv modes is turned on. - vp9_build_inter_predictor( - *base_pre + bd->pre, bd->pre_stride, bd->predictor, 16, - &bd->bmi.as_mv[which_mv], &xd->scale_factor[which_mv], 8, 8, - which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), - &xd->subpix); - } - - vp9_subtract_4b_c(be, bd, 16); - - if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { - if (otherrd) { - x->fwd_txm8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); - otherdist += thisdistortion; - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[TX_8X8][idx], - tlcp + vp9_block2left[TX_8X8][idx], - TX_8X8); - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - } - for (j = 0; j < 4; j += 2) { - bd = &xd->block[ib + iblock[j]]; - be = &x->block[ib + iblock[j]]; - x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); - thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); - *distortion += thisdistortion; - *labelyrate += - cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_4X4][ib + iblock[j]], - tl + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); - *labelyrate += - cost_coeffs(cm, x, ib + iblock[j] + 1, - PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1], - tl + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); - } - } else /* 8x8 */ { - if (otherrd) { - for (j = 0; j < 4; j += 2) { - BLOCKD *bd = &xd->block[ib + iblock[j]]; - BLOCK *be = &x->block[ib + iblock[j]]; - x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]); - thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); - otherdist += thisdistortion; - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - othercost += - cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[TX_4X4][ib + iblock[j]], - tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); - othercost += - cost_coeffs(cm, x, ib + iblock[j] + 1, - PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1], - tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - } - } - x->fwd_txm8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); - *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_8X8][idx], - tl + vp9_block2left[TX_8X8][idx], TX_8X8); - } - } - } - *distortion >>= 2; - if (otherrd) { - otherdist >>= 2; - *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist); + if (scaled_ref_frame[1]) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = backup_second_yv12[i]; } - return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); -} - -static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0}; - -typedef struct { - int_mv *ref_mv, *second_ref_mv; - int_mv mvp; - - int64_t segment_rd; - SPLITMV_PARTITIONING_TYPE segment_num; - TX_SIZE txfm_size; - int r; - int d; - int segment_yrate; - B_PREDICTION_MODE modes[16]; - int_mv mvs[16], second_mvs[16]; - int eobs[16]; - - int mvthresh; - int *mdcounts; - - int_mv sv_mvp[4]; // save 4 mvp from 8x8 - int sv_istep[2]; // save 2 initial step_param for 16x8/8x16 - -} BEST_SEG_INFO; - -static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { - int r = 0; - r |= (mv->as_mv.row >> 3) < x->mv_row_min; - r |= (mv->as_mv.row >> 3) > x->mv_row_max; - r |= (mv->as_mv.col >> 3) < x->mv_col_min; - r |= (mv->as_mv.col >> 3) > x->mv_col_max; - return r; + vpx_free(second_pred); } -static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, - BEST_SEG_INFO *bsi, - SPLITMV_PARTITIONING_TYPE segmentation, - TX_SIZE tx_size, int64_t *otherrds, - int64_t *rds, int *completed, - /* 16 = n_blocks */ - int_mv seg_mvs[16 /* n_blocks */] - [MAX_REF_FRAMES - 1]) { - int i, j; - int const *labels; - int br = 0, bd = 0; - B_PREDICTION_MODE this_mode; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - - int label_count; - int64_t this_segment_rd = 0, other_segment_rd; - int label_mv_thresh; - int rate = 0; - int sbr = 0, sbd = 0; - int segmentyrate = 0; - int best_eobs[16] = { 0 }; - - vp9_variance_fn_ptr_t *v_fn_ptr; +static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, + int64_t txfm_cache[], + int *rate2, int *distortion, int *skippable, + int *rate_y, int *distortion_y, + int *rate_uv, int *distortion_uv, + int *mode_excluded, int *disable_skip, + INTERPOLATIONFILTERTYPE *best_filter, + int_mv *frame_mv, + YV12_BUFFER_CONFIG **scaled_ref_frame, + int mi_row, int mi_col, + int_mv single_newmv[MAX_REF_FRAMES]) { + const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize); - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; - ENTROPY_CONTEXT *ta_b, *tl_b; + VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); + const enum BlockSize uv_block_size = get_plane_block_size(bsize, + &xd->plane[1]); + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const int is_comp_pred = (mbmi->ref_frame[1] > 0); + const int num_refs = is_comp_pred ? 2 : 1; + const int this_mode = mbmi->mode; + int i; + int refs[2] = { mbmi->ref_frame[0], + (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; + int_mv cur_mv[2]; + int_mv ref_mv[2]; + int64_t this_rd = 0; + unsigned char tmp_buf[MAX_MB_PLANE][64 * 64]; + int pred_exists = 0; + int interpolating_intpel_seen = 0; + int intpel_mv; + int64_t rd, best_rd = INT64_MAX; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + switch (this_mode) { + case NEWMV: + ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; + ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - ta_b = (ENTROPY_CONTEXT *)&t_above_b; - tl_b = (ENTROPY_CONTEXT *)&t_left_b; + if (is_comp_pred) { + // Initialize mv using single prediction mode result. + frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; + frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; - v_fn_ptr = &cpi->fn_ptr[segmentation]; - labels = vp9_mbsplits[segmentation]; - label_count = vp9_mbsplit_count[segmentation]; + if (cpi->sf.comp_inter_joint_search_thresh < bsize) + iterative_motion_search(cpi, x, bsize, frame_mv, scaled_ref_frame, + mi_row, mi_col, single_newmv); - // 64 makes this threshold really big effectively - // making it so that we very rarely check mvs on - // segments. setting this to 1 would make mv thresh - // roughly equal to what it is for macroblocks - label_mv_thresh = 1 * bsi->mvthresh / label_count; - - // Segmentation method overheads - rate = cost_token(vp9_mbsplit_tree, vp9_mbsplit_probs, - vp9_mbsplit_encodings + segmentation); - rate += vp9_cost_mv_ref(cpi, SPLITMV, - mbmi->mb_mode_context[mbmi->ref_frame]); - this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); - br += rate; - other_segment_rd = this_segment_rd; - - mbmi->txfm_size = tx_size; - for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { - int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; - int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; - B_PREDICTION_MODE mode_selected = ZERO4X4; - int bestlabelyrate = 0; - - // search for the best motion vector on this segment - for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { - int64_t this_rd, other_rd; - int distortion; - int labelyrate; - ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; - ENTROPY_CONTEXT *ta_s; - ENTROPY_CONTEXT *tl_s; - - vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta_s = (ENTROPY_CONTEXT *)&t_above_s; - tl_s = (ENTROPY_CONTEXT *)&t_left_s; - - // motion search for newmv (single predictor case only) - if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) { - int sseshift, n; - int step_param = 0; - int further_steps; - int thissme, bestsme = INT_MAX; - BLOCK *c; - BLOCKD *e; - - /* Is the best so far sufficiently good that we cant justify doing - * and new motion search. */ - if (best_label_rd < label_mv_thresh) - break; - - if (cpi->compressor_speed) { - if (segmentation == PARTITIONING_8X16 || - segmentation == PARTITIONING_16X8) { - bsi->mvp.as_int = bsi->sv_mvp[i].as_int; - if (i == 1 && segmentation == PARTITIONING_16X8) - bsi->mvp.as_int = bsi->sv_mvp[2].as_int; - - step_param = bsi->sv_istep[i]; - } - - // use previous block's result as next block's MV predictor. - if (segmentation == PARTITIONING_4X4 && i > 0) { - bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv[0].as_int; - if (i == 4 || i == 8 || i == 12) - bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv[0].as_int; - step_param = 2; - } - } - - further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - - { - int sadpb = x->sadperbit4; - int_mv mvp_full; - - mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; - mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; - - // find first label - n = vp9_mbsplit_offset[segmentation][i]; - - c = &x->block[n]; - e = &x->e_mbd.block[n]; - - bestsme = vp9_full_pixel_diamond(cpi, x, c, e, &mvp_full, step_param, - sadpb, further_steps, 0, v_fn_ptr, - bsi->ref_mv, &mode_mv[NEW4X4]); - - sseshift = segmentation_to_sseshift[segmentation]; - - // Should we do a full search (best quality only) - if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { - /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, - x->mv_row_min, x->mv_row_max); - - thissme = cpi->full_search_sad(x, c, e, &mvp_full, - sadpb, 16, v_fn_ptr, - x->nmvjointcost, x->mvcost, - bsi->ref_mv); - - if (thissme < bestsme) { - bestsme = thissme; - mode_mv[NEW4X4].as_int = e->bmi.as_mv[0].as_int; - } else { - /* The full search result is actually worse so re-instate the - * previous best vector */ - e->bmi.as_mv[0].as_int = mode_mv[NEW4X4].as_int; - } - } - } - - if (bestsme < INT_MAX) { - int distortion; - unsigned int sse; - cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit, v_fn_ptr, - x->nmvjointcost, x->mvcost, - &distortion, &sse); - - // safe motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; - } - } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) { - /* NEW4X4 */ - /* motion search not completed? Then skip newmv for this block with - * comppred */ - if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || - seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { - continue; - } - } - - rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], - &second_mode_mv[this_mode], seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, - x->mvcost); - - // Trap vectors that reach beyond the UMV borders - if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || - ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || - ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || - ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { - continue; - } - if (mbmi->second_ref_frame > 0 && - mv_check_bounds(x, &second_mode_mv[this_mode])) - continue; - - if (segmentation == PARTITIONING_4X4) { - this_rd = encode_inter_mb_segment(&cpi->common, - x, labels, i, &labelyrate, - &distortion, ta_s, tl_s); - other_rd = this_rd; - } else { - this_rd = encode_inter_mb_segment_8x8(&cpi->common, - x, labels, i, &labelyrate, - &distortion, &other_rd, - ta_s, tl_s); - } - this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); - rate += labelyrate; - - if (this_rd < best_label_rd) { - sbr = rate; - sbd = distortion; - bestlabelyrate = labelyrate; - mode_selected = this_mode; - best_label_rd = this_rd; - if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) { - for (j = 0; j < 16; j++) - if (labels[j] == i) - best_eobs[j] = x->e_mbd.eobs[j]; - } else { - for (j = 0; j < 4; j++) { - int ib = vp9_i8x8_block[j], idx = j * 4; - - if (labels[ib] == i) - best_eobs[idx] = x->e_mbd.eobs[idx]; - } - } - if (other_rd < best_other_rd) - best_other_rd = other_rd; - - vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); - - } - } /*for each 4x4 mode*/ - - vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); - - labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], - &second_mode_mv[mode_selected], seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, x->mvcost); - - br += sbr; - bd += sbd; - segmentyrate += bestlabelyrate; - this_segment_rd += best_label_rd; - other_segment_rd += best_other_rd; - if (rds) - rds[i] = this_segment_rd; - if (otherrds) - otherrds[i] = other_segment_rd; - } /* for each label */ - - if (this_segment_rd < bsi->segment_rd) { - bsi->r = br; - bsi->d = bd; - bsi->segment_yrate = segmentyrate; - bsi->segment_rd = this_segment_rd; - bsi->segment_num = segmentation; - bsi->txfm_size = mbmi->txfm_size; - - // store everything needed to come back to this!! - for (i = 0; i < 16; i++) { - bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; - if (mbmi->second_ref_frame > 0) - bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv; - bsi->modes[i] = x->partition_info->bmi[i].mode; - bsi->eobs[i] = best_eobs[i]; - } - } - - if (completed) { - *completed = i; - } -} - -static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x, - BEST_SEG_INFO *bsi, - unsigned int segmentation, - /* 16 = n_blocks */ - int_mv seg_mvs[16][MAX_REF_FRAMES - 1], - int64_t txfm_cache[NB_TXFM_MODES]) { - int i, n, c = vp9_mbsplit_count[segmentation]; - - if (segmentation == PARTITIONING_4X4) { - int64_t rd[16]; - - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL, - rd, &n, seg_mvs); - if (n == c) { - for (i = 0; i < NB_TXFM_MODES; i++) { - if (rd[c - 1] < txfm_cache[i]) - txfm_cache[i] = rd[c - 1]; - } - } - } else { - int64_t diff, base_rd; - int cost4x4 = vp9_cost_bit(cpi->common.prob_tx[0], 0); - int cost8x8 = vp9_cost_bit(cpi->common.prob_tx[0], 1); - - if (cpi->common.txfm_mode == TX_MODE_SELECT) { - int64_t rd4x4[4], rd8x8[4]; - int n4x4, n8x8, nmin; - BEST_SEG_INFO bsi4x4, bsi8x8; - - /* factor in cost of cost4x4/8x8 in decision */ - vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi)); - vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi)); - rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation, - TX_4X4, NULL, rd4x4, &n4x4, seg_mvs); - rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation, - TX_8X8, NULL, rd8x8, &n8x8, seg_mvs); - if (bsi4x4.segment_num == segmentation) { - bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); - if (bsi4x4.segment_rd < bsi->segment_rd) - vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi)); - } - if (bsi8x8.segment_num == segmentation) { - bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0); - if (bsi8x8.segment_rd < bsi->segment_rd) - vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi)); - } - n = n4x4 > n8x8 ? n4x4 : n8x8; - if (n == c) { - nmin = n4x4 < n8x8 ? n4x4 : n8x8; - diff = rd8x8[nmin - 1] - rd4x4[nmin - 1]; - if (n == n4x4) { - base_rd = rd4x4[c - 1]; - } else { - base_rd = rd8x8[c - 1] - diff; - } - } - } else { - int64_t rd[4], otherrd[4]; - - if (cpi->common.txfm_mode == ONLY_4X4) { - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd, - rd, &n, seg_mvs); - if (n == c) { - base_rd = rd[c - 1]; - diff = otherrd[c - 1] - rd[c - 1]; - } - } else /* use 8x8 transform */ { - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd, - rd, &n, seg_mvs); - if (n == c) { - diff = rd[c - 1] - otherrd[c - 1]; - base_rd = otherrd[c - 1]; - } - } - } - - if (n == c) { - if (base_rd < txfm_cache[ONLY_4X4]) { - txfm_cache[ONLY_4X4] = base_rd; - } - if (base_rd + diff < txfm_cache[ALLOW_8X8]) { - txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = - txfm_cache[ALLOW_32X32] = base_rd + diff; - } - if (diff < 0) { - base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0); - } else { - base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); - } - if (base_rd < txfm_cache[TX_MODE_SELECT]) { - txfm_cache[TX_MODE_SELECT] = base_rd; - } - } - } -} - -static INLINE void cal_step_param(int sr, int *sp) { - int step = 0; - - if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP; - else if (sr < 1) sr = 1; - - while (sr >>= 1) - step++; - - *sp = MAX_MVSEARCH_STEPS - 1 - step; -} - -static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - int64_t best_rd, - int *mdcounts, - int *returntotrate, - int *returnyrate, - int *returndistortion, - int *skippable, int mvthresh, - int_mv seg_mvs[NB_PARTITIONINGS] - [16 /* n_blocks */] - [MAX_REF_FRAMES - 1], - int64_t txfm_cache[NB_TXFM_MODES]) { - int i; - BEST_SEG_INFO bsi; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - - vpx_memset(&bsi, 0, sizeof(bsi)); - for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; - - bsi.segment_rd = best_rd; - bsi.ref_mv = best_ref_mv; - bsi.second_ref_mv = second_best_ref_mv; - bsi.mvp.as_int = best_ref_mv->as_int; - bsi.mvthresh = mvthresh; - bsi.mdcounts = mdcounts; - bsi.txfm_size = TX_4X4; - - for (i = 0; i < 16; i++) - bsi.modes[i] = ZERO4X4; - - if (cpi->compressor_speed == 0) { - /* for now, we will keep the original segmentation order - when in best quality mode */ - rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, - seg_mvs[PARTITIONING_16X8], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, - seg_mvs[PARTITIONING_8X16], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, - seg_mvs[PARTITIONING_8X8], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, - seg_mvs[PARTITIONING_4X4], txfm_cache); - } else { - int sr; - - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, - seg_mvs[PARTITIONING_8X8], txfm_cache); - - if (bsi.segment_rd < best_rd) { - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - - vp9_clamp_mv_min_max(x, best_ref_mv); - - /* Get 8x8 result */ - bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int; - bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int; - bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int; - bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int; - - /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range - * according to the closeness of 2 MV. */ - /* block 8X16 */ - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[0]); - - sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[1]); - - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, - seg_mvs[PARTITIONING_8X16], txfm_cache); - - /* block 16X8 */ - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[0]); - - sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[1]); - - rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, - seg_mvs[PARTITIONING_16X8], txfm_cache); - - /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ - /* Not skip 4x4 if speed=0 (good quality) */ - if (cpi->sf.no_skip_block4x4_search || - bsi.segment_num == PARTITIONING_8X8) { - /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ - bsi.mvp.as_int = bsi.sv_mvp[0].as_int; - rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, - seg_mvs[PARTITIONING_4X4], txfm_cache); - } - - /* restore UMV window */ - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - } - } - - /* set it to the best */ - for (i = 0; i < 16; i++) { - BLOCKD *bd = &x->e_mbd.block[i]; - - bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int; - if (mbmi->second_ref_frame > 0) - bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int; - x->e_mbd.eobs[i] = bsi.eobs[i]; - } - - *returntotrate = bsi.r; - *returndistortion = bsi.d; - *returnyrate = bsi.segment_yrate; - *skippable = bsi.txfm_size == TX_4X4 ? - vp9_mby_is_skippable_4x4(&x->e_mbd) : - vp9_mby_is_skippable_8x8(&x->e_mbd); - - /* save partitions */ - mbmi->txfm_size = bsi.txfm_size; - mbmi->partitioning = bsi.segment_num; - x->partition_info->count = vp9_mbsplit_count[bsi.segment_num]; - - for (i = 0; i < x->partition_info->count; i++) { - int j; - - j = vp9_mbsplit_offset[bsi.segment_num][i]; - - x->partition_info->bmi[i].mode = bsi.modes[j]; - x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv; - if (mbmi->second_ref_frame > 0) - x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[j].as_mv; - } - /* - * used to set mbmi->mv.as_int - */ - x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int; - if (mbmi->second_ref_frame > 0) - x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int; - - return (int)(bsi.segment_rd); -} - -static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, - uint8_t *ref_y_buffer, int ref_y_stride, - int ref_frame, enum BlockSize block_size ) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - int_mv this_mv; - int i; - int zero_seen = FALSE; - int best_index = 0; - int best_sad = INT_MAX; - int this_sad = INT_MAX; - - BLOCK *b = &x->block[0]; - uint8_t *src_y_ptr = *(b->base_src); - uint8_t *ref_y_ptr; - int row_offset, col_offset; - - // Get the sad for each candidate reference mv - for (i = 0; i < 4; i++) { - this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int; - - // The list is at an end if we see 0 for a second time. - if (!this_mv.as_int && zero_seen) - break; - zero_seen = zero_seen || !this_mv.as_int; - - row_offset = this_mv.as_mv.row >> 3; - col_offset = this_mv.as_mv.col >> 3; - ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; - - // Find sad for current vector. - this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - ref_y_ptr, ref_y_stride, - 0x7fffffff); - - // Note if it is the best so far. - if (this_sad < best_sad) { - best_sad = this_sad; - best_index = i; - } - } - - // Note the index of the mv that worked best in the reference list. - x->mv_best_ref_index[ref_frame] = best_index; -} - -static void set_i8x8_block_modes(MACROBLOCK *x, int modes[4]) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[i]; - xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[i]; - xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[i]; - xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[i]; - // printf("%d,%d,%d,%d\n", - // modes[0], modes[1], modes[2], modes[3]); - } - - for (i = 0; i < 16; i++) { - xd->block[i].bmi = xd->mode_info_context->bmi[i]; - } -} - -extern void vp9_calc_ref_probs(int *count, vp9_prob *probs); -static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3], int pred_ref) { - int norm_cnt[MAX_REF_FRAMES]; - const int *const rfct = cpi->count_mb_ref_frame_usage; - int intra_count = rfct[INTRA_FRAME]; - int last_count = rfct[LAST_FRAME]; - int gf_count = rfct[GOLDEN_FRAME]; - int arf_count = rfct[ALTREF_FRAME]; - - // Work out modified reference frame probabilities to use where prediction - // of the reference frame fails - if (pred_ref == INTRA_FRAME) { - norm_cnt[0] = 0; - norm_cnt[1] = last_count; - norm_cnt[2] = gf_count; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[0] = 0; // This branch implicit - } else if (pred_ref == LAST_FRAME) { - norm_cnt[0] = intra_count; - norm_cnt[1] = 0; - norm_cnt[2] = gf_count; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[1] = 0; // This branch implicit - } else if (pred_ref == GOLDEN_FRAME) { - norm_cnt[0] = intra_count; - norm_cnt[1] = last_count; - norm_cnt[2] = 0; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[2] = 0; // This branch implicit - } else { - norm_cnt[0] = intra_count; - norm_cnt[1] = last_count; - norm_cnt[2] = gf_count; - norm_cnt[3] = 0; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[2] = 0; // This branch implicit - } -} - -static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1, - int idx, int val, int weight) { - unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0; - unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0; - // weight is 16-bit fixed point, so this basically calculates: - // 0.5 + weight * cost1 + (1.0 - weight) * cost0 - return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16; -} - -static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int *ref_costs) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - vp9_prob *mod_refprobs; - - unsigned int cost; - int pred_ref; - int pred_flag; - int pred_ctx; - int i; - - vp9_prob pred_prob, new_pred_prob; - int seg_ref_active; - int seg_ref_count = 0; - seg_ref_active = vp9_segfeature_active(xd, - segment_id, - SEG_LVL_REF_FRAME); - - if (seg_ref_active) { - seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) + - vp9_check_segref(xd, segment_id, LAST_FRAME) + - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, segment_id, ALTREF_FRAME); - } - - // Get the predicted reference for this mb - pred_ref = vp9_get_pred_ref(cm, xd); - - // Get the context probability for the prediction flag (based on last frame) - pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF); - - // Predict probability for current frame based on stats so far - pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF); - new_pred_prob = get_binary_prob(cpi->ref_pred_count[pred_ctx][0], - cpi->ref_pred_count[pred_ctx][1]); - - // Get the set of probabilities to use if prediction fails - mod_refprobs = cm->mod_refprobs[pred_ref]; - - // For each possible selected reference frame work out a cost. - for (i = 0; i < MAX_REF_FRAMES; i++) { - if (seg_ref_active && seg_ref_count == 1) { - cost = 0; - } else { - pred_flag = (i == pred_ref); - - // Get the prediction for the current mb - cost = weighted_cost(&pred_prob, &new_pred_prob, 0, - pred_flag, cpi->seg0_progress); - if (cost > 1024) cost = 768; // i.e. account for 4 bits max. - - // for incorrectly predicted cases - if (! pred_flag) { - vp9_prob curframe_mod_refprobs[3]; - - if (cpi->seg0_progress) { - estimate_curframe_refprobs(cpi, curframe_mod_refprobs, pred_ref); - } else { - vpx_memset(curframe_mod_refprobs, 0, sizeof(curframe_mod_refprobs)); - } - - cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 0, - (i != INTRA_FRAME), cpi->seg0_progress); - if (i != INTRA_FRAME) { - cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 1, - (i != LAST_FRAME), cpi->seg0_progress); - if (i != LAST_FRAME) { - cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 2, - (i != GOLDEN_FRAME), cpi->seg0_progress); - } - } - } - } - - ref_costs[i] = cost; - } -} - -static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, - int mode_index, - PARTITION_INFO *partition, - int_mv *ref_mv, - int_mv *second_ref_mv, - int64_t comp_pred_diff[NB_PREDICTION_TYPES], - int64_t txfm_size_diff[NB_TXFM_MODES]) { - MACROBLOCKD *const xd = &x->e_mbd; - - // Take a snapshot of the coding context so it can be - // restored if we decide to encode this way - ctx->skip = x->skip; - ctx->best_mode_index = mode_index; - vpx_memcpy(&ctx->mic, xd->mode_info_context, - sizeof(MODE_INFO)); - if (partition) - vpx_memcpy(&ctx->partition_info, partition, - sizeof(PARTITION_INFO)); - ctx->best_ref_mv.as_int = ref_mv->as_int; - ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; - - ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; - ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; - ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; - - memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); -} - -static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x, - int *rate2, int *distortion2, int *rate_y, - int *distortion, int* rate_uv, int *distortion_uv, - int *skippable, int64_t txfm_cache[NB_TXFM_MODES]) { - int y_skippable, uv_skippable; - - // Y cost and distortion - macro_block_yrd(cpi, x, rate_y, distortion, &y_skippable, txfm_cache); - - *rate2 += *rate_y; - *distortion2 += *distortion; - - // UV cost and distortion - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4 && - x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED && - x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) - rd_inter16x16_uv_8x8(cpi, x, rate_uv, distortion_uv, - cpi->common.full_pixel, &uv_skippable, 1); - else - rd_inter16x16_uv_4x4(cpi, x, rate_uv, distortion_uv, - cpi->common.full_pixel, &uv_skippable, 1); - - *rate2 += *rate_uv; - *distortion2 += *distortion_uv; - *skippable = y_skippable && uv_skippable; -} - -static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, - int idx, MV_REFERENCE_FRAME frame_type, - int block_size, - int mb_row, int mb_col, - int_mv frame_nearest_mv[MAX_REF_FRAMES], - int_mv frame_near_mv[MAX_REF_FRAMES], - int frame_mdcounts[4][4], - YV12_BUFFER_CONFIG yv12_mb[4], - struct scale_factors scale[MAX_REF_FRAMES]) { - VP9_COMMON *cm = &cpi->common; - YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - int use_prev_in_find_mv_refs, use_prev_in_find_best_ref; - - // set up scaling factors - scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; - scale[frame_type].x_offset_q4 = - (mb_col * 16 * scale[frame_type].x_num / scale[frame_type].x_den) & 0xf; - scale[frame_type].y_offset_q4 = - (mb_row * 16 * scale[frame_type].y_num / scale[frame_type].y_den) & 0xf; - - // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this - // use the UV scaling factors. - setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col, - &scale[frame_type], &scale[frame_type]); - - // Gets an initial list of candidate vectors from neighbours and orders them - use_prev_in_find_mv_refs = cm->width == cm->last_width && - cm->height == cm->last_height && - !cpi->common.error_resilient_mode; - vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context, - use_prev_in_find_mv_refs ? xd->prev_mode_info_context : NULL, - frame_type, - mbmi->ref_mvs[frame_type], - cpi->common.ref_frame_sign_bias); - - // Candidate refinement carried out at encoder and decoder - use_prev_in_find_best_ref = - scale[frame_type].x_num == scale[frame_type].x_den && - scale[frame_type].y_num == scale[frame_type].y_den && - !cm->error_resilient_mode && - !cm->frame_parallel_decoding_mode; - vp9_find_best_ref_mvs(xd, - use_prev_in_find_best_ref ? - yv12_mb[frame_type].y_buffer : NULL, - yv12->y_stride, - mbmi->ref_mvs[frame_type], - &frame_nearest_mv[frame_type], - &frame_near_mv[frame_type]); - - // Further refinement that is encode side only to test the top few candidates - // in full and choose the best as the centre point for subsequent searches. - // The current implementation doesn't support scaling. - if (scale[frame_type].x_num == scale[frame_type].x_den && - scale[frame_type].y_num == scale[frame_type].y_den) - mv_pred(cpi, x, yv12_mb[frame_type].y_buffer, yv12->y_stride, - frame_type, block_size); -} - -static void model_rd_from_var_lapndz(int var, int n, int qstep, - int *rate, int *dist) { - // This function models the rate and distortion for a Laplacian - // source with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expressions are in: - // Hang and Chen, "Source Model for transform video coder and its - // application - Part I: Fundamental Theory", IEEE Trans. Circ. - // Sys. for Video Tech., April 1997. - // The function is implemented as piecewise approximation to the - // exact computation. - // TODO(debargha): Implement the functions by interpolating from a - // look-up table - vp9_clear_system_state(); - { - double D, R; - double s2 = (double) var / n; - double s = sqrt(s2); - double x = qstep / s; - if (x > 1.0) { - double y = exp(-x / 2); - double y2 = y * y; - D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275; - R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017; - } else { - double x2 = x * x; - D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807; - if (x > 0.125) - R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x + - 0.1626989668625); - else - R = -1.442252874826093 * log(x) + 1.944647760719664; - } - if (R < 0) { - *rate = 0; - *dist = var; - } else { - *rate = (n * R * 256 + 0.5); - *dist = (n * D * s2 + 0.5); - } - } - vp9_clear_system_state(); -} - -static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - enum BlockSize block_size, - int *saddone, int near_sadidx[], - int mdcounts[4], int64_t txfm_cache[], - int *rate2, int *distortion, int *skippable, - int *compmode_cost, -#if CONFIG_COMP_INTERINTRA_PRED - int *compmode_interintra_cost, -#endif - int *rate_y, int *distortion_y, - int *rate_uv, int *distortion_uv, - int *mode_excluded, int *disable_skip, - int mode_index, - INTERPOLATIONFILTERTYPE *best_filter, - int_mv frame_mv[MB_MODE_COUNT] - [MAX_REF_FRAMES], - YV12_BUFFER_CONFIG *scaled_ref_frame, - int mb_row, int mb_col) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - BLOCK *b = &x->block[0]; - BLOCKD *d = &xd->block[0]; - const int is_comp_pred = (mbmi->second_ref_frame > 0); -#if CONFIG_COMP_INTERINTRA_PRED - const int is_comp_interintra_pred = (mbmi->second_ref_frame == INTRA_FRAME); -#endif - const int num_refs = is_comp_pred ? 2 : 1; - const int this_mode = mbmi->mode; - int i; - int refs[2] = { mbmi->ref_frame, - (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) }; - int_mv cur_mv[2]; - int_mv ref_mv[2]; - int64_t this_rd = 0; - unsigned char tmp_ybuf[64 * 64]; - unsigned char tmp_ubuf[32 * 32]; - unsigned char tmp_vbuf[32 * 32]; - int pred_exists = 0; - int interpolating_intpel_seen = 0; - int intpel_mv; - int64_t rd, best_rd = INT64_MAX; - - switch (this_mode) { - case NEWMV: - ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; - ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; - - if (is_comp_pred) { - if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV || - frame_mv[NEWMV][refs[1]].as_int == INVALID_MV) - return INT64_MAX; - *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]], - &ref_mv[0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); - *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[1]], - &ref_mv[1], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); - } else { - YV12_BUFFER_CONFIG backup_yv12 = xd->pre; - int bestsme = INT_MAX; - int further_steps, step_param = cpi->sf.first_step; - int sadpb = x->sadperbit16; - int_mv mvp_full, tmp_mv; - int sr = 0; - - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - - if (scaled_ref_frame) { - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // motion search code to be used without additional modifications. - xd->pre = *scaled_ref_frame; - xd->pre.y_buffer += mb_row * 16 * xd->pre.y_stride + mb_col * 16; - xd->pre.u_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8; - xd->pre.v_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8; - } - - vp9_clamp_mv_min_max(x, &ref_mv[0]); - - sr = vp9_init_search_range(cpi->common.width, cpi->common.height); - - // mvp_full.as_int = ref_mv[0].as_int; - mvp_full.as_int = - mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int; - - mvp_full.as_mv.col >>= 3; - mvp_full.as_mv.row >>= 3; - - // adjust search range according to sr from mv prediction - step_param = MAX(step_param, sr); - - // Further step/diamond searches as necessary - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - - bestsme = vp9_full_pixel_diamond(cpi, x, b, d, &mvp_full, step_param, - sadpb, further_steps, 1, - &cpi->fn_ptr[block_size], - &ref_mv[0], &tmp_mv); - - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - - if (bestsme < INT_MAX) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; - cpi->find_fractional_mv_step(x, b, d, &tmp_mv, - &ref_mv[0], - x->errorperbit, - &cpi->fn_ptr[block_size], - x->nmvjointcost, x->mvcost, - &dis, &sse); - } - d->bmi.as_mv[0].as_int = tmp_mv.as_int; - frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv[0].as_int; - - // Add the new motion vector cost to our rolling cost variable - *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0], - x->nmvjointcost, x->mvcost, - 96, xd->allow_high_precision_mv); - - // restore the predictor, if required - if (scaled_ref_frame) { - xd->pre = backup_yv12; - } - } - break; - case NEARMV: - case NEARESTMV: - case ZEROMV: - default: - break; - } - for (i = 0; i < num_refs; ++i) { - cur_mv[i] = frame_mv[this_mode][refs[i]]; - // Clip "next_nearest" so that it does not extend to far out of image - clamp_mv2(&cur_mv[i], xd); - if (mv_check_bounds(x, &cur_mv[i])) - return INT64_MAX; - mbmi->mv[i].as_int = cur_mv[i].as_int; - } - - - /* We don't include the cost of the second reference here, because there - * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other - * words if you present them in that order, the second one is always known - * if the first is known */ - *compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), - is_comp_pred); - *rate2 += vp9_cost_mv_ref(cpi, this_mode, - mbmi->mb_mode_context[mbmi->ref_frame]); -#if CONFIG_COMP_INTERINTRA_PRED - if (!is_comp_pred) { - *compmode_interintra_cost = vp9_cost_bit(cm->fc.interintra_prob, - is_comp_interintra_pred); - if (is_comp_interintra_pred) { - *compmode_interintra_cost += - x->mbmode_cost[xd->frame_type][mbmi->interintra_mode]; -#if SEPARATE_INTERINTRA_UV - *compmode_interintra_cost += - x->intra_uv_mode_cost[xd->frame_type][mbmi->interintra_uv_mode]; -#endif - } - } -#endif - - pred_exists = 0; - interpolating_intpel_seen = 0; - // Are all MVs integer pel for Y and UV - intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && - (mbmi->mv[0].as_mv.col & 15) == 0; - if (is_comp_pred) - intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && - (mbmi->mv[1].as_mv.col & 15) == 0; - // Search for best switchable filter by checking the variance of - // pred error irrespective of whether the filter will be used - if (block_size == BLOCK_64X64) { - int switchable_filter_index, newbest; - int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; - int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { - int rs = 0; - mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); - const int m = vp9_switchable_interp_map[mbmi->interp_filter]; - rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; - } - if (interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i, - tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i); - } else { - unsigned int sse, var; - int tmp_rate_y, tmp_rate_u, tmp_rate_v; - int tmp_dist_y, tmp_dist_u, tmp_dist_v; - vp9_build_inter64x64_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - var = vp9_variance64x64(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - model_rd_from_var_lapndz(var, 64 * 64, xd->block[0].dequant[1] >> 3, - &tmp_rate_y, &tmp_dist_y); - var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 32 * 32, xd->block[16].dequant[1] >> 3, - &tmp_rate_u, &tmp_dist_u); - var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 32 * 32, xd->block[20].dequant[1] >> 3, - &tmp_rate_v, &tmp_dist_v); - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y + tmp_rate_u + tmp_rate_v, - tmp_dist_y + tmp_dist_u + tmp_dist_v); - if (!interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - tmp_rate_y_i = tmp_rate_y; - tmp_rate_u_i = tmp_rate_u; - tmp_rate_v_i = tmp_rate_v; - tmp_dist_y_i = tmp_dist_y; - tmp_dist_u_i = tmp_dist_u; - tmp_dist_v_i = tmp_dist_v; - } - } - newbest = (switchable_filter_index == 0 || rd < best_rd); - if (newbest) { - best_rd = rd; - *best_filter = mbmi->interp_filter; - } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { - int i; - for (i = 0; i < 64; ++i) - vpx_memcpy(tmp_ybuf + i * 64, - xd->dst.y_buffer + i * xd->dst.y_stride, - sizeof(unsigned char) * 64); - for (i = 0; i < 32; ++i) - vpx_memcpy(tmp_ubuf + i * 32, - xd->dst.u_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 32); - for (i = 0; i < 32; ++i) - vpx_memcpy(tmp_vbuf + i * 32, - xd->dst.v_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 32); - pred_exists = 1; - } - interpolating_intpel_seen |= - intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter]; - } - } else if (block_size == BLOCK_32X32) { - int switchable_filter_index, newbest; - int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; - int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { - int rs = 0; - mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); - const int m = vp9_switchable_interp_map[mbmi->interp_filter]; - rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; - } - if (interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i, - tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i); - } else { - unsigned int sse, var; - int tmp_rate_y, tmp_rate_u, tmp_rate_v; - int tmp_dist_y, tmp_dist_u, tmp_dist_v; - vp9_build_inter32x32_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - var = vp9_variance32x32(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - model_rd_from_var_lapndz(var, 32 * 32, xd->block[0].dequant[1] >> 3, - &tmp_rate_y, &tmp_dist_y); - var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 16 * 16, xd->block[16].dequant[1] >> 3, - &tmp_rate_u, &tmp_dist_u); - var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse); - model_rd_from_var_lapndz(var, 16 * 16, xd->block[20].dequant[1] >> 3, - &tmp_rate_v, &tmp_dist_v); - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y + tmp_rate_u + tmp_rate_v, - tmp_dist_y + tmp_dist_u + tmp_dist_v); - if (!interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - tmp_rate_y_i = tmp_rate_y; - tmp_rate_u_i = tmp_rate_u; - tmp_rate_v_i = tmp_rate_v; - tmp_dist_y_i = tmp_dist_y; - tmp_dist_u_i = tmp_dist_u; - tmp_dist_v_i = tmp_dist_v; - } - } - newbest = (switchable_filter_index == 0 || rd < best_rd); - if (newbest) { - best_rd = rd; - *best_filter = mbmi->interp_filter; - } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { - int i; - for (i = 0; i < 32; ++i) - vpx_memcpy(tmp_ybuf + i * 64, - xd->dst.y_buffer + i * xd->dst.y_stride, - sizeof(unsigned char) * 32); - for (i = 0; i < 16; ++i) - vpx_memcpy(tmp_ubuf + i * 32, - xd->dst.u_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 16); - for (i = 0; i < 16; ++i) - vpx_memcpy(tmp_vbuf + i * 32, - xd->dst.v_buffer + i * xd->dst.uv_stride, - sizeof(unsigned char) * 16); - pred_exists = 1; - } - interpolating_intpel_seen |= - intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter]; - } - } else { - int switchable_filter_index, newbest; - int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; - int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; - assert(block_size == BLOCK_16X16); - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { - int rs = 0; - mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); - const int m = vp9_switchable_interp_map[mbmi->interp_filter]; - rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; - } - if (interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i, - tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i); - } else { - unsigned int sse, var; - int tmp_rate_y, tmp_rate_u, tmp_rate_v; - int tmp_dist_y, tmp_dist_u, tmp_dist_v; - vp9_build_inter16x16_predictors_mb(xd, xd->predictor, - xd->predictor + 256, - xd->predictor + 320, - 16, 8, mb_row, mb_col); - var = vp9_variance16x16(*(b->base_src), b->src_stride, - xd->predictor, 16, &sse); - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3, - &tmp_rate_y, &tmp_dist_y); - var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride, - &xd->predictor[256], 8, &sse); - model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3, - &tmp_rate_u, &tmp_dist_u); - var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride, - &xd->predictor[320], 8, &sse); - model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3, - &tmp_rate_v, &tmp_dist_v); - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y + tmp_rate_u + tmp_rate_v, - tmp_dist_y + tmp_dist_u + tmp_dist_v); - if (!interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - tmp_rate_y_i = tmp_rate_y; - tmp_rate_u_i = tmp_rate_u; - tmp_rate_v_i = tmp_rate_v; - tmp_dist_y_i = tmp_dist_y; - tmp_dist_u_i = tmp_dist_u; - tmp_dist_v_i = tmp_dist_v; - } - } - newbest = (switchable_filter_index == 0 || rd < best_rd); - if (newbest) { - best_rd = rd; - *best_filter = mbmi->interp_filter; - } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { - vpx_memcpy(tmp_ybuf, xd->predictor, sizeof(unsigned char) * 256); - vpx_memcpy(tmp_ubuf, xd->predictor + 256, sizeof(unsigned char) * 64); - vpx_memcpy(tmp_vbuf, xd->predictor + 320, sizeof(unsigned char) * 64); - pred_exists = 1; - } - interpolating_intpel_seen |= - intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter]; - } - } - - // Set the appripriate filter - if (cm->mcomp_filter_type != SWITCHABLE) - mbmi->interp_filter = cm->mcomp_filter_type; - else - mbmi->interp_filter = *best_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - if (pred_exists) { - if (block_size == BLOCK_64X64) { - for (i = 0; i < 64; ++i) - vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64, - sizeof(unsigned char) * 64); - for (i = 0; i < 32; ++i) - vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32, - sizeof(unsigned char) * 32); - for (i = 0; i < 32; ++i) - vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32, - sizeof(unsigned char) * 32); - } else if (block_size == BLOCK_32X32) { - for (i = 0; i < 32; ++i) - vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64, - sizeof(unsigned char) * 32); - for (i = 0; i < 16; ++i) - vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32, - sizeof(unsigned char) * 16); - for (i = 0; i < 16; ++i) - vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32, - sizeof(unsigned char) * 16); - } else { - vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256); - vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64); - vpx_memcpy(xd->predictor + 320, tmp_vbuf, sizeof(unsigned char) * 64); - } - } else { - // Handles the special case when a filter that is not in the - // switchable list (ex. bilinear, 6-tap) is indicated at the frame level - if (block_size == BLOCK_64X64) { - vp9_build_inter64x64_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - } else if (block_size == BLOCK_32X32) { - vp9_build_inter32x32_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride, - mb_row, mb_col); - } else { - vp9_build_inter16x16_predictors_mb(xd, xd->predictor, - xd->predictor + 256, - xd->predictor + 320, - 16, 8, mb_row, mb_col); - } - } - - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); - const int m = vp9_switchable_interp_map[mbmi->interp_filter]; - *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; - } - - if (cpi->active_map_enabled && x->active_ptr[0] == 0) - x->skip = 1; - else if (x->encode_breakout) { - unsigned int var, sse; - int threshold = (xd->block[0].dequant[1] - * xd->block[0].dequant[1] >> 4); - - if (threshold < x->encode_breakout) - threshold = x->encode_breakout; - - if (block_size == BLOCK_64X64) { - var = vp9_variance64x64(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); - } else if (block_size == BLOCK_32X32) { - var = vp9_variance32x32(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); - } else { - assert(block_size == BLOCK_16X16); - var = vp9_variance16x16(*(b->base_src), b->src_stride, - xd->predictor, 16, &sse); - } - - if ((int)sse < threshold) { - unsigned int q2dc = xd->block[0].dequant[0]; - /* If there is no codeable 2nd order dc - or a very small uniform pixel change change */ - if ((sse - var < q2dc * q2dc >> 4) || - (sse / 2 > var && sse - var < 64)) { - // Check u and v to make sure skip is ok - int sse2; - - if (block_size == BLOCK_64X64) { - unsigned int sse2u, sse2v; - var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); - var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); - sse2 = sse2u + sse2v; - } else if (block_size == BLOCK_32X32) { - unsigned int sse2u, sse2v; - var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); - var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); - sse2 = sse2u + sse2v; - } else { - assert(block_size == BLOCK_16X16); - sse2 = vp9_uvsse(x); - } - - if (sse2 * 2 < threshold) { - x->skip = 1; - *distortion = sse + sse2; - *rate2 = 500; - - /* for best_yrd calculation */ - *rate_uv = 0; - *distortion_uv = sse2; - - *disable_skip = 1; - this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); - } - } - } - } - - if (!x->skip) { - if (block_size == BLOCK_64X64) { - int skippable_y, skippable_uv; - - // Y cost and distortion - super_block_64_yrd(cpi, x, rate_y, distortion_y, - &skippable_y, txfm_cache); - *rate2 += *rate_y; - *distortion += *distortion_y; - - rd_inter64x64_uv(cpi, x, rate_uv, distortion_uv, - cm->full_pixel, &skippable_uv); - - *rate2 += *rate_uv; - *distortion += *distortion_uv; - *skippable = skippable_y && skippable_uv; - } else if (block_size == BLOCK_32X32) { - int skippable_y, skippable_uv; - - // Y cost and distortion - super_block_yrd(cpi, x, rate_y, distortion_y, - &skippable_y, txfm_cache); - *rate2 += *rate_y; - *distortion += *distortion_y; - - rd_inter32x32_uv(cpi, x, rate_uv, distortion_uv, - cm->full_pixel, &skippable_uv); - - *rate2 += *rate_uv; - *distortion += *distortion_uv; - *skippable = skippable_y && skippable_uv; - } else { - assert(block_size == BLOCK_16X16); - inter_mode_cost(cpi, x, rate2, distortion, - rate_y, distortion_y, rate_uv, distortion_uv, - skippable, txfm_cache); - } - } - - if (!(*mode_excluded)) { - if (is_comp_pred) { - *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); - } else { - *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); - } -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1; -#endif - } - - return this_rd; // if 0, this will be re-calculated by caller -} - -static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *returnrate, int *returndistortion, - int64_t *returnintra) { - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - union b_mode_info best_bmodes[16]; - MB_MODE_INFO best_mbmode; - PARTITION_INFO best_partition; - int_mv best_ref_mv, second_best_ref_mv; - MB_PREDICTION_MODE this_mode; - MB_PREDICTION_MODE best_mode = DC_PRED; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int i, best_mode_index = 0; - int mode8x8[4]; - unsigned char segment_id = mbmi->segment_id; - - int mode_index; - int mdcounts[4]; - int rate, distortion; - int rate2, distortion2; - int64_t best_txfm_rd[NB_TXFM_MODES]; - int64_t best_txfm_diff[NB_TXFM_MODES]; - int64_t best_pred_diff[NB_PREDICTION_TYPES]; - int64_t best_pred_rd[NB_PREDICTION_TYPES]; - int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX; -#if CONFIG_COMP_INTERINTRA_PRED - int is_best_interintra = 0; - int64_t best_intra16_rd = INT64_MAX; - int best_intra16_mode = DC_PRED; -#if SEPARATE_INTERINTRA_UV - int best_intra16_uv_mode = DC_PRED; -#endif -#endif - int64_t best_overall_rd = INT64_MAX; - INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; - INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE; - int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; - int uv_intra_skippable = 0; - int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0; - int uv_intra_skippable_8x8 = 0; - int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); - int distortion_uv = INT_MAX; - int64_t best_yrd = INT64_MAX; - - MB_PREDICTION_MODE uv_intra_mode; - MB_PREDICTION_MODE uv_intra_mode_8x8 = 0; - - int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; - int saddone = 0; - - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int frame_mdcounts[4][4]; - YV12_BUFFER_CONFIG yv12_mb[4]; - - unsigned int ref_costs[MAX_REF_FRAMES]; - int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1]; - - int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, - cpi->common.y1dc_delta_q); - - struct scale_factors scale_factor[4]; - - vpx_memset(mode8x8, 0, sizeof(mode8x8)); - vpx_memset(&frame_mv, 0, sizeof(frame_mv)); - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); - vpx_memset(&x->mb_context[xd->sb_index][xd->mb_index], 0, - sizeof(PICK_MODE_CONTEXT)); - - for (i = 0; i < MAX_REF_FRAMES; i++) - frame_mv[NEWMV][i].as_int = INVALID_MV; - for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = INT64_MAX; - for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = INT64_MAX; - - for (i = 0; i < NB_PARTITIONINGS; i++) { - int j, k; - - for (j = 0; j < 16; j++) - for (k = 0; k < MAX_REF_FRAMES - 1; k++) - seg_mvs[i][j][k].as_int = INVALID_MV; - } - - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - setup_buffer_inter(cpi, x, cpi->lst_fb_idx, - LAST_FRAME, BLOCK_16X16, mb_row, mb_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - setup_buffer_inter(cpi, x, cpi->gld_fb_idx, - GOLDEN_FRAME, BLOCK_16X16, mb_row, mb_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); - } - - if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - setup_buffer_inter(cpi, x, cpi->alt_fb_idx, - ALTREF_FRAME, BLOCK_16X16, mb_row, mb_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); - } - - *returnintra = INT64_MAX; - - mbmi->ref_frame = INTRA_FRAME; - - /* Initialize zbin mode boost for uv costing */ - cpi->zbin_mode_boost = 0; - vp9_update_zbin_extra(cpi, x); - - xd->mode_info_context->mbmi.mode = DC_PRED; - - rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, - &uv_intra_rate_tokenonly, &uv_intra_distortion, - &uv_intra_skippable); - uv_intra_mode = mbmi->uv_mode; - - /* rough estimate for now */ - if (cpi->common.txfm_mode != ONLY_4X4) { - rd_pick_intra_mbuv_mode_8x8(cpi, x, &uv_intra_rate_8x8, - &uv_intra_rate_tokenonly_8x8, - &uv_intra_distortion_8x8, - &uv_intra_skippable_8x8); - uv_intra_mode_8x8 = mbmi->uv_mode; - } - - // Get estimates of reference frame costs for each reference frame - // that depend on the current prediction etc. - estimate_ref_frame_costs(cpi, segment_id, ref_costs); - - for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { - int64_t this_rd = INT64_MAX; - int disable_skip = 0, skippable = 0; - int other_cost = 0; - int compmode_cost = 0; -#if CONFIG_COMP_INTERINTRA_PRED - int compmode_interintra_cost = 0; -#endif - int mode_excluded = 0; - int64_t txfm_cache[NB_TXFM_MODES] = { 0 }; - YV12_BUFFER_CONFIG *scaled_ref_frame; - - // These variables hold are rolling total cost and distortion for this mode - rate2 = 0; - distortion2 = 0; - rate_y = 0; - rate_uv = 0; - - x->skip = 0; - - this_mode = vp9_mode_order[mode_index].mode; - mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame; - mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; - - mbmi->interp_filter = cm->mcomp_filter_type; - - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, - scale_factor); - - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - // Test best rd so far against threshold for trying this mode. - if (best_rd <= cpi->rd_threshes[mode_index]) - continue; - - // Ensure that the references used by this mode are available. - if (mbmi->ref_frame && - !(cpi->ref_frame_flags & flag_list[mbmi->ref_frame])) - continue; - - if (mbmi->second_ref_frame > 0 && - !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame])) - continue; - - // only scale on zeromv. - if (mbmi->ref_frame > 0 && - (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 || - yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && - this_mode != ZEROMV) - continue; - - if (mbmi->second_ref_frame > 0 && - (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || - yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && - this_mode != ZEROMV) - continue; - - // current coding mode under rate-distortion optimization test loop -#if CONFIG_COMP_INTERINTRA_PRED - mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); - mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif - - // If the segment reference frame feature is enabled.... - // then do nothing if the current ref frame is not allowed.. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_check_segref(xd, segment_id, mbmi->ref_frame)) { - continue; - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) && - (this_mode != ZEROMV)) { - continue; - // Disable this drop out case if the ref frame segment - // level feature is enabled for this segment. This is to - // prevent the possibility that the we end up unable to pick any mode. - } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) { - // Only consider ZEROMV/ALTREF_FRAME for alt ref frame overlay, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative - if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - if (this_mode != ZEROMV || - mbmi->ref_frame != ALTREF_FRAME) { - continue; - } - } - } - - /* everything but intra */ - scaled_ref_frame = NULL; - if (mbmi->ref_frame) { - int ref = mbmi->ref_frame; - int fb; - - xd->pre = yv12_mb[ref]; - best_ref_mv = mbmi->ref_mvs[ref][0]; - vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts)); - - if (mbmi->ref_frame == LAST_FRAME) { - fb = cpi->lst_fb_idx; - } else if (mbmi->ref_frame == GOLDEN_FRAME) { - fb = cpi->gld_fb_idx; - } else { - fb = cpi->alt_fb_idx; - } - - if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) - scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; - } - - if (mbmi->second_ref_frame > 0) { - int ref = mbmi->second_ref_frame; - - xd->second_pre = yv12_mb[ref]; - second_best_ref_mv = mbmi->ref_mvs[ref][0]; - } - - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - if (cpi->zbin_mode_boost_enabled) { - if (vp9_mode_order[mode_index].ref_frame == INTRA_FRAME) - cpi->zbin_mode_boost = 0; - else { - if (vp9_mode_order[mode_index].mode == ZEROMV) { - if (vp9_mode_order[mode_index].ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (vp9_mode_order[mode_index].mode == SPLITMV) - cpi->zbin_mode_boost = 0; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } - - vp9_update_zbin_extra(cpi, x); - } - - // Intra - if (!mbmi->ref_frame) { - switch (this_mode) { - default: - case V_PRED: - case H_PRED: - case D45_PRED: - case D135_PRED: - case D117_PRED: - case D153_PRED: - case D27_PRED: - case D63_PRED: - rate2 += intra_cost_penalty; - case DC_PRED: - case TM_PRED: - mbmi->ref_frame = INTRA_FRAME; - // FIXME compound intra prediction - vp9_build_intra_predictors_mby(&x->e_mbd); - macro_block_yrd(cpi, x, &rate_y, &distortion, &skippable, txfm_cache); - rate2 += rate_y; - distortion2 += distortion; - rate2 += x->mbmode_cost[xd->frame_type][mbmi->mode]; - if (mbmi->txfm_size != TX_4X4) { - rate2 += uv_intra_rate_8x8; - rate_uv = uv_intra_rate_tokenonly_8x8; - distortion2 += uv_intra_distortion_8x8; - distortion_uv = uv_intra_distortion_8x8; - skippable = skippable && uv_intra_skippable_8x8; - } else { - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - skippable = skippable && uv_intra_skippable; - } - break; - case B_PRED: { - int64_t tmp_rd; - - // Note the rate value returned here includes the cost of coding - // the BPRED mode : x->mbmode_cost[xd->frame_type][BPRED]; - mbmi->txfm_size = TX_4X4; - tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, - &distortion, best_yrd); - rate2 += rate; - rate2 += intra_cost_penalty; - distortion2 += distortion; - - if (tmp_rd < best_yrd) { - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - } - break; - case I8X8_PRED: { - int64_t tmp_rd; - - tmp_rd = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate, &rate_y, - &distortion, mode8x8, - best_yrd, txfm_cache); - rate2 += rate; - rate2 += intra_cost_penalty; - distortion2 += distortion; - - /* TODO: uv rate maybe over-estimated here since there is UV intra - mode coded in I8X8_PRED prediction */ - if (tmp_rd < best_yrd) { - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - } - break; - } - } - // Split MV. The code is very different from the other inter modes so - // special case it. - else if (this_mode == SPLITMV) { - const int is_comp_pred = mbmi->second_ref_frame > 0; - int64_t this_rd_thresh; - int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; - int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; - int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0; - int switchable_filter_index; - int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL; - union b_mode_info tmp_best_bmodes[16]; - MB_MODE_INFO tmp_best_mbmode; - PARTITION_INFO tmp_best_partition; - int pred_exists = 0; - - this_rd_thresh = - (mbmi->ref_frame == LAST_FRAME) ? - cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA]; - this_rd_thresh = - (mbmi->ref_frame == GOLDEN_FRAME) ? - cpi->rd_threshes[THR_NEWG] : this_rd_thresh; - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { - int newbest; - mbmi->interp_filter = - vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - second_ref, best_yrd, mdcounts, - &rate, &rate_y, &distortion, - &skippable, - (int)this_rd_thresh, seg_mvs, - txfm_cache); - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs - [vp9_get_pred_context(&cpi->common, xd, - PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[mbmi->interp_filter]]; - tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0); - } - newbest = (tmp_rd < tmp_best_rd); - if (newbest) { - tmp_best_filter = mbmi->interp_filter; - tmp_best_rd = tmp_rd; - } - if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || - (mbmi->interp_filter == cm->mcomp_filter_type && - cm->mcomp_filter_type != SWITCHABLE)) { - tmp_best_rdu = tmp_rd; - tmp_best_rate = rate; - tmp_best_ratey = rate_y; - tmp_best_distortion = distortion; - tmp_best_skippable = skippable; - vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&tmp_best_partition, x->partition_info, - sizeof(PARTITION_INFO)); - for (i = 0; i < 16; i++) { - tmp_best_bmodes[i] = xd->block[i].bmi; - } - pred_exists = 1; - } - } // switchable_filter_index loop - - mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? - tmp_best_filter : cm->mcomp_filter_type); - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - if (!pred_exists) { - // Handles the special case when a filter that is not in the - // switchable list (bilinear, 6-tap) is indicated at the frame level - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - second_ref, best_yrd, mdcounts, - &rate, &rate_y, &distortion, - &skippable, - (int)this_rd_thresh, seg_mvs, - txfm_cache); - } else { - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs - [vp9_get_pred_context(&cpi->common, xd, - PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[mbmi->interp_filter]]; - tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); - } - tmp_rd = tmp_best_rdu; - rate = tmp_best_rate; - rate_y = tmp_best_ratey; - distortion = tmp_best_distortion; - skippable = tmp_best_skippable; - vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO)); - vpx_memcpy(x->partition_info, &tmp_best_partition, - sizeof(PARTITION_INFO)); - for (i = 0; i < 16; i++) { - xd->block[i].bmi = xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; - } - } - - rate2 += rate; - distortion2 += distortion; - - if (cpi->common.mcomp_filter_type == SWITCHABLE) - rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs - [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[mbmi->interp_filter]]; - - // If even the 'Y' rd value of split is higher than best so far - // then dont bother looking at UV - if (tmp_rd < best_yrd) { - int uv_skippable; - - vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - rd_inter16x16_uv_4x4(cpi, x, &rate_uv, &distortion_uv, - cpi->common.full_pixel, &uv_skippable, 1); - rate2 += rate_uv; - distortion2 += distortion_uv; - skippable = skippable && uv_skippable; + if (frame_mv[refs[0]].as_int == INVALID_MV || + frame_mv[refs[1]].as_int == INVALID_MV) + return INT64_MAX; + *rate2 += vp9_mv_bit_cost(&frame_mv[refs[0]], + &ref_mv[0], + x->nmvjointcost, x->mvcost, 96, + x->e_mbd.allow_high_precision_mv); + *rate2 += vp9_mv_bit_cost(&frame_mv[refs[1]], + &ref_mv[1], + x->nmvjointcost, x->mvcost, 96, + x->e_mbd.allow_high_precision_mv); } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - - if (!mode_excluded) { - if (is_comp_pred) - mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; - else - mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; - } - - compmode_cost = - vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred); - mbmi->mode = this_mode; - } - else { -#if CONFIG_COMP_INTERINTRA_PRED - if (mbmi->second_ref_frame == INTRA_FRAME) { - if (best_intra16_mode == DC_PRED - 1) continue; - mbmi->interintra_mode = best_intra16_mode; -#if SEPARATE_INTERINTRA_UV - mbmi->interintra_uv_mode = best_intra16_uv_mode; -#else - mbmi->interintra_uv_mode = best_intra16_mode; -#endif - } -#endif - this_rd = handle_inter_mode(cpi, x, BLOCK_16X16, - &saddone, near_sadidx, mdcounts, txfm_cache, - &rate2, &distortion2, &skippable, - &compmode_cost, -#if CONFIG_COMP_INTERINTRA_PRED - &compmode_interintra_cost, -#endif - &rate_y, &distortion, - &rate_uv, &distortion_uv, - &mode_excluded, &disable_skip, - mode_index, &tmp_best_filter, frame_mv, - scaled_ref_frame, mb_row, mb_col); - if (this_rd == INT64_MAX) - continue; - } - -#if CONFIG_COMP_INTERINTRA_PRED - if (cpi->common.use_interintra) - rate2 += compmode_interintra_cost; -#endif - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) - rate2 += compmode_cost; - - // Estimate the reference frame signaling cost and add it - // to the rolling cost variable. - rate2 += ref_costs[mbmi->ref_frame]; - - if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - if (cpi->common.mb_no_coeff_skip) { - int mb_skip_allowed; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + int bestsme = INT_MAX; + int further_steps, step_param = cpi->sf.first_step; + int sadpb = x->sadperbit16; + int_mv mvp_full, tmp_mv; + int sr = 0; - // Is Mb level skip allowed (i.e. not coded at segment level). - mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; - if (skippable) { - mbmi->mb_skip_coeff = 1; + if (scaled_ref_frame[0]) { + int i; - // Back out the coefficient coding costs - rate2 -= (rate_y + rate_uv); - // for best_yrd calculation - rate_uv = 0; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[0]; - if (mb_skip_allowed) { - int prob_skip_cost; + setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col, + NULL, NULL); + } - // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP); + vp9_clamp_mv_min_max(x, &ref_mv[0]); - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } - // Add in the cost of the no skip flag. - else { - mbmi->mb_skip_coeff = 0; - if (mb_skip_allowed) { - int prob_skip_cost = vp9_cost_bit( - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } + sr = vp9_init_search_range(cpi->common.width, cpi->common.height); - // Calculate the final RD estimate for this mode. - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - } + // mvp_full.as_int = ref_mv[0].as_int; + mvp_full.as_int = + mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int; - // Keep record of best intra distortion - if ((mbmi->ref_frame == INTRA_FRAME) && - (this_rd < best_intra_rd)) { - best_intra_rd = this_rd; - *returnintra = distortion2; - } -#if CONFIG_COMP_INTERINTRA_PRED - if ((mbmi->ref_frame == INTRA_FRAME) && - (this_mode <= TM_PRED) && - (this_rd < best_intra16_rd)) { - best_intra16_rd = this_rd; - best_intra16_mode = this_mode; -#if SEPARATE_INTERINTRA_UV - best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ? - uv_intra_mode_8x8 : uv_intra_mode); -#endif - } -#endif + mvp_full.as_mv.col >>= 3; + mvp_full.as_mv.row >>= 3; - if (!disable_skip && mbmi->ref_frame == INTRA_FRAME) - for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); + // adjust search range according to sr from mv prediction + step_param = MAX(step_param, sr); - if (this_rd < best_overall_rd) { - best_overall_rd = this_rd; - best_filter = tmp_best_filter; - best_mode = this_mode; -#if CONFIG_COMP_INTERINTRA_PRED - is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME); -#endif - } + // Further step/diamond searches as necessary + further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - // Did this mode help.. i.e. is it the new best mode - if (this_rd < best_rd || x->skip) { - if (!mode_excluded) { - /* - if (mbmi->second_ref_frame == INTRA_FRAME) { - printf("rd %d best %d bestintra16 %d\n", this_rd, best_rd, best_intra16_rd); - } - */ - // Note index of best mode so far - best_mode_index = mode_index; + bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, + sadpb, further_steps, 1, + &cpi->fn_ptr[block_size], + &ref_mv[0], &tmp_mv); - if (this_mode <= B_PRED) { - if (mbmi->txfm_size != TX_4X4 - && this_mode != B_PRED - && this_mode != I8X8_PRED) - mbmi->uv_mode = uv_intra_mode_8x8; - else - mbmi->uv_mode = uv_intra_mode; - /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + + if (bestsme < INT_MAX) { + int dis; /* TODO: use dis in distortion calculation later. */ + unsigned int sse; + cpi->find_fractional_mv_step(x, &tmp_mv, + &ref_mv[0], + x->errorperbit, + &cpi->fn_ptr[block_size], + x->nmvjointcost, x->mvcost, + &dis, &sse); } + frame_mv[refs[0]].as_int = tmp_mv.as_int; + single_newmv[refs[0]].as_int = tmp_mv.as_int; - other_cost += ref_costs[mbmi->ref_frame]; + // Add the new motion vector cost to our rolling cost variable + *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0], + x->nmvjointcost, x->mvcost, + 96, xd->allow_high_precision_mv); - /* Calculate the final y RD estimate for this mode */ - best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), - (distortion2 - distortion_uv)); + // restore the predictor, if required + if (scaled_ref_frame[0]) { + int i; - *returnrate = rate2; - *returndistortion = distortion2; - best_rd = this_rd; - vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); - - if ((this_mode == B_PRED) - || (this_mode == I8X8_PRED) - || (this_mode == SPLITMV)) - for (i = 0; i < 16; i++) { - best_bmodes[i] = xd->block[i].bmi; - } + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[0] = backup_yv12[i]; + } } + break; + case NEARMV: + case NEARESTMV: + case ZEROMV: + default: + break; + } + for (i = 0; i < num_refs; ++i) { + cur_mv[i] = frame_mv[refs[i]]; + // Clip "next_nearest" so that it does not extend to far out of image + if (this_mode == NEWMV) + assert(!clamp_mv2(&cur_mv[i], xd)); + else + clamp_mv2(&cur_mv[i], xd); - // Testing this mode gave rise to an improvement in best error score. - // Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = - (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? - cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) * - cpi->rd_thresh_mult[mode_index]; - } else { - // If the mode did not help improve the best error case then raise the - // threshold for testing that mode next time around. - cpi->rd_thresh_mult[mode_index] += 4; + if (mv_check_bounds(x, &cur_mv[i])) + return INT64_MAX; + mbmi->mv[i].as_int = cur_mv[i].as_int; + } - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) - * cpi->rd_thresh_mult[mode_index]; - } + /* We don't include the cost of the second reference here, because there + * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other + * words if you present them in that order, the second one is always known + * if the first is known */ + *rate2 += vp9_cost_mv_ref(cpi, this_mode, + mbmi->mb_mode_context[mbmi->ref_frame[0]]); - /* keep record of best compound/single-only prediction */ - if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) { - int64_t single_rd, hybrid_rd; - int single_rate, hybrid_rate; + pred_exists = 0; + interpolating_intpel_seen = 0; + // Are all MVs integer pel for Y and UV + intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && + (mbmi->mv[0].as_mv.col & 15) == 0; + if (is_comp_pred) + intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && + (mbmi->mv[1].as_mv.col & 15) == 0; + // Search for best switchable filter by checking the variance of + // pred error irrespective of whether the filter will be used + if (cpi->speed > 4) { + *best_filter = EIGHTTAP; + } else { + int i, newbest; + int tmp_rate_sum = 0, tmp_dist_sum = 0; + for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + int rs = 0; + const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i]; + const int is_intpel_interp = intpel_mv && + vp9_is_interpolating_filter[filter]; + mbmi->interp_filter = filter; + vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - single_rate = rate2 - compmode_cost; - hybrid_rate = rate2; + if (cm->mcomp_filter_type == SWITCHABLE) + rs = get_switchable_rate(cm, x); + + if (interpolating_intpel_seen && is_intpel_interp) { + rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum); } else { - single_rate = rate2; - hybrid_rate = rate2 + compmode_cost; + int rate_sum = 0, dist_sum = 0; + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum); + if (!interpolating_intpel_seen && is_intpel_interp) { + tmp_rate_sum = rate_sum; + tmp_dist_sum = dist_sum; + } } + newbest = i == 0 || rd < best_rd; - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - - if (mbmi->second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; - } else if (mbmi->second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; + if (newbest) { + best_rd = rd; + *best_filter = mbmi->interp_filter; } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; - } - /* keep record of best txfm size */ - if (!mode_excluded && this_rd != INT64_MAX) { - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd; - if (this_mode != B_PRED) { - const int64_t txfm_mode_diff = - txfm_cache[i] - txfm_cache[cm->txfm_mode]; - adj_rd = this_rd + txfm_mode_diff; - } else { - adj_rd = this_rd; + if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || + (cm->mcomp_filter_type != SWITCHABLE && + cm->mcomp_filter_type == mbmi->interp_filter)) { + int p; + + for (p = 0; p < MAX_MB_PLANE; p++) { + const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y; + const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x; + int i; + + for (i = 0; i < y; i++) + vpx_memcpy(&tmp_buf[p][64 * i], + xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x); } - if (adj_rd < best_txfm_rd[i]) - best_txfm_rd[i] = adj_rd; + pred_exists = 1; } + interpolating_intpel_seen |= is_intpel_interp; } - - if (x->skip && !mode_excluded) - break; } - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || - (best_mbmode.mode <= B_PRED)); + // Set the appripriate filter + mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? + cm->mcomp_filter_type : *best_filter; + vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); -#if CONFIG_COMP_INTERINTRA_PRED - ++cpi->interintra_select_count[is_best_interintra]; -#endif - // Accumulate filter usage stats - // TODO(agrange): Use RD criteria to select interpolation filter mode. - if ((best_mode >= NEARESTMV) && (best_mode <= SPLITMV)) - ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]]; + if (pred_exists) { + int p; - // Reduce the activation RD thresholds for the best choice mode - if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && - (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { - int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); + for (p = 0; p < MAX_MB_PLANE; p++) { + const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y; + const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x; + int i; - cpi->rd_thresh_mult[best_mode_index] = - (cpi->rd_thresh_mult[best_mode_index] >= - (MIN_THRESHMULT + best_adjustment)) ? - cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; - cpi->rd_threshes[best_mode_index] = - (cpi->rd_baseline_thresh[best_mode_index] >> 7) * - cpi->rd_thresh_mult[best_mode_index]; + for (i = 0; i < y; i++) + vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, + &tmp_buf[p][64 * i], x); + } + } else { + // Handles the special case when a filter that is not in the + // switchable list (ex. bilinear, 6-tap) is indicated at the frame level + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); } - // This code forces Altref,0,0 and skip for the frame that overlays a - // an alrtef unless Altref is filtered. However, this is unsafe if - // segment level coding of ref frame is enabled for this - // segment. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - cpi->is_src_frame_alt_ref && - (cpi->oxcf.arnr_max_frames == 0) && - (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { - mbmi->mode = ZEROMV; - if (cm->txfm_mode <= ALLOW_8X8) - mbmi->txfm_size = cm->txfm_mode; - else - mbmi->txfm_size = TX_16X16; - mbmi->ref_frame = ALTREF_FRAME; - mbmi->mv[0].as_int = 0; - mbmi->uv_mode = DC_PRED; - mbmi->mb_skip_coeff = - (cpi->common.mb_no_coeff_skip) ? 1 : 0; - mbmi->partitioning = 0; - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, - scale_factor); - - vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); - goto end; - } + if (cpi->common.mcomp_filter_type == SWITCHABLE) + *rate2 += get_switchable_rate(cm, x); - // macroblock modes - vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); - if (best_mbmode.mode == B_PRED) { - for (i = 0; i < 16; i++) { - xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; - xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode; - } - } + if (cpi->active_map_enabled && x->active_ptr[0] == 0) + x->skip = 1; + else if (x->encode_breakout) { + unsigned int var, sse; + int threshold = (xd->plane[0].dequant[1] + * xd->plane[0].dequant[1] >> 4); - if (best_mbmode.mode == I8X8_PRED) - set_i8x8_block_modes(x, mode8x8); + if (threshold < x->encode_breakout) + threshold = x->encode_breakout; - if (best_mbmode.mode == SPLITMV) { - for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv[0].as_int = - best_bmodes[i].as_mv[0].as_int; - if (mbmi->second_ref_frame > 0) - for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv[1].as_int = - best_bmodes[i].as_mv[1].as_int; + var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf, + x->plane[0].src.stride, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride, + &sse); - vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); + if ((int)sse < threshold) { + unsigned int q2dc = xd->plane[0].dequant[0]; + /* If there is no codeable 2nd order dc + or a very small uniform pixel change change */ + if ((sse - var < q2dc * q2dc >> 4) || + (sse / 2 > var && sse - var < 64)) { + // Check u and v to make sure skip is ok + int sse2; + unsigned int sse2u, sse2v; + var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf, + x->plane[1].src.stride, + xd->plane[1].dst.buf, + xd->plane[1].dst.stride, &sse2u); + var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf, + x->plane[1].src.stride, + xd->plane[2].dst.buf, + xd->plane[1].dst.stride, &sse2v); + sse2 = sse2u + sse2v; - mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int; - mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; - } + if (sse2 * 2 < threshold) { + x->skip = 1; + *distortion = sse + sse2; + *rate2 = 500; - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - if (best_pred_rd[i] == INT64_MAX) - best_pred_diff[i] = INT_MIN; - else - best_pred_diff[i] = best_rd - best_pred_rd[i]; - } + /* for best_yrd calculation */ + *rate_uv = 0; + *distortion_uv = sse2; - if (!x->skip) { - for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == INT64_MAX) - best_txfm_diff[i] = 0; - else - best_txfm_diff[i] = best_rd - best_txfm_rd[i]; + *disable_skip = 1; + this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); + } + } } - } else { - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); } -end: - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, - scale_factor); - store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index], - best_mode_index, &best_partition, - &mbmi->ref_mvs[mbmi->ref_frame][0], - &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : - mbmi->second_ref_frame][0], - best_pred_diff, best_txfm_diff); -} + if (!x->skip) { + int skippable_y, skippable_uv; -void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, - int *returndist) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - int rate_y = 0, rate_uv; - int rate_y_tokenonly = 0, rate_uv_tokenonly; - int dist_y = 0, dist_uv; - int y_skip = 0, uv_skip; - int64_t txfm_cache[NB_TXFM_MODES], err; - int i; + // Y cost and distortion + super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, + bsize, txfm_cache); - xd->mode_info_context->mbmi.mode = DC_PRED; - err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip, txfm_cache); - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip); + *rate2 += *rate_y; + *distortion += *distortion_y; - if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { - *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + - vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); - *returndist = dist_y + (dist_uv >> 2); - memset(x->sb32_context[xd->sb_index].txfm_rd_diff, 0, - sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff)); - } else { - *returnrate = rate_y + rate_uv; - if (cpi->common.mb_no_coeff_skip) - *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - *returndist = dist_y + (dist_uv >> 2); - for (i = 0; i < NB_TXFM_MODES; i++) { - x->sb32_context[xd->sb_index].txfm_rd_diff[i] = err - txfm_cache[i]; + super_block_uvrd(cm, x, rate_uv, distortion_uv, + &skippable_uv, bsize); + + *rate2 += *rate_uv; + *distortion += *distortion_uv; + *skippable = skippable_y && skippable_uv; + } + + if (!(*mode_excluded)) { + if (is_comp_pred) { + *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); + } else { + *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); } } + + return this_rd; // if 0, this will be re-calculated by caller } -void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, - int *returndist) { +void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, + int *returnrate, int *returndist, + BLOCK_SIZE_TYPE bsize, + PICK_MODE_CONTEXT *ctx) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; int rate_y = 0, rate_uv; @@ -5010,193 +2379,80 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, int dist_y = 0, dist_uv; int y_skip = 0, uv_skip; int64_t txfm_cache[NB_TXFM_MODES], err; + MB_PREDICTION_MODE mode; + TX_SIZE txfm_size; + int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y; + int64_t err4x4 = INT64_MAX; int i; + vpx_memset(&txfm_cache,0,sizeof(txfm_cache)); + ctx->skip = 0; xd->mode_info_context->mbmi.mode = DC_PRED; - err = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip, txfm_cache); - rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip); - - if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { + xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; + err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, &y_skip, bsize, txfm_cache); + mode = xd->mode_info_context->mbmi.mode; + txfm_size = xd->mode_info_context->mbmi.txfm_size; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + &dist_uv, &uv_skip, + (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : + bsize); + if (bsize < BLOCK_SIZE_SB8X8) + err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y, + &rate4x4_y_tokenonly, + &dist4x4_y, err); + + if (y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + - vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); *returndist = dist_y + (dist_uv >> 2); - memset(x->sb64_context.txfm_rd_diff, 0, - sizeof(x->sb64_context.txfm_rd_diff)); + memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); + xd->mode_info_context->mbmi.mode = mode; + xd->mode_info_context->mbmi.txfm_size = txfm_size; + } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) { + *returnrate = rate4x4_y + rate_uv + + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + *returndist = dist4x4_y + (dist_uv >> 2); + vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); + xd->mode_info_context->mbmi.txfm_size = TX_4X4; } else { - *returnrate = rate_y + rate_uv; - if (cm->mb_no_coeff_skip) - *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + *returnrate = rate_y + rate_uv + + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist_y + (dist_uv >> 2); for (i = 0; i < NB_TXFM_MODES; i++) { - x->sb64_context.txfm_rd_diff[i] = err - txfm_cache[i]; - } - } -} - -void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, int *returndist) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t error4x4, error16x16; - int rate4x4, rate16x16 = 0, rateuv, rateuv8x8; - int dist4x4 = 0, dist16x16 = 0, distuv = 0, distuv8x8 = 0; - int rate; - int rate4x4_tokenonly = 0; - int rate16x16_tokenonly = 0; - int rateuv_tokenonly = 0, rateuv8x8_tokenonly = 0; - int64_t error8x8; - int rate8x8_tokenonly=0; - int rate8x8, dist8x8; - int mode16x16; - int mode8x8[4]; - int dist; - int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8; - int y_intra16x16_skippable = 0; - int64_t txfm_cache[2][NB_TXFM_MODES]; - TX_SIZE txfm_size_16x16, txfm_size_8x8; - int i; - - mbmi->ref_frame = INTRA_FRAME; - mbmi->mode = DC_PRED; - rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv, - &uv_intra_skippable); - modeuv = mbmi->uv_mode; - if (cpi->common.txfm_mode != ONLY_4X4) { - rd_pick_intra_mbuv_mode_8x8(cpi, x, &rateuv8x8, &rateuv8x8_tokenonly, - &distuv8x8, &uv_intra_skippable_8x8); - modeuv8x8 = mbmi->uv_mode; - } else { - uv_intra_skippable_8x8 = uv_intra_skippable; - rateuv8x8 = rateuv; - distuv8x8 = distuv; - rateuv8x8_tokenonly = rateuv_tokenonly; - modeuv8x8 = modeuv; - } - - // current macroblock under rate-distortion optimization test loop - error16x16 = rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, - &rate16x16_tokenonly, &dist16x16, - &y_intra16x16_skippable, - txfm_cache[1]); - mode16x16 = mbmi->mode; - txfm_size_16x16 = mbmi->txfm_size; - if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable && - ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) || - (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) { - error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0); - rate16x16 -= rate16x16_tokenonly; - } - for (i = 0; i < NB_TXFM_MODES; i++) { - txfm_cache[0][i] = error16x16 - txfm_cache[1][cm->txfm_mode] + - txfm_cache[1][i]; - } - - error8x8 = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate8x8, - &rate8x8_tokenonly, - &dist8x8, mode8x8, - error16x16, txfm_cache[1]); - txfm_size_8x8 = mbmi->txfm_size; - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t tmp_rd = error8x8 - txfm_cache[1][cm->txfm_mode] + txfm_cache[1][i]; - if (tmp_rd < txfm_cache[0][i]) - txfm_cache[0][i] = tmp_rd; - } - - mbmi->txfm_size = TX_4X4; - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, - &rate4x4, &rate4x4_tokenonly, - &dist4x4, error16x16); - for (i = 0; i < NB_TXFM_MODES; i++) { - if (error4x4 < txfm_cache[0][i]) - txfm_cache[0][i] = error4x4; - } - - mbmi->mb_skip_coeff = 0; - if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable && - ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) || - (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) { - mbmi->mb_skip_coeff = 1; - mbmi->mode = mode16x16; - mbmi->uv_mode = (cm->txfm_mode == ONLY_4X4) ? modeuv : modeuv8x8; - rate = rate16x16 + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); - dist = dist16x16; - if (cm->txfm_mode == ONLY_4X4) { - rate += rateuv - rateuv_tokenonly; - dist += (distuv >> 2); - } else { - rate += rateuv8x8 - rateuv8x8_tokenonly; - dist += (distuv8x8 >> 2); - } - - mbmi->txfm_size = txfm_size_16x16; - } else if (error8x8 > error16x16) { - if (error4x4 < error16x16) { - rate = rateuv + rate4x4; - mbmi->mode = B_PRED; - mbmi->txfm_size = TX_4X4; - dist = dist4x4 + (distuv >> 2); - } else { - mbmi->txfm_size = txfm_size_16x16; - mbmi->mode = mode16x16; - rate = rate16x16 + rateuv8x8; - dist = dist16x16 + (distuv8x8 >> 2); - } - if (cpi->common.mb_no_coeff_skip) - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - } else { - if (error4x4 < error8x8) { - rate = rateuv + rate4x4; - mbmi->mode = B_PRED; - mbmi->txfm_size = TX_4X4; - dist = dist4x4 + (distuv >> 2); - } else { - mbmi->mode = I8X8_PRED; - mbmi->txfm_size = txfm_size_8x8; - set_i8x8_block_modes(x, mode8x8); - rate = rate8x8 + rateuv; - dist = dist8x8 + (distuv >> 2); + ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode]; } - if (cpi->common.mb_no_coeff_skip) - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - } - - for (i = 0; i < NB_TXFM_MODES; i++) { - x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] = - txfm_cache[0][cm->txfm_mode] - txfm_cache[0][i]; + xd->mode_info_context->mbmi.txfm_size = txfm_size; + xd->mode_info_context->mbmi.mode = mode; } - *returnrate = rate; - *returndist = dist; + ctx->mic = *xd->mode_info_context; } -static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *returnrate, - int *returndistortion, - int block_size) { +int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, + int mi_row, int mi_col, + int *returnrate, + int *returndistortion, + BLOCK_SIZE_TYPE bsize, + PICK_MODE_CONTEXT *ctx) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MB_PREDICTION_MODE best_mode = DC_PRED; MV_REFERENCE_FRAME ref_frame; unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int frame_mdcounts[4][4]; - YV12_BUFFER_CONFIG yv12_mb[4]; + struct buf_2d yv12_mb[4][MAX_MB_PLANE]; + int_mv single_newmv[MAX_REF_FRAMES]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int idx_list[4] = {0, cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx}; - int mdcounts[4]; - int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; - int saddone = 0; int64_t best_rd = INT64_MAX; int64_t best_txfm_rd[NB_TXFM_MODES]; int64_t best_txfm_diff[NB_TXFM_MODES]; @@ -5205,119 +2461,105 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO best_mbmode; int j; int mode_index, best_mode_index = 0; - unsigned int ref_costs[MAX_REF_FRAMES]; -#if CONFIG_COMP_INTERINTRA_PRED - int is_best_interintra = 0; - int64_t best_intra16_rd = INT64_MAX; - int best_intra16_mode = DC_PRED; -#if SEPARATE_INTERINTRA_UV - int best_intra16_uv_mode = DC_PRED; -#endif -#endif + unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; + vp9_prob comp_mode_p; int64_t best_overall_rd = INT64_MAX; INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE; - int rate_uv_4x4 = 0, rate_uv_8x8 = 0, rate_uv_tokenonly_4x4 = 0, - rate_uv_tokenonly_8x8 = 0; - int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0; - MB_PREDICTION_MODE mode_uv_4x4 = NEARESTMV, mode_uv_8x8 = NEARESTMV; - int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0; - int dist_uv_16x16 = 0, uv_skip_16x16 = 0; - MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV; + int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB]; + int dist_uv[TX_SIZE_MAX_SB], skip_uv[TX_SIZE_MAX_SB]; + MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB]; struct scale_factors scale_factor[4]; unsigned int ref_frame_mask = 0; unsigned int mode_mask = 0; + int64_t mode_distortions[MB_MODE_COUNT] = {-1}; + int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; + int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, + cpi->common.y_dc_delta_q); + int_mv seg_mvs[4][MAX_REF_FRAMES]; + union b_mode_info best_bmodes[4]; + PARTITION_INFO best_partition; + int bwsl = b_width_log2(bsize); + int bws = (1 << bwsl) / 4; // mode_info step for subsize + int bhsl = b_height_log2(bsize); + int bhs = (1 << bhsl) / 4; // mode_info step for subsize + + for (i = 0; i < 4; i++) { + int j; + + for (j = 0; j < MAX_REF_FRAMES; j++) + seg_mvs[i][j].as_int = INVALID_MV; + } + // Everywhere the flag is set the error is much higher than its neighbors. + ctx->frames_with_high_error = 0; + ctx->modes_with_high_error = 0; xd->mode_info_context->mbmi.segment_id = segment_id; - estimate_ref_frame_costs(cpi, segment_id, ref_costs); + estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, + &comp_mode_p); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); + vpx_memset(&single_newmv, 0, sizeof(single_newmv)); for (i = 0; i < NB_PREDICTION_TYPES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < NB_TXFM_MODES; i++) best_txfm_rd[i] = INT64_MAX; - // Create a mask set to 1 for each frame used by a smaller resolution.p - if (cpi->Speed > 0) { + // Create a mask set to 1 for each frame used by a smaller resolution. + if (cpi->speed > 0) { switch (block_size) { case BLOCK_64X64: for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - ref_frame_mask |= (1 << x->mb_context[i][j].mic.mbmi.ref_frame); - mode_mask |= (1 << x->mb_context[i][j].mic.mbmi.mode); + ref_frame_mask |= x->mb_context[i][j].frames_with_high_error; + mode_mask |= x->mb_context[i][j].modes_with_high_error; } } for (i = 0; i < 4; i++) { - ref_frame_mask |= (1 << x->sb32_context[i].mic.mbmi.ref_frame); - mode_mask |= (1 << x->sb32_context[i].mic.mbmi.mode); + ref_frame_mask |= x->sb32_context[i].frames_with_high_error; + mode_mask |= x->sb32_context[i].modes_with_high_error; } break; case BLOCK_32X32: for (i = 0; i < 4; i++) { - ref_frame_mask |= (1 - << x->mb_context[xd->sb_index][i].mic.mbmi.ref_frame); - mode_mask |= (1 << x->mb_context[xd->sb_index][i].mic.mbmi.mode); + ref_frame_mask |= + x->mb_context[xd->sb_index][i].frames_with_high_error; + mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error; } break; + default: + // Until we handle all block sizes set it to present; + ref_frame_mask = 0; + mode_mask = 0; + break; } + ref_frame_mask = ~ref_frame_mask; + mode_mask = ~mode_mask; } for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, - mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); + mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], + yv12_mb, scale_factor); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } - // Disallow intra if none of the smaller prediction sizes used intra and - // speed > 0 ; - if (cpi->Speed == 0 - || ( cpi->Speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) { - if (block_size == BLOCK_64X64) { - mbmi->mode = DC_PRED; - if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { - mbmi->txfm_size = TX_4X4; - rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, - &dist_uv_4x4, &uv_skip_4x4); - mode_uv_4x4 = mbmi->uv_mode; - } - if (cm->txfm_mode != ONLY_4X4) { - mbmi->txfm_size = TX_8X8; - rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, - &dist_uv_8x8, &uv_skip_8x8); - mode_uv_8x8 = mbmi->uv_mode; - } - if (cm->txfm_mode >= ALLOW_32X32) { - mbmi->txfm_size = TX_32X32; - rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_16x16, - &rate_uv_tokenonly_16x16, &dist_uv_16x16, - &uv_skip_16x16); - mode_uv_16x16 = mbmi->uv_mode; - } - } else { - assert(block_size == BLOCK_32X32); - mbmi->mode = DC_PRED; - if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { - mbmi->txfm_size = TX_4X4; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, - &dist_uv_4x4, &uv_skip_4x4); - mode_uv_4x4 = mbmi->uv_mode; - } - if (cm->txfm_mode != ONLY_4X4) { - mbmi->txfm_size = TX_8X8; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, - &dist_uv_8x8, &uv_skip_8x8); - mode_uv_8x8 = mbmi->uv_mode; - } - if (cm->txfm_mode >= ALLOW_32X32) { - mbmi->txfm_size = TX_32X32; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, - &rate_uv_tokenonly_16x16, &dist_uv_16x16, - &uv_skip_16x16); - mode_uv_16x16 = mbmi->uv_mode; - } + if (cpi->speed == 0 + || (cpi->speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) { + mbmi->mode = DC_PRED; + mbmi->ref_frame[0] = INTRA_FRAME; + for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 : + (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 : + (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32))); + i++) { + mbmi->txfm_size = i; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i], + &dist_uv[i], &skip_uv[i], + (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : + bsize); + mode_uv[i] = mbmi->uv_mode; } } @@ -5325,33 +2567,39 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mode_excluded = 0; int64_t this_rd = INT64_MAX; int disable_skip = 0; - int other_cost = 0; int compmode_cost = 0; int rate2 = 0, rate_y = 0, rate_uv = 0; int distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable; int64_t txfm_cache[NB_TXFM_MODES]; -#if CONFIG_COMP_INTERINTRA_PRED - int compmode_interintra_cost = 0; -#endif + int i; + + for (i = 0; i < NB_TXFM_MODES; ++i) + txfm_cache[i] = INT64_MAX; // Test best rd so far against threshold for trying this mode. - if (best_rd <= cpi->rd_threshes[mode_index] || - cpi->rd_threshes[mode_index] == INT_MAX) { + if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] * + cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) || + cpi->rd_threshes[bsize][mode_index] == INT_MAX) + continue; + + // Do not allow compound prediction if the segment level reference + // frame feature is in use as in this case there can only be one reference. + if ((vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME) && + vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) continue; - } x->skip = 0; this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame; - if (!(ref_frame == INTRA_FRAME - || (cpi->ref_frame_flags & flag_list[ref_frame]))) { - continue; - } - if (cpi->Speed > 0) { + + if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) { if (!(ref_frame_mask & (1 << ref_frame))) { continue; } + if (!(mode_mask & (1 << this_mode))) { + continue; + } if (vp9_mode_order[mode_index].second_ref_frame != NONE && !(ref_frame_mask & (1 << vp9_mode_order[mode_index].second_ref_frame))) { @@ -5359,75 +2607,89 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - mbmi->ref_frame = ref_frame; - mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + mbmi->ref_frame[0] = ref_frame; + mbmi->ref_frame[1] = vp9_mode_order[mode_index].second_ref_frame; + + if (!(ref_frame == INTRA_FRAME + || (cpi->ref_frame_flags & flag_list[ref_frame]))) { + continue; + } + if (!(mbmi->ref_frame[1] == NONE + || (cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))) { + continue; + } + + // TODO(jingning, jkoleszar): scaling reference frame not supported for + // SPLITMV. + if (mbmi->ref_frame[0] > 0 && + (scale_factor[mbmi->ref_frame[0]].x_scale_fp != + (1 << VP9_REF_SCALE_SHIFT) || + scale_factor[mbmi->ref_frame[0]].y_scale_fp != + (1 << VP9_REF_SCALE_SHIFT)) && + this_mode == SPLITMV) + continue; + + if (mbmi->ref_frame[1] > 0 && + (scale_factor[mbmi->ref_frame[1]].x_scale_fp != + (1 << VP9_REF_SCALE_SHIFT) || + scale_factor[mbmi->ref_frame[1]].y_scale_fp != + (1 << VP9_REF_SCALE_SHIFT)) && + this_mode == SPLITMV) + continue; + + set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], scale_factor); - comp_pred = mbmi->second_ref_frame > INTRA_FRAME; + comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; mbmi->mode = this_mode; mbmi->uv_mode = DC_PRED; -#if CONFIG_COMP_INTERINTRA_PRED - mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); - mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. mbmi->interp_filter = cm->mcomp_filter_type; vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - // if (!(cpi->ref_frame_flags & flag_list[ref_frame])) - // continue; - - if (this_mode == I8X8_PRED || this_mode == B_PRED || this_mode == SPLITMV) + if (bsize >= BLOCK_SIZE_SB8X8 && + (this_mode == I4X4_PRED || this_mode == SPLITMV)) + continue; + if (bsize < BLOCK_SIZE_SB8X8 && + !(this_mode == I4X4_PRED || this_mode == SPLITMV)) continue; - // if (vp9_mode_order[mode_index].second_ref_frame == INTRA_FRAME) - // continue; if (comp_pred) { - int second_ref; - - if (ref_frame == ALTREF_FRAME) { - second_ref = LAST_FRAME; - } else { - second_ref = ref_frame + 1; - } - if (!(cpi->ref_frame_flags & flag_list[second_ref])) + if (!(cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]])) continue; - mbmi->second_ref_frame = second_ref; - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], scale_factor); - xd->second_pre = yv12_mb[second_ref]; mode_excluded = mode_excluded ? mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; } else { - // mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; + // mbmi->ref_frame[1] = vp9_mode_order[mode_index].ref_frame[1]; if (ref_frame != INTRA_FRAME) { - if (mbmi->second_ref_frame != INTRA_FRAME) + if (mbmi->ref_frame[1] != INTRA_FRAME) mode_excluded = mode_excluded ? mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; -#if CONFIG_COMP_INTERINTRA_PRED - else - mode_excluded = mode_excluded ? mode_excluded : !cm->use_interintra; -#endif } } - xd->pre = yv12_mb[ref_frame]; - vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts)); + // Select predictors + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; + if (comp_pred) + xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; + } // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_check_segref(xd, segment_id, ref_frame)) { + vp9_get_segdata(xd, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; // If the segment skip feature is enabled.... // then do nothing if the current mode is not allowed.. } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) && - (this_mode != ZEROMV)) { + (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) { continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to @@ -5442,130 +2704,249 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } + // TODO(JBB): This is to make up for the fact that we don't have sad + // functions that work when the block size reads outside the umv. We + // should fix this either by making the motion search just work on + // a representative block in the boundary ( first ) and then implement a + // function that does sads when inside the border.. + if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) && + this_mode == NEWMV) { + continue; + } - if (ref_frame == INTRA_FRAME) { - if (block_size == BLOCK_64X64) { - vp9_build_intra_predictors_sb64y_s(xd); - super_block_64_yrd(cpi, x, &rate_y, &distortion_y, - &skippable, txfm_cache); - } else { - assert(block_size == BLOCK_32X32); - vp9_build_intra_predictors_sby_s(xd); - super_block_yrd(cpi, x, &rate_y, &distortion_y, - &skippable, txfm_cache); - } - if (mbmi->txfm_size == TX_4X4) { - rate_uv = rate_uv_4x4; - distortion_uv = dist_uv_4x4; - skippable = skippable && uv_skip_4x4; - mbmi->uv_mode = mode_uv_4x4; - } else if (mbmi->txfm_size == TX_32X32) { - rate_uv = rate_uv_16x16; - distortion_uv = dist_uv_16x16; - skippable = skippable && uv_skip_16x16; - mbmi->uv_mode = mode_uv_16x16; - } else { - rate_uv = rate_uv_8x8; - distortion_uv = dist_uv_8x8; - skippable = skippable && uv_skip_8x8; - mbmi->uv_mode = mode_uv_8x8; - } + if (this_mode == I4X4_PRED) { + int rate; - rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv; + mbmi->txfm_size = TX_4X4; + rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, + &distortion_y, INT64_MAX); + rate2 += rate; + rate2 += intra_cost_penalty; + distortion2 += distortion_y; + + rate2 += rate_uv_intra[TX_4X4]; + rate_uv = rate_uv_intra[TX_4X4]; + distortion2 += dist_uv[TX_4X4]; + distortion_uv = dist_uv[TX_4X4]; + mbmi->uv_mode = mode_uv[TX_4X4]; + txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + for (i = 0; i < NB_TXFM_MODES; ++i) + txfm_cache[i] = txfm_cache[ONLY_4X4]; + } else if (ref_frame == INTRA_FRAME) { + TX_SIZE uv_tx; + super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, + bsize, txfm_cache); + + uv_tx = mbmi->txfm_size; + if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8) + uv_tx = TX_4X4; + if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16) + uv_tx = TX_8X8; + else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32) + uv_tx = TX_16X16; + + rate_uv = rate_uv_intra[uv_tx]; + distortion_uv = dist_uv[uv_tx]; + skippable = skippable && skip_uv[uv_tx]; + mbmi->uv_mode = mode_uv[uv_tx]; + + rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv; + if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) + rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; - } else { - YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; - int fb; + } else if (this_mode == SPLITMV) { + const int is_comp_pred = mbmi->ref_frame[1] > 0; + int rate, distortion; + int64_t this_rd_thresh; + int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; + int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; + int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0; + int switchable_filter_index; + int_mv *second_ref = is_comp_pred ? + &mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL; + union b_mode_info tmp_best_bmodes[16]; + MB_MODE_INFO tmp_best_mbmode; + PARTITION_INFO tmp_best_partition; + int pred_exists = 0; + int uv_skippable; + + this_rd_thresh = (mbmi->ref_frame[0] == LAST_FRAME) ? + cpi->rd_threshes[bsize][THR_NEWMV] : + cpi->rd_threshes[bsize][THR_NEWA]; + this_rd_thresh = (mbmi->ref_frame[0] == GOLDEN_FRAME) ? + cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh; + xd->mode_info_context->mbmi.txfm_size = TX_4X4; + + for (switchable_filter_index = 0; + switchable_filter_index < VP9_SWITCHABLE_FILTERS; + ++switchable_filter_index) { + int newbest; + mbmi->interp_filter = + vp9_switchable_interp[switchable_filter_index]; + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, + &mbmi->ref_mvs[mbmi->ref_frame[0]][0], + second_ref, INT64_MAX, + &rate, &rate_y, &distortion, + &skippable, + (int)this_rd_thresh, seg_mvs, + mi_row, mi_col); + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + const int rs = get_switchable_rate(cm, x); + tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0); + } + newbest = (tmp_rd < tmp_best_rd); + if (newbest) { + tmp_best_filter = mbmi->interp_filter; + tmp_best_rd = tmp_rd; + } + if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || + (mbmi->interp_filter == cm->mcomp_filter_type && + cm->mcomp_filter_type != SWITCHABLE)) { + tmp_best_rdu = tmp_rd; + tmp_best_rate = rate; + tmp_best_ratey = rate_y; + tmp_best_distortion = distortion; + tmp_best_skippable = skippable; + tmp_best_mbmode = *mbmi; + tmp_best_partition = *x->partition_info; + for (i = 0; i < 4; i++) + tmp_best_bmodes[i] = xd->mode_info_context->bmi[i]; + pred_exists = 1; + } + } // switchable_filter_index loop - if (mbmi->ref_frame == LAST_FRAME) { - fb = cpi->lst_fb_idx; - } else if (mbmi->ref_frame == GOLDEN_FRAME) { - fb = cpi->gld_fb_idx; + mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? + tmp_best_filter : cm->mcomp_filter_type); + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + if (!pred_exists) { + // Handles the special case when a filter that is not in the + // switchable list (bilinear, 6-tap) is indicated at the frame level + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, + &mbmi->ref_mvs[mbmi->ref_frame[0]][0], + second_ref, INT64_MAX, + &rate, &rate_y, &distortion, + &skippable, + (int)this_rd_thresh, seg_mvs, + mi_row, mi_col); } else { - fb = cpi->alt_fb_idx; + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + int rs = get_switchable_rate(cm, x); + tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); + } + tmp_rd = tmp_best_rdu; + rate = tmp_best_rate; + rate_y = tmp_best_ratey; + distortion = tmp_best_distortion; + skippable = tmp_best_skippable; + *mbmi = tmp_best_mbmode; + *x->partition_info = tmp_best_partition; + for (i = 0; i < 4; i++) + xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; + } + + rate2 += rate; + distortion2 += distortion; + + if (cpi->common.mcomp_filter_type == SWITCHABLE) + rate2 += get_switchable_rate(cm, x); + + // If even the 'Y' rd value of split is higher than best so far + // then dont bother looking at UV + vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, + BLOCK_SIZE_SB8X8); + vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8); + super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, + &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4); + rate2 += rate_uv; + distortion2 += distortion_uv; + skippable = skippable && uv_skippable; + + txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + for (i = 0; i < NB_TXFM_MODES; ++i) + txfm_cache[i] = txfm_cache[ONLY_4X4]; + + if (!mode_excluded) { + if (is_comp_pred) + mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; + else + mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; } + compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred); + } else { + YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; + int fb = get_ref_frame_idx(cpi, mbmi->ref_frame[0]); if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) - scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; - -#if CONFIG_COMP_INTERINTRA_PRED - if (mbmi->second_ref_frame == INTRA_FRAME) { - if (best_intra16_mode == DC_PRED - 1) continue; - mbmi->interintra_mode = best_intra16_mode; -#if SEPARATE_INTERINTRA_UV - mbmi->interintra_uv_mode = best_intra16_uv_mode; -#else - mbmi->interintra_uv_mode = best_intra16_mode; -#endif + scaled_ref_frame[0] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; + + if (comp_pred) { + fb = get_ref_frame_idx(cpi, mbmi->ref_frame[1]); + if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) + scaled_ref_frame[1] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; } -#endif - this_rd = handle_inter_mode(cpi, x, block_size, - &saddone, near_sadidx, mdcounts, txfm_cache, + + compmode_cost = vp9_cost_bit(comp_mode_p, + mbmi->ref_frame[1] > INTRA_FRAME); + this_rd = handle_inter_mode(cpi, x, bsize, + txfm_cache, &rate2, &distortion2, &skippable, - &compmode_cost, -#if CONFIG_COMP_INTERINTRA_PRED - &compmode_interintra_cost, -#endif &rate_y, &distortion_y, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, - mode_index, &tmp_best_filter, frame_mv, - scaled_ref_frame, mb_row, mb_col); + &tmp_best_filter, frame_mv[this_mode], + scaled_ref_frame, mi_row, mi_col, + single_newmv); if (this_rd == INT64_MAX) continue; } -#if CONFIG_COMP_INTERINTRA_PRED - if (cpi->common.use_interintra) { - rate2 += compmode_interintra_cost; - } -#endif if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { rate2 += compmode_cost; } // Estimate the reference frame signaling cost and add it // to the rolling cost variable. - rate2 += ref_costs[xd->mode_info_context->mbmi.ref_frame]; + if (mbmi->ref_frame[1] > INTRA_FRAME) { + rate2 += ref_costs_comp[mbmi->ref_frame[0]]; + } else { + rate2 += ref_costs_single[mbmi->ref_frame[0]]; + } if (!disable_skip) { // Test for the condition where skip block will be activated // because there are no non zero coefficients and make any // necessary adjustment for rate. Ignore if skip is coded at // segment level as the cost wont have been added in. - if (cpi->common.mb_no_coeff_skip) { - int mb_skip_allowed; + int mb_skip_allowed; - // Is Mb level skip allowed (i.e. not coded at segment level). - mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + // Is Mb level skip allowed (i.e. not coded at segment level). + mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - if (skippable) { - // Back out the coefficient coding costs - rate2 -= (rate_y + rate_uv); - // for best_yrd calculation - rate_uv = 0; + if (skippable && bsize >= BLOCK_SIZE_SB8X8) { + // Back out the coefficient coding costs + rate2 -= (rate_y + rate_uv); + // for best_yrd calculation + rate_uv = 0; - if (mb_skip_allowed) { - int prob_skip_cost; + if (mb_skip_allowed) { + int prob_skip_cost; - // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob(cm, xd, PRED_MBSKIP); + // Cost the skip mb case + vp9_prob skip_prob = + vp9_get_pred_prob(cm, xd, PRED_MBSKIP); - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } + if (skip_prob) { + prob_skip_cost = vp9_cost_bit(skip_prob, 1); + rate2 += prob_skip_cost; } } + } else if (mb_skip_allowed) { // Add in the cost of the no skip flag. - else if (mb_skip_allowed) { - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, - PRED_MBSKIP), 0); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } + int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, + PRED_MBSKIP), 0); + rate2 += prob_skip_cost; } // Calculate the final RD estimate for this mode. @@ -5574,26 +2955,14 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #if 0 // Keep record of best intra distortion - if ((xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && + if ((xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) && (this_rd < best_intra_rd)) { best_intra_rd = this_rd; *returnintra = distortion2; } #endif -#if CONFIG_COMP_INTERINTRA_PRED - if ((mbmi->ref_frame == INTRA_FRAME) && - (this_mode <= TM_PRED) && - (this_rd < best_intra16_rd)) { - best_intra16_rd = this_rd; - best_intra16_mode = this_mode; -#if SEPARATE_INTERINTRA_UV - best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ? - mode_uv_8x8 : mode_uv_4x4); -#endif - } -#endif - if (!disable_skip && mbmi->ref_frame == INTRA_FRAME) + if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME) for (i = 0; i < NB_PREDICTION_TYPES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); @@ -5601,9 +2970,18 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_overall_rd = this_rd; best_filter = tmp_best_filter; best_mode = this_mode; -#if CONFIG_COMP_INTERINTRA_PRED - is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME); -#endif + } + + if (this_mode != I4X4_PRED && this_mode != SPLITMV) { + // Store the respective mode distortions for later use. + if (mode_distortions[this_mode] == -1 + || distortion2 < mode_distortions[this_mode]) { + mode_distortions[this_mode] = distortion2; + } + if (frame_distortions[mbmi->ref_frame[0]] == -1 + || distortion2 < frame_distortions[mbmi->ref_frame[0]]) { + frame_distortions[mbmi->ref_frame[0]] = distortion2; + } } // Did this mode help.. i.e. is it the new best mode @@ -5612,16 +2990,20 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Note index of best mode so far best_mode_index = mode_index; - if (this_mode <= B_PRED) { + if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ mbmi->mv[0].as_int = 0; } - other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame]; *returnrate = rate2; *returndistortion = distortion2; best_rd = this_rd; - vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); + best_mbmode = *mbmi; + best_partition = *x->partition_info; + + if (this_mode == I4X4_PRED || this_mode == SPLITMV) + for (i = 0; i < 4; i++) + best_bmodes[i] = xd->mode_info_context->bmi[i]; } #if 0 // Testing this mode gave rise to an improvement in best error score. @@ -5649,7 +3031,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best compound/single-only prediction */ - if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) { + if (!disable_skip && mbmi->ref_frame[0] != INTRA_FRAME) { int single_rd, hybrid_rd, single_rate, hybrid_rate; if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { @@ -5663,10 +3045,10 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - if (mbmi->second_ref_frame <= INTRA_FRAME && + if (mbmi->ref_frame[1] <= INTRA_FRAME && single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; - } else if (mbmi->second_ref_frame > INTRA_FRAME && + } else if (mbmi->ref_frame[1] > INTRA_FRAME && single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; } @@ -5675,14 +3057,23 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ + if (bsize < BLOCK_SIZE_SB32X32) { + if (bsize < BLOCK_SIZE_MB16X16) { + if (this_mode == SPLITMV || this_mode == I4X4_PRED) + txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4]; + txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8]; + } + txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16]; + } if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd; - if (this_mode != B_PRED) { + int64_t adj_rd = INT64_MAX; + if (this_mode != I4X4_PRED) { adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode]; } else { adj_rd = this_rd; } + if (adj_rd < best_txfm_rd[i]) best_txfm_rd[i] = adj_rd; } @@ -5691,22 +3082,61 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (x->skip && !mode_excluded) break; } + // Flag all modes that have a distortion thats > 2x the best we found at + // this level. + for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { + if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV) + continue; + + if (mode_distortions[mode_index] > 2 * *returndistortion) { + ctx->modes_with_high_error |= (1 << mode_index); + } + } + + // Flag all ref frames that have a distortion thats > 2x the best we found at + // this level. + for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { + if (frame_distortions[ref_frame] > 2 * *returndistortion) { + ctx->frames_with_high_error |= (1 << ref_frame); + } + } + + if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) { + *returnrate = INT_MAX; + *returndistortion = INT_MAX; + return best_rd; + } assert((cm->mcomp_filter_type == SWITCHABLE) || (cm->mcomp_filter_type == best_mbmode.interp_filter) || - (best_mbmode.mode <= B_PRED)); - -#if CONFIG_COMP_INTERINTRA_PRED - ++cpi->interintra_select_count[is_best_interintra]; - // if (is_best_interintra) printf("best_interintra\n"); -#endif + (best_mbmode.ref_frame[0] == INTRA_FRAME)); // Accumulate filter usage stats // TODO(agrange): Use RD criteria to select interpolation filter mode. - if ((best_mode >= NEARESTMV) && (best_mode <= SPLITMV)) + if (is_inter_mode(best_mode)) ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]]; - // TODO(rbultje) integrate with RD thresholding + // Updating rd_thresh_freq_fact[] here means that the differnt + // partition/block sizes are handled independently based on the best + // choice for the current partition. It may well be better to keep a scaled + // best rd so far value and update rd_thresh_freq_fact based on the mode/size + // combination that wins out. + if (cpi->sf.adpative_rd_thresh) { + for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { + if (mode_index == best_mode_index) { + cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT; + } else { + cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC; + if (cpi->rd_thresh_freq_fact[bsize][mode_index] > + (cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) { + cpi->rd_thresh_freq_fact[bsize][mode_index] = + cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT; + } + } + } + } + + // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding #if 0 // Reduce the activation RD thresholds for the best choice mode if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && @@ -5727,16 +3157,22 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0) && - (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { + (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame[0] != ALTREF_FRAME) + && bsize >= BLOCK_SIZE_SB8X8) { mbmi->mode = ZEROMV; - mbmi->ref_frame = ALTREF_FRAME; - mbmi->second_ref_frame = INTRA_FRAME; + mbmi->ref_frame[0] = ALTREF_FRAME; + mbmi->ref_frame[1] = NONE; mbmi->mv[0].as_int = 0; mbmi->uv_mode = DC_PRED; - mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; - mbmi->partitioning = 0; - mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ? - TX_32X32 : cm->txfm_mode; + mbmi->mb_skip_coeff = 1; + if (cm->txfm_mode == TX_MODE_SELECT) { + if (bsize >= BLOCK_SIZE_SB32X32) + mbmi->txfm_size = TX_32X32; + else if (bsize >= BLOCK_SIZE_MB16X16) + mbmi->txfm_size = TX_16X16; + else + mbmi->txfm_size = TX_8X8; + } vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); @@ -5744,7 +3180,29 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } // macroblock modes - vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + *mbmi = best_mbmode; + if (best_mbmode.ref_frame[0] == INTRA_FRAME && + best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { + for (i = 0; i < 4; i++) + xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; + } + + if (best_mbmode.ref_frame[0] != INTRA_FRAME && + best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { + for (i = 0; i < 4; i++) + xd->mode_info_context->bmi[i].as_mv[0].as_int = + best_bmodes[i].as_mv[0].as_int; + + if (mbmi->ref_frame[1] > 0) + for (i = 0; i < 4; i++) + xd->mode_info_context->bmi[i].as_mv[1].as_int = + best_bmodes[i].as_mv[1].as_int; + + *x->partition_info = best_partition; + + mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int; + mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int; + } for (i = 0; i < NB_PREDICTION_TYPES; ++i) { if (best_pred_rd[i] == INT64_MAX) @@ -5765,72 +3223,14 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } end: - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], scale_factor); - { - PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ? - &x->sb32_context[xd->sb_index] : - &x->sb64_context; - store_coding_context(x, p, best_mode_index, NULL, - &mbmi->ref_mvs[mbmi->ref_frame][0], - &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : - mbmi->second_ref_frame][0], - best_pred_diff, best_txfm_diff); - } + store_coding_context(x, ctx, best_mode_index, + &best_partition, + &mbmi->ref_mvs[mbmi->ref_frame[0]][0], + &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : + mbmi->ref_frame[1]][0], + best_pred_diff, best_txfm_diff); return best_rd; } - -int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *returnrate, - int *returndistortion) { - return vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col, - returnrate, returndistortion, BLOCK_32X32); -} - -int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *returnrate, - int *returndistortion) { - return vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col, - returnrate, returndistortion, BLOCK_64X64); -} - -void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *totalrate, int *totaldist) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int rate, distortion; - int64_t intra_error = 0; - unsigned char *segment_id = &mbmi->segment_id; - - if (xd->segmentation_enabled) - x->encode_breakout = cpi->segment_encode_breakout[*segment_id]; - else - x->encode_breakout = cpi->oxcf.encode_breakout; - - // if (cpi->sf.RD) - // For now this codebase is limited to a single rd encode path - { - int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; - - rd_pick_inter_mode(cpi, x, mb_row, mb_col, &rate, - &distortion, &intra_error); - - /* restore cpi->zbin_mode_boost_enabled */ - cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; - } - // else - // The non rd encode path has been deleted from this code base - // to simplify development - // vp9_pick_inter_mode - - // Store metrics so they can be added in to totals if this mode is picked - x->mb_context[xd->sb_index][xd->mb_index].distortion = distortion; - x->mb_context[xd->sb_index][xd->mb_index].intra_error = intra_error; - - *totalrate = rate; - *totaldist = distortion; -} diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index d1b4777171505d6ab5ba59dde14cc11f05b58e10..dcf5d00e9f04d97b3b8b49f4476e94815d3b6ace 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -19,26 +19,14 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); -void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); - -void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); - -void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); - -void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *r, int *d); - -int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *r, int *d); - -int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *r, int *d); +void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d, BLOCK_SIZE_TYPE bsize, + PICK_MODE_CONTEXT *ctx); + +int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, + int mi_row, int mi_col, + int *r, int *d, BLOCK_SIZE_TYPE bsize, + PICK_MODE_CONTEXT *ctx); void vp9_init_me_luts(); diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index 96d993863e2095b29c01ad4dd48c7c690fc84582..6b1ba49647f04efaf28f345fe0980dc589b25e11 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -23,6 +23,52 @@ unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64); } +unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 32); +} + +void vp9_sad64x32x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad64x32(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + +unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 64); +} + +void vp9_sad32x64x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad32x64(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -31,6 +77,52 @@ unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32); } +unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 16); +} + +void vp9_sad32x16x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad32x16(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + +unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 32); +} + +void vp9_sad16x32x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad16x32(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -64,6 +156,21 @@ unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16); } +unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 4); +} + +unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 8); +} unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int src_stride, @@ -471,6 +578,98 @@ void vp9_sad8x16x4d_c(const uint8_t *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } +void vp9_sad8x4x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + +void vp9_sad8x4x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + uint32_t *sad_array) { + sad_array[0] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); +} + +void vp9_sad4x8x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + +void vp9_sad4x8x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + uint32_t *sad_array) { + sad_array[0] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); +} + void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index a04a20c29a68683f8f0e3bc3af7a755a577640d8..fe995ad729fabff0d195d7ad1f33420f4e9c46ee 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -15,63 +15,16 @@ #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_tile_common.h" -void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) { - int mb_row, mb_col; - - MODE_INFO *this_mb_mode_info = cm->mi; - - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; - - if ((cm->frame_type == KEY_FRAME) || (cpi->refresh_golden_frame)) { - // Reset Gf useage monitors - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - } else { - // for each macroblock row in image - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - // for each macroblock col in image - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - - // If using golden then set GF active flag if not already set. - // If using last frame 0,0 mode then leave flag as it is - // else if using non 0,0 motion or intra modes then clear - // flag if it is currently set - if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || - (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) { - if (*(x->gf_active_ptr) == 0) { - *(x->gf_active_ptr) = 1; - cpi->gf_active_count++; - } - } else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && - *(x->gf_active_ptr)) { - *(x->gf_active_ptr) = 0; - cpi->gf_active_count--; - } - - x->gf_active_ptr++; // Step onto next entry - this_mb_mode_info++; // skip to next mb - - } - - // this is to account for the border - this_mb_mode_info++; - } - } -} - void vp9_enable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); + VP9_COMP *cpi = (VP9_COMP *)ptr; - // Set the appropriate feature bit cpi->mb.e_mbd.segmentation_enabled = 1; cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } void vp9_disable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - // Clear the appropriate feature bit + VP9_COMP *cpi = (VP9_COMP *)ptr; cpi->mb.e_mbd.segmentation_enabled = 0; } @@ -81,7 +34,7 @@ void vp9_set_segmentation_map(VP9_PTR ptr, // Copy in the new segmentation map vpx_memcpy(cpi->segmentation_map, segmentation_map, - (cpi->common.mb_rows * cpi->common.mb_cols)); + (cpi->common.mi_rows * cpi->common.mi_cols)); // Signal that the map should be updated. cpi->mb.e_mbd.update_mb_segmentation_map = 1; @@ -104,104 +57,59 @@ void vp9_set_segment_data(VP9_PTR ptr, } // Based on set of segment counts calculate a probability tree -static void calc_segtree_probs(MACROBLOCKD *xd, - int *segcounts, +static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts, vp9_prob *segment_tree_probs) { - int count1, count2; - - // Total count for all segments - count1 = segcounts[0] + segcounts[1]; - count2 = segcounts[2] + segcounts[3]; - // Work out probabilities of each segment - segment_tree_probs[0] = get_binary_prob(count1, count2); - segment_tree_probs[1] = get_prob(segcounts[0], count1); - segment_tree_probs[2] = get_prob(segcounts[2], count2); + const int c01 = segcounts[0] + segcounts[1]; + const int c23 = segcounts[2] + segcounts[3]; + const int c45 = segcounts[4] + segcounts[5]; + const int c67 = segcounts[6] + segcounts[7]; + + segment_tree_probs[0] = get_binary_prob(c01 + c23, c45 + c67); + segment_tree_probs[1] = get_binary_prob(c01, c23); + segment_tree_probs[2] = get_binary_prob(c45, c67); + segment_tree_probs[3] = get_binary_prob(segcounts[0], segcounts[1]); + segment_tree_probs[4] = get_binary_prob(segcounts[2], segcounts[3]); + segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]); + segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]); } // Based on set of segment counts and probabilities calculate a cost estimate -static int cost_segmap(MACROBLOCKD *xd, - int *segcounts, - vp9_prob *probs) { - int cost; - int count1, count2; +static int cost_segmap(MACROBLOCKD *xd, int *segcounts, vp9_prob *probs) { + const int c01 = segcounts[0] + segcounts[1]; + const int c23 = segcounts[2] + segcounts[3]; + const int c45 = segcounts[4] + segcounts[5]; + const int c67 = segcounts[6] + segcounts[7]; + const int c0123 = c01 + c23; + const int c4567 = c45 + c67; // Cost the top node of the tree - count1 = segcounts[0] + segcounts[1]; - count2 = segcounts[2] + segcounts[3]; - cost = count1 * vp9_cost_zero(probs[0]) + - count2 * vp9_cost_one(probs[0]); - - // Now add the cost of each individual segment branch - if (count1 > 0) - cost += segcounts[0] * vp9_cost_zero(probs[1]) + - segcounts[1] * vp9_cost_one(probs[1]); - - if (count2 > 0) - cost += segcounts[2] * vp9_cost_zero(probs[2]) + - segcounts[3] * vp9_cost_one(probs[2]); - - return cost; -} - -// Based on set of segment counts calculate a probability tree -static void calc_segtree_probs_pred(MACROBLOCKD *xd, - int (*segcounts)[MAX_MB_SEGMENTS], - vp9_prob *segment_tree_probs, - vp9_prob *mod_probs) { - int count[4]; - - assert(!segcounts[0][0] && !segcounts[1][1] && - !segcounts[2][2] && !segcounts[3][3]); - - // Total count for all segments - count[0] = segcounts[3][0] + segcounts[1][0] + segcounts[2][0]; - count[1] = segcounts[2][1] + segcounts[0][1] + segcounts[3][1]; - count[2] = segcounts[0][2] + segcounts[3][2] + segcounts[1][2]; - count[3] = segcounts[1][3] + segcounts[2][3] + segcounts[0][3]; + int cost = c0123 * vp9_cost_zero(probs[0]) + + c4567 * vp9_cost_one(probs[0]); + + // Cost subsequent levels + if (c0123 > 0) { + cost += c01 * vp9_cost_zero(probs[1]) + + c23 * vp9_cost_one(probs[1]); + + if (c01 > 0) + cost += segcounts[0] * vp9_cost_zero(probs[3]) + + segcounts[1] * vp9_cost_one(probs[3]); + if (c23 > 0) + cost += segcounts[2] * vp9_cost_zero(probs[4]) + + segcounts[3] * vp9_cost_one(probs[4]); + } - // Work out probabilities of each segment - segment_tree_probs[0] = get_binary_prob(count[0] + count[1], - count[2] + count[3]); - segment_tree_probs[1] = get_binary_prob(count[0], count[1]); - segment_tree_probs[2] = get_binary_prob(count[2], count[3]); - - // now work out modified counts that the decoder would have - count[0] = segment_tree_probs[0] * segment_tree_probs[1]; - count[1] = segment_tree_probs[0] * (256 - segment_tree_probs[1]); - count[2] = (256 - segment_tree_probs[0]) * segment_tree_probs[2]; - count[3] = (256 - segment_tree_probs[0]) * (256 - segment_tree_probs[2]); - - // Work out modified probabilties depending on what segment was predicted - mod_probs[0] = get_binary_prob(count[1], count[2] + count[3]); - mod_probs[1] = get_binary_prob(count[0], count[2] + count[3]); - mod_probs[2] = get_binary_prob(count[0] + count[1], count[3]); - mod_probs[3] = get_binary_prob(count[0] + count[1], count[2]); -} + if (c4567 > 0) { + cost += c45 * vp9_cost_zero(probs[2]) + + c67 * vp9_cost_one(probs[2]); -// Based on set of segment counts and probabilities calculate a cost estimate -static int cost_segmap_pred(MACROBLOCKD *xd, - int (*segcounts)[MAX_MB_SEGMENTS], - vp9_prob *probs, vp9_prob *mod_probs) { - int pred_seg, cost = 0; - - for (pred_seg = 0; pred_seg < MAX_MB_SEGMENTS; pred_seg++) { - int count1, count2; - - // Cost the top node of the tree - count1 = segcounts[pred_seg][0] + segcounts[pred_seg][1]; - count2 = segcounts[pred_seg][2] + segcounts[pred_seg][3]; - cost += count1 * vp9_cost_zero(mod_probs[pred_seg]) + - count2 * vp9_cost_one(mod_probs[pred_seg]); - - // Now add the cost of each individual segment branch - if (pred_seg >= 2 && count1) { - cost += segcounts[pred_seg][0] * vp9_cost_zero(probs[1]) + - segcounts[pred_seg][1] * vp9_cost_one(probs[1]); - } else if (pred_seg < 2 && count2 > 0) { - cost += segcounts[pred_seg][2] * vp9_cost_zero(probs[2]) + - segcounts[pred_seg][3] * vp9_cost_one(probs[2]); - } + if (c45 > 0) + cost += segcounts[4] * vp9_cost_zero(probs[5]) + + segcounts[5] * vp9_cost_one(probs[5]); + if (c67 > 0) + cost += segcounts[6] * vp9_cost_zero(probs[6]) + + segcounts[7] * vp9_cost_one(probs[6]); } return cost; @@ -211,16 +119,18 @@ static void count_segs(VP9_COMP *cpi, MODE_INFO *mi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], - int (*t_unpred_seg_counts)[MAX_MB_SEGMENTS], - int mb_size, int mb_row, int mb_col) { + int *t_unpred_seg_counts, + int bw, int bh, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const int segmap_index = mb_row * cm->mb_cols + mb_col; - const int segment_id = mi->mbmi.segment_id; + int segment_id; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + segment_id = mi->mbmi.segment_id; xd->mode_info_context = mi; - set_mb_row(cm, xd, mb_row, mb_size); - set_mb_col(cm, xd, mb_col, mb_size); + set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); // Count the number of hits on each segment with no prediction no_pred_segcounts[segment_id]++; @@ -228,7 +138,8 @@ static void count_segs(VP9_COMP *cpi, // Temporal prediction not allowed on key frames if (cm->frame_type != KEY_FRAME) { // Test to see if the segment id matches the predicted value. - const int pred_seg_id = vp9_get_pred_mb_segid(cm, xd, segmap_index); + const int pred_seg_id = vp9_get_pred_mi_segid(cm, mi->mbmi.sb_type, + mi_row, mi_col); const int seg_predicted = (segment_id == pred_seg_id); // Get the segment id prediction context @@ -241,7 +152,62 @@ static void count_segs(VP9_COMP *cpi, if (!seg_predicted) // Update the "unpredicted" segment count - t_unpred_seg_counts[pred_seg_id][segment_id]++; + t_unpred_seg_counts[segment_id]++; + } +} + +static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi, + int *no_pred_segcounts, + int (*temporal_predictor_count)[2], + int *t_unpred_seg_counts, + int mi_row, int mi_col, + BLOCK_SIZE_TYPE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mode_info_stride; + int bwl, bhl; + const int bsl = mi_width_log2(bsize), bs = 1 << (bsl - 1); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + bwl = mi_width_log2(mi->mbmi.sb_type); + bhl = mi_height_log2(mi->mbmi.sb_type); + + if (bwl == bsl && bhl == bsl) { + count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 1 << bsl, 1 << bsl, mi_row, mi_col); + } else if (bwl == bsl && bhl < bsl) { + count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 1 << bsl, bs, mi_row, mi_col); + count_segs(cpi, mi + bs * mis, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 1 << bsl, bs, mi_row + bs, mi_col); + } else if (bwl < bsl && bhl == bsl) { + count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, bs, 1 << bsl, mi_row, mi_col); + count_segs(cpi, mi + bs, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, bs, 1 << bsl, mi_row, mi_col + bs); + } else { + BLOCK_SIZE_TYPE subsize; + int n; + + assert(bwl < bsl && bhl < bsl); + if (bsize == BLOCK_SIZE_SB64X64) { + subsize = BLOCK_SIZE_SB32X32; + } else if (bsize == BLOCK_SIZE_SB32X32) { + subsize = BLOCK_SIZE_MB16X16; + } else { + assert(bsize == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; + } + + for (n = 0; n < 4; n++) { + const int y_idx = n >> 1, x_idx = n & 0x01; + + count_segs_sb(cpi, mi + y_idx * bs * mis + x_idx * bs, + no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, + mi_row + y_idx * bs, mi_col + x_idx * bs, subsize); + } } } @@ -253,15 +219,14 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { int t_pred_cost = INT_MAX; int i; - int tile_col, mb_row, mb_col; + int tile_col, mi_row, mi_col; int temporal_predictor_count[PREDICTION_PROBS][2]; int no_pred_segcounts[MAX_MB_SEGMENTS]; - int t_unpred_seg_counts[MAX_MB_SEGMENTS][MAX_MB_SEGMENTS]; + int t_unpred_seg_counts[MAX_MB_SEGMENTS]; - vp9_prob no_pred_tree[MB_FEATURE_TREE_PROBS]; - vp9_prob t_pred_tree[MB_FEATURE_TREE_PROBS]; - vp9_prob t_pred_tree_mod[MAX_MB_SEGMENTS]; + vp9_prob no_pred_tree[MB_SEG_TREE_PROBS]; + vp9_prob t_pred_tree[MB_SEG_TREE_PROBS]; vp9_prob t_nopred_prob[PREDICTION_PROBS]; const int mis = cm->mode_info_stride; @@ -269,10 +234,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Set default state for the segment tree probabilities and the // temporal coding probabilities - vpx_memset(xd->mb_segment_tree_probs, 255, - sizeof(xd->mb_segment_tree_probs)); - vpx_memset(cm->segment_pred_probs, 255, - sizeof(cm->segment_pred_probs)); + vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); + vpx_memset(cm->segment_pred_probs, 255, sizeof(cm->segment_pred_probs)); vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts)); vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts)); @@ -280,53 +243,17 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // First of all generate stats regarding how well the last segment map // predicts this one - for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { vp9_get_tile_col_offsets(cm, tile_col); - mi_ptr = cm->mi + cm->cur_tile_mb_col_start; - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { + mi_ptr = cm->mi + cm->cur_tile_mi_col_start; + for (mi_row = 0; mi_row < cm->mi_rows; + mi_row += 8, mi_ptr += 8 * mis) { mi = mi_ptr; - for (mb_col = cm->cur_tile_mb_col_start; - mb_col < cm->cur_tile_mb_col_end; mb_col += 4, mi += 4) { - if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, - t_unpred_seg_counts, 4, mb_row, mb_col); - } else { - for (i = 0; i < 4; i++) { - int x_idx = (i & 1) << 1, y_idx = i & 2; - MODE_INFO *sb_mi = mi + y_idx * mis + x_idx; - - if (mb_col + x_idx >= cm->mb_cols || - mb_row + y_idx >= cm->mb_rows) { - continue; - } - - if (sb_mi->mbmi.sb_type) { - assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32); - count_segs(cpi, sb_mi, no_pred_segcounts, - temporal_predictor_count, t_unpred_seg_counts, 2, - mb_row + y_idx, mb_col + x_idx); - } else { - int j; - - for (j = 0; j < 4; j++) { - const int x_idx_mb = x_idx + (j & 1); - const int y_idx_mb = y_idx + (j >> 1); - MODE_INFO *mb_mi = mi + x_idx_mb + y_idx_mb * mis; - - if (mb_col + x_idx_mb >= cm->mb_cols || - mb_row + y_idx_mb >= cm->mb_rows) { - continue; - } - - assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); - count_segs(cpi, mb_mi, no_pred_segcounts, - temporal_predictor_count, t_unpred_seg_counts, - 1, mb_row + y_idx_mb, mb_col + x_idx_mb); - } - } - } - } + for (mi_col = cm->cur_tile_mi_col_start; + mi_col < cm->cur_tile_mi_col_end; + mi_col += 8, mi += 8) { + count_segs_sb(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, mi_row, mi_col, BLOCK_SIZE_SB64X64); } } } @@ -340,36 +267,29 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { if (cm->frame_type != KEY_FRAME) { // Work out probability tree for coding those segments not // predicted using the temporal method and the cost. - calc_segtree_probs_pred(xd, t_unpred_seg_counts, t_pred_tree, - t_pred_tree_mod); - t_pred_cost = cost_segmap_pred(xd, t_unpred_seg_counts, t_pred_tree, - t_pred_tree_mod); + calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree); + t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree); // Add in the cost of the signalling for each prediction context for (i = 0; i < PREDICTION_PROBS; i++) { - t_nopred_prob[i] = get_binary_prob(temporal_predictor_count[i][0], - temporal_predictor_count[i][1]); + const int count0 = temporal_predictor_count[i][0]; + const int count1 = temporal_predictor_count[i][1]; + + t_nopred_prob[i] = get_binary_prob(count0, count1); // Add in the predictor signaling cost - t_pred_cost += (temporal_predictor_count[i][0] * - vp9_cost_zero(t_nopred_prob[i])) + - (temporal_predictor_count[i][1] * - vp9_cost_one(t_nopred_prob[i])); + t_pred_cost += count0 * vp9_cost_zero(t_nopred_prob[i]) + + count1 * vp9_cost_one(t_nopred_prob[i]); } } // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { cm->temporal_update = 1; - vpx_memcpy(xd->mb_segment_tree_probs, - t_pred_tree, sizeof(t_pred_tree)); - vpx_memcpy(xd->mb_segment_mispred_tree_probs, - t_pred_tree_mod, sizeof(t_pred_tree_mod)); - vpx_memcpy(&cm->segment_pred_probs, - t_nopred_prob, sizeof(t_nopred_prob)); + vpx_memcpy(xd->mb_segment_tree_probs, t_pred_tree, sizeof(t_pred_tree)); + vpx_memcpy(cm->segment_pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); } else { cm->temporal_update = 0; - vpx_memcpy(xd->mb_segment_tree_probs, - no_pred_tree, sizeof(no_pred_tree)); + vpx_memcpy(xd->mb_segment_tree_probs, no_pred_tree, sizeof(no_pred_tree)); } } diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h index 1c90c2f2d9001af15767bad6225910eb5ecdde5a..2183771c45971c8e6bfa5d3fea4373532909a283 100644 --- a/vp9/encoder/vp9_segmentation.h +++ b/vp9/encoder/vp9_segmentation.h @@ -15,8 +15,6 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_onyx_int.h" -void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x); - void vp9_enable_segmentation(VP9_PTR ptr); void vp9_disable_segmentation(VP9_PTR ptr); diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 22a12f4a8558978014a7aaaf1e8cbb44f6f957c4..47792fcc2d47f94a014871c461e15763c0eacd09 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -26,7 +26,6 @@ #include "vp9/common/vp9_quant_common.h" #include "vp9/encoder/vp9_segmentation.h" #include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_swapyv12buffer.h" #include "vpx_ports/vpx_timer.h" #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering @@ -41,42 +40,35 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, int mv_col, uint8_t *pred) { const int which_mv = 0; - int_mv subpel_mv; - int_mv fullpel_mv; + int_mv mv; - subpel_mv.as_mv.row = mv_row; - subpel_mv.as_mv.col = mv_col; - // TODO(jkoleszar): Make this rounding consistent with the rest of the code - fullpel_mv.as_mv.row = (mv_row >> 1) & ~7; - fullpel_mv.as_mv.col = (mv_col >> 1) & ~7; + mv.as_mv.row = mv_row; + mv.as_mv.col = mv_col; vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, - &subpel_mv, + &mv, &xd->scale_factor[which_mv], 16, 16, - which_mv << - (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), + which_mv, &xd->subpix); stride = (stride + 1) >> 1; vp9_build_inter_predictor_q4(u_mb_ptr, stride, &pred[256], 8, - &fullpel_mv, &subpel_mv, + &mv, &xd->scale_factor_uv[which_mv], 8, 8, - which_mv << - (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), + which_mv, &xd->subpix); vp9_build_inter_predictor_q4(v_mb_ptr, stride, &pred[320], 8, - &fullpel_mv, &subpel_mv, + &mv, &xd->scale_factor_uv[which_mv], 8, 8, - which_mv << - (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), + which_mv, &xd->subpix); } @@ -126,53 +118,45 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1, #if ALT_REF_MC_ENABLED static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, - YV12_BUFFER_CONFIG *arf_frame, - YV12_BUFFER_CONFIG *frame_ptr, - int mb_offset, + uint8_t *arf_frame_buf, + uint8_t *frame_ptr_buf, + int stride, int error_thresh) { MACROBLOCK *x = &cpi->mb; + MACROBLOCKD* const xd = &x->e_mbd; int step_param; int sadpb = x->sadperbit16; int bestsme = INT_MAX; - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; int_mv best_ref_mv1; int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ + int_mv *ref_mv; // Save input state - uint8_t **base_src = b->base_src; - int src = b->src; - int src_stride = b->src_stride; - uint8_t **base_pre = d->base_pre; - int pre = d->pre; - int pre_stride = d->pre_stride; + struct buf_2d src = x->plane[0].src; + struct buf_2d pre = xd->plane[0].pre[0]; best_ref_mv1.as_int = 0; best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3; best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3; // Setup frame pointers - b->base_src = &arf_frame->y_buffer; - b->src_stride = arf_frame->y_stride; - b->src = mb_offset; - - d->base_pre = &frame_ptr->y_buffer; - d->pre_stride = frame_ptr->y_stride; - d->pre = mb_offset; + x->plane[0].src.buf = arf_frame_buf; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = frame_ptr_buf; + xd->plane[0].pre[0].stride = stride; // Further step/diamond searches as necessary - if (cpi->Speed < 8) { - step_param = cpi->sf.first_step + - ((cpi->Speed > 5) ? 1 : 0); - } else { + if (cpi->speed < 8) + step_param = cpi->sf.first_step + ((cpi->speed > 5) ? 1 : 0); + else step_param = cpi->sf.first_step + 2; - } /*cpi->sf.search_method == HEX*/ // TODO Check that the 16x16 vf & sdf are selected here // Ignore mv costing by sending NULL pointer instead of cost arrays - bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv[0], + ref_mv = &x->e_mbd.mode_info_context->bmi[0].as_mv[0]; + bestsme = vp9_hex_search(x, &best_ref_mv1_full, ref_mv, step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16], NULL, NULL, NULL, NULL, &best_ref_mv1); @@ -184,7 +168,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int distortion; unsigned int sse; // Ignore mv costing by sending NULL pointer instead of cost array - bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv[0], + bestsme = cpi->find_fractional_mv_step(x, ref_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], @@ -193,13 +177,9 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, } #endif - // Save input state - b->base_src = base_src; - b->src = src; - b->src_stride = src_stride; - d->base_pre = base_pre; - d->pre = pre; - d->pre_stride = pre_stride; + // Restore input state + x->plane[0].src = src; + xd->plane[0].pre[0] = pre; return bestsme; } @@ -225,9 +205,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 + 8 * 8 + 8 * 8); // Save input state - uint8_t *y_buffer = mbd->pre.y_buffer; - uint8_t *u_buffer = mbd->pre.u_buffer; - uint8_t *v_buffer = mbd->pre.v_buffer; + uint8_t* input_buffer[MAX_MB_PLANE]; + int i; + + for (i = 0; i < MAX_MB_PLANE; i++) + input_buffer[i] = mbd->plane[i].pre[0].buf; for (mb_row = 0; mb_row < mb_rows; mb_row++) { #if ALT_REF_MC_ENABLED @@ -264,8 +246,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (cpi->frames[frame] == NULL) continue; - mbd->block[0].bmi.as_mv[0].as_mv.row = 0; - mbd->block[0].bmi.as_mv[0].as_mv.col = 0; + mbd->mode_info_context->bmi[0].as_mv[0].as_mv.row = 0; + mbd->mode_info_context->bmi[0].as_mv[0].as_mv.col = 0; if (frame == alt_ref_index) { filter_weight = 2; @@ -278,9 +260,9 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, // Find best match in this frame by MC err = temporal_filter_find_matching_mb_c (cpi, - cpi->frames[alt_ref_index], - cpi->frames[frame], - mb_y_offset, + cpi->frames[alt_ref_index]->y_buffer + mb_y_offset, + cpi->frames[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->y_stride, THRESH_LOW); #endif // Assign higher weight to matching MB if it's error @@ -298,8 +280,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, cpi->frames[frame]->u_buffer + mb_uv_offset, cpi->frames[frame]->v_buffer + mb_uv_offset, cpi->frames[frame]->y_stride, - mbd->block[0].bmi.as_mv[0].as_mv.row, - mbd->block[0].bmi.as_mv[0].as_mv.col, + mbd->mode_info_context->bmi[0].as_mv[0].as_mv.row, + mbd->mode_info_context->bmi[0].as_mv[0].as_mv.col, predictor); // Apply the filter (YUV) @@ -372,16 +354,15 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, } // Restore input state - mbd->pre.y_buffer = y_buffer; - mbd->pre.u_buffer = u_buffer; - mbd->pre.v_buffer = v_buffer; + for (i = 0; i < MAX_MB_PLANE; i++) + mbd->plane[i].pre[0].buf = input_buffer[i]; } void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { + VP9_COMMON *const cm = &cpi->common; + int frame = 0; - int num_frames_backward = 0; - int num_frames_forward = 0; int frames_to_blur_backward = 0; int frames_to_blur_forward = 0; int frames_to_blur = 0; @@ -391,15 +372,13 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { int blur_type = cpi->oxcf.arnr_type; int max_frames = cpi->active_arnr_frames; - num_frames_backward = distance; - num_frames_forward = vp9_lookahead_depth(cpi->lookahead) - - (num_frames_backward + 1); + const int num_frames_backward = distance; + const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead) + - (num_frames_backward + 1); switch (blur_type) { case 1: - ///////////////////////////////////////// // Backward Blur - frames_to_blur_backward = num_frames_backward; if (frames_to_blur_backward >= max_frames) @@ -409,7 +388,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { break; case 2: - ///////////////////////////////////////// // Forward Blur frames_to_blur_forward = num_frames_forward; @@ -422,7 +400,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { case 3: default: - ///////////////////////////////////////// // Center Blur frames_to_blur_forward = num_frames_forward; frames_to_blur_backward = num_frames_backward; @@ -462,23 +439,91 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { // Setup scaling factors. Scaling on each of the arnr frames is not supported vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0], - &cpi->common.yv12_fb[cpi->common.new_fb_idx], - cpi->common.width, - cpi->common.height); + cm->yv12_fb[cm->new_fb_idx].y_crop_width, + cm->yv12_fb[cm->new_fb_idx].y_crop_height, + cm->width, cm->height); cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0]; // Setup frame pointers, NULL indicates frame not included in filter vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *)); for (frame = 0; frame < frames_to_blur; frame++) { - int which_buffer = start_frame - frame; + int which_buffer = start_frame - frame; struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, which_buffer); cpi->frames[frames_to_blur - 1 - frame] = &buf->img; } - temporal_filter_iterate_c( - cpi, - frames_to_blur, - frames_to_blur_backward, - strength); + temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward, + strength); +} + +void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, + const int group_boost) { + int half_gf_int; + int frames_after_arf; + int frames_bwd = cpi->oxcf.arnr_max_frames - 1; + int frames_fwd = cpi->oxcf.arnr_max_frames - 1; + int q; + + // Define the arnr filter width for this group of frames: + // We only filter frames that lie within a distance of half + // the GF interval from the ARF frame. We also have to trap + // cases where the filter extends beyond the end of clip. + // Note: this_frame->frame has been updated in the loop + // so it now points at the ARF frame. + half_gf_int = cpi->baseline_gf_interval >> 1; + frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1); + + switch (cpi->oxcf.arnr_type) { + case 1: // Backward filter + frames_fwd = 0; + if (frames_bwd > half_gf_int) + frames_bwd = half_gf_int; + break; + + case 2: // Forward filter + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + if (frames_fwd > frames_after_arf) + frames_fwd = frames_after_arf; + frames_bwd = 0; + break; + + case 3: // Centered filter + default: + frames_fwd >>= 1; + if (frames_fwd > frames_after_arf) + frames_fwd = frames_after_arf; + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + + frames_bwd = frames_fwd; + + // For even length filter there is one more frame backward + // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. + if (frames_bwd < half_gf_int) + frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; + break; + } + + cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; + + // Adjust the strength based on active max q + q = ((int)vp9_convert_qindex_to_q(cpi->active_worst_quality) >> 1); + if (q > 8) { + cpi->active_arnr_strength = cpi->oxcf.arnr_strength; + } else { + cpi->active_arnr_strength = cpi->oxcf.arnr_strength - (8 - q); + if (cpi->active_arnr_strength < 0) + cpi->active_arnr_strength = 0; + } + + // Adjust number of frames in filter and strength based on gf boost level. + if (cpi->active_arnr_frames > (group_boost / 150)) { + cpi->active_arnr_frames = (group_boost / 150); + cpi->active_arnr_frames += !(cpi->active_arnr_frames & 1); + } + if (cpi->active_arnr_strength > (group_boost / 300)) { + cpi->active_arnr_strength = (group_boost / 300); + } } diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h index f3ca8c6169107123325d73914498f69c5812f74d..c5f3b467e5403a09d983eaf2c126147b1c6bb22d 100644 --- a/vp9/encoder/vp9_temporal_filter.h +++ b/vp9/encoder/vp9_temporal_filter.h @@ -12,5 +12,7 @@ #define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance); +void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, + const int group_boost); #endif // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 21401d1759c598c2ae41c4377a07f730d83a1853..0a290e124c093b823f7e9cce94c7f3c8286f4ae1 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -25,31 +25,12 @@ compressions, then generating vp9_context.c = initial stats. */ #ifdef ENTROPY_STATS -vp9_coeff_accum context_counters_4x4[BLOCK_TYPES]; -vp9_coeff_accum context_counters_8x8[BLOCK_TYPES]; -vp9_coeff_accum context_counters_16x16[BLOCK_TYPES]; -vp9_coeff_accum context_counters_32x32[BLOCK_TYPES]; - -extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES]; -extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES]; -extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES]; -extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES]; +vp9_coeff_accum context_counters[TX_SIZE_MAX_SB][BLOCK_TYPES]; +extern vp9_coeff_stats tree_update_hist[TX_SIZE_MAX_SB][BLOCK_TYPES]; #endif /* ENTROPY_STATS */ -#if CONFIG_CODE_NONZEROCOUNT -#ifdef NZC_STATS -unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC4X4_TOKENS]; -unsigned int nzc_counts_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC8X8_TOKENS]; -unsigned int nzc_counts_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC16X16_TOKENS]; -unsigned int nzc_counts_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] - [NZC32X32_TOKENS]; -unsigned int nzc_pcat_counts[MAX_NZC_CONTEXTS][NZC_TOKENS_EXTRA] - [NZC_BITS_EXTRA][2]; -#endif -#endif +DECLARE_ALIGNED(16, extern const uint8_t, + vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; const TOKENVALUE *vp9_dct_value_tokens_ptr; @@ -59,7 +40,7 @@ const int *vp9_dct_value_cost_ptr; static void fill_value_tokens() { TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; - vp9_extra_bit_struct *const e = vp9_extra_bits; + vp9_extra_bit *const e = vp9_extra_bits; int i = -DCT_MAX_VALUE; int sign = 1; @@ -77,25 +58,25 @@ static void fill_value_tokens() { while (++j < 11 && e[j].base_val <= a) {} - t[i].Token = --j; + t[i].token = --j; eb |= (a - e[j].base_val) << 1; } else - t[i].Token = a; + t[i].token = a; - t[i].Extra = eb; + t[i].extra = eb; } // initialize the cost for extra bits for all possible coefficient value. { int cost = 0; - vp9_extra_bit_struct *p = vp9_extra_bits + t[i].Token; + vp9_extra_bit *p = vp9_extra_bits + t[i].token; if (p->base_val) { - const int extra = t[i].Extra; - const int Length = p->Len; + const int extra = t[i].extra; + const int length = p->len; - if (Length) - cost += treed_cost(p->tree, p->prob, extra >> 1, Length); + if (length) + cost += treed_cost(p->tree, p->prob, extra >> 1, length); cost += vp9_cost_bit(vp9_prob_half, extra & 1); /* sign */ dct_value_cost[i + DCT_MAX_VALUE] = cost; @@ -111,695 +92,220 @@ static void fill_value_tokens() { extern const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad); -static void tokenize_b(VP9_COMP *cpi, - MACROBLOCKD *xd, - const int ib, - TOKENEXTRA **tp, - PLANE_TYPE type, - TX_SIZE tx_size, - int dry_run) { +struct tokenize_b_args { + VP9_COMP *cpi; + MACROBLOCKD *xd; + TOKENEXTRA **tp; + TX_SIZE tx_size; + int dry_run; +}; + +static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct tokenize_b_args* const args = arg; + VP9_COMP *cpi = args->cpi; + MACROBLOCKD *xd = args->xd; + TOKENEXTRA **tp = args->tp; + PLANE_TYPE type = plane ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC; + TX_SIZE tx_size = ss_txfrm_size / 2; + int dry_run = args->dry_run; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ - int c = 0; - const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */ + int c = 0, rc = 0; TOKENEXTRA *t = *tp; /* store tokens starting here */ - int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib; + const int eob = xd->plane[plane].eobs[block]; + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); + const BLOCK_SIZE_TYPE sb_type = (mbmi->sb_type < BLOCK_SIZE_SB8X8) ? + BLOCK_SIZE_SB8X8 : mbmi->sb_type; + const int bwl = b_width_log2(sb_type); + const int off = block >> (2 * tx_size); + const int mod = bwl - tx_size - xd->plane[plane].subsampling_x; + const int aoff = (off & ((1 << mod) - 1)) << tx_size; + const int loff = (off >> mod) << tx_size; + ENTROPY_CONTEXT *A = xd->plane[plane].above_context + aoff; + ENTROPY_CONTEXT *L = xd->plane[plane].left_context + loff; int seg_eob, default_eob, pad; const int segment_id = mbmi->segment_id; - const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; const int *scan, *nb; vp9_coeff_count *counts; - vp9_coeff_probs *probs; - const int ref = mbmi->ref_frame != INTRA_FRAME; - ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + vp9_coeff_probs_model *coef_probs; + const int ref = mbmi->ref_frame[0] != INTRA_FRAME; + ENTROPY_CONTEXT above_ec, left_ec; uint8_t token_cache[1024]; -#if CONFIG_CODE_NONZEROCOUNT - int zerosleft, nzc = 0; - if (eob == 0) - assert(xd->nzcs[ib] == 0); -#endif - - if (sb_type == BLOCK_SIZE_SB64X64) { - a = (ENTROPY_CONTEXT *)xd->above_context + - vp9_block2above_sb64[tx_size][ib]; - l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - } else if (sb_type == BLOCK_SIZE_SB32X32) { - a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib]; - l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a2 = a3 = l2 = l3 = NULL; - } else { - a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; - l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; - a1 = l1 = a2 = l2 = a3 = l3 = NULL; - } + TX_TYPE tx_type = DCT_DCT; + const uint8_t * band_translate; + assert((!type && !plane) || (type && plane)); + counts = cpi->coef_counts[tx_size]; + coef_probs = cpi->common.fc.coef_probs[tx_size]; switch (tx_size) { default: case TX_4X4: { - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_4x4(xd, ib) : DCT_DCT; - a_ec = *a; - l_ec = *l; + tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_4x4(xd, block) : DCT_DCT; + above_ec = A[0] != 0; + left_ec = L[0] != 0; seg_eob = 16; - scan = vp9_default_zig_zag1d_4x4; - if (tx_type != DCT_DCT) { - if (tx_type == ADST_DCT) { - scan = vp9_row_scan_4x4; - } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan_4x4; - } - } - counts = cpi->coef_counts_4x4; - probs = cpi->common.fc.coef_probs_4x4; + scan = get_scan_4x4(tx_type); + band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { - const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; - a_ec = (a[0] + a[1]) != 0; - l_ec = (l[0] + l[1]) != 0; + const int sz = 1 + b_width_log2(sb_type); + const int x = block & ((1 << sz) - 1), y = block - x; + tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; + above_ec = (A[0] + A[1]) != 0; + left_ec = (L[0] + L[1]) != 0; seg_eob = 64; - scan = vp9_default_zig_zag1d_8x8; - if (tx_type != DCT_DCT) { - if (tx_type == ADST_DCT) { - scan = vp9_row_scan_8x8; - } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan_8x8; - } - } - counts = cpi->coef_counts_8x8; - probs = cpi->common.fc.coef_probs_8x8; + scan = get_scan_8x8(tx_type); + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { - const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; - if (type != PLANE_TYPE_UV) { - a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; - } else { - a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - } + const int sz = 2 + b_width_log2(sb_type); + const int x = block & ((1 << sz) - 1), y = block - x; + tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; + above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; + left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; seg_eob = 256; - scan = vp9_default_zig_zag1d_16x16; - if (tx_type != DCT_DCT) { - if (tx_type == ADST_DCT) { - scan = vp9_row_scan_16x16; - } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan_16x16; - } - } - counts = cpi->coef_counts_16x16; - probs = cpi->common.fc.coef_probs_16x16; + scan = get_scan_16x16(tx_type); + band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: - if (type != PLANE_TYPE_UV) { - a_ec = (a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]) != 0; - } else { - a_ec = (a[0] + a[1] + a1[0] + a1[1] + - a2[0] + a2[1] + a3[0] + a3[1]) != 0; - l_ec = (l[0] + l[1] + l1[0] + l1[1] + - l2[0] + l2[1] + l3[0] + l3[1]) != 0; - } + above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; + left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; seg_eob = 1024; - scan = vp9_default_zig_zag1d_32x32; - counts = cpi->coef_counts_32x32; - probs = cpi->common.fc.coef_probs_32x32; + scan = vp9_default_scan_32x32; + band_translate = vp9_coefband_trans_8x8plus; break; } - VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); + pt = combine_entropy_contexts(above_ec, left_ec); nb = vp9_get_coef_neighbors_handle(scan, &pad); default_eob = seg_eob; if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; + c = 0; do { - const int band = get_coef_band(scan, tx_size, c); + const int band = get_coef_band(band_translate, c); int token; int v = 0; -#if CONFIG_CODE_NONZEROCOUNT - zerosleft = seg_eob - xd->nzcs[ib] - c + nzc; -#endif + rc = scan[c]; + if (c) + pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); if (c < eob) { - const int rc = scan[c]; v = qcoeff_ptr[rc]; assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE); - t->Extra = vp9_dct_value_tokens_ptr[v].Extra; - token = vp9_dct_value_tokens_ptr[v].Token; + t->extra = vp9_dct_value_tokens_ptr[v].extra; + token = vp9_dct_value_tokens_ptr[v].token; } else { -#if CONFIG_CODE_NONZEROCOUNT - break; -#else token = DCT_EOB_TOKEN; -#endif } - t->Token = token; - t->context_tree = probs[type][ref][band][pt]; -#if CONFIG_CODE_NONZEROCOUNT - // Skip zero node if there are no zeros left - t->skip_eob_node = 1 + (zerosleft == 0); + t->token = token; + t->context_tree = coef_probs[type][ref][band][pt]; + t->skip_eob_node = (c > 0) && (token_cache[scan[c - 1]] == 0); + +#if CONFIG_BALANCED_COEFTREE + assert(token <= ZERO_TOKEN || + vp9_coef_encodings[t->token].len - t->skip_eob_node > 0); #else - t->skip_eob_node = (c > 0) && (token_cache[c - 1] == 0); + assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0); #endif - assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); + if (!dry_run) { ++counts[type][ref][band][pt][token]; +#if CONFIG_BALANCED_COEFTREE + if (!t->skip_eob_node && token > ZERO_TOKEN) +#else if (!t->skip_eob_node) +#endif ++cpi->common.fc.eob_branch_counts[tx_size][type][ref][band][pt]; } -#if CONFIG_CODE_NONZEROCOUNT - nzc += (v != 0); -#endif - token_cache[c] = token; - - pt = vp9_get_coef_context(scan, nb, pad, token_cache, c + 1, default_eob); + token_cache[scan[c]] = vp9_pt_energy_class[token]; ++t; } while (c < eob && ++c < seg_eob); -#if CONFIG_CODE_NONZEROCOUNT - assert(nzc == xd->nzcs[ib]); -#endif *tp = t; - a_ec = l_ec = (c > 0); /* 0 <-> all coeff data is zero */ - a[0] = a_ec; - l[0] = l_ec; - - if (tx_size == TX_8X8) { - a[1] = a_ec; - l[1] = l_ec; - } else if (tx_size == TX_16X16) { - if (type != PLANE_TYPE_UV) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; - } else { - a1[0] = a1[1] = a[1] = a_ec; - l1[0] = l1[1] = l[1] = l_ec; - } - } else if (tx_size == TX_32X32) { - if (type != PLANE_TYPE_UV) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; - a1[0] = a1[1] = a1[2] = a1[3] = a_ec; - l1[0] = l1[1] = l1[2] = l1[3] = l_ec; - } else { - a[1] = a1[0] = a1[1] = a_ec; - l[1] = l1[0] = l1[1] = l_ec; - a2[0] = a2[1] = a3[0] = a3[1] = a_ec; - l2[0] = l2[1] = l3[0] = l3[1] = l_ec; + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + set_contexts_on_border(xd, bsize, plane, tx_size, c, aoff, loff, A, L); + } else { + for (pt = 0; pt < (1 << tx_size); pt++) { + A[pt] = L[pt] = c > 0; } } } -int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 16; i++) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i; - - for (i = 16; i < 24; i++) - skip &= (!xd->eobs[i]); - return skip; -} - -static int mb_is_skippable_4x4(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_4x4(xd) & - vp9_mbuv_is_skippable_4x4(xd)); -} - -int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 16; i += 4) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) { - return (!xd->eobs[16]) & (!xd->eobs[20]); -} - -static int mb_is_skippable_8x8(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_8x8(xd) & - vp9_mbuv_is_skippable_8x8(xd)); -} - -static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_8x8(xd) & - vp9_mbuv_is_skippable_4x4(xd)); +struct is_skippable_args { + MACROBLOCKD *xd; + int *skippable; +}; +static void is_skippable(int plane, int block, + BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *argv) { + struct is_skippable_args *args = argv; + args->skippable[0] &= (!args->xd->plane[plane].eobs[block]); } -int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->eobs[0]); +int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + int result = 1; + struct is_skippable_args args = {xd, &result}; + foreach_transformed_block(xd, bsize, is_skippable, &args); + return result; } -static int mb_is_skippable_16x16(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd)); +int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + int result = 1; + struct is_skippable_args args = {xd, &result}; + foreach_transformed_block_in_plane(xd, bsize, 0, + is_skippable, &args); + return result; } -int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { - return (!xd->eobs[0]); -} - -int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->eobs[64]) & (!xd->eobs[80]); -} - -static int sb_is_skippable_32x32(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_32x32(xd) && - vp9_sbuv_is_skippable_16x16(xd); -} - -int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i += 16) - skip &= (!xd->eobs[i]); - - return skip; -} - -static int sb_is_skippable_16x16(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd); -} - -int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i += 4) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 64; i < 96; i += 4) - skip &= (!xd->eobs[i]); - - return skip; -} - -static int sb_is_skippable_8x8(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_8x8(xd) & vp9_sbuv_is_skippable_8x8(xd); -} - -int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i++) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 64; i < 96; i++) - skip &= (!xd->eobs[i]); - - return skip; -} - -static int sb_is_skippable_4x4(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_4x4(xd) & vp9_sbuv_is_skippable_4x4(xd); +int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + int result = 1; + struct is_skippable_args args = {xd, &result}; + foreach_transformed_block_uv(xd, bsize, is_skippable, &args); + return result; } void vp9_tokenize_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, - int dry_run) { - VP9_COMMON * const cm = &cpi->common; - MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; - TOKENEXTRA *t_backup = *t; - const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); - const int segment_id = mbmi->segment_id; - const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - int b; - - switch (mbmi->txfm_size) { - case TX_32X32: - mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd); - break; - case TX_16X16: - mbmi->mb_skip_coeff = sb_is_skippable_16x16(xd); - break; - case TX_8X8: - mbmi->mb_skip_coeff = sb_is_skippable_8x8(xd); - break; - case TX_4X4: - mbmi->mb_skip_coeff = sb_is_skippable_4x4(xd); - break; - default: assert(0); - } - - if (mbmi->mb_skip_coeff) { - if (!dry_run) - cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cm->mb_no_coeff_skip) { - vp9_stuff_sb(cpi, xd, t, dry_run); - } else { - vp9_reset_sb_tokens_context(xd); - } - if (dry_run) - *t = t_backup; - return; - } - - if (!dry_run) - cpi->skip_false_count[mb_skip_context] += skip_inc; - - switch (mbmi->txfm_size) { - case TX_32X32: - tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, dry_run); - for (b = 64; b < 96; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); - break; - case TX_16X16: - for (b = 0; b < 64; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_16X16, dry_run); - for (b = 64; b < 96; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); - break; - case TX_8X8: - for (b = 0; b < 64; b += 4) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_8X8, dry_run); - for (b = 64; b < 96; b += 4) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_8X8, dry_run); - break; - case TX_4X4: - for (b = 0; b < 64; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_4X4, dry_run); - for (b = 64; b < 96; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_4X4, dry_run); - break; - default: assert(0); - } - - if (dry_run) - *t = t_backup; -} - -int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i += 64) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) { - return (!xd->eobs[256]) & (!xd->eobs[320]); -} - -static int sb64_is_skippable_32x32(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd); -} - -int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i += 16) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 256; i < 384; i += 16) - skip &= (!xd->eobs[i]); - - return skip; -} - -static int sb64_is_skippable_16x16(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_16x16(xd) & vp9_sb64uv_is_skippable_16x16(xd); -} - -int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i += 4) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 256; i < 384; i += 4) - skip &= (!xd->eobs[i]); - - return skip; -} - -static int sb64_is_skippable_8x8(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_8x8(xd) & vp9_sb64uv_is_skippable_8x8(xd); -} - -int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i++) - skip &= (!xd->eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 256; i < 384; i++) - skip &= (!xd->eobs[i]); - - return skip; -} - -static int sb64_is_skippable_4x4(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_4x4(xd) & vp9_sb64uv_is_skippable_4x4(xd); -} - -void vp9_tokenize_sb64(VP9_COMP *cpi, - MACROBLOCKD *xd, - TOKENEXTRA **t, - int dry_run) { + int dry_run, BLOCK_SIZE_TYPE bsize) { VP9_COMMON * const cm = &cpi->common; MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; TOKENEXTRA *t_backup = *t; const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); const int segment_id = mbmi->segment_id; const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - int b; + const TX_SIZE txfm_size = mbmi->txfm_size; + struct tokenize_b_args arg = { + cpi, xd, t, txfm_size, dry_run + }; - switch (mbmi->txfm_size) { - case TX_32X32: - mbmi->mb_skip_coeff = sb64_is_skippable_32x32(xd); - break; - case TX_16X16: - mbmi->mb_skip_coeff = sb64_is_skippable_16x16(xd); - break; - case TX_8X8: - mbmi->mb_skip_coeff = sb64_is_skippable_8x8(xd); - break; - case TX_4X4: - mbmi->mb_skip_coeff = sb64_is_skippable_4x4(xd); - break; - default: assert(0); - } + mbmi->mb_skip_coeff = vp9_sb_is_skippable(xd, bsize); if (mbmi->mb_skip_coeff) { if (!dry_run) - cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cm->mb_no_coeff_skip) { - vp9_stuff_sb64(cpi, xd, t, dry_run); - } else { - vp9_reset_sb64_tokens_context(xd); - } + cm->fc.mbskip_count[mb_skip_context][1] += skip_inc; + vp9_reset_sb_tokens_context(xd, bsize); if (dry_run) *t = t_backup; return; } if (!dry_run) - cpi->skip_false_count[mb_skip_context] += skip_inc; - - switch (mbmi->txfm_size) { - case TX_32X32: - for (b = 0; b < 256; b += 64) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, dry_run); - for (b = 256; b < 384; b += 64) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_32X32, dry_run); - break; - case TX_16X16: - for (b = 0; b < 256; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_16X16, dry_run); - for (b = 256; b < 384; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); - break; - case TX_8X8: - for (b = 0; b < 256; b += 4) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_8X8, dry_run); - for (b = 256; b < 384; b += 4) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_8X8, dry_run); - break; - case TX_4X4: - for (b = 0; b < 256; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_4X4, dry_run); - for (b = 256; b < 384; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_4X4, dry_run); - break; - default: assert(0); - } - - if (dry_run) - *t = t_backup; -} - -void vp9_tokenize_mb(VP9_COMP *cpi, - MACROBLOCKD *xd, - TOKENEXTRA **t, - int dry_run) { - int b; - int tx_size = xd->mode_info_context->mbmi.txfm_size; - int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP); - TOKENEXTRA *t_backup = *t; - - // If the MB is going to be skipped because of a segment level flag - // exclude this from the skip count stats used to calculate the - // transmitted skip probability; - int skip_inc; - int segment_id = xd->mode_info_context->mbmi.segment_id; - - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { - skip_inc = 1; - } else - skip_inc = 0; - - switch (tx_size) { - case TX_16X16: - - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd); - break; - case TX_8X8: - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) - xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_8x8_4x4uv(xd); - else - xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_8x8(xd); - break; - - default: - xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_4x4(xd); - break; - } - - if (xd->mode_info_context->mbmi.mb_skip_coeff) { - if (!dry_run) - cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cpi->common.mb_no_coeff_skip) { - vp9_stuff_mb(cpi, xd, t, dry_run); - } else { - vp9_reset_mb_tokens_context(xd); - } + cm->fc.mbskip_count[mb_skip_context][0] += skip_inc; - if (dry_run) - *t = t_backup; - return; - } - - if (!dry_run) - cpi->skip_false_count[mb_skip_context] += skip_inc; + foreach_transformed_block(xd, bsize, tokenize_b, &arg); - if (tx_size == TX_16X16) { - tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); - for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); - } - } else if (tx_size == TX_8X8) { - for (b = 0; b < 16; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); - } - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) { - for (b = 16; b < 24; b++) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); - } - } else { - for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); - } - } - } else { - for (b = 0; b < 16; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); - for (b = 16; b < 24; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); - } if (dry_run) *t = t_backup; } @@ -808,29 +314,17 @@ void vp9_tokenize_mb(VP9_COMP *cpi, void init_context_counters(void) { FILE *f = fopen("context.bin", "rb"); if (!f) { - vpx_memset(context_counters_4x4, 0, sizeof(context_counters_4x4)); - vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8)); - vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16)); - vpx_memset(context_counters_32x32, 0, sizeof(context_counters_32x32)); + vp9_zero(context_counters); } else { - fread(context_counters_4x4, sizeof(context_counters_4x4), 1, f); - fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f); - fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f); - fread(context_counters_32x32, sizeof(context_counters_32x32), 1, f); + fread(context_counters, sizeof(context_counters), 1, f); fclose(f); } f = fopen("treeupdate.bin", "rb"); if (!f) { - vpx_memset(tree_update_hist_4x4, 0, sizeof(tree_update_hist_4x4)); - vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8)); - vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16)); - vpx_memset(tree_update_hist_32x32, 0, sizeof(tree_update_hist_32x32)); + vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist)); } else { - fread(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f); - fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); - fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); - fread(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f); + fread(tree_update_hist, sizeof(tree_update_hist), 1, f); fclose(f); } } @@ -932,32 +426,29 @@ void print_context_counters() { fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n"); /* print counts */ - print_counter(f, context_counters_4x4, BLOCK_TYPES, + print_counter(f, context_counters[TX_4X4], BLOCK_TYPES, "vp9_default_coef_counts_4x4[BLOCK_TYPES]"); - print_counter(f, context_counters_8x8, BLOCK_TYPES, + print_counter(f, context_counters[TX_8X8], BLOCK_TYPES, "vp9_default_coef_counts_8x8[BLOCK_TYPES]"); - print_counter(f, context_counters_16x16, BLOCK_TYPES, + print_counter(f, context_counters[TX_16X16], BLOCK_TYPES, "vp9_default_coef_counts_16x16[BLOCK_TYPES]"); - print_counter(f, context_counters_32x32, BLOCK_TYPES, + print_counter(f, context_counters[TX_32X32], BLOCK_TYPES, "vp9_default_coef_counts_32x32[BLOCK_TYPES]"); /* print coefficient probabilities */ - print_probs(f, context_counters_4x4, BLOCK_TYPES, + print_probs(f, context_counters[TX_4X4], BLOCK_TYPES, "default_coef_probs_4x4[BLOCK_TYPES]"); - print_probs(f, context_counters_8x8, BLOCK_TYPES, + print_probs(f, context_counters[TX_8X8], BLOCK_TYPES, "default_coef_probs_8x8[BLOCK_TYPES]"); - print_probs(f, context_counters_16x16, BLOCK_TYPES, + print_probs(f, context_counters[TX_16X16], BLOCK_TYPES, "default_coef_probs_16x16[BLOCK_TYPES]"); - print_probs(f, context_counters_32x32, BLOCK_TYPES, + print_probs(f, context_counters[TX_32X32], BLOCK_TYPES, "default_coef_probs_32x32[BLOCK_TYPES]"); fclose(f); f = fopen("context.bin", "wb"); - fwrite(context_counters_4x4, sizeof(context_counters_4x4), 1, f); - fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f); - fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f); - fwrite(context_counters_32x32, sizeof(context_counters_32x32), 1, f); + fwrite(context_counters, sizeof(context_counters), 1, f); fclose(f); } #endif @@ -965,268 +456,3 @@ void print_context_counters() { void vp9_tokenize_initialize() { fill_value_tokens(); } - -static void stuff_b(VP9_COMP *cpi, - MACROBLOCKD *xd, - const int ib, - TOKENEXTRA **tp, - PLANE_TYPE type, - TX_SIZE tx_size, - int dry_run) { - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; -#if CONFIG_CODE_NONZEROCOUNT == 0 - vp9_coeff_count *counts; - vp9_coeff_probs *probs; - int pt, band; - TOKENEXTRA *t = *tp; - const int ref = mbmi->ref_frame != INTRA_FRAME; -#endif - ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; - - if (sb_type == BLOCK_SIZE_SB32X32) { - a = (ENTROPY_CONTEXT *)xd->above_context + - vp9_block2above_sb64[tx_size][ib]; - l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - } else if (sb_type == BLOCK_SIZE_SB32X32) { - a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib]; - l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - a2 = l2 = a3 = l3 = NULL; - } else { - a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; - l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; - a1 = l1 = a2 = l2 = a3 = l3 = NULL; - } - - switch (tx_size) { - default: - case TX_4X4: - a_ec = a[0]; - l_ec = l[0]; -#if CONFIG_CODE_NONZEROCOUNT == 0 - counts = cpi->coef_counts_4x4; - probs = cpi->common.fc.coef_probs_4x4; -#endif - break; - case TX_8X8: - a_ec = (a[0] + a[1]) != 0; - l_ec = (l[0] + l[1]) != 0; -#if CONFIG_CODE_NONZEROCOUNT == 0 - counts = cpi->coef_counts_8x8; - probs = cpi->common.fc.coef_probs_8x8; -#endif - break; - case TX_16X16: - if (type != PLANE_TYPE_UV) { - a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; - } else { - a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - } -#if CONFIG_CODE_NONZEROCOUNT == 0 - counts = cpi->coef_counts_16x16; - probs = cpi->common.fc.coef_probs_16x16; -#endif - break; - case TX_32X32: - if (type != PLANE_TYPE_UV) { - a_ec = (a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]) != 0; - } else { - a_ec = (a[0] + a[1] + a1[0] + a1[1] + - a2[0] + a2[1] + a3[0] + a3[1]) != 0; - l_ec = (l[0] + l[1] + l1[0] + l1[1] + - l2[0] + l2[1] + l3[0] + l3[1]) != 0; - } -#if CONFIG_CODE_NONZEROCOUNT == 0 - counts = cpi->coef_counts_32x32; - probs = cpi->common.fc.coef_probs_32x32; -#endif - break; - } - -#if CONFIG_CODE_NONZEROCOUNT == 0 - VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); - band = 0; - t->Token = DCT_EOB_TOKEN; - t->context_tree = probs[type][ref][band][pt]; - t->skip_eob_node = 0; - ++t; - *tp = t; - if (!dry_run) { - ++counts[type][ref][band][pt][DCT_EOB_TOKEN]; - } -#endif - *a = *l = 0; - if (tx_size == TX_8X8) { - a[1] = 0; - l[1] = 0; - } else if (tx_size == TX_16X16) { - if (type != PLANE_TYPE_UV) { - a[1] = a[2] = a[3] = 0; - l[1] = l[2] = l[3] = 0; - } else { - a1[0] = a1[1] = a[1] = a_ec; - l1[0] = l1[1] = l[1] = l_ec; - } - } else if (tx_size == TX_32X32) { - if (type != PLANE_TYPE_Y_WITH_DC) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; - a1[0] = a1[1] = a1[2] = a1[3] = a_ec; - l1[0] = l1[1] = l1[2] = l1[3] = l_ec; - } else { - a[1] = a1[0] = a1[1] = a_ec; - l[1] = l1[0] = l1[1] = l_ec; - a2[0] = a2[1] = a3[0] = a3[1] = a_ec; - l2[0] = l2[1] = l3[0] = l3[1] = l_ec; - } - } -} - -static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - int b; - - for (b = 0; b < 16; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); - for (b = 16; b < 24; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); -} - -static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - int b; - stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); - - for (b = 16; b < 24; b += 4) { - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); - } -} - -static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - int b; - - for (b = 0; b < 16; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); - for (b = 16; b < 24; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); -} - -static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - int b; - - for (b = 0; b < 16; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); - for (b = 16; b < 24; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); -} - -void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - TOKENEXTRA * const t_backup = *t; - - if (tx_size == TX_16X16) { - stuff_mb_16x16(cpi, xd, t, dry_run); - } else if (tx_size == TX_8X8) { - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) { - stuff_mb_8x8_4x4uv(cpi, xd, t, dry_run); - } else { - stuff_mb_8x8(cpi, xd, t, dry_run); - } - } else { - stuff_mb_4x4(cpi, xd, t, dry_run); - } - - if (dry_run) { - *t = t_backup; - } -} - -void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - TOKENEXTRA * const t_backup = *t; - int b; - - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - for (b = 64; b < 96; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - break; - case TX_16X16: - for (b = 0; b < 64; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); - for (b = 64; b < 96; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - break; - case TX_8X8: - for (b = 0; b < 64; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); - for (b = 64; b < 96; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); - break; - case TX_4X4: - for (b = 0; b < 64; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); - for (b = 64; b < 96; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); - break; - default: assert(0); - } - - if (dry_run) { - *t = t_backup; - } -} - -void vp9_stuff_sb64(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - TOKENEXTRA * const t_backup = *t; - int b; - - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - for (b = 0; b < 256; b += 64) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - for (b = 256; b < 384; b += 64) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_32X32, dry_run); - break; - case TX_16X16: - for (b = 0; b < 256; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); - for (b = 256; b < 384; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - break; - case TX_8X8: - for (b = 0; b < 256; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); - for (b = 256; b < 384; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); - break; - case TX_4X4: - for (b = 0; b < 256; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); - for (b = 256; b < 384; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); - break; - default: assert(0); - } - - if (dry_run) { - *t = t_backup; - } -} diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 907f814ac1debe966574220eba544dee1a3a1a5a..e7f90c96cffe0c55f14d1e6c2dedb3b3afa54b66 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -17,65 +17,33 @@ void vp9_tokenize_initialize(); typedef struct { - int16_t Token; - int16_t Extra; + int16_t token; + int16_t extra; } TOKENVALUE; typedef struct { const vp9_prob *context_tree; - int16_t Extra; - uint8_t Token; + int16_t extra; + uint8_t token; uint8_t skip_eob_node; } TOKENEXTRA; typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS + 1]; -int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); -int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd); - +int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); struct VP9_COMP; -void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); -void vp9_tokenize_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); - -void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); -void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); -void vp9_stuff_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); + TOKENEXTRA **t, int dry_run, BLOCK_SIZE_TYPE bsize); #ifdef ENTROPY_STATS void init_context_counters(); void print_context_counters(); -extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES]; -extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES]; -extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES]; -extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES]; +extern vp9_coeff_accum context_counters[TX_SIZE_MAX_SB][BLOCK_TYPES]; #endif extern const int *vp9_dct_value_cost_ptr; diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c index 52da3c6ce3f3f2a2aaefe7747ba1c468247f3ced..e4aed5374cf44713467adf04dbca0b9019ca48ec 100644 --- a/vp9/encoder/vp9_treewriter.c +++ b/vp9/encoder/vp9_treewriter.c @@ -8,35 +8,31 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "vp9/encoder/vp9_treewriter.h" -#include "vp9/common/vp9_common.h" -static void cost( - int *const C, - vp9_tree T, - const vp9_prob *const P, - int i, - int c -) { - const vp9_prob p = P [i >> 1]; +static void cost(int *costs, vp9_tree tree, const vp9_prob *probs, + int i, int c) { + const vp9_prob prob = probs[i / 2]; + int b; - do { - const vp9_tree_index j = T[i]; - const int d = c + vp9_cost_bit(p, i & 1); + for (b = 0; b <= 1; ++b) { + const int cc = c + vp9_cost_bit(prob, b); + const vp9_tree_index ii = tree[i + b]; - if (j <= 0) - C[-j] = d; + if (ii <= 0) + costs[-ii] = cc; else - cost(C, T, P, j, d); - } while (++i & 1); + cost(costs, tree, probs, ii, cc); + } } -void vp9_cost_tokens(int *c, const vp9_prob *p, vp9_tree t) { - cost(c, t, p, 0, 0); + +void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) { + cost(costs, tree, probs, 0, 0); } -void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t) { - assert(t[1] > 0 && t[0] <= 0); - c[-t[0]] = vp9_cost_bit(p[0], 0); - cost(c, t, p, 2, 0); +void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) { + assert(tree[0] <= 0 && tree[1] > 0); + + costs[-tree[0]] = vp9_cost_bit(probs[0], 0); + cost(costs, tree, probs, 2, 0); } diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h index 832471aa80652d780d53f47e1e8df2bc55374b73..eeda5cda7961075bbf480189507563b28340ccc8 100644 --- a/vp9/encoder/vp9_treewriter.h +++ b/vp9/encoder/vp9_treewriter.h @@ -19,11 +19,8 @@ #include "vp9/encoder/vp9_boolhuff.h" /* for now */ -typedef BOOL_CODER vp9_writer; -#define vp9_write encode_bool -#define vp9_write_literal vp9_encode_value -#define vp9_write_bit(W, V) vp9_write(W, V, vp9_prob_half) +#define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8) /* Approximate length of an encoded bool in 256ths of a bit at given prob */ @@ -38,69 +35,53 @@ typedef BOOL_CODER vp9_writer; /* Both of these return bits, not scaled bits. */ static INLINE unsigned int cost_branch256(const unsigned int ct[2], vp9_prob p) { - /* Imitate existing calculation */ return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p); } static INLINE unsigned int cost_branch(const unsigned int ct[2], vp9_prob p) { - /* Imitate existing calculation */ return cost_branch256(ct, p) >> 8; } -/* Small functions to write explicit values and tokens, as well as - estimate their lengths. */ - -static INLINE void treed_write(vp9_writer *const w, - vp9_tree t, - const vp9_prob *const p, - int v, - /* number of bits in v, assumed nonzero */ - int n) { +static INLINE void treed_write(vp9_writer *w, + vp9_tree tree, const vp9_prob *probs, + int bits, int len) { vp9_tree_index i = 0; do { - const int b = (v >> --n) & 1; - vp9_write(w, b, p[i >> 1]); - i = t[i + b]; - } while (n); + const int bit = (bits >> --len) & 1; + vp9_write(w, bit, probs[i >> 1]); + i = tree[i + bit]; + } while (len); } -static INLINE void write_token(vp9_writer *const w, - vp9_tree t, - const vp9_prob *const p, - vp9_token *const x) { - treed_write(w, t, p, x->value, x->Len); +static INLINE void write_token(vp9_writer *w, vp9_tree tree, + const vp9_prob *probs, + const struct vp9_token *token) { + treed_write(w, tree, probs, token->value, token->len); } -static INLINE int treed_cost(vp9_tree t, - const vp9_prob *const p, - int v, - /* number of bits in v, assumed nonzero */ - int n) { - int c = 0; +static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs, + int bits, int len) { + int cost = 0; vp9_tree_index i = 0; do { - const int b = (v >> --n) & 1; - c += vp9_cost_bit(p[i >> 1], b); - i = t[i + b]; - } while (n); + const int bit = (bits >> --len) & 1; + cost += vp9_cost_bit(probs[i >> 1], bit); + i = tree[i + bit]; + } while (len); - return c; + return cost; } -static INLINE int cost_token(vp9_tree t, - const vp9_prob *const p, - vp9_token *const x) { - return treed_cost(t, p, x->value, x->Len); +static INLINE int cost_token(vp9_tree tree, const vp9_prob *probs, + const struct vp9_token *token) { + return treed_cost(tree, probs, token->value, token->len); } -/* Fill array of costs for all possible token values. */ - -void vp9_cost_tokens(int *Costs, const vp9_prob *, vp9_tree); - -void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t); +void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree); +void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree); #endif // VP9_ENCODER_VP9_TREEWRITER_H_ diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 13dabbda41d711a8dce115cf2892182450083a14..aaa43ef82f6a8da6429258d1b8ca5820fe33c35c 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -12,6 +12,7 @@ #define VP9_ENCODER_VP9_VARIANCE_H_ #include "vpx/vpx_integer.h" +// #include "./vpx_config.h" typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, @@ -50,6 +51,15 @@ typedef unsigned int (*vp9_subpixvariance_fn_t)(const uint8_t *src_ptr, int Refstride, unsigned int *sse); +typedef unsigned int (*vp9_subp_avg_variance_fn_t)(const uint8_t *src_ptr, + int source_stride, + int xoffset, + int yoffset, + const uint8_t *ref_ptr, + int Refstride, + unsigned int *sse, + const uint8_t *second_pred); + typedef void (*vp9_ssimpf_fn_t)(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, @@ -64,15 +74,31 @@ typedef unsigned int (*vp9_get16x16prederror_fn_t)(const uint8_t *src_ptr, int ref_stride); typedef struct vp9_variance_vtable { - vp9_sad_fn_t sdf; - vp9_variance_fn_t vf; - vp9_subpixvariance_fn_t svf; - vp9_variance_fn_t svf_halfpix_h; - vp9_variance_fn_t svf_halfpix_v; - vp9_variance_fn_t svf_halfpix_hv; - vp9_sad_multi_fn_t sdx3f; - vp9_sad_multi1_fn_t sdx8f; - vp9_sad_multi_d_fn_t sdx4df; + vp9_sad_fn_t sdf; + vp9_variance_fn_t vf; + vp9_subpixvariance_fn_t svf; + vp9_subp_avg_variance_fn_t svaf; + vp9_variance_fn_t svf_halfpix_h; + vp9_variance_fn_t svf_halfpix_v; + vp9_variance_fn_t svf_halfpix_hv; + vp9_sad_multi_fn_t sdx3f; + vp9_sad_multi1_fn_t sdx8f; + vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; +static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int weight, + int height, uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < weight; j++) { + int tmp; + tmp = pred[j] + ref[j]; + comp_pred[j] = (tmp + 1) >> 1; + } + comp_pred += weight; + pred += weight; + ref += ref_stride; + } +} #endif // VP9_ENCODER_VP9_VARIANCE_H_ diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index d07a65b4551c8171c4946ead30d7c5c375fa3e97..3b9d50f34444b8f067811341e7f2c9e6bb9b6cc1 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -13,6 +13,7 @@ #include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_subpelvar.h" #include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; @@ -24,6 +25,234 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { return sum; } +unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); +} + +unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 64, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); + + return vp9_variance64x32_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 64, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); + comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); + return vp9_variance64x32_c(temp3, 64, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); +} + +unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 65, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); + + return vp9_variance32x64_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 65, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); + comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); + return vp9_variance32x64_c(temp3, 32, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} + +unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); + + return vp9_variance32x16_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); + comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); + return vp9_variance32x16_c(temp3, 32, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} + +unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); + + return vp9_variance16x32_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); + comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); + return vp9_variance16x32_c(temp3, 16, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -89,6 +318,11 @@ unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, return (var - (((unsigned int)avg * avg) >> 7)); } +void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); +} unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, @@ -103,6 +337,32 @@ unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, return (var - (((unsigned int)avg * avg) >> 6)); } +unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + +unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -130,6 +390,45 @@ unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, return var; } +unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); + *sse = var; + return var; +} + +unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); + *sse = var; + return var; +} + +unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); + *sse = var; + return var; +} + unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int src_pixels_per_line, @@ -139,21 +438,47 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse) { uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; - uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering + const int16_t *hfilter, *vfilter; + uint16_t fdata3[5 * 4]; // Temp data bufffer used in filtering - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); // First filter 1d Horizontal - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 5, 4, hfilter); // Now filter Verticaly - var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); } +unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint8_t temp2[20 * 16]; + const int16_t *hfilter, *vfilter; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer + uint16_t fdata3[5 * 4]; // Temp data bufffer used in filtering + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + // First filter 1d Horizontal + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 5, 4, hfilter); + + // Now filter Verticaly + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); + comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); + return vp9_variance4x4_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse); +} unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int src_pixels_per_line, @@ -162,19 +487,43 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[9 * 8]; // Temp data bufffer used in filtering + uint16_t fdata3[9 * 8]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } +unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[9 * 8]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); + comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + return vp9_variance8x8_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -182,19 +531,44 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[17 * 16]; // Temp data bufffer used in filtering + uint16_t fdata3[17 * 16]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } +unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[17 * 16]; + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); + + comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); + return vp9_variance16x16_c(temp3, 16, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -202,20 +576,43 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[65 * 64]; // Temp data bufffer used in filtering + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering uint8_t temp2[68 * 64]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, - 1, 65, 64, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 64, 64, 64, 64, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 65, 64, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); } +unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 65, 64, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); + comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); + return vp9_variance64x64_c(temp3, 64, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -223,19 +620,43 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[33 * 32]; // Temp data bufffer used in filtering + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering uint8_t temp2[36 * 32]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); } +unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 33, 32, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); + comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); + return vp9_variance32x32_c(temp3, 32, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -363,19 +784,43 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[16 * 9]; // Temp data bufffer used in filtering + uint16_t fdata3[16 * 9]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } +unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[16 * 9]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 16, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); + comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); + return vp9_variance16x8_c(temp3, 16, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -383,17 +828,129 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t FData3[9 * 16]; // Temp data bufffer used in filtering + uint16_t fdata3[9 * 16]; // Temp data bufffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *HFilter, *VFilter; + const int16_t *hfilter, *vfilter; - HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); - VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, - 1, 17, 8, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } +unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[9 * 16]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); + comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); + return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 5, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); + + return vp9_variance8x4_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 5, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); + comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); + return vp9_variance8x4_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering + // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be + // of this big? same issue appears in all other block size settings. + uint8_t temp2[20 * 16]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 4, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); + + return vp9_variance4x8_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 9, 4, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); + comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); + return vp9_variance4x8_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse); +} diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h new file mode 100644 index 0000000000000000000000000000000000000000..6f91cfc85c986927fcb585d0eadbb78168b63264 --- /dev/null +++ b/vp9/encoder/vp9_write_bit_buffer.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_BIT_WRITE_BUFFER_H_ +#define VP9_BIT_WRITE_BUFFER_H_ + +#include <limits.h> + +#include "vpx/vpx_integer.h" + +struct vp9_write_bit_buffer { + uint8_t *bit_buffer; + size_t bit_offset; +}; + +static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { + return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); +} + +static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { + const int off = wb->bit_offset; + const int p = off / CHAR_BIT; + const int q = CHAR_BIT - 1 - off % CHAR_BIT; + if (q == CHAR_BIT -1) { + wb->bit_buffer[p] = bit << q; + } else { + wb->bit_buffer[p] &= ~(1 << q); + wb->bit_buffer[p] |= bit << q; + } + wb->bit_offset = off + 1; +} + +static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, + int data, int bits) { + int bit; + for (bit = bits - 1; bit >= 0; bit--) + vp9_wb_write_bit(wb, (data >> bit) & 1); +} + + +#endif // VP9_BIT_WRITE_BUFFER_H_ diff --git a/vp9/encoder/x86/vp9_encodeopt.asm b/vp9/encoder/x86/vp9_encodeopt.asm index 90c793d4fb84e871ca036ab20762785bc641d0ff..734cb61cae9fb43acec2a4eaafd2ec586631527f 100644 --- a/vp9/encoder/x86/vp9_encodeopt.asm +++ b/vp9/encoder/x86/vp9_encodeopt.asm @@ -123,254 +123,3 @@ sym(vp9_block_error_mmx): UNSHADOW_ARGS pop rbp ret - - -;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr); -global sym(vp9_mbblock_error_mmx_impl) PRIVATE -sym(vp9_mbblock_error_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - pxor mm7, mm7 - - mov rdi, arg(1) ;dcoef_ptr - pxor mm2, mm2 - - mov rcx, 16 - -.mberror_loop_mmx: - movq mm3, [rsi] - movq mm4, [rdi] - - movq mm5, [rsi+8] - movq mm6, [rdi+8] - - - psubw mm5, mm6 - pmaddwd mm5, mm5 - - psubw mm3, mm4 - - pmaddwd mm3, mm3 - paddd mm2, mm5 - - paddd mm2, mm3 - movq mm3, [rsi+16] - - movq mm4, [rdi+16] - movq mm5, [rsi+24] - - movq mm6, [rdi+24] - psubw mm5, mm6 - - pmaddwd mm5, mm5 - psubw mm3, mm4 - - pmaddwd mm3, mm3 - paddd mm2, mm5 - - paddd mm2, mm3 - add rsi, 32 - - add rdi, 32 - sub rcx, 1 - - jnz .mberror_loop_mmx - - movq mm0, mm2 - psrlq mm2, 32 - - paddd mm0, mm2 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr); -global sym(vp9_mbblock_error_xmm_impl) PRIVATE -sym(vp9_mbblock_error_xmm_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 5 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - pxor xmm5, xmm5 - - mov rdi, arg(1) ;dcoef_ptr - pxor xmm4, xmm4 - - mov rcx, 16 - -.mberror_loop: - movdqa xmm0, [rsi] - movdqa xmm1, [rdi] - - movdqa xmm2, [rsi+16] - movdqa xmm3, [rdi+16] - - - psubw xmm2, xmm3 - pmaddwd xmm2, xmm2 - - psubw xmm0, xmm1 - - pmaddwd xmm0, xmm0 - add rsi, 32 - - add rdi, 32 - - sub rcx, 1 - paddd xmm4, xmm2 - - paddd xmm4, xmm0 - jnz .mberror_loop - - movdqa xmm0, xmm4 - punpckldq xmm0, xmm5 - - punpckhdq xmm4, xmm5 - paddd xmm0, xmm4 - - movdqa xmm1, xmm0 - psrldq xmm0, 8 - - paddd xmm0, xmm1 - movq rax, xmm0 - - pop rdi - pop rsi - ; begin epilog - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_mmx_impl) PRIVATE -sym(vp9_mbuverror_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor mm7, mm7 - -.mbuverror_loop_mmx: - - movq mm1, [rsi] - movq mm2, [rdi] - - psubw mm1, mm2 - pmaddwd mm1, mm1 - - - movq mm3, [rsi+8] - movq mm4, [rdi+8] - - psubw mm3, mm4 - pmaddwd mm3, mm3 - - - paddd mm7, mm1 - paddd mm7, mm3 - - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop_mmx - - movq mm0, mm7 - psrlq mm7, 32 - - paddd mm0, mm7 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_xmm_impl) PRIVATE -sym(vp9_mbuverror_xmm_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor xmm3, xmm3 - -.mbuverror_loop: - - movdqa xmm1, [rsi] - movdqa xmm2, [rdi] - - psubw xmm1, xmm2 - pmaddwd xmm1, xmm1 - - paddd xmm3, xmm1 - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop - - pxor xmm0, xmm0 - movdqa xmm1, xmm3 - - movdqa xmm2, xmm1 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - paddd xmm1, xmm2 - - movdqa xmm2, xmm1 - - psrldq xmm1, 8 - paddd xmm1, xmm2 - - movq rax, xmm1 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/encoder/x86/vp9_quantize_mmx.asm b/vp9/encoder/x86/vp9_quantize_mmx.asm deleted file mode 100644 index 22e2356101f593164a6baded7039f0578ea22fed..0000000000000000000000000000000000000000 --- a/vp9/encoder/x86/vp9_quantize_mmx.asm +++ /dev/null @@ -1,286 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;int vp9_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, -; short *qcoeff_ptr,short *dequant_ptr, -; short *scan_mask, short *round_ptr, -; short *quant_ptr, short *dqcoeff_ptr); -global sym(vp9_fast_quantize_b_impl_mmx) PRIVATE -sym(vp9_fast_quantize_b_impl_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - movq mm0, [rsi] - - mov rax, arg(1) ;zbin_ptr - movq mm1, [rax] - - movq mm3, mm0 - psraw mm0, 15 - - pxor mm3, mm0 - psubw mm3, mm0 ; abs - - movq mm2, mm3 - pcmpgtw mm1, mm2 - - pandn mm1, mm2 - movq mm3, mm1 - - mov rdx, arg(6) ;quant_ptr - movq mm1, [rdx] - - mov rcx, arg(5) ;round_ptr - movq mm2, [rcx] - - paddw mm3, mm2 - pmulhuw mm3, mm1 - - pxor mm3, mm0 - psubw mm3, mm0 ;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - movq mm0, mm3 - - movq [rdi], mm3 - - mov rax, arg(3) ;dequant_ptr - movq mm2, [rax] - - pmullw mm3, mm2 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax], mm3 - - ; next 8 - movq mm4, [rsi+8] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+8] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+8] - movq mm6, [rcx+8] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+8], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+8] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+8], mm7 - - - ; next 8 - movq mm4, [rsi+16] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+16] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+16] - movq mm6, [rcx+16] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+16], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+16] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+16], mm7 - - - ; next 8 - movq mm4, [rsi+24] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+24] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+24] - movq mm6, [rcx+24] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+24], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+24] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+24], mm7 - - - - mov rdi, arg(4) ;scan_mask - mov rsi, arg(2) ;qcoeff_ptr - - pxor mm5, mm5 - pxor mm7, mm7 - - movq mm0, [rsi] - movq mm1, [rsi+8] - - movq mm2, [rdi] - movq mm3, [rdi+8]; - - pcmpeqw mm0, mm7 - pcmpeqw mm1, mm7 - - pcmpeqw mm6, mm6 - pxor mm0, mm6 - - pxor mm1, mm6 - psrlw mm0, 15 - - psrlw mm1, 15 - pmaddwd mm0, mm2 - - pmaddwd mm1, mm3 - movq mm5, mm0 - - paddd mm5, mm1 - - movq mm0, [rsi+16] - movq mm1, [rsi+24] - - movq mm2, [rdi+16] - movq mm3, [rdi+24]; - - pcmpeqw mm0, mm7 - pcmpeqw mm1, mm7 - - pcmpeqw mm6, mm6 - pxor mm0, mm6 - - pxor mm1, mm6 - psrlw mm0, 15 - - psrlw mm1, 15 - pmaddwd mm0, mm2 - - pmaddwd mm1, mm3 - paddd mm5, mm0 - - paddd mm5, mm1 - movq mm0, mm5 - - psrlq mm5, 32 - paddd mm0, mm5 - - ; eob adjustment begins here - movq rcx, mm0 - and rcx, 0xffff - - xor rdx, rdx - sub rdx, rcx ; rdx=-rcx - - bsr rax, rcx - inc rax - - sar rdx, 31 - and rax, rdx - ; Substitute the sse assembly for the old mmx mixed assembly/C. The - ; following is kept as reference - ; movq rcx, mm0 - ; bsr rax, rcx - ; - ; mov eob, rax - ; mov eee, rcx - ; - ;if(eee==0) - ;{ - ; eob=-1; - ;} - ;else if(eee<0) - ;{ - ; eob=15; - ;} - ;d->eob = eob+1; - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm deleted file mode 100644 index 2a686f5a87b0fd425f3a11f60af9eebf2742223b..0000000000000000000000000000000000000000 --- a/vp9/encoder/x86/vp9_quantize_sse2.asm +++ /dev/null @@ -1,380 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "vp9_asm_enc_offsets.asm" - - -; void vp9_regular_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_regular_quantize_b_sse2) PRIVATE -sym(vp9_regular_quantize_b_sse2): - push rbp - mov rbp, rsp - SAVE_XMM 7 - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %if LIBVPX_YASM_WIN64 - push rdi - push rsi - %endif -%endif - - ALIGN_STACK 16, rax - %define zrun_zbin_boost 0 ; 8 - %define abs_minus_zbin 8 ; 32 - %define temp_qcoeff 40 ; 32 - %define qcoeff 72 ; 32 - %define stack_size 104 - sub rsp, stack_size - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr - mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr - movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value - - ; z - movdqa xmm0, [rdx] - movdqa xmm4, [rdx + 16] - mov rdx, [rdi + vp9_block_round] ; round_ptr - - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value - - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz - psraw xmm0, 15 - psraw xmm4, 15 - - ; (z ^ sz) - pxor xmm1, xmm0 - pxor xmm5, xmm4 - - ; x = abs(z) - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] - mov rcx, [rdi + vp9_block_quant] ; quant_ptr - - ; *zbin_ptr + zbin_oq_value - paddw xmm2, xmm7 - paddw xmm3, xmm7 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm1, xmm2 - psubw xmm5, xmm3 - movdqa [rsp + abs_minus_zbin], xmm1 - movdqa [rsp + abs_minus_zbin + 16], xmm5 - - ; add (zbin_ptr + zbin_oq_value) back - paddw xmm1, xmm2 - paddw xmm5, xmm3 - - movdqa xmm2, [rdx] - movdqa xmm6, [rdx + 16] - - movdqa xmm3, [rcx] - movdqa xmm7, [rcx + 16] - - ; x + round - paddw xmm1, xmm2 - paddw xmm5, xmm6 - - ; y = x * quant_ptr >> 16 - pmulhw xmm3, xmm1 - pmulhw xmm7, xmm5 - - ; y += x - paddw xmm1, xmm3 - paddw xmm5, xmm7 - - movdqa [rsp + temp_qcoeff], xmm1 - movdqa [rsp + temp_qcoeff + 16], xmm5 - - pxor xmm6, xmm6 - ; zero qcoeff - movdqa [rsp + qcoeff], xmm6 - movdqa [rsp + qcoeff + 16], xmm6 - - mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr - mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr - mov [rsp + zrun_zbin_boost], rdx - -%macro ZIGZAG_LOOP 1 - ; x - movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2] - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2] - - ; downshift by quant_shift[rc] - movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y - mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] - mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c -ZIGZAG_LOOP 0 -ZIGZAG_LOOP 1 -ZIGZAG_LOOP 4 -ZIGZAG_LOOP 8 -ZIGZAG_LOOP 5 -ZIGZAG_LOOP 2 -ZIGZAG_LOOP 3 -ZIGZAG_LOOP 6 -ZIGZAG_LOOP 9 -ZIGZAG_LOOP 12 -ZIGZAG_LOOP 13 -ZIGZAG_LOOP 10 -ZIGZAG_LOOP 7 -ZIGZAG_LOOP 11 -ZIGZAG_LOOP 14 -ZIGZAG_LOOP 15 - - movdqa xmm2, [rsp + qcoeff] - movdqa xmm3, [rsp + qcoeff + 16] - - mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr - mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr - - ; y ^ sz - pxor xmm2, xmm0 - pxor xmm3, xmm4 - ; x = (y ^ sz) - sz - psubw xmm2, xmm0 - psubw xmm3, xmm4 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr - - pmullw xmm0, xmm2 - pmullw xmm1, xmm3 - - movdqa [rcx], xmm2 ; store qcoeff - movdqa [rcx + 16], xmm3 - movdqa [rdi], xmm0 ; store dqcoeff - movdqa [rdi + 16], xmm1 - - ; select the last value (in zig_zag order) for EOB - pcmpeqw xmm2, xmm6 - pcmpeqw xmm3, xmm6 - ; ! - pcmpeqw xmm6, xmm6 - pxor xmm2, xmm6 - pxor xmm3, xmm6 - ; mask inv_zig_zag - pand xmm2, [GLOBAL(inv_zig_zag)] - pand xmm3, [GLOBAL(inv_zig_zag + 16)] - ; select the max value - pmaxsw xmm2, xmm3 - pshufd xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00000001b - pmaxsw xmm2, xmm3 - movd eax, xmm2 - and eax, 0xff - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog - add rsp, stack_size - pop rsp -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - %endif -%endif - RESTORE_GOT - RESTORE_XMM - pop rbp - ret - -; void vp9_fast_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_fast_quantize_b_sse2) PRIVATE -sym(vp9_fast_quantize_b_sse2): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %if LIBVPX_YASM_WIN64 - push rdi - push rsi - %else - ; these registers are used for passing arguments - %endif -%endif - - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_round] - mov rdx, [rdi + vp9_block_quant_fast] - - ; z = coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; dup z so we can save sz - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - ; x = abs(z) = (z ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; x += round - paddw xmm1, [rcx] - paddw xmm5, [rcx + 16] - - mov rax, [rsi + vp9_blockd_qcoeff] - mov rcx, [rsi + vp9_blockd_dequant] - mov rdi, [rsi + vp9_blockd_dqcoeff] - - ; y = x * quant >> 16 - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - ; x = (y ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; qcoeff = x - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - ; x * dequant - movdqa xmm2, xmm1 - movdqa xmm3, xmm5 - pmullw xmm2, [rcx] - pmullw xmm3, [rcx + 16] - - ; dqcoeff = x * dequant - movdqa [rdi], xmm2 - movdqa [rdi + 16], xmm3 - - pxor xmm4, xmm4 ;clear all bits - pcmpeqw xmm1, xmm4 - pcmpeqw xmm5, xmm4 - - pcmpeqw xmm4, xmm4 ;set all bits - pxor xmm1, xmm4 - pxor xmm5, xmm4 - - pand xmm1, [GLOBAL(inv_zig_zag)] - pand xmm5, [GLOBAL(inv_zig_zag + 16)] - - pmaxsw xmm1, xmm5 - - ; now down to 8 - pshufd xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; only 4 left - pshuflw xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; okay, just 2! - pshuflw xmm5, xmm1, 00000001b - - pmaxsw xmm1, xmm5 - - movd eax, xmm1 - and eax, 0xff - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - -SECTION_RODATA -align 16 -inv_zig_zag: - dw 0x0001, 0x0002, 0x0006, 0x0007 - dw 0x0003, 0x0005, 0x0008, 0x000d - dw 0x0004, 0x0009, 0x000c, 0x000e - dw 0x000a, 0x000b, 0x000f, 0x0010 diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm deleted file mode 100644 index d7779bd0df11519c30739caa96ec186df8c27a5d..0000000000000000000000000000000000000000 --- a/vp9/encoder/x86/vp9_quantize_sse4.asm +++ /dev/null @@ -1,254 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "vp9_asm_enc_offsets.asm" - - -; void vp9_regular_quantize_b_sse4 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_regular_quantize_b_sse4) PRIVATE -sym(vp9_regular_quantize_b_sse4): - -%if ABI_IS_32BIT - push rbp - mov rbp, rsp - GET_GOT rbx - push rdi - push rsi - - ALIGN_STACK 16, rax - %define qcoeff 0 ; 32 - %define stack_size 32 - sub rsp, stack_size -%else - %if LIBVPX_YASM_WIN64 - SAVE_XMM 8, u - push rdi - push rsi - %endif -%endif - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_zbin] - mov rdx, [rdi + vp9_block_round] - movd xmm7, [rdi + vp9_block_zbin_extra] - - ; z - movdqa xmm0, [rax] - movdqa xmm1, [rax + 16] - - ; duplicate zbin_oq_value - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - ; sz - psraw xmm0, 15 - psraw xmm1, 15 - - ; (z ^ sz) - pxor xmm2, xmm0 - pxor xmm3, xmm1 - - ; x = abs(z) - psubw xmm2, xmm0 - psubw xmm3, xmm1 - - ; zbin - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; *zbin_ptr + zbin_oq_value - paddw xmm4, xmm7 - paddw xmm5, xmm7 - - movdqa xmm6, xmm2 - movdqa xmm7, xmm3 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm6, xmm4 - psubw xmm7, xmm5 - - ; round - movdqa xmm4, [rdx] - movdqa xmm5, [rdx + 16] - - mov rax, [rdi + vp9_block_quant_shift] - mov rcx, [rdi + vp9_block_quant] - mov rdx, [rdi + vp9_block_zrun_zbin_boost] - - ; x + round - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - ; quant - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; y = x * quant_ptr >> 16 - pmulhw xmm4, xmm2 - pmulhw xmm5, xmm3 - - ; y += x - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - pxor xmm4, xmm4 -%if ABI_IS_32BIT - movdqa [rsp + qcoeff], xmm4 - movdqa [rsp + qcoeff + 16], xmm4 -%else - pxor xmm8, xmm8 -%endif - - ; quant_shift - movdqa xmm5, [rax] - - ; zrun_zbin_boost - mov rax, rdx - -%macro ZIGZAG_LOOP 5 - ; x - pextrw ecx, %4, %2 - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - pextrw edi, %3, %2 ; y - - ; downshift by quant_shift[rc] - pextrb ecx, xmm5, %1 ; quant_shift[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y -%if ABI_IS_32BIT - mov WORD PTR[rsp + qcoeff + %1 *2], di -%else - pinsrw %5, edi, %2 ; qcoeff[rc] -%endif - mov rdx, rax ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c -ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 - - mov rcx, [rsi + vp9_blockd_dequant] - mov rdi, [rsi + vp9_blockd_dqcoeff] - -%if ABI_IS_32BIT - movdqa xmm4, [rsp + qcoeff] - movdqa xmm5, [rsp + qcoeff + 16] -%else - %define xmm5 xmm8 -%endif - - ; y ^ sz - pxor xmm4, xmm0 - pxor xmm5, xmm1 - ; x = (y ^ sz) - sz - psubw xmm4, xmm0 - psubw xmm5, xmm1 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp9_blockd_qcoeff] - - pmullw xmm0, xmm4 - pmullw xmm1, xmm5 - - ; store qcoeff - movdqa [rcx], xmm4 - movdqa [rcx + 16], xmm5 - - ; store dqcoeff - movdqa [rdi], xmm0 - movdqa [rdi + 16], xmm1 - - ; select the last value (in zig_zag order) for EOB - pxor xmm6, xmm6 - pcmpeqw xmm4, xmm6 - pcmpeqw xmm5, xmm6 - - packsswb xmm4, xmm5 - pshufb xmm4, [GLOBAL(zig_zag1d)] - pmovmskb edx, xmm4 - xor rdi, rdi - mov eax, -1 - xor dx, ax - bsr eax, edx - sub edi, edx - sar edi, 31 - add eax, 1 - and eax, edi - - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - add rsp, stack_size - pop rsp - - pop rsi - pop rdi - RESTORE_GOT - pop rbp -%else - %undef xmm5 - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - RESTORE_XMM - %endif -%endif - - ret - -SECTION_RODATA -align 16 -; vp9/common/vp9_entropy.c: vp9_default_zig_zag1d -zig_zag1d: - db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm deleted file mode 100644 index e082af1f5c46f65d418a3020874380c7ea0faba9..0000000000000000000000000000000000000000 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ /dev/null @@ -1,138 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "vp9_asm_enc_offsets.asm" - - -; void vp9_fast_quantize_b_ssse3 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 -; - -global sym(vp9_fast_quantize_b_ssse3) PRIVATE -sym(vp9_fast_quantize_b_ssse3): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %if LIBVPX_YASM_WIN64 - push rdi - push rsi - %endif -%endif - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_round] - mov rdx, [rdi + vp9_block_quant_fast] - - ; coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; round - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] - - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - pabsw xmm1, xmm1 - pabsw xmm5, xmm5 - - paddw xmm1, xmm2 - paddw xmm5, xmm3 - - ; quant_fast - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - mov rax, [rsi + vp9_blockd_qcoeff] - mov rdi, [rsi + vp9_blockd_dequant] - mov rcx, [rsi + vp9_blockd_dqcoeff] - - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - movdqa xmm2, [rdi] - movdqa xmm3, [rdi + 16] - - pxor xmm4, xmm4 - pmullw xmm2, xmm1 - pmullw xmm3, xmm5 - - pcmpeqw xmm1, xmm4 ;non zero mask - pcmpeqw xmm5, xmm4 ;non zero mask - packsswb xmm1, xmm5 - pshufb xmm1, [GLOBAL(zz_shuf)] - - pmovmskb edx, xmm1 - - xor rdi, rdi - mov eax, -1 - xor dx, ax ;flip the bits for bsr - bsr eax, edx - - movdqa [rcx], xmm2 ;store dqcoeff - movdqa [rcx + 16], xmm3 ;store dqcoeff - - sub edi, edx ;check for all zeros in bit mask - sar edi, 31 ;0 or -1 - add eax, 1 - and eax, edi ;if the bit mask was all zero, - ;then eob = 0 - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - -SECTION_RODATA -align 16 -zz_shuf: - db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp9/encoder/x86/vp9_quantize_x86.h b/vp9/encoder/x86/vp9_quantize_x86.h deleted file mode 100644 index d1db17394a6c43909029a2b60d88988488aa69b1..0000000000000000000000000000000000000000 --- a/vp9/encoder/x86/vp9_quantize_x86.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_ -#define VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_ - - -/* Note: - * - * This platform is commonly built for runtime CPU detection. If you modify - * any of the function mappings present in this file, be sure to also update - * them in the function pointer initialization code - */ -#if HAVE_MMX - -#endif /* HAVE_MMX */ - - -#if HAVE_SSE2 -extern prototype_quantize_block(vp9_regular_quantize_b_sse2); -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_quantize_quantb -#define vp9_quantize_quantb vp9_regular_quantize_b_sse2 -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_SSE2 */ - - -#if HAVE_SSE4_1 -extern prototype_quantize_block(vp9_regular_quantize_b_sse4); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_quantize_quantb -#define vp9_quantize_quantb vp9_regular_quantize_b_sse4 - -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_SSE4_1 */ - -#endif /* QUANTIZE_X86_H */ diff --git a/vp9/encoder/x86/vp9_sad4d_sse2.asm b/vp9/encoder/x86/vp9_sad4d_sse2.asm index 3716d91ecd04372ad60ca523afff88e99cb9e2c3..25dd064e1983233b24faded840933a8151d54594 100644 --- a/vp9/encoder/x86/vp9_sad4d_sse2.asm +++ b/vp9/encoder/x86/vp9_sad4d_sse2.asm @@ -215,7 +215,11 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ INIT_XMM sse2 SADNXN4D 64, 64 +SADNXN4D 64, 32 +SADNXN4D 32, 64 SADNXN4D 32, 32 +SADNXN4D 32, 16 +SADNXN4D 16, 32 SADNXN4D 16, 16 SADNXN4D 16, 8 SADNXN4D 8, 16 diff --git a/vp9/encoder/x86/vp9_sad_sse2.asm b/vp9/encoder/x86/vp9_sad_sse2.asm index ea482e071a1091e2be52df22b8b37dffdaf87d71..ea92377eefbad1d6ec10b3c2698008b08ad4bbf0 100644 --- a/vp9/encoder/x86/vp9_sad_sse2.asm +++ b/vp9/encoder/x86/vp9_sad_sse2.asm @@ -14,11 +14,11 @@ SECTION .text ; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); -INIT_XMM sse2 -cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows +%macro SAD64XN 1 +cglobal sad64x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided - mov n_rowsd, 64 + mov n_rowsd, %1 pxor m0, m0 .loop: movu m1, [refq] @@ -42,14 +42,19 @@ cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows paddd m0, m1 movd eax, m0 RET +%endmacro + +INIT_XMM sse2 +SAD64XN 64 ; sad64x64_sse2 +SAD64XN 32 ; sad64x32_sse2 ; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); -INIT_XMM sse2 -cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows +%macro SAD32XN 1 +cglobal sad32x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided - mov n_rowsd, 16 + mov n_rowsd, %1/2 pxor m0, m0 .loop: @@ -74,6 +79,12 @@ cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows paddd m0, m1 movd eax, m0 RET +%endmacro + +INIT_XMM sse2 +SAD32XN 64 ; sad32x64_sse2 +SAD32XN 32 ; sad32x32_sse2 +SAD32XN 16 ; sad32x16_sse2 ; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); @@ -112,6 +123,7 @@ cglobal sad16x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \ %endmacro INIT_XMM sse2 +SAD16XN 32 ; sad16x32_sse2 SAD16XN 16 ; sad16x16_sse2 SAD16XN 8 ; sad16x8_sse2 diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c index fc363b6b033aa022be2a2d822d3eab489376ebaa..67ca9257c01a0b6d48530dded73aab9daa416f49 100644 --- a/vp9/encoder/x86/vp9_variance_sse2.c +++ b/vp9/encoder/x86/vp9_variance_sse2.c @@ -139,7 +139,37 @@ void vp9_half_vert_variance16x_h_sse2 DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]); -unsigned int vp9_variance4x4_wmt( +typedef unsigned int (*get_var_sse2) ( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *SSE, + int *Sum +); + +static void variance_sse2(const unsigned char *src_ptr, int source_stride, + const unsigned char *ref_ptr, int recon_stride, + int w, int h, unsigned int *sse, int *sum, + get_var_sse2 var_fn, int block_size) { + unsigned int sse0; + int sum0; + int i, j; + + *sse = 0; + *sum = 0; + + for (i = 0; i < h; i += block_size) { + for (j = 0; j < w; j += block_size) { + var_fn(src_ptr + source_stride * i + j, source_stride, + ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); + *sse += sse0; + *sum += sum0; + } + } +} + +unsigned int vp9_variance4x4_sse2( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, @@ -148,13 +178,41 @@ unsigned int vp9_variance4x4_wmt( unsigned int var; int avg; - vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, + &var, &avg, vp9_get4x4var_mmx, 4); *sse = var; return (var - (((unsigned int)avg * avg) >> 4)); +} + +unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, + &var, &avg, vp9_get4x4var_mmx, 4); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + +unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, + &var, &avg, vp9_get4x4var_mmx, 4); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); } -unsigned int vp9_variance8x8_wmt +unsigned int vp9_variance8x8_sse2 ( const unsigned char *src_ptr, int source_stride, @@ -164,83 +222,157 @@ unsigned int vp9_variance8x8_wmt unsigned int var; int avg; - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, + &var, &avg, vp9_get8x8var_sse2, 8); *sse = var; return (var - (((unsigned int)avg * avg) >> 6)); - } - -unsigned int vp9_variance16x16_wmt +unsigned int vp9_variance16x8_sse2 ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { - unsigned int sse0; - int sum0; - + unsigned int var; + int avg; - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - *sse = sse0; - return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, + &var, &avg, vp9_get8x8var_sse2, 8); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 7)); } -unsigned int vp9_mse16x16_wmt( +unsigned int vp9_variance8x16_sse2 +( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { + unsigned int var; + int avg; - unsigned int sse0; - int sum0; - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - *sse = sse0; - return sse0; - + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, + &var, &avg, vp9_get8x8var_sse2, 8); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 7)); } - -unsigned int vp9_variance16x8_wmt +unsigned int vp9_variance16x16_sse2 ( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); + unsigned int var; + int avg; - var = sse0 + sse1; - avg = sum0 + sum1; + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, + &var, &avg, vp9_get16x16var_sse2, 16); *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); - + return (var - (((unsigned int)avg * avg) >> 8)); } -unsigned int vp9_variance8x16_wmt -( +unsigned int vp9_mse16x16_wmt( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); + unsigned int sse0; + int sum0; + vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, + &sum0); + *sse = sse0; + return sse0; +} - var = sse0 + sse1; - avg = sum0 + sum1; +unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, + &var, &avg, vp9_get16x16var_sse2, 16); *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); + return (var - (((int64_t)avg * avg) >> 10)); +} + +unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} +unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 9)); +} + +unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 12)); +} + +unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); +} + +unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, + &var, &avg, vp9_get16x16var_sse2, 16); + *sse = var; + return (var - (((int64_t)avg * avg) >> 11)); } unsigned int vp9_sub_pixel_variance4x4_wmt diff --git a/vp9/encoder/x86/vp9_variance_ssse3.c b/vp9/encoder/x86/vp9_variance_ssse3.c index f95a5423c89896f3d6fb45a91136ea4dc589b553..882acad78dc00817125ed11a99583700c6c636eb 100644 --- a/vp9/encoder/x86/vp9_variance_ssse3.c +++ b/vp9/encoder/x86/vp9_variance_ssse3.c @@ -15,15 +15,6 @@ #define HALFNDX 8 -extern unsigned int vp9_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); extern void vp9_half_horiz_vert_variance16x_h_sse2 ( const unsigned char *ref_ptr, diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c index 2bf32c569e7b2d6effca63aa0f7babe6123330f1..6016e14eb14ba167275a01aab3b6940781ac460a 100644 --- a/vp9/encoder/x86/vp9_x86_csystemdependent.c +++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c @@ -17,26 +17,12 @@ // TODO(jimbankoski) Consider rewriting the c to take the same values rather // than going through these pointer conversions -#if HAVE_MMX +#if 0 && HAVE_MMX void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) { vp9_short_fdct4x4_mmx(input, output, pitch); vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch); } -int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr); -int vp9_mbblock_error_mmx(MACROBLOCK *mb) { - short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; - return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr); -} - -int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_mmx(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_mmx_impl(s_ptr, d_ptr); -} - void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch); @@ -44,27 +30,15 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *z = *(be->base_src) + be->src; unsigned int src_stride = be->src_stride; short *diff = &be->src_diff[0]; - unsigned char *predictor = &bd->predictor[0]; + unsigned char *predictor = *(bd->base_dst) + bd->dst; + // TODO(jingning): The prototype function in c has been changed. Need to + // modify the mmx and sse versions. vp9_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); } #endif -#if HAVE_SSE2 -int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr); -int vp9_mbblock_error_xmm(MACROBLOCK *mb) { - short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; - return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr); -} - -int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_xmm(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_xmm_impl(s_ptr, d_ptr); -} - +#if 0 && HAVE_SSE2 void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch); @@ -72,7 +46,9 @@ void vp9_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *z = *(be->base_src) + be->src; unsigned int src_stride = be->src_stride; short *diff = &be->src_diff[0]; - unsigned char *predictor = &bd->predictor[0]; + unsigned char *predictor = *(bd->base_dst) + bd->dst; + // TODO(jingning): The prototype function in c has been changed. Need to + // modify the mmx and sse versions. vp9_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); } diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index f5a4103f6db5bf5ab6f7ce9c98a2724bf8251cd5..732891449ef2b23ee09a3bc92eba4265da9fc87a 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -15,7 +15,6 @@ VP9_COMMON_SRCS-yes += common/vp9_ppflags.h VP9_COMMON_SRCS-yes += common/vp9_onyx.h VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_asm_com_offsets.c -VP9_COMMON_SRCS-yes += common/vp9_blockd.c VP9_COMMON_SRCS-yes += common/vp9_coefupdateprobs.h VP9_COMMON_SRCS-yes += common/vp9_convolve.c VP9_COMMON_SRCS-yes += common/vp9_convolve.h @@ -36,9 +35,9 @@ VP9_COMMON_SRCS-yes += common/vp9_common.h VP9_COMMON_SRCS-yes += common/vp9_entropy.h VP9_COMMON_SRCS-yes += common/vp9_entropymode.h VP9_COMMON_SRCS-yes += common/vp9_entropymv.h +VP9_COMMON_SRCS-yes += common/vp9_enums.h VP9_COMMON_SRCS-yes += common/vp9_extend.h VP9_COMMON_SRCS-yes += common/vp9_findnearmv.h -VP9_COMMON_SRCS-yes += common/vp9_header.h VP9_COMMON_SRCS-yes += common/vp9_idct.h VP9_COMMON_SRCS-yes += common/vp9_invtrans.h VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h @@ -56,8 +55,6 @@ VP9_COMMON_SRCS-yes += common/vp9_sadmxn.h VP9_COMMON_SRCS-yes += common/vp9_subpelvar.h VP9_COMMON_SRCS-yes += common/vp9_seg_common.h VP9_COMMON_SRCS-yes += common/vp9_seg_common.c -VP9_COMMON_SRCS-yes += common/vp9_setupintrarecon.h -VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.h VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h VP9_COMMON_SRCS-yes += common/vp9_textblit.h VP9_COMMON_SRCS-yes += common/vp9_tile_common.h @@ -72,12 +69,8 @@ VP9_COMMON_SRCS-yes += common/vp9_modecontext.c VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h VP9_COMMON_SRCS-yes += common/vp9_quant_common.c -VP9_COMMON_SRCS-yes += common/vp9_recon.c VP9_COMMON_SRCS-yes += common/vp9_reconinter.c VP9_COMMON_SRCS-yes += common/vp9_reconintra.c -VP9_COMMON_SRCS-yes += common/vp9_reconintra4x4.c -VP9_COMMON_SRCS-yes += common/vp9_setupintrarecon.c -VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.c VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/vp9_textblit.c VP9_COMMON_SRCS-yes += common/vp9_treecoder.c VP9_COMMON_SRCS-$(CONFIG_IMPLICIT_SEGMENTATION) += common/vp9_implicit_segmentation.c @@ -92,7 +85,6 @@ VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_iwalsh_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm @@ -113,14 +105,6 @@ endif VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp9/common/x86/vp9_idct_intrin_sse2.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_loopfilter_intrin_sse2.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_sadmxn_sse2.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_idct_intrin_sse2.c.d: CFLAGS += -msse2 -vp9/common/x86/vp9_loopfilter_intrin_sse2.c.d: CFLAGS += -msse2 -vp9/common/x86/vp9_sadmxn_sse2.c.d: CFLAGS += -msse2 -endif $(eval $(call asm_offsets_template,\ vp9_asm_com_offsets.asm, $(VP9_PREFIX)common/vp9_asm_com_offsets.c)) diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 56453e2496aa4fb0d6bca8bd13ded5be7729f762..e5b50894c9a9abccd6c9ef8861861724cb93d70f 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -20,7 +20,7 @@ #include <stdlib.h> #include <string.h> -struct vp8_extracfg { +struct vp9_extracfg { struct vpx_codec_pkt_list *pkt_list; int cpu_used; /** available cpu percentage in 1/16*/ unsigned int enable_auto_alt_ref; /** if encoder decides to uses alternate reference frame */ @@ -42,7 +42,7 @@ struct vp8_extracfg { struct extraconfig_map { int usage; - struct vp8_extracfg cfg; + struct vp9_extracfg cfg; }; static const struct extraconfig_map extracfg_map[] = { @@ -73,7 +73,7 @@ static const struct extraconfig_map extracfg_map[] = { struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_enc_cfg_t cfg; - struct vp8_extracfg vp8_cfg; + struct vp9_extracfg vp8_cfg; VP9_CONFIG oxcf; VP9_PTR cpi; unsigned char *cx_data; @@ -131,7 +131,7 @@ update_error_state(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, - const struct vp8_extracfg *vp8_cfg) { + const struct vp9_extracfg *vp8_cfg) { RANGE_CHECK(cfg, g_w, 1, 65535); /* 16 bits available */ RANGE_CHECK(cfg, g_h, 1, 65535); /* 16 bits available */ RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); @@ -211,11 +211,12 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, switch (img->fmt) { case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: + case VPX_IMG_FMT_I422: + case VPX_IMG_FMT_I444: break; default: - ERROR("Invalid image format. Only YV12 and I420 images are supported"); + ERROR("Invalid image format. Only YV12, I420, I422, I444 images are " + "supported."); } if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h)) @@ -225,9 +226,9 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t set_vp8e_config(VP9_CONFIG *oxcf, +static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, vpx_codec_enc_cfg_t cfg, - struct vp8_extracfg vp8_cfg) { + struct vp9_extracfg vp8_cfg) { oxcf->version = cfg.g_profile | (vp8_cfg.experimental ? 0x4 : 0); oxcf->width = cfg.g_w; oxcf->height = cfg.g_h; @@ -350,7 +351,7 @@ static vpx_codec_err_t set_vp8e_config(VP9_CONFIG *oxcf, return VPX_CODEC_OK; } -static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_set_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg) { vpx_codec_err_t res; @@ -369,7 +370,7 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, if (!res) { ctx->cfg = *cfg; - set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); + set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); } @@ -405,7 +406,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, va_list args) { vpx_codec_err_t res = VPX_CODEC_OK; - struct vp8_extracfg xcfg = ctx->vp8_cfg; + struct vp9_extracfg xcfg = ctx->vp8_cfg; #define MAP(id, var) case id: var = CAST(id, args); break; @@ -432,7 +433,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, if (!res) { ctx->vp8_cfg = xcfg; - set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); + set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); } @@ -441,7 +442,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t vp8e_common_init(vpx_codec_ctx_t *ctx, +static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx, int experimental) { vpx_codec_err_t res = VPX_CODEC_OK; struct vpx_codec_alg_priv *priv; @@ -486,7 +487,10 @@ static vpx_codec_err_t vp8e_common_init(vpx_codec_ctx_t *ctx, priv->vp8_cfg.pkt_list = &priv->pkt_list.head; priv->vp8_cfg.experimental = experimental; - priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2; + // TODO(agrange) Check the limits set on this buffer, or the check that is + // applied in vp9e_encode. + priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 8; +// priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2; if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; @@ -501,7 +505,7 @@ static vpx_codec_err_t vp8e_common_init(vpx_codec_ctx_t *ctx, res = validate_config(priv, &priv->cfg, &priv->vp8_cfg); if (!res) { - set_vp8e_config(&ctx->priv->alg_priv->oxcf, + set_vp9e_config(&ctx->priv->alg_priv->oxcf, ctx->priv->alg_priv->cfg, ctx->priv->alg_priv->vp8_cfg); optr = vp9_create_compressor(&ctx->priv->alg_priv->oxcf); @@ -517,21 +521,21 @@ static vpx_codec_err_t vp8e_common_init(vpx_codec_ctx_t *ctx, } -static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, +static vpx_codec_err_t vp9e_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { - return vp8e_common_init(ctx, 0); + return vp9e_common_init(ctx, 0); } #if CONFIG_EXPERIMENTAL -static vpx_codec_err_t vp8e_exp_init(vpx_codec_ctx_t *ctx, +static vpx_codec_err_t vp9e_exp_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { - return vp8e_common_init(ctx, 1); + return vp9e_common_init(ctx, 1); } #endif -static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx) { +static vpx_codec_err_t vp9e_destroy(vpx_codec_alg_priv_t *ctx) { free(ctx->cx_data); vp9_remove_compressor(&ctx->cpi); @@ -539,28 +543,6 @@ static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx) { return VPX_CODEC_OK; } -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) { - vpx_codec_err_t res = VPX_CODEC_OK; - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; - - yv12->y_crop_width = img->d_w; - yv12->y_crop_height = img->d_h; - yv12->y_width = img->d_w; - yv12->y_height = img->d_h; - yv12->uv_width = (1 + yv12->y_width) / 2; - yv12->uv_height = (1 + yv12->y_height) / 2; - - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - - yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; - yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); // REG_YUV = 0 - return res; -} - static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, unsigned long duration, unsigned long deadline) { @@ -626,7 +608,7 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { return index_sz; } -static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, @@ -754,7 +736,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_cx_pkt_t pkt; VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; - /* Pack invisible frames with the next visisble frame */ + /* Pack invisible frames with the next visible frame */ if (!cpi->common.show_frame) { if (!ctx->pending_cx_data) ctx->pending_cx_data = cx_data; @@ -849,12 +831,12 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, } -static const vpx_codec_cx_pkt_t *vp8e_get_cxdata(vpx_codec_alg_priv_t *ctx, +static const vpx_codec_cx_pkt_t *vp9e_get_cxdata(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter); } -static vpx_codec_err_t vp8e_set_reference(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -871,7 +853,7 @@ static vpx_codec_err_t vp8e_set_reference(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t vp8e_copy_reference(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { @@ -904,7 +886,7 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { #if CONFIG_POSTPROC @@ -925,7 +907,7 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, } -static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) { +static vpx_image_t *vp9e_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; vp9_ppflags_t flags = {0}; @@ -937,45 +919,13 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) { } if (0 == vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags)) { - - /* - vpx_img_wrap(&ctx->preview_img, VPX_IMG_FMT_YV12, - sd.y_width + 2*VP9BORDERINPIXELS, - sd.y_height + 2*VP9BORDERINPIXELS, - 1, - sd.buffer_alloc); - vpx_img_set_rect(&ctx->preview_img, - VP9BORDERINPIXELS, VP9BORDERINPIXELS, - sd.y_width, sd.y_height); - */ - - ctx->preview_img.bps = 12; - ctx->preview_img.planes[VPX_PLANE_Y] = sd.y_buffer; - ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer; - ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer; - - if (sd.clrtype == REG_YUV) - ctx->preview_img.fmt = VPX_IMG_FMT_I420; - else - ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420; - - ctx->preview_img.x_chroma_shift = 1; - ctx->preview_img.y_chroma_shift = 1; - - ctx->preview_img.d_w = sd.y_width; - ctx->preview_img.d_h = sd.y_height; - ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride; - ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride; - ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride; - ctx->preview_img.w = sd.y_width; - ctx->preview_img.h = sd.y_height; - + yuvconfig2image(&ctx->preview_img, &sd, NULL); return &ctx->preview_img; } else return NULL; } -static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_update_entropy(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { int update = va_arg(args, int); @@ -984,7 +934,7 @@ static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_update_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { int update = va_arg(args, int); @@ -992,7 +942,7 @@ static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } -static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_use_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { int reference_flag = va_arg(args, int); @@ -1000,7 +950,7 @@ static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } -static vpx_codec_err_t vp8e_set_roi_map(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_set_roi_map(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *); @@ -1018,7 +968,7 @@ static vpx_codec_err_t vp8e_set_roi_map(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t vp8e_set_activemap(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_set_activemap(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { vpx_active_map_t *data = va_arg(args, vpx_active_map_t *); @@ -1035,7 +985,7 @@ static vpx_codec_err_t vp8e_set_activemap(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } -static vpx_codec_err_t vp8e_set_scalemode(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { @@ -1056,16 +1006,16 @@ static vpx_codec_err_t vp8e_set_scalemode(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { - {VP8_SET_REFERENCE, vp8e_set_reference}, - {VP8_COPY_REFERENCE, vp8e_copy_reference}, - {VP8_SET_POSTPROC, vp8e_set_previewpp}, - {VP8E_UPD_ENTROPY, vp8e_update_entropy}, - {VP8E_UPD_REFERENCE, vp8e_update_reference}, - {VP8E_USE_REFERENCE, vp8e_use_reference}, - {VP8E_SET_ROI_MAP, vp8e_set_roi_map}, - {VP8E_SET_ACTIVEMAP, vp8e_set_activemap}, - {VP8E_SET_SCALEMODE, vp8e_set_scalemode}, +static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = { + {VP8_SET_REFERENCE, vp9e_set_reference}, + {VP8_COPY_REFERENCE, vp9e_copy_reference}, + {VP8_SET_POSTPROC, vp9e_set_previewpp}, + {VP8E_UPD_ENTROPY, vp9e_update_entropy}, + {VP8E_UPD_REFERENCE, vp9e_update_reference}, + {VP8E_USE_REFERENCE, vp9e_use_reference}, + {VP8E_SET_ROI_MAP, vp9e_set_roi_map}, + {VP8E_SET_ACTIVEMAP, vp9e_set_activemap}, + {VP8E_SET_SCALEMODE, vp9e_set_scalemode}, {VP8E_SET_CPUUSED, set_param}, {VP8E_SET_NOISE_SENSITIVITY, set_param}, {VP8E_SET_ENABLEAUTOALTREF, set_param}, @@ -1086,7 +1036,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { { -1, NULL}, }; -static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = { +static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = { { 0, { @@ -1151,9 +1101,9 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = { VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR | VPX_CODEC_CAP_OUTPUT_PARTITION, /* vpx_codec_caps_t caps; */ - vp8e_init, /* vpx_codec_init_fn_t init; */ - vp8e_destroy, /* vpx_codec_destroy_fn_t destroy; */ - vp8e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ + vp9e_init, /* vpx_codec_init_fn_t init; */ + vp9e_destroy, /* vpx_codec_destroy_fn_t destroy; */ + vp9e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ NOT_IMPLEMENTED, /* vpx_codec_get_mmap_fn_t get_mmap; */ NOT_IMPLEMENTED, /* vpx_codec_set_mmap_fn_t set_mmap; */ { @@ -1163,12 +1113,12 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = { NOT_IMPLEMENTED, /* vpx_codec_frame_get_fn_t frame_get; */ }, { - vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */ - vp8e_encode, /* vpx_codec_encode_fn_t encode; */ - vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */ - vp8e_set_config, + vp9e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */ + vp9e_encode, /* vpx_codec_encode_fn_t encode; */ + vp9e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */ + vp9e_set_config, NOT_IMPLEMENTED, - vp8e_get_preview, + vp9e_get_preview, } /* encoder functions */ }; @@ -1180,9 +1130,9 @@ CODEC_INTERFACE(vpx_codec_vp9x_cx) = { VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, /* vpx_codec_caps_t caps; */ - vp8e_exp_init, /* vpx_codec_init_fn_t init; */ - vp8e_destroy, /* vpx_codec_destroy_fn_t destroy; */ - vp8e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ + vp9e_exp_init, /* vpx_codec_init_fn_t init; */ + vp9e_destroy, /* vpx_codec_destroy_fn_t destroy; */ + vp9e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ NOT_IMPLEMENTED, /* vpx_codec_get_mmap_fn_t get_mmap; */ NOT_IMPLEMENTED, /* vpx_codec_set_mmap_fn_t set_mmap; */ { @@ -1192,12 +1142,12 @@ CODEC_INTERFACE(vpx_codec_vp9x_cx) = { NOT_IMPLEMENTED, /* vpx_codec_frame_get_fn_t frame_get; */ }, { - vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */ - vp8e_encode, /* vpx_codec_encode_fn_t encode; */ - vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */ - vp8e_set_config, + vp9e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */ + vp9e_encode, /* vpx_codec_encode_fn_t encode; */ + vp9e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */ + vp9e_set_config, NOT_IMPLEMENTED, - vp8e_get_preview, + vp9e_get_preview, } /* encoder functions */ }; #endif diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 9dd419d933e9ed6151e2076a16a73fcbfb6222cc..c240a9e614400e6999b1a4b6daf5c90abb2fa5c8 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -215,26 +215,19 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data, if (data + data_sz <= data) res = VPX_CODEC_INVALID_PARAM; else { - /* Parse uncompresssed part of key frame header. - * 3 bytes:- including version, frame type and an offset - * 3 bytes:- sync code (0x9d, 0x01, 0x2a) - * 4 bytes:- including image width and height in the lowest 14 bits - * of each 2-byte value. - */ si->is_kf = 0; - if (data_sz >= 10 && !(data[0] & 0x01)) { /* I-Frame */ - const uint8_t *c = data + 3; + if (data_sz >= 8 && (data[0] & 0xD8) == 0x80) { /* I-Frame */ + const uint8_t *c = data + 1; si->is_kf = 1; - /* vet via sync code */ - if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a) + if (c[0] != SYNC_CODE_0 || c[1] != SYNC_CODE_1 || c[2] != SYNC_CODE_2) res = VPX_CODEC_UNSUP_BITSTREAM; - si->w = (c[3] | (c[4] << 8)); - si->h = (c[5] | (c[6] << 8)); + si->w = (c[3] << 8) | c[4]; + si->h = (c[5] << 8) | c[6]; - /*printf("w=%d, h=%d\n", si->w, si->h);*/ + // printf("w=%d, h=%d\n", si->w, si->h); if (!(si->h | si->w)) res = VPX_CODEC_UNSUP_BITSTREAM; } else @@ -242,7 +235,6 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data, } return res; - } static vpx_codec_err_t vp8_get_si(vpx_codec_alg_priv_t *ctx, @@ -329,9 +321,9 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, vp9_initialize_dec(); - oxcf.Width = ctx->si.w; - oxcf.Height = ctx->si.h; - oxcf.Version = 9; + oxcf.width = ctx->si.w; + oxcf.height = ctx->si.h; + oxcf.version = 9; oxcf.postprocess = 0; oxcf.max_threads = ctx->cfg.threads; oxcf.inv_tile_order = ctx->invert_tile_order; @@ -578,30 +570,6 @@ static vpx_codec_err_t vp8_xma_set_mmap(vpx_codec_ctx_t *ctx, return res; } -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) { - vpx_codec_err_t res = VPX_CODEC_OK; - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; - - yv12->y_crop_width = img->d_w; - yv12->y_crop_height = img->d_h; - yv12->y_width = img->d_w; - yv12->y_height = img->d_h; - yv12->uv_width = yv12->y_width / 2; - yv12->uv_height = yv12->y_height / 2; - - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - - yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2; - yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || - img->fmt == VPX_IMG_FMT_VPXYV12); - - return res; -} - static vpx_codec_err_t vp9_set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h index 450be7dfda09f655eaa10598d68a28e11caecb31..dc41d77d1206440cdb12256e16b959e8d33620c7 100644 --- a/vp9/vp9_iface_common.h +++ b/vp9/vp9_iface_common.h @@ -10,34 +10,80 @@ #ifndef VP9_VP9_IFACE_COMMON_H_ #define VP9_VP9_IFACE_COMMON_H_ -static void yuvconfig2image(vpx_image_t *img, - const YV12_BUFFER_CONFIG *yv12, - void *user_priv) { +static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, + void *user_priv) { /** vpx_img_wrap() doesn't allow specifying independent strides for * the Y, U, and V planes, nor other alignment adjustments that * might be representable by a YV12_BUFFER_CONFIG, so we just * initialize all the fields.*/ - img->fmt = yv12->clrtype == REG_YUV ? - VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420; + int bps = 12; + if (yv12->uv_height == yv12->y_height) { + if (yv12->uv_width == yv12->y_width) { + img->fmt = VPX_IMG_FMT_I444; + bps = 24; + } else { + img->fmt = VPX_IMG_FMT_I422; + bps = 16; + } + } else { + img->fmt = VPX_IMG_FMT_I420; + } img->w = yv12->y_stride; - img->h = (yv12->y_height + 2 * VP9BORDERINPIXELS + 15) & ~15; - img->d_w = yv12->y_width; - img->d_h = yv12->y_height; - img->x_chroma_shift = 1; - img->y_chroma_shift = 1; + img->h = multiple8(yv12->y_height + 2 * VP9BORDERINPIXELS); + img->d_w = yv12->y_crop_width; + img->d_h = yv12->y_crop_height; + img->x_chroma_shift = yv12->uv_width < yv12->y_width; + img->y_chroma_shift = yv12->uv_height < yv12->y_height; img->planes[VPX_PLANE_Y] = yv12->y_buffer; img->planes[VPX_PLANE_U] = yv12->u_buffer; img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; + img->planes[VPX_PLANE_ALPHA] = yv12->alpha_buffer; img->stride[VPX_PLANE_Y] = yv12->y_stride; img->stride[VPX_PLANE_U] = yv12->uv_stride; img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; - img->bps = 12; + img->stride[VPX_PLANE_ALPHA] = yv12->alpha_stride; + img->bps = bps; img->user_priv = user_priv; img->img_data = yv12->buffer_alloc; img->img_data_owner = 0; img->self_allocd = 0; } +static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12) { + yv12->y_buffer = img->planes[VPX_PLANE_Y]; + yv12->u_buffer = img->planes[VPX_PLANE_U]; + yv12->v_buffer = img->planes[VPX_PLANE_V]; + yv12->alpha_buffer = img->planes[VPX_PLANE_ALPHA]; + + yv12->y_crop_width = img->d_w; + yv12->y_crop_height = img->d_h; + yv12->y_width = img->d_w; + yv12->y_height = img->d_h; + + yv12->uv_width = img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 + : yv12->y_width; + yv12->uv_height = img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 + : yv12->y_height; + + yv12->alpha_width = yv12->alpha_buffer ? img->d_w : 0; + yv12->alpha_height = yv12->alpha_buffer ? img->d_h : 0; + + yv12->y_stride = img->stride[VPX_PLANE_Y]; + yv12->uv_stride = img->stride[VPX_PLANE_U]; + yv12->alpha_stride = yv12->alpha_buffer ? img->stride[VPX_PLANE_ALPHA] : 0; + + yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; + yv12->clrtype = REG_YUV; + +#if CONFIG_ALPHA + // For development purposes, force alpha to hold the same data a Y for now. + yv12->alpha_buffer = yv12->y_buffer; + yv12->alpha_width = yv12->y_width; + yv12->alpha_height = yv12->y_height; + yv12->alpha_stride = yv12->y_stride; #endif + return VPX_CODEC_OK; +} + +#endif // VP9_VP9_IFACE_COMMON_H_ diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 51e24b84671cee401cbd990f03b184fd195ea270..86fd0885081f943f81bd1d7b560b53b2ac895c54 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -17,16 +17,6 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c -# encoder -#INCLUDES += algo/vpx_common/vpx_mem/include -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += algo/vpx_ref/cpu_id/include -#INCLUDES += common -#INCLUDES += encoder - -VP9_CX_SRCS-yes += encoder/vp9_asm_enc_offsets.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c VP9_CX_SRCS-yes += encoder/vp9_dct.c @@ -38,6 +28,7 @@ VP9_CX_SRCS-yes += encoder/vp9_encodemv.c VP9_CX_SRCS-yes += encoder/vp9_firstpass.c VP9_CX_SRCS-yes += encoder/vp9_block.h VP9_CX_SRCS-yes += encoder/vp9_boolhuff.h +VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h VP9_CX_SRCS-yes += encoder/vp9_bitstream.h VP9_CX_SRCS-yes += encoder/vp9_encodeintra.h VP9_CX_SRCS-yes += encoder/vp9_encodemb.h @@ -82,7 +73,6 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h -VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_x86.h VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_x86_csystemdependent.c VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm @@ -95,28 +85,16 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_fwalsh_sse2.asm -#VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_ssse3.c VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_impl_ssse3.asm -#VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm -#VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_quantize_sse4.asm -VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp9/encoder/x86/vp9_dct_sse2.c.d: CFLAGS += -msse2 -vp9/encoder/x86/vp9_dct_sse2.c.o: CFLAGS += -msse2 -endif - VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes)) - -$(eval $(call asm_offsets_template,\ - vp9_asm_enc_offsets.asm, $(VP9_PREFIX)encoder/vp9_asm_enc_offsets.c)) diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index 5cab6fc1fdc922812a6c1748b7b4e201b9204456..7ae3219ca2845ef307841da999417d0dbbaa5c3b 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -22,27 +22,21 @@ VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.c VP9_DX_SRCS-yes += decoder/vp9_decodemv.c VP9_DX_SRCS-yes += decoder/vp9_decodframe.c VP9_DX_SRCS-yes += decoder/vp9_decodframe.h -VP9_DX_SRCS-yes += decoder/vp9_dequantize.c VP9_DX_SRCS-yes += decoder/vp9_detokenize.c VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.h +VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h VP9_DX_SRCS-yes += decoder/vp9_decodemv.h -VP9_DX_SRCS-yes += decoder/vp9_dequantize.h VP9_DX_SRCS-yes += decoder/vp9_detokenize.h VP9_DX_SRCS-yes += decoder/vp9_onyxd.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h VP9_DX_SRCS-yes += decoder/vp9_treereader.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c +VP9_DX_SRCS-yes += decoder/vp9_idct_blk.h VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes)) -VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c - VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_dequantize_sse2.c -ifeq ($(HAVE_SSE2),yes) -vp9/decoder/x86/vp9_dequantize_sse2.c.o: CFLAGS += -msse2 -vp9/decoder/x86/vp9_dequantize_sse2.c.d: CFLAGS += -msse2 -endif $(eval $(call asm_offsets_template,\ vp9_asm_dec_offsets.asm, $(VP9_PREFIX)decoder/vp9_asm_dec_offsets.c)) diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 7f19dd033ae477b2853e7664dc1502521da50375..f8e2ef9c3e9c50971a84863aa17957454c07b149 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -215,9 +215,13 @@ typedef struct vpx_roi_map { unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */ unsigned int rows; /**< number of rows */ unsigned int cols; /**< number of cols */ - int delta_q[4]; /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/ - int delta_lf[4]; /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */ - unsigned int static_threshold[4];/**< threshold for region to be treated as static */ + // TODO(paulwilkins): broken for VP9 which has 8 segments + // q and loop filter deltas for each segment + // (see MAX_MB_SEGMENTS) + int delta_q[4]; + int delta_lf[4]; + // Static breakout threshold for each segment + unsigned int static_threshold[4]; } vpx_roi_map_t; /*!\brief vpx active region map diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h index 809fa38d088394a35aa16133c117360a5391ed05..c304bacd07e18f394cedde3a25a39ac1cd60eeef 100644 --- a/vpx/vpx_image.h +++ b/vpx/vpx_image.h @@ -55,9 +55,11 @@ extern "C" { VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */ - VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4 /** < planar 4:2:0 format with vpx color space */ - } - vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */ + VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, + VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, + VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, + VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 7 + } vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */ #if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT #define IMG_FMT_PLANAR VPX_IMG_FMT_PLANAR /**< \deprecated Use #VPX_IMG_FMT_PLANAR */ diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c index fc7f828814f8c9f0c85c6e1aa8b75e7ab8243552..754a615a8a7bfbe30b6c0600d2fea2ff86e695d2 100644 --- a/vpx_scale/generic/yv12config.c +++ b/vpx_scale/generic/yv12config.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ - +#include "./vpx_config.h" #include "vpx_scale/yv12config.h" #include "vpx_mem/vpx_mem.h" @@ -76,12 +76,17 @@ int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; + ybf->alpha_width = 0; + ybf->alpha_height = 0; + ybf->alpha_stride = 0; + ybf->border = border; ybf->frame_size = frame_size; ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; + ybf->alpha_buffer = NULL; ybf->corrupted = 0; /* assume not currupted by errors */ return 0; @@ -97,3 +102,107 @@ int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, } return -2; } + +#if CONFIG_VP9 +// TODO(jkoleszar): Maybe replace this with struct vpx_image + +int vp9_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) { + if (ybf) { + vpx_free(ybf->buffer_alloc); + + /* buffer_alloc isn't accessed by most functions. Rather y_buffer, + u_buffer and v_buffer point to buffer_alloc and are used. Clear out + all of this so that a freed pointer isn't inadvertently used */ + vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); + } else { + return -1; + } + + return 0; +} + +int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, + int ss_x, int ss_y, int border) { + if (ybf) { + const int aligned_width = (width + 7) & ~7; + const int aligned_height = (height + 7) & ~7; + const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; + const int yplane_size = (aligned_height + 2 * border) * y_stride; + const int uv_width = aligned_width >> ss_x; + const int uv_height = aligned_height >> ss_y; + const int uv_stride = y_stride >> ss_x; + const int uv_border_w = border >> ss_x; + const int uv_border_h = border >> ss_y; + const int uvplane_size = (uv_height + 2 * uv_border_h) * uv_stride; +#if CONFIG_ALPHA + const int alpha_width = aligned_width; + const int alpha_height = aligned_height; + const int alpha_stride = y_stride; + const int alpha_border_w = border; + const int alpha_border_h = border; + const int alpha_plane_size = (alpha_height + 2 * alpha_border_h) * + alpha_stride; + const int frame_size = yplane_size + 2 * uvplane_size + + alpha_plane_size; +#else + const int frame_size = yplane_size + 2 * uvplane_size; +#endif + if (!ybf->buffer_alloc) { + ybf->buffer_alloc = vpx_memalign(32, frame_size); + ybf->buffer_alloc_sz = frame_size; + } + + if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) + return -1; + + /* Only support allocating buffers that have a border that's a multiple + * of 32. The border restriction is required to get 16-byte alignment of + * the start of the chroma rows without intoducing an arbitrary gap + * between planes, which would break the semantics of things like + * vpx_img_set_rect(). */ + if (border & 0x1f) + return -3; + + ybf->y_crop_width = width; + ybf->y_crop_height = height; + ybf->y_width = aligned_width; + ybf->y_height = aligned_height; + ybf->y_stride = y_stride; + + ybf->uv_width = uv_width; + ybf->uv_height = uv_height; + ybf->uv_stride = uv_stride; + + ybf->border = border; + ybf->frame_size = frame_size; + + ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; + ybf->u_buffer = ybf->buffer_alloc + yplane_size + + (uv_border_h * uv_stride) + uv_border_w; + ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + + (uv_border_h * uv_stride) + uv_border_w; + +#if CONFIG_ALPHA + ybf->alpha_width = alpha_width; + ybf->alpha_height = alpha_height; + ybf->alpha_stride = alpha_stride; + ybf->alpha_buffer = ybf->buffer_alloc + yplane_size + 2 * uvplane_size + + (alpha_border_h * alpha_stride) + alpha_border_w; +#endif + ybf->corrupted = 0; /* assume not currupted by errors */ + return 0; + } + return -2; +} + +int vp9_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, + int ss_x, int ss_y, int border) { + if (ybf) { + vp9_free_frame_buffer(ybf); + return vp9_realloc_frame_buffer(ybf, width, height, ss_x, ss_y, border); + } + return -2; +} +#endif diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c index a322e0a2c3452f1df5fff41a8d7b7a299acd7d24..c38fb807a627379a1899ab275c3524ae675d978f 100644 --- a/vpx_scale/generic/yv12extend.c +++ b/vpx_scale/generic/yv12extend.c @@ -9,6 +9,7 @@ */ #include <assert.h> +#include "./vpx_config.h" #include "vpx_scale/yv12config.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/vpx_scale.h" @@ -94,6 +95,36 @@ vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { (ybf->border + ybf->y_width - ybf->y_crop_width + 1) / 2); } +#if CONFIG_VP9 +void vp9_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf, + int subsampling_x, int subsampling_y) { + const int c_w = (ybf->y_crop_width + subsampling_x) >> subsampling_x; + const int c_h = (ybf->y_crop_height + subsampling_y) >> subsampling_y; + const int c_et = ybf->border >> subsampling_y; + const int c_el = ybf->border >> subsampling_x; + const int c_eb = (ybf->border + ybf->y_height - ybf->y_crop_height + + subsampling_y) >> subsampling_y; + const int c_er = (ybf->border + ybf->y_width - ybf->y_crop_width + + subsampling_x) >> subsampling_x; + + assert(ybf->y_height - ybf->y_crop_height < 16); + assert(ybf->y_width - ybf->y_crop_width < 16); + assert(ybf->y_height - ybf->y_crop_height >= 0); + assert(ybf->y_width - ybf->y_crop_width >= 0); + + extend_plane(ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ybf->border, ybf->border, + ybf->border + ybf->y_height - ybf->y_crop_height, + ybf->border + ybf->y_width - ybf->y_crop_width); + + extend_plane(ybf->u_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); + + extend_plane(ybf->v_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); +} +#endif /**************************************************************************** * diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh index e2bade077c583ec69cded2b9a7bdc9d2cb2fd58e..b4f89077da074db9922f2754c20e733de15027c2 100644 --- a/vpx_scale/vpx_scale_rtcd.sh +++ b/vpx_scale/vpx_scale_rtcd.sh @@ -24,3 +24,8 @@ specialize vp8_yv12_copy_frame neon prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" specialize vp8_yv12_copy_y neon + +if [ "$CONFIG_VP9" = "yes" ]; then + prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y" + specialize vp9_extend_frame_borders +fi diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index 14b6e278b1a052b3714684d736689aa0ff688520..7b8bd850f9e7c17506763ac2e94a07a2f881dc65 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -18,7 +18,7 @@ extern "C" { #include "vpx/vpx_integer.h" #define VP8BORDERINPIXELS 32 -#define VP9BORDERINPIXELS 64 +#define VP9BORDERINPIXELS 96 #define VP9_INTERP_EXTEND 4 /************************************* @@ -52,9 +52,14 @@ extern "C" { int uv_stride; /* int uvinternal_width; */ + int alpha_width; + int alpha_height; + int alpha_stride; + uint8_t *y_buffer; uint8_t *u_buffer; uint8_t *v_buffer; + uint8_t *alpha_buffer; uint8_t *buffer_alloc; int buffer_alloc_sz; @@ -72,6 +77,14 @@ extern "C" { int width, int height, int border); int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); + int vp9_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int ss_x, int ss_y, + int border); + int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int ss_x, int ss_y, + int border); + int vp9_free_frame_buffer(YV12_BUFFER_CONFIG *ybf); + #ifdef __cplusplus } #endif diff --git a/vpxdec.c b/vpxdec.c index 5bde0c862e99b4310a52c1feabc9eec371dc2278..deb45d35949e26287679dab87bed021505cd6c2d 100644 --- a/vpxdec.c +++ b/vpxdec.c @@ -12,6 +12,7 @@ /* This is a simple program that reads ivf files and decodes them * using the new interface. Decoded frames are output as YV12 raw. */ +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -891,6 +892,7 @@ int main(int argc, const char **argv_) { if (use_y4m && !noblit) { char buffer[128]; + if (!single_file) { fprintf(stderr, "YUV4MPEG2 not supported with output patterns," " try --i420 or --yv12.\n"); @@ -908,8 +910,8 @@ int main(int argc, const char **argv_) { /*Note: We can't output an aspect ratio here because IVF doesn't store one, and neither does VP8. That will have to wait until these tools support WebM natively.*/ - sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n", - "420jpeg", width, height, fps_num, fps_den, 'p'); + snprintf(buffer, sizeof(buffer), "YUV4MPEG2 W%u H%u F%u:%u I%c ", + width, height, fps_num, fps_den, 'p'); out_put(out, (unsigned char *)buffer, (unsigned int)strlen(buffer), do_md5); } @@ -1036,6 +1038,17 @@ int main(int argc, const char **argv_) { show_progress(frame_in, frame_out, dx_time); if (!noblit) { + if (frame_out == 1 && img && use_y4m) { + /* Write out the color format to terminate the header line */ + const char *color = + img->fmt == VPX_IMG_FMT_444A ? "C444alpha\n" : + img->fmt == VPX_IMG_FMT_I444 ? "C444\n" : + img->fmt == VPX_IMG_FMT_I422 ? "C422\n" : + "C420jpeg\n"; + + out_put(out, (const unsigned char*)color, strlen(color), do_md5); + } + if (do_scale) { if (img && frame_out == 1) { stream_w = img->d_w; @@ -1044,6 +1057,7 @@ int main(int argc, const char **argv_) { stream_w, stream_h, 16); } if (img && (img->d_w != stream_w || img->d_h != stream_h)) { + assert(img->fmt == VPX_IMG_FMT_I420); I420Scale(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], @@ -1064,6 +1078,12 @@ int main(int argc, const char **argv_) { unsigned int y; char out_fn[PATH_MAX]; uint8_t *buf; + unsigned int c_w = + img->x_chroma_shift ? (1 + img->d_w) >> img->x_chroma_shift + : img->d_w; + unsigned int c_h = + img->y_chroma_shift ? (1 + img->d_h) >> img->y_chroma_shift + : img->d_h; if (!single_file) { size_t len = sizeof(out_fn) - 1; @@ -1084,15 +1104,15 @@ int main(int argc, const char **argv_) { buf = img->planes[flipuv ? VPX_PLANE_V : VPX_PLANE_U]; - for (y = 0; y < (1 + img->d_h) / 2; y++) { - out_put(out, buf, (1 + img->d_w) / 2, do_md5); + for (y = 0; y < c_h; y++) { + out_put(out, buf, c_w, do_md5); buf += img->stride[VPX_PLANE_U]; } buf = img->planes[flipuv ? VPX_PLANE_U : VPX_PLANE_V]; - for (y = 0; y < (1 + img->d_h) / 2; y++) { - out_put(out, buf, (1 + img->d_w) / 2, do_md5); + for (y = 0; y < c_h; y++) { + out_put(out, buf, c_w, do_md5); buf += img->stride[VPX_PLANE_V]; } diff --git a/vpxenc.c b/vpxenc.c index 3295fd9a458ca42f47fccc930d7b8b5736706bcf..a60b84d6eb41d73607710de2b36651bb29738fca 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -326,6 +326,7 @@ struct input_state { unsigned int h; struct vpx_rational framerate; int use_i420; + int only_i420; }; @@ -1481,9 +1482,12 @@ static void show_rate_histogram(struct rate_hist *hist, #define mmin(a, b) ((a) < (b) ? (a) : (b)) static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, - int yloc[2], int uloc[2], int vloc[2]) { + int yloc[4], int uloc[4], int vloc[4]) { const unsigned int bsize = 64; - const unsigned int bsize2 = bsize >> 1; + const unsigned int bsizey = bsize >> img1->y_chroma_shift; + const unsigned int bsizex = bsize >> img1->x_chroma_shift; + const int c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const int c_h = (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; unsigned int match = 1; unsigned int i, j; yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; @@ -1510,12 +1514,13 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, } } } + uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; - for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) { - for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) { + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { int k, l; - int si = mmin(i + bsize2, (img1->d_h + 1) / 2) - i; - int sj = mmin(j + bsize2, (img1->d_w + 1) / 2) - j; + int si = mmin(i + bsizey, c_h - i); + int sj = mmin(j + bsizex, c_w - j); for (k = 0; match && k < si; k++) for (l = 0; match && l < sj; l++) { if (*(img1->planes[VPX_PLANE_U] + @@ -1535,11 +1540,11 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, } } vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; - for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) { - for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) { + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { int k, l; - int si = mmin(i + bsize2, (img1->d_h + 1) / 2) - i; - int sj = mmin(j + bsize2, (img1->d_w + 1) / 2) - j; + int si = mmin(i + bsizey, c_h - i); + int sj = mmin(j + bsizex, c_w - j); for (k = 0; match && k < si; k++) for (l = 0; match && l < sj; l++) { if (*(img1->planes[VPX_PLANE_V] + @@ -1562,6 +1567,8 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, static int compare_img(vpx_image_t *img1, vpx_image_t *img2) { + const int c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const int c_h = (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; int match = 1; unsigned int i; @@ -1574,15 +1581,15 @@ static int compare_img(vpx_image_t *img1, vpx_image_t *img2) img2->planes[VPX_PLANE_Y]+i*img2->stride[VPX_PLANE_Y], img1->d_w) == 0); - for (i = 0; i < img1->d_h/2; i++) + for (i = 0; i < c_h; i++) match &= (memcmp(img1->planes[VPX_PLANE_U]+i*img1->stride[VPX_PLANE_U], img2->planes[VPX_PLANE_U]+i*img2->stride[VPX_PLANE_U], - (img1->d_w + 1) / 2) == 0); + c_w) == 0); - for (i = 0; i < img1->d_h/2; i++) + for (i = 0; i < c_h; i++) match &= (memcmp(img1->planes[VPX_PLANE_V]+i*img1->stride[VPX_PLANE_U], img2->planes[VPX_PLANE_V]+i*img2->stride[VPX_PLANE_U], - (img1->d_w + 1) / 2) == 0); + c_w) == 0); return match; } @@ -1792,7 +1799,8 @@ void open_input_file(struct input_state *input) { if (input->detect.buf_read == 4 && file_is_y4m(input->file, &input->y4m, input->detect.buf)) { - if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4) >= 0) { + if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, + input->only_i420) >= 0) { input->file_type = FILE_TYPE_Y4M; input->w = input->y4m.pic_w; input->h = input->y4m.pic_h; @@ -2516,6 +2524,7 @@ int main(int argc, const char **argv_) { input.framerate.num = 30; input.framerate.den = 1; input.use_i420 = 1; + input.only_i420 = 1; /* First parse the global configuration values, because we want to apply * other parameters on top of the default configuration provided by the @@ -2550,6 +2559,12 @@ int main(int argc, const char **argv_) { if (!input.fn) usage_exit(); +#if CONFIG_NON420 + /* Decide if other chroma subsamplings than 4:2:0 are supported */ + if (global.codec->fourcc == VP9_FOURCC) + input.only_i420 = 0; +#endif + for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) { int frames_in = 0, seen_frames = 0; int64_t estimated_time_left = -1; diff --git a/y4minput.c b/y4minput.c index 24f0c154f68a8f0047acb21b4df193420e288093..47f005a950be021325c1a3180033570e74c95f03 100644 --- a/y4minput.c +++ b/y4minput.c @@ -659,7 +659,8 @@ static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { } -int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip) { +int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, + int only_420) { char buffer[80]; int ret; int i; @@ -701,6 +702,8 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip) { "Only progressive scan handled.\n"); return -1; } + _y4m->vpx_fmt = VPX_IMG_FMT_I420; + _y4m->vpx_bps = 12; if (strcmp(_y4m->chroma_type, "420") == 0 || strcmp(_y4m->chroma_type, "420jpeg") == 0) { _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2; @@ -734,16 +737,30 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip) { _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; _y4m->convert = y4m_convert_422jpeg_420jpeg; } else if (strcmp(_y4m->chroma_type, "422") == 0) { - _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2; + _y4m->src_c_dec_h = 2; _y4m->src_c_dec_v = 1; - _y4m->dst_c_dec_v = 2; - _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; - /*Chroma filter required: read into the aux buf first. - We need to make two filter passes, so we need some extra space in the - aux buffer.*/ - _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; - _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; - _y4m->convert = y4m_convert_422_420jpeg; + if (only_420) { + _y4m->dst_c_dec_h = 2; + _y4m->dst_c_dec_v = 2; + _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer.*/ + _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; + _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; + _y4m->convert = y4m_convert_422_420jpeg; + } else { + _y4m->vpx_fmt = VPX_IMG_FMT_I422; + _y4m->vpx_bps = 16; + _y4m->dst_c_dec_h = _y4m->src_c_dec_h; + _y4m->dst_c_dec_v = _y4m->src_c_dec_v; + _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h + + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; + /*Natively supported: no conversion required.*/ + _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; + _y4m->convert = y4m_convert_null; + } } else if (strcmp(_y4m->chroma_type, "411") == 0) { _y4m->src_c_dec_h = 4; _y4m->dst_c_dec_h = 2; @@ -758,29 +775,52 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip) { _y4m->convert = y4m_convert_411_420jpeg; } else if (strcmp(_y4m->chroma_type, "444") == 0) { _y4m->src_c_dec_h = 1; - _y4m->dst_c_dec_h = 2; _y4m->src_c_dec_v = 1; - _y4m->dst_c_dec_v = 2; - _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; - /*Chroma filter required: read into the aux buf first. - We need to make two filter passes, so we need some extra space in the - aux buffer.*/ - _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h; - _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; - _y4m->convert = y4m_convert_444_420jpeg; + if (only_420) { + _y4m->dst_c_dec_h = 2; + _y4m->dst_c_dec_v = 2; + _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer.*/ + _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h; + _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; + _y4m->convert = y4m_convert_444_420jpeg; + } else { + _y4m->vpx_fmt = VPX_IMG_FMT_I444; + _y4m->vpx_bps = 24; + _y4m->dst_c_dec_h = _y4m->src_c_dec_h; + _y4m->dst_c_dec_v = _y4m->src_c_dec_v; + _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h; + /*Natively supported: no conversion required.*/ + _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; + _y4m->convert = y4m_convert_null; + } } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) { _y4m->src_c_dec_h = 1; - _y4m->dst_c_dec_h = 2; _y4m->src_c_dec_v = 1; - _y4m->dst_c_dec_v = 2; - _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; - /*Chroma filter required: read into the aux buf first. - We need to make two filter passes, so we need some extra space in the - aux buffer. - The extra plane also gets read into the aux buf. - It will be discarded.*/ - _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h; - _y4m->convert = y4m_convert_444_420jpeg; + if (only_420) { + _y4m->dst_c_dec_h = 2; + _y4m->dst_c_dec_v = 2; + _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer. + The extra plane also gets read into the aux buf. + It will be discarded.*/ + _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h; + _y4m->convert = y4m_convert_444_420jpeg; + } else { + _y4m->vpx_fmt = VPX_IMG_FMT_444A; + _y4m->vpx_bps = 32; + _y4m->dst_c_dec_h = _y4m->src_c_dec_h; + _y4m->dst_c_dec_v = _y4m->src_c_dec_v; + _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h; + /*Natively supported: no conversion required.*/ + _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; + _y4m->convert = y4m_convert_null; + } } else if (strcmp(_y4m->chroma_type, "mono") == 0) { _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0; _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2; @@ -847,22 +887,23 @@ int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) { sizes, which would require a separate fread call for every row.*/ memset(_img, 0, sizeof(*_img)); /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/ - _img->fmt = IMG_FMT_I420; + _img->fmt = _y4m->vpx_fmt; _img->w = _img->d_w = _y4m->pic_w; _img->h = _img->d_h = _y4m->pic_h; - /*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/ - _img->x_chroma_shift = 1; - _img->y_chroma_shift = 1; - _img->bps = 12; + _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1; + _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1; + _img->bps = _y4m->vpx_bps; + /*Set up the buffer pointers.*/ pic_sz = _y4m->pic_w * _y4m->pic_h; c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; c_sz = c_w * c_h; - _img->stride[PLANE_Y] = _y4m->pic_w; + _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w; _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w; _img->planes[PLANE_Y] = _y4m->dst_buf; _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz; _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz; + _img->planes[PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz; return 1; } diff --git a/y4minput.h b/y4minput.h index 2fa3767c9c746f3091473620004137b2fb65fba4..b2a390cfe81703ca1378a8ab9250ca5745c83449 100644 --- a/y4minput.h +++ b/y4minput.h @@ -51,9 +51,12 @@ struct y4m_input { y4m_convert_func convert; unsigned char *dst_buf; unsigned char *aux_buf; + enum vpx_img_fmt vpx_fmt; + int vpx_bps; }; -int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip); +int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, + int only_420); void y4m_input_close(y4m_input *_y4m); int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img);