diff --git a/build/make/Makefile b/build/make/Makefile index 92113ccb8c24ee29f4169bdfe68a0b896899703e..da7fb03a0bba0244c4f696ce0600740c36eaac8b 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -74,7 +74,7 @@ HOSTCC?=gcc TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN))) TGT_OS:=$(word 2, $(subst -, ,$(TOOLCHAIN))) TGT_CC:=$(word 3, $(subst -, ,$(TOOLCHAIN))) -quiet:=$(if $(verbose),,yes) +quiet:=$(if $(or $(verbose), $(V)),, yes) qexec=$(if $(quiet),@) # Cancel built-in implicit rules @@ -380,6 +380,7 @@ ifneq ($(call enabled,DIST-SRCS),) # Include obj_int_extract if we use offsets from asm_*_offsets DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64) += build/make/obj_int_extract.c DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl + DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas_apple.pl DIST-SRCS-yes += $(target:-$(TOOLCHAIN)=).mk endif INSTALL-SRCS := $(call cond_enabled,CONFIG_INSTALL_SRCS,INSTALL-SRCS) diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl index 81280bf7850c5cc6065112cc64f91fd7650decd9..51e6fbcbdb1e413b249335319498abf1320b909f 100755 --- a/build/make/ads2gas_apple.pl +++ b/build/make/ads2gas_apple.pl @@ -10,12 +10,12 @@ ## -# ads2gas.pl +# ads2gas_apple.pl # Author: Eric Fung (efung (at) acm.org) # # Convert ARM Developer Suite 1.0.1 syntax assembly source to GNU as format # -# Usage: cat inputfile | perl ads2gas.pl > outputfile +# Usage: cat inputfile | perl ads2gas_apple.pl > outputfile # print "@ This file was created from a .asm file\n"; print "@ using the ads2gas_apple.pl script.\n\n"; diff --git a/build/make/configure.sh b/build/make/configure.sh index e558ff2b8e4f5d48eba81653d65e529b8572b6ac..1a78f272bd4d5efabe08cc73333607a3545b66eb 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -598,8 +598,13 @@ process_common_toolchain() { armv6*) tgt_isa=armv6 ;; + armv7*-hardfloat*) + tgt_isa=armv7 + float_abi=hard + ;; armv7*) tgt_isa=armv7 + float_abi=softfp ;; armv5te*) tgt_isa=armv5te @@ -643,6 +648,9 @@ process_common_toolchain() { tgt_isa=x86_64 tgt_os=darwin12 ;; + x86_64*mingw32*) + tgt_os=win64 + ;; *mingw32*|*cygwin*) [ -z "$tgt_isa" ] && tgt_isa=x86 tgt_os=win32 @@ -785,8 +793,9 @@ process_common_toolchain() { check_add_asflags --defsym ARCHITECTURE=${arch_int} tune_cflags="-mtune=" if [ ${tgt_isa} == "armv7" ]; then - check_add_cflags -march=armv7-a -mfloat-abi=softfp - check_add_asflags -march=armv7-a -mfloat-abi=softfp + [ -z "${float_abi}" ] && float_abi=softfp + check_add_cflags -march=armv7-a -mfloat-abi=${float_abi} + check_add_asflags -march=armv7-a -mfloat-abi=${float_abi} if enabled neon then @@ -1038,7 +1047,7 @@ EOF add_ldflags -m${bits} link_with_cc=gcc tune_cflags="-march=" - setup_gnu_toolchain + setup_gnu_toolchain #for 32 bit x86 builds, -O3 did not turn on this flag enabled optimizations && check_add_cflags -fomit-frame-pointer ;; @@ -1056,6 +1065,8 @@ EOF soft_enable sse2 soft_enable sse3 soft_enable ssse3 + # We can't use 'check_cflags' until the compiler is configured and CC is + # populated. if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 " else @@ -1082,7 +1093,7 @@ EOF add_asflags -f x64 enabled debug && add_asflags -g cv8 ;; - linux*|solaris*) + linux*|solaris*|android*) add_asflags -f elf${bits} enabled debug && [ "${AS}" = yasm ] && add_asflags -g dwarf2 enabled debug && [ "${AS}" = nasm ] && add_asflags -g diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh index ddf9e09a4e9674db91cbc79381691a1d7eada0c3..6cc36843b1a6266e8d16e317138e3bb03c525166 100755 --- a/build/make/rtcd.sh +++ b/build/make/rtcd.sh @@ -59,13 +59,13 @@ for f in $defs_file; do [ -f "$f" ] || usage; done # Routines for the RTCD DSL to call # prototype() { - local rtyp + rtyp="" case "$1" in unsigned) rtyp="$1 "; shift;; esac rtyp="${rtyp}$1" - local fn="$2" - local args="$3" + fn="$2" + args="$3" eval "${2}_rtyp='$rtyp'" eval "${2}_args='$3'" @@ -74,7 +74,7 @@ prototype() { } specialize() { - local fn="$1" + fn="$1" shift for opt in "$@"; do eval "${fn}_${opt}=${fn}_${opt}" @@ -84,13 +84,13 @@ specialize() { require() { for fn in $ALL_FUNCS; do for opt in "$@"; do - local ofn=$(eval "echo \$${fn}_${opt}") + ofn=$(eval "echo \$${fn}_${opt}") [ -z "$ofn" ] && continue # if we already have a default, then we can disable it, as we know # we can do better. - local best=$(eval "echo \$${fn}_default") - local best_ofn=$(eval "echo \$${best}") + best=$(eval "echo \$${fn}_default") + best_ofn=$(eval "echo \$${best}") [ -n "$best" ] && [ "$best_ofn" != "$ofn" ] && eval "${best}_link=false" eval "${fn}_default=${fn}_${opt}" eval "${fn}_${opt}_link=true" @@ -121,15 +121,15 @@ process_forward_decls() { determine_indirection() { [ "$CONFIG_RUNTIME_CPU_DETECT" = "yes" ] || require $ALL_ARCHS for fn in $ALL_FUNCS; do - local n="" - local rtyp="$(eval "echo \$${fn}_rtyp")" - local args="$(eval "echo \"\$${fn}_args\"")" - local dfn="$(eval "echo \$${fn}_default")" + n="" + rtyp="$(eval "echo \$${fn}_rtyp")" + args="$(eval "echo \"\$${fn}_args\"")" + dfn="$(eval "echo \$${fn}_default")" dfn=$(eval "echo \$${dfn}") for opt in "$@"; do - local ofn=$(eval "echo \$${fn}_${opt}") + ofn=$(eval "echo \$${fn}_${opt}") [ -z "$ofn" ] && continue - local link=$(eval "echo \$${fn}_${opt}_link") + link=$(eval "echo \$${fn}_${opt}_link") [ "$link" = "false" ] && continue n="${n}x" done @@ -143,12 +143,12 @@ determine_indirection() { declare_function_pointers() { for fn in $ALL_FUNCS; do - local rtyp="$(eval "echo \$${fn}_rtyp")" - local args="$(eval "echo \"\$${fn}_args\"")" - local dfn="$(eval "echo \$${fn}_default")" + rtyp="$(eval "echo \$${fn}_rtyp")" + args="$(eval "echo \"\$${fn}_args\"")" + dfn="$(eval "echo \$${fn}_default")" dfn=$(eval "echo \$${dfn}") for opt in "$@"; do - local ofn=$(eval "echo \$${fn}_${opt}") + ofn=$(eval "echo \$${fn}_${opt}") [ -z "$ofn" ] && continue echo "$rtyp ${ofn}($args);" done @@ -163,20 +163,20 @@ declare_function_pointers() { set_function_pointers() { for fn in $ALL_FUNCS; do - local n="" - local rtyp="$(eval "echo \$${fn}_rtyp")" - local args="$(eval "echo \"\$${fn}_args\"")" - local dfn="$(eval "echo \$${fn}_default")" + n="" + rtyp="$(eval "echo \$${fn}_rtyp")" + args="$(eval "echo \"\$${fn}_args\"")" + dfn="$(eval "echo \$${fn}_default")" dfn=$(eval "echo \$${dfn}") if $(eval "echo \$${fn}_indirect"); then echo " $fn = $dfn;" for opt in "$@"; do - local ofn=$(eval "echo \$${fn}_${opt}") + ofn=$(eval "echo \$${fn}_${opt}") [ -z "$ofn" ] && continue [ "$ofn" = "$dfn" ] && continue; - local link=$(eval "echo \$${fn}_${opt}_link") + link=$(eval "echo \$${fn}_${opt}_link") [ "$link" = "false" ] && continue - local cond="$(eval "echo \$have_${opt}")" + cond="$(eval "echo \$have_${opt}")" echo " if (${cond}) $fn = $ofn;" done fi @@ -185,7 +185,7 @@ set_function_pointers() { } filter() { - local filtered + filtered="" for opt in "$@"; do [ -z $(eval "echo \$disable_${opt}") ] && filtered="$filtered $opt" done @@ -196,8 +196,9 @@ filter() { # Helper functions for generating the arch specific RTCD files # common_top() { - local outfile_basename=$(basename ${symbol:-rtcd.h}) - local include_guard=$(echo $outfile_basename | tr '[a-z]' '[A-Z]' | tr -c '[A-Z]' _) + outfile_basename=$(basename ${symbol:-rtcd}) + include_guard=$(echo $outfile_basename | tr '[a-z]' '[A-Z]' | \ + tr -c '[A-Z0-9]' _)H_ cat <<EOF #ifndef ${include_guard} #define ${include_guard} @@ -227,7 +228,7 @@ x86() { # Assign the helper variable for each enabled extension for opt in $ALL_ARCHS; do - local uc=$(echo $opt | tr '[a-z]' '[A-Z]') + uc=$(echo $opt | tr '[a-z]' '[A-Z]') eval "have_${opt}=\"flags & HAS_${uc}\"" done @@ -254,7 +255,7 @@ arm() { # Assign the helper variable for each enabled extension for opt in $ALL_ARCHS; do - local uc=$(echo $opt | tr '[a-z]' '[A-Z]') + uc=$(echo $opt | tr '[a-z]' '[A-Z]') eval "have_${opt}=\"flags & HAS_${uc}\"" done diff --git a/configure b/configure index f5b45d03591593de1a63a6cf217aed70967f7ba3..f55f798635b2d147a16caa18b7490e65af9c2670 100755 --- a/configure +++ b/configure @@ -106,6 +106,7 @@ all_platforms="${all_platforms} ppc64-darwin8-gcc" all_platforms="${all_platforms} ppc64-darwin9-gcc" all_platforms="${all_platforms} ppc64-linux-gcc" all_platforms="${all_platforms} sparc-solaris-gcc" +all_platforms="${all_platforms} x86-android-gcc" all_platforms="${all_platforms} x86-darwin8-gcc" all_platforms="${all_platforms} x86-darwin8-icc" all_platforms="${all_platforms} x86-darwin9-gcc" @@ -650,7 +651,7 @@ process_toolchain() { enabled postproc || die "postproc_visualizer requires postproc to be enabled" fi - # Enable unit tests if we have a working C++ compiler + # Enable unit tests by default if we have a working C++ compiler. case "$toolchain" in *-vs*) soft_enable unit_tests @@ -663,7 +664,7 @@ process_toolchain() { # x86 targets. ;; *) - check_cxx "$@" <<EOF && soft_enable unit_tests + enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests int z; EOF ;; diff --git a/libs.mk b/libs.mk index 1f0ade34d42fbd6613112ea54f8ac4f42ea56ae9..872a16bae0bd22c87dddb3ce22817b974a9ca3bc 100644 --- a/libs.mk +++ b/libs.mk @@ -17,7 +17,6 @@ else ASM:=.asm endif - # # Calculate platform- and compiler-specific offsets for hand coded assembly # @@ -167,7 +166,9 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c +ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) CODEC_SRCS-$(BUILD_LIBVPX) += third_party/x86inc/x86inc.asm +endif CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec @@ -221,7 +222,6 @@ obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c -I"$(SRC_PATH_BARE)" \ PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.vcproj -PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat vpx.def: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" @@ -325,7 +325,11 @@ vpx.pc: config.mk libs.mk $(qexec)echo 'Requires:' >> $@ $(qexec)echo 'Conflicts:' >> $@ $(qexec)echo 'Libs: -L$${libdir} -lvpx -lm' >> $@ +ifeq ($(HAVE_PTHREAD_H),yes) $(qexec)echo 'Libs.private: -lm -lpthread' >> $@ +else + $(qexec)echo 'Libs.private: -lm' >> $@ +endif $(qexec)echo 'Cflags: -I$${includedir}' >> $@ INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc @@ -373,7 +377,7 @@ LIBVPX_TEST_DATA_PATH ?= . include $(SRC_PATH_BARE)/test/test.mk LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS)) -LIBVPX_TEST_BINS=./test_libvpx +LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX) LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\ $(call enabled,LIBVPX_TEST_DATA)) libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1) diff --git a/test/sad_test.cc b/test/sad_test.cc index 72741a901e02406ad804809383e3224d249983b1..165e2c8f0c58182513ba2a63105f70848c5ad63f 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -15,8 +15,13 @@ extern "C" { #include "./vpx_config.h" +#if CONFIG_VP8_ENCODER #include "./vp8_rtcd.h" -#include "vp8/common/blockd.h" +//#include "vp8/common/blockd.h" +#endif +#if CONFIG_VP9_ENCODER +#include "./vp9_rtcd.h" +#endif #include "vpx_mem/vpx_mem.h" } @@ -32,14 +37,22 @@ typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr, int reference_stride, unsigned int max_sad); +typedef void (*sad_n_by_n_by_4_fn_t)(const uint8_t *src_ptr, + int src_stride, + const unsigned char * const ref_ptr[], + int ref_stride, + unsigned int *sad_array); + using libvpx_test::ACMRandom; namespace { -class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) { +class SADTestBase : public ::testing::Test { public: + SADTestBase(int width, int height) : width_(width), height_(height) {} + static void SetUpTestCase() { source_data_ = reinterpret_cast<uint8_t*>( - vpx_memalign(kDataAlignment, kDataBufferSize)); + vpx_memalign(kDataAlignment, kDataBlockSize)); reference_data_ = reinterpret_cast<uint8_t*>( vpx_memalign(kDataAlignment, kDataBufferSize)); } @@ -52,36 +65,31 @@ class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) { } protected: + // Handle blocks up to 4 blocks 64x64 with stride up to 128 static const int kDataAlignment = 16; - static const int kDataBufferSize = 16 * 32; + static const int kDataBlockSize = 64 * 128; + static const int kDataBufferSize = 4 * kDataBlockSize; virtual void SetUp() { - sad_fn_ = GET_PARAM(2); - height_ = GET_PARAM(1); - width_ = GET_PARAM(0); - source_stride_ = width_ * 2; + source_stride_ = (width_ + 31) & ~31; reference_stride_ = width_ * 2; rnd_.Reset(ACMRandom::DeterministicSeed()); } - sad_m_by_n_fn_t sad_fn_; - virtual unsigned int SAD(unsigned int max_sad) { - unsigned int ret; - REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_, - reference_data_, reference_stride_, - max_sad)); - return ret; + virtual uint8_t* GetReference(int block_idx) { + return reference_data_ + block_idx * kDataBlockSize; } // Sum of Absolute Differences. Given two blocks, calculate the absolute // difference between two pixels in the same relative location; accumulate. - unsigned int ReferenceSAD(unsigned int max_sad) { + unsigned int ReferenceSAD(unsigned int max_sad, int block_idx = 0) { unsigned int sad = 0; + const uint8_t* const reference = GetReference(block_idx); for (int h = 0; h < height_; ++h) { for (int w = 0; w < width_; ++w) { sad += abs(source_data_[h * source_stride_ + w] - - reference_data_[h * reference_stride_ + w]); + - reference[h * reference_stride_ + w]); } if (sad > max_sad) { break; @@ -106,6 +114,32 @@ class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) { } } + int width_, height_; + static uint8_t* source_data_; + int source_stride_; + static uint8_t* reference_data_; + int reference_stride_; + + ACMRandom rnd_; +}; + +class SADTest : public SADTestBase, + public ::testing::WithParamInterface< + std::tr1::tuple<int, int, sad_m_by_n_fn_t> > { + public: + SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + unsigned int SAD(unsigned int max_sad, int block_idx = 0) { + unsigned int ret; + const uint8_t* const reference = GetReference(block_idx); + + REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_, + max_sad)); + return ret; + } + void CheckSad(unsigned int max_sad) { unsigned int reference_sad, exp_sad; @@ -119,19 +153,38 @@ class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) { ASSERT_GE(exp_sad, reference_sad); } } +}; - // Handle blocks up to 16x16 with stride up to 32 - int height_, width_; - static uint8_t* source_data_; - int source_stride_; - static uint8_t* reference_data_; - int reference_stride_; +class SADx4Test : public SADTestBase, + public ::testing::WithParamInterface< + std::tr1::tuple<int, int, sad_n_by_n_by_4_fn_t> > { + public: + SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {} - ACMRandom rnd_; + protected: + void SADs(unsigned int *results) { + const uint8_t* refs[] = {GetReference(0), GetReference(1), + GetReference(2), GetReference(3)}; + + REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_, + refs, reference_stride_, + results)); + } + + void CheckSADs() { + unsigned int reference_sad, exp_sad[4]; + + SADs(exp_sad); + for (int block = 0; block < 4; block++) { + reference_sad = ReferenceSAD(UINT_MAX, block); + + EXPECT_EQ(exp_sad[block], reference_sad) << "block " << block; + } + } }; -uint8_t* SADTest::source_data_ = NULL; -uint8_t* SADTest::reference_data_ = NULL; +uint8_t* SADTestBase::source_data_ = NULL; +uint8_t* SADTestBase::reference_data_ = NULL; TEST_P(SADTest, MaxRef) { FillConstant(source_data_, source_stride_, 0); @@ -139,12 +192,30 @@ TEST_P(SADTest, MaxRef) { CheckSad(UINT_MAX); } +TEST_P(SADx4Test, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(GetReference(0), reference_stride_, 255); + FillConstant(GetReference(1), reference_stride_, 255); + FillConstant(GetReference(2), reference_stride_, 255); + FillConstant(GetReference(3), reference_stride_, 255); + CheckSADs(); +} + TEST_P(SADTest, MaxSrc) { FillConstant(source_data_, source_stride_, 255); FillConstant(reference_data_, reference_stride_, 0); CheckSad(UINT_MAX); } +TEST_P(SADx4Test, MaxSrc) { + FillConstant(source_data_, source_stride_, 255); + FillConstant(GetReference(0), reference_stride_, 0); + FillConstant(GetReference(1), reference_stride_, 0); + FillConstant(GetReference(2), reference_stride_, 0); + FillConstant(GetReference(3), reference_stride_, 0); + CheckSADs(); +} + TEST_P(SADTest, ShortRef) { int tmp_stride = reference_stride_; reference_stride_ >>= 1; @@ -154,6 +225,18 @@ TEST_P(SADTest, ShortRef) { reference_stride_ = tmp_stride; } +TEST_P(SADx4Test, ShortRef) { + int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + TEST_P(SADTest, UnalignedRef) { // The reference frame, but not the source frame, may be unaligned for // certain types of searches. @@ -165,6 +248,20 @@ TEST_P(SADTest, UnalignedRef) { reference_stride_ = tmp_stride; } +TEST_P(SADx4Test, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + TEST_P(SADTest, ShortSrc) { int tmp_stride = source_stride_; source_stride_ >>= 1; @@ -174,6 +271,18 @@ TEST_P(SADTest, ShortSrc) { source_stride_ = tmp_stride; } +TEST_P(SADx4Test, ShortSrc) { + int tmp_stride = source_stride_; + source_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + source_stride_ = tmp_stride; +} + TEST_P(SADTest, MaxSAD) { // Verify that, when max_sad is set, the implementation does not return a // value lower than the reference. @@ -184,17 +293,61 @@ TEST_P(SADTest, MaxSAD) { using std::tr1::make_tuple; +#if CONFIG_VP8_ENCODER && CONFIG_VP9_ENCODER +#define VP8_VP9_SEPARATOR , +#else +#define VP8_VP9_SEPARATOR +#endif + +#if CONFIG_VP8_ENCODER const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c; const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c; const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c; const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c; const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c; +#endif +#if CONFIG_VP9_ENCODER +const sad_m_by_n_fn_t sad_64x64_c_vp9 = vp9_sad64x64_c; +const sad_m_by_n_fn_t sad_32x32_c_vp9 = vp9_sad32x32_c; +const sad_m_by_n_fn_t sad_16x16_c_vp9 = vp9_sad16x16_c; +const sad_m_by_n_fn_t sad_8x16_c_vp9 = vp9_sad8x16_c; +const sad_m_by_n_fn_t sad_16x8_c_vp9 = vp9_sad16x8_c; +const sad_m_by_n_fn_t sad_8x8_c_vp9 = vp9_sad8x8_c; +const sad_m_by_n_fn_t sad_4x4_c_vp9 = vp9_sad4x4_c; +#endif INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values( +#if CONFIG_VP8_ENCODER make_tuple(16, 16, sad_16x16_c), make_tuple(8, 16, sad_8x16_c), make_tuple(16, 8, sad_16x8_c), make_tuple(8, 8, sad_8x8_c), - make_tuple(4, 4, sad_4x4_c))); + make_tuple(4, 4, sad_4x4_c) +#endif + VP8_VP9_SEPARATOR +#if CONFIG_VP9_ENCODER + make_tuple(64, 64, sad_64x64_c_vp9), + make_tuple(32, 32, sad_32x32_c_vp9), + make_tuple(16, 16, sad_16x16_c_vp9), + make_tuple(8, 16, sad_8x16_c_vp9), + make_tuple(16, 8, sad_16x8_c_vp9), + make_tuple(8, 8, sad_8x8_c_vp9), + make_tuple(4, 4, sad_4x4_c_vp9) +#endif + )); + +#if CONFIG_VP9_ENCODER +const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c; +const sad_n_by_n_by_4_fn_t sad_32x32x4d_c = vp9_sad32x32x4d_c; +const sad_n_by_n_by_4_fn_t sad_16x16x4d_c = vp9_sad16x16x4d_c; +const sad_n_by_n_by_4_fn_t sad_8x8x4d_c = vp9_sad8x8x4d_c; +const sad_n_by_n_by_4_fn_t sad_4x4x4d_c = vp9_sad4x4x4d_c; +INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values( + make_tuple(64, 64, sad_64x64x4d_c), + make_tuple(32, 32, sad_32x32x4d_c), + make_tuple(16, 16, sad_16x16x4d_c), + make_tuple(8, 8, sad_8x8x4d_c), + make_tuple(4, 4, sad_4x4x4d_c))); +#endif // ARM tests #if HAVE_MEDIA @@ -219,31 +372,120 @@ INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values( // X86 tests #if HAVE_MMX +#if CONFIG_VP8_ENCODER const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx; const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx; const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx; const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx; const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx; +#endif +#if CONFIG_VP9_ENCODER +const sad_m_by_n_fn_t sad_16x16_mmx_vp9 = vp9_sad16x16_mmx; +const sad_m_by_n_fn_t sad_8x16_mmx_vp9 = vp9_sad8x16_mmx; +const sad_m_by_n_fn_t sad_16x8_mmx_vp9 = vp9_sad16x8_mmx; +const sad_m_by_n_fn_t sad_8x8_mmx_vp9 = vp9_sad8x8_mmx; +const sad_m_by_n_fn_t sad_4x4_mmx_vp9 = vp9_sad4x4_mmx; +#endif + INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values( +#if CONFIG_VP8_ENCODER make_tuple(16, 16, sad_16x16_mmx), make_tuple(8, 16, sad_8x16_mmx), make_tuple(16, 8, sad_16x8_mmx), make_tuple(8, 8, sad_8x8_mmx), - make_tuple(4, 4, sad_4x4_mmx))); + make_tuple(4, 4, sad_4x4_mmx) +#endif + VP8_VP9_SEPARATOR +#if CONFIG_VP9_ENCODER + make_tuple(16, 16, sad_16x16_mmx_vp9), + make_tuple(8, 16, sad_8x16_mmx_vp9), + make_tuple(16, 8, sad_16x8_mmx_vp9), + make_tuple(8, 8, sad_8x8_mmx_vp9), + make_tuple(4, 4, sad_4x4_mmx_vp9) +#endif + )); +#endif + +#if HAVE_SSE +#if CONFIG_VP9_ENCODER +const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse; +INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values( + make_tuple(4, 4, sad_4x4_sse_vp9))); + +const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse = vp9_sad4x4x4d_sse; +INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values( + make_tuple(4, 4, sad_4x4x4d_sse))); #endif +#endif + #if HAVE_SSE2 +#if CONFIG_VP8_ENCODER const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt; const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt; const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt; const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt; const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt; +#endif +#if CONFIG_VP9_ENCODER +const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2; +const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2; +const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2; +const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2; +const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2; +const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2; +#endif INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values( +#if CONFIG_VP8_ENCODER make_tuple(16, 16, sad_16x16_wmt), make_tuple(8, 16, sad_8x16_wmt), make_tuple(16, 8, sad_16x8_wmt), make_tuple(8, 8, sad_8x8_wmt), - make_tuple(4, 4, sad_4x4_wmt))); + make_tuple(4, 4, sad_4x4_wmt) +#endif + VP8_VP9_SEPARATOR +#if CONFIG_VP9_ENCODER + make_tuple(64, 64, sad_64x64_sse2_vp9), + make_tuple(32, 32, sad_32x32_sse2_vp9), + make_tuple(16, 16, sad_16x16_sse2_vp9), + make_tuple(8, 16, sad_8x16_sse2_vp9), + make_tuple(16, 8, sad_16x8_sse2_vp9), + make_tuple(8, 8, sad_8x8_sse2_vp9) +#endif + )); + +#if CONFIG_VP9_ENCODER +const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2; +INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values( + make_tuple(64, 64, sad_64x64x4d_sse2), + make_tuple(32, 32, sad_32x32x4d_sse2), + make_tuple(16, 16, sad_16x16x4d_sse2), + make_tuple(16, 8, sad_16x8x4d_sse2), + make_tuple(8, 16, sad_8x16x4d_sse2), + make_tuple(8, 8, sad_8x8x4d_sse2))); #endif +#endif + +#if HAVE_SSE3 +#if CONFIG_VP8_ENCODER +const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse3 = vp8_sad16x16x4d_sse3; +const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse3 = vp8_sad16x8x4d_sse3; +const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse3 = vp8_sad8x16x4d_sse3; +const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse3 = vp8_sad8x8x4d_sse3; +const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse3 = vp8_sad4x4x4d_sse3; +INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values( + make_tuple(16, 16, sad_16x16x4d_sse3), + make_tuple(16, 8, sad_16x8x4d_sse3), + make_tuple(8, 16, sad_8x16x4d_sse3), + make_tuple(8, 8, sad_8x8x4d_sse3), + make_tuple(4, 4, sad_4x4x4d_sse3))); +#endif +#endif + #if HAVE_SSSE3 const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3; INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values( diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc index 2d4581dc07fb0b4413d2a8642ba3938df207c1d5..9ab7a73479d1e184a8e9dc1208321b7cb122c185 100644 --- a/test/sixtap_predict_test.cc +++ b/test/sixtap_predict_test.cc @@ -61,9 +61,9 @@ class SixtapPredictTest : public PARAMS(int, int, sixtap_predict_fn_t) { width_ = GET_PARAM(0); height_ = GET_PARAM(1); sixtap_predict_ = GET_PARAM(2); - memset(src_, 0, sizeof(src_)); - memset(dst_, 0, sizeof(dst_)); - memset(dst_c_, 0, sizeof(dst_c_)); + memset(src_, 0, kSrcSize); + memset(dst_, 0, kDstSize); + memset(dst_c_, 0, kDstSize); } int width_; diff --git a/test/test.mk b/test/test.mk index 3d56bd838c4a1b032db8b6f32aa8ce69d4c0b4a6..f7a1462673ce7a3f0793a8ceaa65a2ec7451a42c 100644 --- a/test/test.mk +++ b/test/test.mk @@ -50,10 +50,11 @@ endif LIBVPX_TEST_SRCS-yes += idctllm_test.cc LIBVPX_TEST_SRCS-yes += intrapred_test.cc LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc -LIBVPX_TEST_SRCS-yes += sad_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc endif # VP8 diff --git a/test/variance_test.cc b/test/variance_test.cc index 83f1139949dfb981fc7e25b17e414a7de429e8c2..120df316e0d3cdcf2ab976ae98f57b9ed021f3cd 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -12,11 +12,17 @@ #include "third_party/googletest/src/include/gtest/gtest.h" +#include "vpx/vpx_integer.h" #include "vpx_config.h" extern "C" { -#include "vp9/encoder/vp9_variance.h" -#include "vpx/vpx_integer.h" -#include "vp9_rtcd.h" +#if CONFIG_VP8_ENCODER +# include "vp8/common/variance.h" +# include "vp8_rtcd.h" +#endif +#if CONFIG_VP9_ENCODER +# include "vp9/encoder/vp9_variance.h" +# include "vp9_rtcd.h" +#endif } namespace { @@ -25,11 +31,12 @@ using ::std::tr1::get; using ::std::tr1::make_tuple; using ::std::tr1::tuple; -class VP9VarianceTest : - public ::testing::TestWithParam<tuple<int, int, vp9_variance_fn_t> > { +template<typename VarianceFunctionType> +class VarianceTest : + public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > { public: virtual void SetUp() { - const tuple<int, int, vp9_variance_fn_t>& params = GetParam(); + const tuple<int, int, VarianceFunctionType>& params = this->GetParam(); width_ = get<0>(params); height_ = get<1>(params); variance_ = get<2>(params); @@ -47,15 +54,20 @@ class VP9VarianceTest : } protected: + void ZeroTest(); + void OneQuarterTest(); + uint8_t* src_; uint8_t* ref_; int width_; int height_; int block_size_; - vp9_variance_fn_t variance_; + VarianceFunctionType variance_; + }; -TEST_P(VP9VarianceTest, Zero) { +template<typename VarianceFunctionType> +void VarianceTest<VarianceFunctionType>::ZeroTest() { for (int i = 0; i <= 255; ++i) { memset(src_, i, block_size_); for (int j = 0; j <= 255; ++j) { @@ -67,7 +79,8 @@ TEST_P(VP9VarianceTest, Zero) { } } -TEST_P(VP9VarianceTest, OneQuarter) { +template<typename VarianceFunctionType> +void VarianceTest<VarianceFunctionType>::OneQuarterTest() { memset(src_, 255, block_size_); const int half = block_size_ / 2; memset(ref_, 255, half); @@ -78,6 +91,74 @@ TEST_P(VP9VarianceTest, OneQuarter) { EXPECT_EQ(expected, var); } +// ----------------------------------------------------------------------------- +// VP8 test cases. + +namespace vp8 { + +#if CONFIG_VP8_ENCODER +typedef VarianceTest<vp8_variance_fn_t> VP8VarianceTest; + +TEST_P(VP8VarianceTest, Zero) { ZeroTest(); } +TEST_P(VP8VarianceTest, OneQuarter) { OneQuarterTest(); } + +const vp8_variance_fn_t variance4x4_c = vp8_variance4x4_c; +const vp8_variance_fn_t variance8x8_c = vp8_variance8x8_c; +const vp8_variance_fn_t variance8x16_c = vp8_variance8x16_c; +const vp8_variance_fn_t variance16x8_c = vp8_variance16x8_c; +const vp8_variance_fn_t variance16x16_c = vp8_variance16x16_c; +INSTANTIATE_TEST_CASE_P( + C, VP8VarianceTest, + ::testing::Values(make_tuple(4, 4, variance4x4_c), + make_tuple(8, 8, variance8x8_c), + make_tuple(8, 16, variance8x16_c), + make_tuple(16, 8, variance16x8_c), + make_tuple(16, 16, variance16x16_c))); + +#if HAVE_MMX +const vp8_variance_fn_t variance4x4_mmx = vp8_variance4x4_mmx; +const vp8_variance_fn_t variance8x8_mmx = vp8_variance8x8_mmx; +const vp8_variance_fn_t variance8x16_mmx = vp8_variance8x16_mmx; +const vp8_variance_fn_t variance16x8_mmx = vp8_variance16x8_mmx; +const vp8_variance_fn_t variance16x16_mmx = vp8_variance16x16_mmx; +INSTANTIATE_TEST_CASE_P( + MMX, VP8VarianceTest, + ::testing::Values(make_tuple(4, 4, variance4x4_mmx), + make_tuple(8, 8, variance8x8_mmx), + make_tuple(8, 16, variance8x16_mmx), + make_tuple(16, 8, variance16x8_mmx), + make_tuple(16, 16, variance16x16_mmx))); +#endif + +#if HAVE_SSE2 +const vp8_variance_fn_t variance4x4_wmt = vp8_variance4x4_wmt; +const vp8_variance_fn_t variance8x8_wmt = vp8_variance8x8_wmt; +const vp8_variance_fn_t variance8x16_wmt = vp8_variance8x16_wmt; +const vp8_variance_fn_t variance16x8_wmt = vp8_variance16x8_wmt; +const vp8_variance_fn_t variance16x16_wmt = vp8_variance16x16_wmt; +INSTANTIATE_TEST_CASE_P( + SSE2, VP8VarianceTest, + ::testing::Values(make_tuple(4, 4, variance4x4_wmt), + make_tuple(8, 8, variance8x8_wmt), + make_tuple(8, 16, variance8x16_wmt), + make_tuple(16, 8, variance16x8_wmt), + make_tuple(16, 16, variance16x16_wmt))); +#endif +#endif // CONFIG_VP8_ENCODER + +} // namespace vp8 + +// ----------------------------------------------------------------------------- +// VP9 test cases. + +namespace vp9 { + +#if CONFIG_VP9_ENCODER +typedef VarianceTest<vp9_variance_fn_t> VP9VarianceTest; + +TEST_P(VP9VarianceTest, Zero) { ZeroTest(); } +TEST_P(VP9VarianceTest, OneQuarter) { OneQuarterTest(); } + const vp9_variance_fn_t variance4x4_c = vp9_variance4x4_c; const vp9_variance_fn_t variance8x8_c = vp9_variance8x8_c; const vp9_variance_fn_t variance8x16_c = vp9_variance8x16_c; @@ -120,4 +201,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(16, 8, variance16x8_wmt), make_tuple(16, 16, variance16x16_wmt))); #endif +#endif // CONFIG_VP9_ENCODER + +} // namespace vp9 + } // namespace diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm index d9927ecfa9c267fb54c4dfe0f573db12b77c3f69..a66a96bba6273fedb4410360c3acd9abc7082a8f 100644 --- a/third_party/x86inc/x86inc.asm +++ b/third_party/x86inc/x86inc.asm @@ -61,6 +61,8 @@ %define mangle(x) x %elifidn __OUTPUT_FORMAT__,x64 %define mangle(x) x +%elifidn __OUTPUT_FORMAT__,win64 + %define mangle(x) x %else %define mangle(x) _ %+ x %endif @@ -112,7 +114,12 @@ %endif ; Always use long nops (reduces 0x90 spam in disassembly on x86_32) +%ifndef __NASM_VER__ CPU amdnop +%else +%use smartalign +ALIGNMODE k7 +%endif ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c index 8a8f92f6818bccd325e8ed1a1e99f8f13bf784e9..069332660e3276c4075fde9cf7c1069efdbf3e3a 100644 --- a/vp8/common/mfqe.c +++ b/vp8/common/mfqe.c @@ -280,7 +280,7 @@ void vp8_multiframe_quality_enhance FRAME_TYPE frame_type = cm->frame_type; /* Point at base of Mb MODE_INFO list has motion vectors etc */ - const MODE_INFO *mode_info_context = cm->mi; + const MODE_INFO *mode_info_context = cm->show_frame_mi; int mb_row; int mb_col; int totmap, map[4]; diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 03c97187a122e4d9aef8aa415404d2585ed432ed..276dd72ead07721a2dc8652a6d7078f7986258e3 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -127,7 +127,8 @@ typedef struct VP8Common MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ #endif - + MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame + to show */ LOOPFILTERTYPE filter_type; loop_filter_info_n lf_info; diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h index fd7e051ed69b6714551b60be239b617bb28b5f6e..97c81c130a0adc3443ec945a32fc38a36d9e9d4b 100644 --- a/vp8/common/onyxd.h +++ b/vp8/common/onyxd.h @@ -34,7 +34,6 @@ extern "C" int postprocess; int max_threads; int error_concealment; - int input_fragments; } VP8D_CONFIG; typedef enum @@ -56,10 +55,6 @@ extern "C" vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); - struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf); - - void vp8dx_remove_decompressor(struct VP8D_COMP* comp); - #ifdef __cplusplus } #endif diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index fd313b6fe768fa236917148323fe7c007210233a..e40fb111cd774850dd8594c4602a9512203276d0 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -334,7 +334,7 @@ void vp8_deblock(VP8_COMMON *cm, double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); - const MODE_INFO *mode_info_context = cm->mi; + const MODE_INFO *mode_info_context = cm->show_frame_mi; int mbr, mbc; /* The pixel thresholds are adjusted according to if or not the macroblock diff --git a/vp8/common/variance_c.c b/vp8/common/variance_c.c index da08affb81a2237254ff4fb9a7d08903ff0273a1..773b655efc518128e9ac38c71a025f78ef21e842 100644 --- a/vp8/common/variance_c.c +++ b/vp8/common/variance_c.c @@ -75,7 +75,7 @@ unsigned int vp8_variance16x16_c( variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); *sse = var; - return (var - ((unsigned int)(avg * avg) >> 8)); + return (var - (((unsigned int)avg * avg) >> 8)); } unsigned int vp8_variance8x16_c( @@ -91,7 +91,7 @@ unsigned int vp8_variance8x16_c( variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); *sse = var; - return (var - ((unsigned int)(avg * avg) >> 7)); + return (var - (((unsigned int)avg * avg) >> 7)); } unsigned int vp8_variance16x8_c( @@ -107,7 +107,7 @@ unsigned int vp8_variance16x8_c( variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); *sse = var; - return (var - ((unsigned int)(avg * avg) >> 7)); + return (var - (((unsigned int)avg * avg) >> 7)); } @@ -124,7 +124,7 @@ unsigned int vp8_variance8x8_c( variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); *sse = var; - return (var - ((unsigned int)(avg * avg) >> 6)); + return (var - (((unsigned int)avg * avg) >> 6)); } unsigned int vp8_variance4x4_c( @@ -140,7 +140,7 @@ unsigned int vp8_variance4x4_c( variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); *sse = var; - return (var - ((unsigned int)(avg * avg) >> 4)); + return (var - (((unsigned int)avg * avg) >> 4)); } diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm index 4aac09484de7251d4e59609a9d416d9c878d29c5..158c3b745838ca406bf31d29c0e004ed7000aaa2 100644 --- a/vp8/common/x86/iwalsh_mmx.asm +++ b/vp8/common/x86/iwalsh_mmx.asm @@ -24,7 +24,7 @@ sym(vp8_short_inv_walsh4x4_mmx): movq mm0, [rdx + 0] ;ip[0] movq mm1, [rdx + 8] ;ip[4] - movd mm7, rax + movq mm7, rax movq mm2, [rdx + 16] ;ip[8] movq mm3, [rdx + 24] ;ip[12] diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm index 3d45c617b7ecc051647029667360ce3933544ff0..6d5aaa19db79045ff9a343ce68eb4843770ddc00 100644 --- a/vp8/common/x86/loopfilter_block_sse2.asm +++ b/vp8/common/x86/loopfilter_block_sse2.asm @@ -136,7 +136,7 @@ global sym(vp8_loop_filter_bh_y_sse2) PRIVATE sym(vp8_loop_filter_bh_y_sse2): -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %define src rcx ; src_ptr %define stride rdx ; src_pixel_step %define blimit r8 @@ -256,7 +256,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 movdqa i12, xmm3 movdqa i13, xmm8 -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 pop r13 pop r12 RESTORE_XMM @@ -278,7 +278,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 global sym(vp8_loop_filter_bv_y_sse2) PRIVATE sym(vp8_loop_filter_bv_y_sse2): -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %define src rcx ; src_ptr %define stride rdx ; src_pixel_step %define blimit r8 @@ -779,7 +779,7 @@ LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 ; un-ALIGN_STACK pop rsp -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 pop r13 pop r12 RESTORE_XMM diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm index c1d21743d95f5633641e0028c2de4f9b70d95cb4..a8a7f568dcdfb09365114b7c02a1e24e37f23588 100644 --- a/vp8/common/x86/mfqe_sse2.asm +++ b/vp8/common/x86/mfqe_sse2.asm @@ -271,7 +271,13 @@ sym(vp8_variance_and_sad_16x16_sse2): SECTION_RODATA align 16 t128: +%ifndef __NASM_VER__ ddq 128 +%elif CONFIG_BIG_ENDIAN + dq 0, 128 +%else + dq 128, 0 +%endif align 16 tMFQE: ; 1 << MFQE_PRECISION times 8 dw 0x10 diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm index f90a589985a4570bd557af0049d641f9fefc8c3e..69c8d376973c424fffa54545e05a93f7649bed22 100644 --- a/vp8/common/x86/sad_sse3.asm +++ b/vp8/common/x86/sad_sse3.asm @@ -33,7 +33,7 @@ movsxd rax, dword ptr arg(1) ; src_stride movsxd rdx, dword ptr arg(3) ; ref_stride %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx @@ -76,7 +76,7 @@ pop rsi pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif @@ -111,7 +111,7 @@ xchg rbx, rax %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx @@ -156,7 +156,7 @@ pop rsi pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi RESTORE_XMM %endif diff --git a/vp8/common/x86/variance_mmx.c b/vp8/common/x86/variance_mmx.c index 0c4dd4a981e021f555d7d7079284366d4a73e34a..36995db9aa7b1d450338e22cba6fe55e8b107bc0 100644 --- a/vp8/common/x86/variance_mmx.c +++ b/vp8/common/x86/variance_mmx.c @@ -91,7 +91,7 @@ unsigned int vp8_variance4x4_mmx( vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 4)); + return (var - (((unsigned int)avg * avg) >> 4)); } @@ -108,7 +108,7 @@ unsigned int vp8_variance8x8_mmx( vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 6)); + return (var - (((unsigned int)avg * avg) >> 6)); } @@ -153,7 +153,7 @@ unsigned int vp8_variance16x16_mmx( var = sse0 + sse1 + sse2 + sse3; avg = sum0 + sum1 + sum2 + sum3; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 8)); + return (var - (((unsigned int)avg * avg) >> 8)); } unsigned int vp8_variance16x8_mmx( @@ -172,7 +172,7 @@ unsigned int vp8_variance16x8_mmx( var = sse0 + sse1; avg = sum0 + sum1; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 7)); + return (var - (((unsigned int)avg * avg) >> 7)); } @@ -194,7 +194,7 @@ unsigned int vp8_variance8x16_mmx( avg = sum0 + sum1; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 7)); + return (var - (((unsigned int)avg * avg) >> 7)); } @@ -219,7 +219,7 @@ unsigned int vp8_sub_pixel_variance4x4_mmx &xsum, &xxsum ); *sse = xxsum; - return (xxsum - ((unsigned int)(xsum * xsum) >> 4)); + return (xxsum - (((unsigned int)xsum * xsum) >> 4)); } @@ -244,7 +244,7 @@ unsigned int vp8_sub_pixel_variance8x8_mmx &xsum, &xxsum ); *sse = xxsum; - return (xxsum - ((unsigned int)(xsum * xsum) >> 6)); + return (xxsum - (((unsigned int)xsum * xsum) >> 6)); } unsigned int vp8_sub_pixel_variance16x16_mmx @@ -282,7 +282,7 @@ unsigned int vp8_sub_pixel_variance16x16_mmx xxsum0 += xxsum1; *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); } @@ -335,7 +335,7 @@ unsigned int vp8_sub_pixel_variance16x8_mmx xxsum0 += xxsum1; *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); } unsigned int vp8_sub_pixel_variance8x16_mmx @@ -358,7 +358,7 @@ unsigned int vp8_sub_pixel_variance8x16_mmx &xsum, &xxsum ); *sse = xxsum; - return (xxsum - ((unsigned int)(xsum * xsum) >> 7)); + return (xxsum - (((unsigned int)xsum * xsum) >> 7)); } diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c index afd642915d235d8258ebd818088c10629116c994..7fa5f53dcbf605e835e4874249aa465d63547fd0 100644 --- a/vp8/common/x86/variance_sse2.c +++ b/vp8/common/x86/variance_sse2.c @@ -148,7 +148,7 @@ unsigned int vp8_variance4x4_wmt( vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 4)); + return (var - (((unsigned int)avg * avg) >> 4)); } @@ -165,7 +165,7 @@ unsigned int vp8_variance8x8_wmt vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 6)); + return (var - (((unsigned int)avg * avg) >> 6)); } @@ -184,7 +184,7 @@ unsigned int vp8_variance16x16_wmt vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; *sse = sse0; - return (sse0 - ((unsigned int)(sum0 * sum0) >> 8)); + return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); } unsigned int vp8_mse16x16_wmt( const unsigned char *src_ptr, @@ -220,7 +220,7 @@ unsigned int vp8_variance16x8_wmt var = sse0 + sse1; avg = sum0 + sum1; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 7)); + return (var - (((unsigned int)avg * avg) >> 7)); } @@ -241,7 +241,7 @@ unsigned int vp8_variance8x16_wmt var = sse0 + sse1; avg = sum0 + sum1; *sse = var; - return (var - ((unsigned int)(avg * avg) >> 7)); + return (var - (((unsigned int)avg * avg) >> 7)); } @@ -265,7 +265,7 @@ unsigned int vp8_sub_pixel_variance4x4_wmt &xsum, &xxsum ); *sse = xxsum; - return (xxsum - ((unsigned int)(xsum * xsum) >> 4)); + return (xxsum - (((unsigned int)xsum * xsum) >> 4)); } @@ -314,7 +314,7 @@ unsigned int vp8_sub_pixel_variance8x8_wmt } *sse = xxsum; - return (xxsum - ((unsigned int)(xsum * xsum) >> 6)); + return (xxsum - (((unsigned int)xsum * xsum) >> 6)); } unsigned int vp8_sub_pixel_variance16x16_wmt @@ -376,7 +376,7 @@ unsigned int vp8_sub_pixel_variance16x16_wmt } *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); } unsigned int vp8_sub_pixel_mse16x16_wmt( @@ -447,7 +447,7 @@ unsigned int vp8_sub_pixel_variance16x8_wmt } *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); } unsigned int vp8_sub_pixel_variance8x16_wmt @@ -495,7 +495,7 @@ unsigned int vp8_sub_pixel_variance8x16_wmt } *sse = xxsum; - return (xxsum - ((unsigned int)(xsum * xsum) >> 7)); + return (xxsum - (((unsigned int)xsum * xsum) >> 7)); } @@ -515,7 +515,7 @@ unsigned int vp8_variance_halfpixvar16x16_h_wmt( &xsum0, &xxsum0); *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); } @@ -534,7 +534,7 @@ unsigned int vp8_variance_halfpixvar16x16_v_wmt( &xsum0, &xxsum0); *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); } @@ -554,5 +554,5 @@ unsigned int vp8_variance_halfpixvar16x16_hv_wmt( &xsum0, &xxsum0); *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); } diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c index ba2055cc90455e181da4c8373a4e9aa15be0ecbf..f90f8117c4dbdb0c1051af196c06d98b44ad94f9 100644 --- a/vp8/common/x86/variance_ssse3.c +++ b/vp8/common/x86/variance_ssse3.c @@ -113,7 +113,7 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 } *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); } unsigned int vp8_sub_pixel_variance16x8_ssse3 @@ -162,5 +162,5 @@ unsigned int vp8_sub_pixel_variance16x8_ssse3 } *sse = xxsum0; - return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7)); + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); } diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 463211810cd6cf9dae894e4c1578263e0823f365..6f8282a6436ea38f2c4fd68ebd2e75056024b538 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -828,8 +828,8 @@ static void setup_token_decoder(VP8D_COMP *pbi, unsigned int partition_idx; unsigned int fragment_idx; unsigned int num_token_partitions; - const unsigned char *first_fragment_end = pbi->fragments[0] + - pbi->fragment_sizes[0]; + const unsigned char *first_fragment_end = pbi->fragments.ptrs[0] + + pbi->fragments.sizes[0]; TOKEN_PARTITION multi_token_partition = (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); @@ -839,10 +839,10 @@ static void setup_token_decoder(VP8D_COMP *pbi, /* Check for partitions within the fragments and unpack the fragments * so that each fragment pointer points to its corresponding partition. */ - for (fragment_idx = 0; fragment_idx < pbi->num_fragments; ++fragment_idx) + for (fragment_idx = 0; fragment_idx < pbi->fragments.count; ++fragment_idx) { - unsigned int fragment_size = pbi->fragment_sizes[fragment_idx]; - const unsigned char *fragment_end = pbi->fragments[fragment_idx] + + unsigned int fragment_size = pbi->fragments.sizes[fragment_idx]; + const unsigned char *fragment_end = pbi->fragments.ptrs[fragment_idx] + fragment_size; /* Special case for handling the first partition since we have already * read its size. */ @@ -850,16 +850,16 @@ static void setup_token_decoder(VP8D_COMP *pbi, { /* Size of first partition + token partition sizes element */ ptrdiff_t ext_first_part_size = token_part_sizes - - pbi->fragments[0] + 3 * (num_token_partitions - 1); + pbi->fragments.ptrs[0] + 3 * (num_token_partitions - 1); fragment_size -= (unsigned int)ext_first_part_size; if (fragment_size > 0) { - pbi->fragment_sizes[0] = (unsigned int)ext_first_part_size; + pbi->fragments.sizes[0] = (unsigned int)ext_first_part_size; /* The fragment contains an additional partition. Move to * next. */ fragment_idx++; - pbi->fragments[fragment_idx] = pbi->fragments[0] + - pbi->fragment_sizes[0]; + pbi->fragments.ptrs[fragment_idx] = pbi->fragments.ptrs[0] + + pbi->fragments.sizes[0]; } } /* Split the chunk into partitions read from the bitstream */ @@ -868,12 +868,12 @@ static void setup_token_decoder(VP8D_COMP *pbi, ptrdiff_t partition_size = read_available_partition_size( pbi, token_part_sizes, - pbi->fragments[fragment_idx], + pbi->fragments.ptrs[fragment_idx], first_fragment_end, fragment_end, fragment_idx - 1, num_token_partitions); - pbi->fragment_sizes[fragment_idx] = (unsigned int)partition_size; + pbi->fragments.sizes[fragment_idx] = (unsigned int)partition_size; fragment_size -= (unsigned int)partition_size; assert(fragment_idx <= num_token_partitions); if (fragment_size > 0) @@ -881,19 +881,19 @@ static void setup_token_decoder(VP8D_COMP *pbi, /* The fragment contains an additional partition. * Move to next. */ fragment_idx++; - pbi->fragments[fragment_idx] = - pbi->fragments[fragment_idx - 1] + partition_size; + pbi->fragments.ptrs[fragment_idx] = + pbi->fragments.ptrs[fragment_idx - 1] + partition_size; } } } - pbi->num_fragments = num_token_partitions + 1; + pbi->fragments.count = num_token_partitions + 1; - for (partition_idx = 1; partition_idx < pbi->num_fragments; ++partition_idx) + for (partition_idx = 1; partition_idx < pbi->fragments.count; ++partition_idx) { if (vp8dx_start_decode(bool_decoder, - pbi->fragments[partition_idx], - pbi->fragment_sizes[partition_idx])) + pbi->fragments.ptrs[partition_idx], + pbi->fragments.sizes[partition_idx])) vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", partition_idx); @@ -983,8 +983,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) vp8_reader *const bc = & pbi->mbc[8]; VP8_COMMON *const pc = & pbi->common; MACROBLOCKD *const xd = & pbi->mb; - const unsigned char *data = pbi->fragments[0]; - const unsigned char *data_end = data + pbi->fragment_sizes[0]; + const unsigned char *data = pbi->fragments.ptrs[0]; + const unsigned char *data_end = data + pbi->fragments.sizes[0]; ptrdiff_t first_partition_length_in_bytes; int i, j, k, l; diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 24fc8783aa9c618e2ba69957bb17d850b8e5b310..2db309658c5b94f3badd6f02fb887032c5769066 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -42,7 +42,16 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); static int get_free_fb (VP8_COMMON *cm); static void ref_cnt_fb (int *buf, int *idx, int new_idx); -struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) +static void remove_decompressor(VP8D_COMP *pbi) +{ +#if CONFIG_ERROR_CONCEALMENT + vp8_de_alloc_overlap_lists(pbi); +#endif + vp8_remove_common(&pbi->common); + vpx_free(pbi); +} + +static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) { VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); @@ -54,7 +63,7 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; - vp8dx_remove_decompressor(pbi); + remove_decompressor(pbi); return 0; } @@ -65,11 +74,6 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; -#if CONFIG_MULTITHREAD - pbi->max_threads = oxcf->max_threads; - vp8_decoder_create_threads(pbi); -#endif - /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid * unnecessary calling of vp8cx_init_de_quantizer() for every frame. */ @@ -92,9 +96,6 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) pbi->decoded_key_frame = 0; - pbi->input_fragments = oxcf->input_fragments; - pbi->num_fragments = 0; - /* Independent partitions is activated when a frame updates the * token probability table to have equal probabilities over the * PREV_COEF context. @@ -106,25 +107,6 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) return pbi; } - -void vp8dx_remove_decompressor(VP8D_COMP *pbi) -{ - if (!pbi) - return; - -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); - vp8_decoder_remove_threads(pbi); -#endif -#if CONFIG_ERROR_CONCEALMENT - vp8_de_alloc_overlap_lists(pbi); -#endif - vp8_remove_common(&pbi->common); - vpx_free(pbi); -} - - vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &pbi->common; @@ -282,60 +264,13 @@ static int swap_frame_buffers (VP8_COMMON *cm) return err; } -int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, - const uint8_t *source, - int64_t time_stamp) +int check_fragments_for_errors(VP8D_COMP *pbi) { -#if HAVE_NEON - int64_t dx_store_reg[8]; -#endif - VP8_COMMON *cm = &pbi->common; - int retcode = -1; - - pbi->common.error.error_code = VPX_CODEC_OK; - - if (pbi->num_fragments == 0) - { - /* New frame, reset fragment pointers and sizes */ - vpx_memset((void*)pbi->fragments, 0, sizeof(pbi->fragments)); - vpx_memset(pbi->fragment_sizes, 0, sizeof(pbi->fragment_sizes)); - } - if (pbi->input_fragments && !(source == NULL && size == 0)) - { - /* Store a pointer to this fragment and return. We haven't - * received the complete frame yet, so we will wait with decoding. - */ - assert(pbi->num_fragments < MAX_PARTITIONS); - pbi->fragments[pbi->num_fragments] = source; - pbi->fragment_sizes[pbi->num_fragments] = size; - pbi->num_fragments++; - if (pbi->num_fragments > (1 << EIGHT_PARTITION) + 1) - { - pbi->common.error.error_code = VPX_CODEC_UNSUP_BITSTREAM; - pbi->common.error.setjmp = 0; - pbi->num_fragments = 0; - return -1; - } - return 0; - } - - if (!pbi->input_fragments) - { - pbi->fragments[0] = source; - pbi->fragment_sizes[0] = size; - pbi->num_fragments = 1; - } - assert(pbi->common.multi_token_partition <= EIGHT_PARTITION); - if (pbi->num_fragments == 0) - { - pbi->num_fragments = 1; - pbi->fragments[0] = NULL; - pbi->fragment_sizes[0] = 0; - } - if (!pbi->ec_active && - pbi->num_fragments <= 1 && pbi->fragment_sizes[0] == 0) + pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0) { + VP8_COMMON *cm = &pbi->common; + /* If error concealment is disabled we won't signal missing frames * to the decoder. */ @@ -361,12 +296,29 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, /* Signal that we have no frame to show. */ cm->show_frame = 0; - pbi->num_fragments = 0; - /* Nothing more to do. */ return 0; } + return 1; +} + +int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, + const uint8_t *source, + int64_t time_stamp) +{ +#if HAVE_NEON + int64_t dx_store_reg[8]; +#endif + VP8_COMMON *cm = &pbi->common; + int retcode = -1; + + pbi->common.error.error_code = VPX_CODEC_OK; + + retcode = check_fragments_for_errors(pbi); + if(retcode <= 0) + return retcode; + #if HAVE_NEON #if CONFIG_RUNTIME_CPU_DETECT if (cm->cpu_caps & HAS_NEON) @@ -419,7 +371,13 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, vp8_clear_system_state(); -#if CONFIG_ERROR_CONCEALMENT + if (cm->show_frame) + { + cm->current_video_frame++; + cm->show_frame_mi = cm->mi; + } + + #if CONFIG_ERROR_CONCEALMENT /* swap the mode infos to storage for future error concealment */ if (pbi->ec_enabled && pbi->common.prev_mi) { @@ -441,9 +399,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, } #endif - if (cm->show_frame) - cm->current_video_frame++; - pbi->ready_for_new_data = 0; pbi->last_time_stamp = time_stamp; @@ -458,7 +413,6 @@ decode_exit: #endif pbi->common.error.setjmp = 0; - pbi->num_fragments = 0; return retcode; } int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) @@ -521,3 +475,54 @@ int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ) return 0; } + +int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf) +{ + if(!fb->use_frame_threads) + { + /* decoder instance for single thread mode */ + fb->pbi[0] = create_decompressor(oxcf); + if(!fb->pbi[0]) + return VPX_CODEC_ERROR; + +#if CONFIG_MULTITHREAD + /* enable row-based threading only when use_frame_threads + * is disabled */ + fb->pbi[0]->max_threads = oxcf->max_threads; + vp8_decoder_create_threads(fb->pbi[0]); +#endif + } + else + { + /* TODO : create frame threads and decoder instances for each + * thread here */ + } + + return VPX_CODEC_OK; +} + +int vp8_remove_decoder_instances(struct frame_buffers *fb) +{ + if(!fb->use_frame_threads) + { + VP8D_COMP *pbi = fb->pbi[0]; + + if (!pbi) + return VPX_CODEC_ERROR; +#if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd) + vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); + vp8_decoder_remove_threads(pbi); +#endif + + /* decoder instance for single thread mode */ + remove_decompressor(pbi); + } + else + { + /* TODO : remove frame threads and decoder instances for each + * thread here */ + } + + return VPX_CODEC_OK; +} diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 0063beb638c44687f01508c7867949c001cd2f60..fb2dde8527f7a5895916471c32ee1928d0b895a1 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -33,6 +33,31 @@ typedef struct MACROBLOCKD mbd; } MB_ROW_DEC; + +typedef struct +{ + int enabled; + unsigned int count; + const unsigned char *ptrs[MAX_PARTITIONS]; + unsigned int sizes[MAX_PARTITIONS]; +} FRAGMENT_DATA; + +#define MAX_FB_MT_DEC 32 + +struct frame_buffers +{ + /* + * this struct will be populated with frame buffer management + * info in future commits. */ + + /* enable/disable frame-based threading */ + int use_frame_threads; + + /* decoder instances */ + struct VP8D_COMP *pbi[MAX_FB_MT_DEC]; + +}; + typedef struct VP8D_COMP { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -46,10 +71,7 @@ typedef struct VP8D_COMP VP8D_CONFIG oxcf; - - const unsigned char *fragments[MAX_PARTITIONS]; - unsigned int fragment_sizes[MAX_PARTITIONS]; - unsigned int num_fragments; + FRAGMENT_DATA fragments; #if CONFIG_MULTITHREAD /* variable for threading */ @@ -95,7 +117,6 @@ typedef struct VP8D_COMP #endif int ec_enabled; int ec_active; - int input_fragments; int decoded_key_frame; int independent_partitions; int frame_corrupt_residual; @@ -104,6 +125,9 @@ typedef struct VP8D_COMP int vp8_decode_frame(VP8D_COMP *cpi); +int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf); +int vp8_remove_decoder_instances(struct frame_buffers *fb); + #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval,expr) do {\ lval = (expr); \ diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 68095ca68347749c3fd88794f69d13c13af38b39..433726df604c9732252a6477eb2f4b1b23289fed 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -858,7 +858,9 @@ skip_motion_search: */ if ((cm->current_video_frame > 0) && (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && - ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0)) + ((cpi->twopass.this_frame_stats.intra_error / + DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) > + 2.0)) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); } @@ -2116,23 +2118,25 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) (cpi->twopass.kf_group_error_left > 0)) { cpi->twopass.gf_group_bits = - (int)((double)cpi->twopass.kf_group_bits * - (gf_group_err / (double)cpi->twopass.kf_group_error_left)); + (int64_t)(cpi->twopass.kf_group_bits * + (gf_group_err / cpi->twopass.kf_group_error_left)); } else cpi->twopass.gf_group_bits = 0; - cpi->twopass.gf_group_bits = (int)( + cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) - ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits); + ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; /* Clip cpi->twopass.gf_group_bits based on user supplied data rate * variability limit (cpi->oxcf.two_pass_vbrmax_section) */ - if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) - cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; + if (cpi->twopass.gf_group_bits > + (int64_t)max_bits * cpi->baseline_gf_interval) + cpi->twopass.gf_group_bits = + (int64_t)max_bits * cpi->baseline_gf_interval; /* Reset the file position */ reset_fpf_position(cpi, start_pos); @@ -2446,7 +2450,7 @@ void vp8_second_pass(VP8_COMP *cpi) */ if (cpi->oxcf.error_resilient_mode) { - cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits; + cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits; cpi->twopass.gf_group_error_left = (int)cpi->twopass.kf_group_error_left; cpi->baseline_gf_interval = cpi->twopass.frames_to_key; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index b08c7a5897bacc81cf9d71c100c60c028e6bdc4b..a34af64280702b800485f701860420b370e55d43 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -235,13 +235,12 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MACROBLOCKD *xd = &x->e_mbd; unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; - int buf_r1, buf_r2, buf_c1, buf_c2; + int buf_r1, buf_r2, buf_c1; /* Clamping to avoid out-of-range data access */ buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3; buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3; buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3; - buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3; y_stride = 32; /* Copy to intermediate buffer before searching. */ diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 7eb7193bf75f8f1323f391dbc25b5d955dc5d711..92f9818577436368e5e665c480e9da8c92256179 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -5362,6 +5362,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla #endif #if CONFIG_POSTPROC + cpi->common.show_frame_mi = cpi->common.mi; ret = vp8_post_proc_frame(&cpi->common, dest, flags); #else diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index fb8ad357c02b5ae0971d745d44ad1fc7d30154c4..378731d0a7a81c1e813cc43e23763b6b71928406 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -587,7 +587,7 @@ typedef struct VP8_COMP /* Error score of frames still to be coded in kf group */ int64_t kf_group_error_left; /* Projected Bits available for a group including 1 GF or ARF */ - int gf_group_bits; + int64_t gf_group_bits; /* Bits for the golden frame or ARF */ int gf_bits; int alt_extra_bits; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 673de2b334d09ca58342d398482f5773a9f46bee..4c2527d68ade140a0f2942959d004e77dc77fbe1 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -389,7 +389,7 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb) } -static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) +static void update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) { MACROBLOCKD *xd = &x->e_mbd; /* Split MV modes currently not supported when RD is nopt enabled, @@ -1241,7 +1241,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; - update_mvcount(cpi, x, &best_ref_mv); + update_mvcount(x, &best_ref_mv); } diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index a399a38771dd223c09786bdf6e41d8cdeee0d707..65fd0c5be58b65def0f4271be793c519a3c27a70 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -1360,10 +1360,10 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) * whichever is smaller. */ int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1; - av_key_frame_frequency = (int)cpi->output_frame_rate * 2; + av_key_frame_frequency = 1 + (int)cpi->output_frame_rate * 2; if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq) - av_key_frame_frequency = cpi->oxcf.key_freq; + av_key_frame_frequency = key_freq; cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] = av_key_frame_frequency; @@ -1393,6 +1393,10 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) av_key_frame_frequency /= total_weight; } + // TODO (marpan): Given the checks above, |av_key_frame_frequency| + // should always be above 0. But for now we keep the sanity check in. + if (av_key_frame_frequency == 0) + av_key_frame_frequency = 1; return av_key_frame_frequency; } diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index f0ec7b6e210b980644b3594eefabf635bbd09c34..3d60bebdaf5987b26a7ee0d70027e52e4b9a2f30 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1733,7 +1733,7 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse } } -static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) +static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) { if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) { @@ -2608,7 +2608,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; - rd_update_mvcount(cpi, x, &best_ref_mv); + rd_update_mvcount(x, &best_ref_mv); } void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm index d880ce0c46073949d9a3390ddb4023ef0f1e1112..d06bca5927f2888b5f06b1db9579ca95803b5c60 100644 --- a/vp8/encoder/x86/dct_sse2.asm +++ b/vp8/encoder/x86/dct_sse2.asm @@ -29,7 +29,7 @@ movsxd rax, dword ptr arg(2) lea rcx, [rsi + rax*2] %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define input rcx %define output rdx %define pitch r8 @@ -53,7 +53,7 @@ RESTORE_GOT pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm index fe9464b3d995cd2b31805e3dbb27a293432e566f..b41768ce083cedd420ef7d548f3928b1b076d8bc 100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -27,7 +27,7 @@ sym(vp8_regular_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -46,7 +46,7 @@ sym(vp8_regular_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -226,7 +226,7 @@ ZIGZAG_LOOP 15 pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif @@ -236,147 +236,6 @@ ZIGZAG_LOOP 15 pop rbp ret -; void vp8_fast_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp8_fast_quantize_b_sse2) PRIVATE -sym(vp8_fast_quantize_b_sse2): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %ifidn __OUTPUT_FORMAT__,x64 - push rdi - push rsi - %else - ; these registers are used for passing arguments - %endif -%endif - - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp8_block_coeff] - mov rcx, [rdi + vp8_block_round] - mov rdx, [rdi + vp8_block_quant_fast] - - ; z = coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; dup z so we can save sz - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - ; x = abs(z) = (z ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; x += round - paddw xmm1, [rcx] - paddw xmm5, [rcx + 16] - - mov rax, [rsi + vp8_blockd_qcoeff] - mov rcx, [rsi + vp8_blockd_dequant] - mov rdi, [rsi + vp8_blockd_dqcoeff] - - ; y = x * quant >> 16 - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - ; x = (y ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; qcoeff = x - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - ; x * dequant - movdqa xmm2, xmm1 - movdqa xmm3, xmm5 - pmullw xmm2, [rcx] - pmullw xmm3, [rcx + 16] - - ; dqcoeff = x * dequant - movdqa [rdi], xmm2 - movdqa [rdi + 16], xmm3 - - pxor xmm4, xmm4 ;clear all bits - pcmpeqw xmm1, xmm4 - pcmpeqw xmm5, xmm4 - - pcmpeqw xmm4, xmm4 ;set all bits - pxor xmm1, xmm4 - pxor xmm5, xmm4 - - pand xmm1, [GLOBAL(inv_zig_zag)] - pand xmm5, [GLOBAL(inv_zig_zag + 16)] - - pmaxsw xmm1, xmm5 - - mov rcx, [rsi + vp8_blockd_eob] - - ; now down to 8 - pshufd xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; only 4 left - pshuflw xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; okay, just 2! - pshuflw xmm5, xmm1, 00000001b - - pmaxsw xmm1, xmm5 - - movd eax, xmm1 - and eax, 0xff - - mov BYTE PTR [rcx], al ; store eob - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - SECTION_RODATA align 16 inv_zig_zag: diff --git a/vp8/encoder/x86/quantize_sse2.c b/vp8/encoder/x86/quantize_sse2.c new file mode 100644 index 0000000000000000000000000000000000000000..55d57ad62ad6f4590b227a945c1af218499e1ead --- /dev/null +++ b/vp8/encoder/x86/quantize_sse2.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vp8/common/blockd.h" +#include "vp8/common/entropy.h" +#include "vp8/encoder/block.h" + +#include <mmintrin.h> //MMX +#include <xmmintrin.h> //SSE +#include <emmintrin.h> //SSE2 + +void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) +{ + __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); + __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); + __m128i round0 = _mm_load_si128((__m128i *)(b->round)); + __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); + __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast)); + __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8)); + __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); + __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); + __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag)); + __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8)); + + __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones; + + /* sign of z: z >> 15 */ + sz0 = _mm_srai_epi16(z0, 15); + sz1 = _mm_srai_epi16(z1, 15); + + /* x = abs(z): (z ^ sz) - sz */ + x0 = _mm_xor_si128(z0, sz0); + x1 = _mm_xor_si128(z1, sz1); + x0 = _mm_sub_epi16(x0, sz0); + x1 = _mm_sub_epi16(x1, sz1); + + /* x += round */ + x0 = _mm_add_epi16(x0, round0); + x1 = _mm_add_epi16(x1, round1); + + /* y = (x * quant) >> 16 */ + y0 = _mm_mulhi_epi16(x0, quant_fast0); + y1 = _mm_mulhi_epi16(x1, quant_fast1); + + /* x = abs(y) = (y ^ sz) - sz */ + y0 = _mm_xor_si128(y0, sz0); + y1 = _mm_xor_si128(y1, sz1); + x0 = _mm_sub_epi16(y0, sz0); + x1 = _mm_sub_epi16(y1, sz1); + + /* qcoeff = x */ + _mm_store_si128((__m128i *)(d->qcoeff), x0); + _mm_store_si128((__m128i *)(d->qcoeff + 8), x1); + + /* x * dequant */ + xdq0 = _mm_mullo_epi16(x0, dequant0); + xdq1 = _mm_mullo_epi16(x1, dequant1); + + /* dqcoeff = x * dequant */ + _mm_store_si128((__m128i *)(d->dqcoeff), xdq0); + _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1); + + /* build a mask for the zig zag */ + zeros = _mm_setzero_si128(); + + x0 = _mm_cmpeq_epi16(x0, zeros); + x1 = _mm_cmpeq_epi16(x1, zeros); + + ones = _mm_cmpeq_epi16(zeros, zeros); + + x0 = _mm_xor_si128(x0, ones); + x1 = _mm_xor_si128(x1, ones); + + x0 = _mm_and_si128(x0, inv_zig_zag0); + x1 = _mm_and_si128(x1, inv_zig_zag1); + + x0 = _mm_max_epi16(x0, x1); + + /* now down to 8 */ + x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110 + + x0 = _mm_max_epi16(x0, x1); + + /* only 4 left */ + x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110 + + x0 = _mm_max_epi16(x0, x1); + + /* okay, just 2! */ + x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001 + + x0 = _mm_max_epi16(x0, x1); + + *d->eob = 0xFF & _mm_cvtsi128_si32(x0); +} diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm index f21146457e8295032ade7ce565082c62a834f867..dbd171bfcdeef537040d5ba6e8002fbc30e0b9ca 100644 --- a/vp8/encoder/x86/quantize_sse4.asm +++ b/vp8/encoder/x86/quantize_sse4.asm @@ -31,7 +31,7 @@ sym(vp8_regular_quantize_b_sse4): %define stack_size 32 sub rsp, stack_size %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 8, u push rdi push rsi @@ -43,7 +43,7 @@ sym(vp8_regular_quantize_b_sse4): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -240,7 +240,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 pop rbp %else %undef xmm5 - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi RESTORE_XMM diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index 35368894dafde871c3afb8dc2016fafed545e0ce..7b1dc119f080b19b78b3af9ff293404b03e88d07 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -27,7 +27,7 @@ sym(vp8_fast_quantize_b_ssse3): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -38,7 +38,7 @@ sym(vp8_fast_quantize_b_ssse3): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -122,7 +122,7 @@ sym(vp8_fast_quantize_b_ssse3): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm index ce9d9836bdaf5023b69494df9354cf3327dfac3e..bd92b398a0b1c34205d52131ee53ea67ccdd2790 100644 --- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm +++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm @@ -50,7 +50,7 @@ sym(vp8_temporal_filter_apply_sse2): ; 0x8000 >> (16 - strength) mov rdx, 16 sub rdx, arg(4) ; 16 - strength - movd xmm4, rdx ; can't use rdx w/ shift + movq xmm4, rdx ; can't use rdx w/ shift movdqa xmm5, [GLOBAL(_const_top_bit)] psrlw xmm5, xmm4 movdqa [rsp + rounding_bit], xmm5 diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index 01482fcc7471970d3620387c9ac0ae407abc2189..1db61f1614c7a4943ca09e1d4bc73f24f496c468 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -64,7 +64,6 @@ struct vpx_codec_alg_priv vp8_stream_info_t si; int defer_alloc; int decoder_init; - struct VP8D_COMP *pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; #if CONFIG_POSTPROC_VISUALIZER @@ -76,7 +75,9 @@ struct vpx_codec_alg_priv #endif vpx_image_t img; int img_setup; + struct frame_buffers yv12_frame_buffers; void *user_priv; + FRAGMENT_DATA fragments; }; static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags) @@ -215,11 +216,36 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, { vp8_init_ctx(ctx, &mmap); + /* initialize number of fragments to zero */ + ctx->priv->alg_priv->fragments.count = 0; + /* is input fragments enabled? */ + ctx->priv->alg_priv->fragments.enabled = + (ctx->priv->alg_priv->base.init_flags & + VPX_CODEC_USE_INPUT_FRAGMENTS); + ctx->priv->alg_priv->defer_alloc = 1; /*post processing level initialized to do nothing */ } } + ctx->priv->alg_priv->yv12_frame_buffers.use_frame_threads = + (ctx->priv->alg_priv->base.init_flags & + VPX_CODEC_USE_FRAME_THREADING); + + /* for now, disable frame threading */ + ctx->priv->alg_priv->yv12_frame_buffers.use_frame_threads = 0; + + if(ctx->priv->alg_priv->yv12_frame_buffers.use_frame_threads && + (( ctx->priv->alg_priv->base.init_flags & + VPX_CODEC_USE_ERROR_CONCEALMENT) + || ( ctx->priv->alg_priv->base.init_flags & + VPX_CODEC_USE_INPUT_FRAGMENTS) ) ) + { + /* row-based threading, error concealment, and input fragments will + * not be supported when using frame-based threading */ + res = VPX_CODEC_INVALID_PARAM; + } + return res; } @@ -227,7 +253,7 @@ static vpx_codec_err_t vp8_destroy(vpx_codec_alg_priv_t *ctx) { int i; - vp8dx_remove_decompressor(ctx->pbi); + vp8_remove_decoder_instances(&ctx->yv12_frame_buffers); for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--) { @@ -343,6 +369,47 @@ static void yuvconfig2image(vpx_image_t *img, img->self_allocd = 0; } +static int +update_fragments(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, + unsigned int data_sz, + vpx_codec_err_t *res) +{ + *res = VPX_CODEC_OK; + + if (ctx->fragments.count == 0) + { + /* New frame, reset fragment pointers and sizes */ + vpx_memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs)); + vpx_memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes)); + } + if (ctx->fragments.enabled && !(data == NULL && data_sz == 0)) + { + /* Store a pointer to this fragment and return. We haven't + * received the complete frame yet, so we will wait with decoding. + */ + ctx->fragments.ptrs[ctx->fragments.count] = data; + ctx->fragments.sizes[ctx->fragments.count] = data_sz; + ctx->fragments.count++; + if (ctx->fragments.count > (1 << EIGHT_PARTITION) + 1) + { + ctx->fragments.count = 0; + *res = VPX_CODEC_INVALID_PARAM; + return -1; + } + return 0; + } + + if (!ctx->fragments.enabled) + { + ctx->fragments.ptrs[0] = data; + ctx->fragments.sizes[0] = data_sz; + ctx->fragments.count = 1; + } + + return 1; +} + static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, @@ -353,6 +420,11 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, unsigned int resolution_change = 0; unsigned int w, h; + + /* Update the input fragment data */ + if(update_fragments(ctx, data, data_sz, &res) <= 0) + return res; + /* Determine the stream parameters. Note that we rely on peek_si to * validate that we have a buffer that does not wrap around the top * of the heap. @@ -360,7 +432,8 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, w = ctx->si.w; h = ctx->si.h; - res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); + res = ctx->base.iface->dec.peek_si(ctx->fragments.ptrs[0], + ctx->fragments.sizes[0], &ctx->si); if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) { @@ -412,7 +485,6 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, if (!res) { VP8D_CONFIG oxcf; - struct VP8D_COMP* optr; oxcf.Width = ctx->si.w; oxcf.Height = ctx->si.h; @@ -421,10 +493,6 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, oxcf.max_threads = ctx->cfg.threads; oxcf.error_concealment = (ctx->base.init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT); - oxcf.input_fragments = - (ctx->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS); - - optr = vp8dx_create_decompressor(&oxcf); /* If postprocessing was enabled by the application and a * configuration has not been provided, default it. @@ -438,20 +506,17 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, ctx->postproc_cfg.noise_level = 0; } - if (!optr) - res = VPX_CODEC_ERROR; - else - ctx->pbi = optr; + res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf); } ctx->decoder_init = 1; } - if (!res && ctx->pbi) + if (!res) { + VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; if(resolution_change) { - VP8D_COMP *pbi = ctx->pbi; VP8_COMMON *const pc = & pbi->common; MACROBLOCKD *const xd = & pbi->mb; #if CONFIG_MULTITHREAD @@ -541,15 +606,20 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, pbi->common.error.setjmp = 0; /* required to get past the first get_free_fb() call */ - ctx->pbi->common.fb_idx_ref_cnt[0] = 0; + pbi->common.fb_idx_ref_cnt[0] = 0; } + /* update the pbi fragment data */ + pbi->fragments = ctx->fragments; + ctx->user_priv = user_priv; - if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) + if (vp8dx_receive_compressed_data(pbi, data_sz, data, deadline)) { - VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; res = update_error_state(ctx, &pbi->common.error); } + + /* get ready for the next series of fragments */ + ctx->fragments.count = 0; } return res; @@ -590,7 +660,8 @@ static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, #endif } - if (0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) + if (0 == vp8dx_get_raw_frame(ctx->yv12_frame_buffers.pbi[0], &sd, + &time_stamp, &time_end_stamp, &flags)) { yuvconfig2image(&ctx->img, &sd, ctx->user_priv); @@ -715,14 +786,15 @@ static vpx_codec_err_t vp8_set_reference(vpx_codec_alg_priv_t *ctx, vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - if (data) + if (data && !ctx->yv12_frame_buffers.use_frame_threads) { vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); - return vp8dx_set_reference(ctx->pbi, frame->frame_type, &sd); + return vp8dx_set_reference(ctx->yv12_frame_buffers.pbi[0], + frame->frame_type, &sd); } else return VPX_CODEC_INVALID_PARAM; @@ -736,14 +808,15 @@ static vpx_codec_err_t vp8_get_reference(vpx_codec_alg_priv_t *ctx, vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - if (data) + if (data && !ctx->yv12_frame_buffers.use_frame_threads) { vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); - return vp8dx_get_reference(ctx->pbi, frame->frame_type, &sd); + return vp8dx_get_reference(ctx->yv12_frame_buffers.pbi[0], + frame->frame_type, &sd); } else return VPX_CODEC_INVALID_PARAM; @@ -799,10 +872,11 @@ static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, va_list args) { int *update_info = va_arg(args, int *); - VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; - if (update_info) + if (update_info && !ctx->yv12_frame_buffers.use_frame_threads) { + VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; + *update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME + pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME + pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME; @@ -819,11 +893,11 @@ static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, va_list args) { int *ref_info = va_arg(args, int *); - VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; - VP8_COMMON *oci = &pbi->common; - if (ref_info) + if (ref_info && !ctx->yv12_frame_buffers.use_frame_threads) { + VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; + VP8_COMMON *oci = &pbi->common; *ref_info = (vp8dx_references_buffer( oci, ALTREF_FRAME )?VP8_ALTR_FRAME:0) | (vp8dx_references_buffer( oci, GOLDEN_FRAME )?VP8_GOLD_FRAME:0) | @@ -844,7 +918,7 @@ static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, if (corrupted) { - VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; + VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; *corrupted = pbi->common.frame_to_show->corrupted; return VPX_CODEC_OK; diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 0659407adc3b7f8a71d6b5147dce0b091d742598..f6feafb6ea3a690304554ea5a6754cfe9705a8f8 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -89,8 +89,15 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm +# TODO(johann) make this generic +ifeq ($(HAVE_SSE2),yes) +vp8/encoder/x86/quantize_sse2.c.o: CFLAGS += -msse2 +vp8/encoder/x86/quantize_sse2.c.d: CFLAGS += -msse2 +endif + ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c ifeq ($(HAVE_SSE2),yes) @@ -112,7 +119,6 @@ ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm endif - VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes)) $(eval $(call asm_offsets_template,\ diff --git a/vp8_multi_resolution_encoder.c b/vp8_multi_resolution_encoder.c index eae36a4da4f4e7384bbea805b8c83b7f543e5989..4c29056e563e77eeab54941997044accd8740be4 100644 --- a/vp8_multi_resolution_encoder.c +++ b/vp8_multi_resolution_encoder.c @@ -216,7 +216,7 @@ int main(int argc, char **argv) * If target bitrate for highest-resolution level is set to 0, * (i.e. target_bitrate[0]=0), we skip encoding at that level. */ - unsigned int target_bitrate[NUM_ENCODERS]={1400, 500, 100}; + unsigned int target_bitrate[NUM_ENCODERS]={1000, 500, 100}; /* Enter the frame rate of the input video */ int framerate = 30; /* Set down-sampling factor for each resolution level. @@ -351,27 +351,26 @@ int main(int argc, char **argv) if(vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed)) die_codec(&codec[i], "Failed to set cpu_used"); } - /* Set static thresh for highest-resolution encoder. Set it to 1000 for - * better performance. */ - { - unsigned int static_thresh = 1000; - if(vpx_codec_control(&codec[0], VP8E_SET_STATIC_THRESHOLD, static_thresh)) - die_codec(&codec[0], "Failed to set static threshold"); - } - /* Set static thresh = 0 for other encoders for better quality */ - for ( i=1; i<NUM_ENCODERS; i++) + + /* Set static threshold. */ + for ( i=0; i<NUM_ENCODERS; i++) { - unsigned int static_thresh = 0; + unsigned int static_thresh = 1; if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, static_thresh)) die_codec(&codec[i], "Failed to set static threshold"); } + /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */ - for ( i=0; i< NUM_ENCODERS; i++) + /* Enable denoising for the highest-resolution encoder. */ + if(vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1)) + die_codec(&codec[0], "Failed to set noise_sensitivity"); + for ( i=1; i< NUM_ENCODERS; i++) { if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0)) die_codec(&codec[i], "Failed to set noise_sensitivity"); } + frame_avail = 1; got_data = 0; diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 77b79395e3fb4979cd8fc8b1e0c672abd2ab4c91..2f709bf58da5bfbab451ee0931947d04a2b34e90 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -57,9 +57,9 @@ unsigned int vp9_sad16x3_c(const uint8_t *src_ptr, unsigned int vp9_variance2x16_c(const uint8_t *src_ptr, - const int source_stride, + int source_stride, const uint8_t *ref_ptr, - const int recon_stride, + int recon_stride, unsigned int *sse) { int sum; variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, sse, &sum); @@ -67,9 +67,9 @@ unsigned int vp9_variance2x16_c(const uint8_t *src_ptr, } unsigned int vp9_variance16x2_c(const uint8_t *src_ptr, - const int source_stride, + int source_stride, const uint8_t *ref_ptr, - const int recon_stride, + int recon_stride, unsigned int *sse) { int sum; variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, sse, &sum); @@ -77,11 +77,11 @@ unsigned int vp9_variance16x2_c(const uint8_t *src_ptr, } unsigned int vp9_sub_pixel_variance16x2_c(const uint8_t *src_ptr, - const int src_pixels_per_line, - const int xoffset, - const int yoffset, + int src_pixels_per_line, + int xoffset, + int yoffset, const uint8_t *dst_ptr, - const int dst_pixels_per_line, + int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[16 * 3]; // Temp data buffer used in filtering uint8_t temp2[2 * 16]; @@ -98,11 +98,11 @@ unsigned int vp9_sub_pixel_variance16x2_c(const uint8_t *src_ptr, } unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr, - const int src_pixels_per_line, - const int xoffset, - const int yoffset, + int src_pixels_per_line, + int xoffset, + int yoffset, const uint8_t *dst_ptr, - const int dst_pixels_per_line, + int dst_pixels_per_line, unsigned int *sse) { uint16_t FData3[2 * 17]; // Temp data buffer used in filtering uint8_t temp2[2 * 16]; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 9cbf44c464d2ec6f3906368ba65265764d878bdc..e6dcff4d1a16c9a2cbefbd8af7b2d8e34b3c6eef 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -14,7 +14,7 @@ struct loop_filter_info; /* Encoder forward decls */ struct block; struct macroblock; -struct variance_vtable; +struct vp9_variance_vtable; #define DEC_MVCOSTS int *mvjcost, int *mvcost[2] union int_mv; @@ -470,25 +470,25 @@ specialize vp9_sad8x8x8 sse4 prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" specialize vp9_sad4x4x8 sse4 -prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad64x64x4d sse2 -prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x4d sse2 -prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad16x16x4d sse2 -prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad16x8x4d sse2 -prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad8x16x4d sse2 -prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad8x8x4d sse2 -prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad4x4x4d sse prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" specialize vp9_sub_pixel_mse16x16 sse2 mmx @@ -577,16 +577,16 @@ specialize vp9_short_walsh8x4_x8 # # Motion search # -prototype int vp9_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +prototype int vp9_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" specialize vp9_full_search_sad sse3 sse4_1 vp9_full_search_sad_sse3=vp9_full_search_sadx3 vp9_full_search_sad_sse4_1=vp9_full_search_sadx8 -prototype int vp9_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +prototype int vp9_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" specialize vp9_refining_search_sad sse3 vp9_refining_search_sad_sse3=vp9_refining_search_sadx4 -prototype int vp9_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" +prototype int vp9_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv" specialize vp9_diamond_search_sad sse3 vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4 diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm index fa24f4cd0da4bc56f13d74ba3f17470b2cc4812c..32f00e2893dade99ee9085866fee80cbc39b893c 100644 --- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm +++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm @@ -113,7 +113,7 @@ mov rcx, 0x0400040 movdqa xmm4, [rdx] ;load filters - movd xmm5, rcx + movq xmm5, rcx packsswb xmm4, xmm4 pshuflw xmm0, xmm4, 0b ;k0_k1 pshuflw xmm1, xmm4, 01010101b ;k2_k3 @@ -198,7 +198,7 @@ mov rcx, 0x0400040 movdqa xmm4, [rdx] ;load filters - movd xmm5, rcx + movq xmm5, rcx packsswb xmm4, xmm4 pshuflw xmm0, xmm4, 0b ;k0_k1 pshuflw xmm1, xmm4, 01010101b ;k2_k3 @@ -542,7 +542,7 @@ sym(vp9_filter_block1d16_v8_avg_ssse3): mov rcx, 0x0400040 movdqa xmm4, [rdx] ;load filters - movd xmm5, rcx + movq xmm5, rcx packsswb xmm4, xmm4 pshuflw xmm0, xmm4, 0b ;k0_k1 pshuflw xmm1, xmm4, 01010101b ;k2_k3 @@ -681,7 +681,7 @@ sym(vp9_filter_block1d16_v8_avg_ssse3): mov rcx, 0x0400040 movdqa xmm4, [rdx] ;load filters - movd xmm5, rcx + movq xmm5, rcx packsswb xmm4, xmm4 pshuflw xmm0, xmm4, 0b ;k0_k1 pshuflw xmm1, xmm4, 01010101b ;k2_k3 diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index d4212a52eea488176762121a634fdf51e1de5a63..86806d2d0143b6906688530ece299628f9090f32 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -565,7 +565,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, }; static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, unsigned int mb_col, + int mb_row, int mb_col, BOOL_DECODER* const bc) { int n, eobtotal; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; @@ -688,7 +688,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, unsigned int mb_col, + int mb_row, int mb_col, BOOL_DECODER* const bc) { int n, eobtotal; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 4d0a299e8437325d20aec04dae42e077d3091ce4..337276d5919b81b29e3d28ae633a9c65d71d5ffd 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -751,7 +751,8 @@ void vp9_first_pass(VP9_COMP *cpi) { ((cm->current_video_frame > 0) && (cpi->twopass.this_frame_stats->pcnt_inter > 0.20) && ((cpi->twopass.this_frame_stats->intra_error / - cpi->twopass.this_frame_stats->coded_error) > 2.0))) { + DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats->coded_error)) > + 2.0))) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); cpi->twopass.sr_update_lag = 1; } else @@ -1650,8 +1651,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0)) { cpi->twopass.gf_group_bits = - (int)((double)cpi->twopass.kf_group_bits * - (gf_group_err / cpi->twopass.kf_group_error_left)); + (int64_t)(cpi->twopass.kf_group_bits * + (gf_group_err / cpi->twopass.kf_group_error_left)); } else cpi->twopass.gf_group_bits = 0; diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index a51c786563d98a55a3749f8f7c4a1a8ef1706d60..1bca9d267c2a300c5e32b62dd4af99b284b94775 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -174,7 +174,7 @@ vp9_lookahead_peek(struct lookahead_ctx *ctx, int index) { struct lookahead_entry *buf = NULL; - assert(index < ctx->max_sz); + assert(index < (int)ctx->max_sz); if (index < (int)ctx->sz) { index += ctx->read_idx; if (index >= (int)ctx->max_sz) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 496be950c63949876a8472fa0398c7edfaac2824..59e33a46480c6fecfe7fa27e7fbfcba89d5a794a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -756,7 +756,7 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, error += this_diff * this_diff; } - return error > INT_MAX ? INT_MAX : error; + return error > INT_MAX ? INT_MAX : (int)error; } #define DEBUG_ERROR 0 @@ -3067,9 +3067,9 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->best_ref_mv.as_int = ref_mv->as_int; ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; - ctx->single_pred_diff = comp_pred_diff[SINGLE_PREDICTION_ONLY]; - ctx->comp_pred_diff = comp_pred_diff[COMP_PREDICTION_ONLY]; - ctx->hybrid_pred_diff = comp_pred_diff[HYBRID_PREDICTION]; + ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; + ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; + ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); } diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index dc21f02f638b804ead98c58fb9b2889ee9fc2791..af5526dce333bf133eff44b957ea3c63951ab766 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -383,7 +383,7 @@ void vp9_sad4x4x8_c(const uint8_t *src_ptr, void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr[], + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad64x64(src_ptr, src_stride, @@ -398,7 +398,7 @@ void vp9_sad64x64x4d_c(const uint8_t *src_ptr, void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr[], + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad32x32(src_ptr, src_stride, @@ -413,7 +413,7 @@ void vp9_sad32x32x4d_c(const uint8_t *src_ptr, void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr[], + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad16x16(src_ptr, src_stride, @@ -428,7 +428,7 @@ void vp9_sad16x16x4d_c(const uint8_t *src_ptr, void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr[], + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad16x8(src_ptr, src_stride, @@ -443,7 +443,7 @@ void vp9_sad16x8x4d_c(const uint8_t *src_ptr, void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr[], + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad8x8(src_ptr, src_stride, @@ -458,7 +458,7 @@ void vp9_sad8x8x4d_c(const uint8_t *src_ptr, void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr[], + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad8x16(src_ptr, src_stride, @@ -473,7 +473,7 @@ void vp9_sad8x16x4d_c(const uint8_t *src_ptr, void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr[], + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad4x4(src_ptr, src_stride, diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 8b32524a24d5eab6d5dc2409c12e281c65617878..13dabbda41d711a8dce115cf2892182450083a14 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -33,7 +33,7 @@ typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr, typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr, int source_stride, - const uint8_t ** ref_ptr, + const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); typedef unsigned int (*vp9_variance_fn_t)(const uint8_t *src_ptr, @@ -63,7 +63,7 @@ typedef unsigned int (*vp9_get16x16prederror_fn_t)(const uint8_t *src_ptr, const uint8_t *ref_ptr, int ref_stride); -typedef struct variance_vtable { +typedef struct vp9_variance_vtable { vp9_sad_fn_t sdf; vp9_variance_fn_t vf; vp9_subpixvariance_fn_t svf; diff --git a/vp9/encoder/x86/vp9_dct_sse2.asm b/vp9/encoder/x86/vp9_dct_sse2.asm index 57b81a566282679b8e573cb3f2bfc0eed4544c3a..bbd6086dafd3b3dd0689ee969f4054f4e430e5ca 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.asm +++ b/vp9/encoder/x86/vp9_dct_sse2.asm @@ -29,7 +29,7 @@ movsxd rax, dword ptr arg(2) lea rcx, [rsi + rax*2] %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define input rcx %define output rdx %define pitch r8 @@ -53,7 +53,7 @@ RESTORE_GOT pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm index 060acc2acfaef6a2afc59e30c602c751086edbc5..2a686f5a87b0fd425f3a11f60af9eebf2742223b 100644 --- a/vp9/encoder/x86/vp9_quantize_sse2.asm +++ b/vp9/encoder/x86/vp9_quantize_sse2.asm @@ -27,7 +27,7 @@ sym(vp9_regular_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -46,7 +46,7 @@ sym(vp9_regular_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -223,7 +223,7 @@ ZIGZAG_LOOP 15 pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif @@ -247,7 +247,7 @@ sym(vp9_fast_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %else @@ -261,7 +261,7 @@ sym(vp9_fast_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -361,7 +361,7 @@ sym(vp9_fast_quantize_b_sse2): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm index 1d43ce95863b60063777a552f1cf0eb950302f4c..d7779bd0df11519c30739caa96ec186df8c27a5d 100644 --- a/vp9/encoder/x86/vp9_quantize_sse4.asm +++ b/vp9/encoder/x86/vp9_quantize_sse4.asm @@ -31,7 +31,7 @@ sym(vp9_regular_quantize_b_sse4): %define stack_size 32 sub rsp, stack_size %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 8, u push rdi push rsi @@ -43,7 +43,7 @@ sym(vp9_regular_quantize_b_sse4): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -238,7 +238,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 pop rbp %else %undef xmm5 - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi RESTORE_XMM diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm index 41edbc13ef25a3cb56b8406575ec32f4540b8f78..e082af1f5c46f65d418a3020874380c7ea0faba9 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm @@ -27,7 +27,7 @@ sym(vp9_fast_quantize_b_ssse3): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -38,7 +38,7 @@ sym(vp9_fast_quantize_b_ssse3): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -122,7 +122,7 @@ sym(vp9_fast_quantize_b_ssse3): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp9/encoder/x86/vp9_sad_sse3.asm b/vp9/encoder/x86/vp9_sad_sse3.asm index 5d84172701c377d94eb62fdbb5cbc1d775417886..2b90a5d54789fa46787e0e2e7efe1dc6c978fdb8 100644 --- a/vp9/encoder/x86/vp9_sad_sse3.asm +++ b/vp9/encoder/x86/vp9_sad_sse3.asm @@ -33,7 +33,7 @@ movsxd rax, dword ptr arg(1) ; src_stride movsxd rdx, dword ptr arg(3) ; ref_stride %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx @@ -76,7 +76,7 @@ pop rsi pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm index a559d5d5ae1b71c5ac7a60747fb751553ef757b7..d2d13b3839425b5d297e1af3823963965b049ea0 100644 --- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm +++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm @@ -50,7 +50,7 @@ sym(vp9_temporal_filter_apply_sse2): ; 0x8000 >> (16 - strength) mov rdx, 16 sub rdx, arg(4) ; 16 - strength - movd xmm4, rdx ; can't use rdx w/ shift + movq xmm4, rdx ; can't use rdx w/ shift movdqa xmm5, [GLOBAL(_const_top_bit)] psrlw xmm5, xmm4 movdqa [rsp + rounding_bit], xmm5 diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h index d2dec6f5dd0517375332cfd77ddc60b432337089..ddbc0a6d6b0584a5dbb16878e0928cbb9ac6adaa 100644 --- a/vpx/vpx_decoder.h +++ b/vpx/vpx_decoder.h @@ -65,12 +65,17 @@ extern "C" { * * The available flags are specified by VPX_CODEC_USE_* defines. */ +#define VPX_CODEC_CAP_FRAME_THREADING 0x200000 /**< Can support frame-based + multi-threading */ + #define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */ #define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /**< Conceal errors in decoded frames */ #define VPX_CODEC_USE_INPUT_FRAGMENTS 0x40000 /**< The input frame should be passed to the decoder one fragment at a time */ +#define VPX_CODEC_USE_FRAME_THREADING 0x80000 /**< Enable frame-based + multi-threading */ /*!\brief Stream properties * diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm index efad1a503db477b7b02c16f4d7ad4a6a9ba934bd..db8da287375206fad61ad631ba0abcb7c4f3709e 100644 --- a/vpx_ports/emms.asm +++ b/vpx_ports/emms.asm @@ -18,7 +18,7 @@ sym(vpx_reset_mmx_state): ret -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 global sym(vpx_winx64_fldcw) PRIVATE sym(vpx_winx64_fldcw): sub rsp, 8 diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index 0c9fe377405693d7fcb406d823519d3c2d623a9e..eccbfa35c2080965025fd4d7c831a89dfe78de19 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -78,6 +78,17 @@ %endif +; LIBVPX_YASM_WIN64 +; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 +; or win64 is defined on the Yasm command line. +%ifidn __OUTPUT_FORMAT__,win64 +%define LIBVPX_YASM_WIN64 1 +%elifidn __OUTPUT_FORMAT__,x64 +%define LIBVPX_YASM_WIN64 1 +%else +%define LIBVPX_YASM_WIN64 0 +%endif + ; sym() ; Return the proper symbol name for the target ABI. ; @@ -90,7 +101,7 @@ %define sym(x) x %elifidn __OUTPUT_FORMAT__,elfx32 %define sym(x) x -%elifidn __OUTPUT_FORMAT__,x64 +%elif LIBVPX_YASM_WIN64 %define sym(x) x %else %define sym(x) _ %+ x @@ -114,7 +125,7 @@ %define PRIVATE :hidden %elifidn __OUTPUT_FORMAT__,elfx32 %define PRIVATE :hidden - %elifidn __OUTPUT_FORMAT__,x64 + %elif LIBVPX_YASM_WIN64 %define PRIVATE %else %define PRIVATE :private_extern @@ -131,7 +142,7 @@ %else ; 64 bit ABI passes arguments in registers. This is a workaround to get up ; and running quickly. Relies on SHADOW_ARGS_TO_STACK - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define arg(x) [rbp+16+8*x] %else %define arg(x) [rbp-8-8*x] @@ -230,6 +241,12 @@ %elifidn __OUTPUT_FORMAT__,elfx32 %define WRT_PLT wrt ..plt %define HIDDEN_DATA(x) x:data hidden + %elifidn __OUTPUT_FORMAT__,macho64 + %ifdef CHROMIUM + %define HIDDEN_DATA(x) x:private_extern + %else + %define HIDDEN_DATA(x) x + %endif %else %define HIDDEN_DATA(x) x %endif @@ -251,7 +268,7 @@ %endm %define UNSHADOW_ARGS %else -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %macro SHADOW_ARGS_TO_STACK 1 ; argc %if %1 > 0 mov arg(0),rcx @@ -307,7 +324,7 @@ ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return ; value. Typically we follow this up with 'push rbp' - re-aligning the stack - ; but in some cases this is not done and unaligned movs must be used. -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %macro SAVE_XMM 1-2 a %if %1 < 6 %error Only xmm registers 6-15 must be preserved diff --git a/vpxenc.c b/vpxenc.c index 2f3ae0f8cd59232a01f066d7e9bcb4849895d7b4..0df0a982ff929805f71bfbc2f605547b739cdde5 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#if defined(_WIN32) || !CONFIG_OS_SUPPORT +#if defined(_WIN32) || defined(__OS2__) || !CONFIG_OS_SUPPORT #define USE_POSIX_MMAP 0 #else #define USE_POSIX_MMAP 1