diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl new file mode 100755 index 0000000000000000000000000000000000000000..18ee80d36e7272d9ec34a88e9c2e5c5a71de0474 --- /dev/null +++ b/build/make/rtcd.pl @@ -0,0 +1,414 @@ +#!/usr/bin/env perl + +no strict 'refs'; +use warnings; +use Getopt::Long; +Getopt::Long::Configure("auto_help"); + +my %ALL_FUNCS = (); +my @ALL_ARCHS; +my @ALL_FORWARD_DECLS; +my @REQUIRES; + +my %opts = (); +my %disabled = (); +my %required = (); + +my @argv; +foreach (@ARGV) { + $disabled{$1} = 1, next if /--disable-(.*)/; + $required{$1} = 1, next if /--require-(.*)/; + push @argv, $_; +} + +# NB: use GetOptions() instead of GetOptionsFromArray() for compatibility. +@ARGV = @argv; +GetOptions( + \%opts, + 'arch=s', + 'sym=s', + 'config=s', +); + +foreach my $opt (qw/arch config/) { + if (!defined($opts{$opt})) { + warn "--$opt is required!\n"; + Getopt::Long::HelpMessage('-exit' => 1); + } +} + +foreach my $defs_file (@ARGV) { + if (!-f $defs_file) { + warn "$defs_file: $!\n"; + Getopt::Long::HelpMessage('-exit' => 1); + } +} + +open CONFIG_FILE, $opts{config} or + die "Error opening config file '$opts{config}': $!\n"; + +my %config = (); +while (<CONFIG_FILE>) { + next if !/^CONFIG_/; + chomp; + my @pair = split /=/; + $config{$pair[0]} = $pair[1]; +} +close CONFIG_FILE; + +# +# Routines for the RTCD DSL to call +# +sub vpx_config($) { + return (defined $config{$_[0]}) ? $config{$_[0]} : ""; +} + +sub specialize { + my $fn=$_[0]; + shift; + foreach my $opt (@_) { + eval "\$${fn}_${opt}=${fn}_${opt}"; + } +} + +sub add_proto { + my $fn = splice(@_, -2, 1); + $ALL_FUNCS{$fn} = \@_; + specialize $fn, "c"; +} + +sub require { + foreach my $fn (keys %ALL_FUNCS) { + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + + # if we already have a default, then we can disable it, as we know + # we can do better. + my $best = eval "\$${fn}_default"; + if ($best) { + my $best_ofn = eval "\$${best}"; + if ($best_ofn && "$best_ofn" ne "$ofn") { + eval "\$${best}_link = 'false'"; + } + } + eval "\$${fn}_default=${fn}_${opt}"; + eval "\$${fn}_${opt}_link='true'"; + } + } +} + +sub forward_decls { + push @ALL_FORWARD_DECLS, @_; +} + +# +# Include the user's directives +# +foreach my $f (@ARGV) { + open FILE, "<", $f or die "cannot open $f: $!\n"; + my $contents = join('', <FILE>); + close FILE; + eval $contents or warn "eval failed: $@\n"; +} + +# +# Process the directives according to the command line +# +sub process_forward_decls() { + foreach (@ALL_FORWARD_DECLS) { + $_->(); + } +} + +sub determine_indirection { + vpx_config("CONFIG_RUNTIME_CPU_DETECT") eq "yes" or &require(@ALL_ARCHS); + foreach my $fn (keys %ALL_FUNCS) { + my $n = ""; + my @val = @{$ALL_FUNCS{$fn}}; + my $args = pop @val; + my $rtyp = "@val"; + my $dfn = eval "\$${fn}_default"; + $dfn = eval "\$${dfn}"; + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + my $link = eval "\$${fn}_${opt}_link"; + next if $link && $link eq "false"; + $n .= "x"; + } + if ($n eq "x") { + eval "\$${fn}_indirect = 'false'"; + } else { + eval "\$${fn}_indirect = 'true'"; + } + } +} + +sub declare_function_pointers { + foreach my $fn (sort keys %ALL_FUNCS) { + my @val = @{$ALL_FUNCS{$fn}}; + my $args = pop @val; + my $rtyp = "@val"; + my $dfn = eval "\$${fn}_default"; + $dfn = eval "\$${dfn}"; + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + print "$rtyp ${ofn}($args);\n"; + } + if (eval "\$${fn}_indirect" eq "false") { + print "#define ${fn} ${dfn}\n"; + } else { + print "RTCD_EXTERN $rtyp (*${fn})($args);\n"; + } + print "\n"; + } +} + +sub set_function_pointers { + foreach my $fn (sort keys %ALL_FUNCS) { + my @val = @{$ALL_FUNCS{$fn}}; + my $args = pop @val; + my $rtyp = "@val"; + my $dfn = eval "\$${fn}_default"; + $dfn = eval "\$${dfn}"; + if (eval "\$${fn}_indirect" eq "true") { + print " $fn = $dfn;\n"; + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + next if "$ofn" eq "$dfn"; + my $link = eval "\$${fn}_${opt}_link"; + next if $link && $link eq "false"; + my $cond = eval "\$have_${opt}"; + print " if (${cond}) $fn = $ofn;\n" + } + } + } +} + +sub filter { + my @filtered; + foreach (@_) { push @filtered, $_ unless $disabled{$_}; } + return @filtered; +} + +# +# Helper functions for generating the arch specific RTCD files +# +sub common_top() { + my $include_guard = uc($opts{sym})."_H_"; + print <<EOF; +#ifndef ${include_guard} +#define ${include_guard} + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +EOF + +process_forward_decls(); +print "\n"; +declare_function_pointers("c", @ALL_ARCHS); + +print <<EOF; +void $opts{sym}(void); + +EOF +} + +sub common_bottom() { + print <<EOF; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif +EOF +} + +sub x86() { + determine_indirection("c", @ALL_ARCHS); + + # Assign the helper variable for each enabled extension + foreach my $opt (@ALL_ARCHS) { + my $opt_uc = uc $opt; + eval "\$have_${opt}=\"flags & HAS_${opt_uc}\""; + } + + common_top; + print <<EOF; +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + +EOF + + set_function_pointers("c", @ALL_ARCHS); + + print <<EOF; +} +#endif +EOF + common_bottom; +} + +sub arm() { + determine_indirection("c", @ALL_ARCHS); + + # Assign the helper variable for each enabled extension + foreach my $opt (@ALL_ARCHS) { + my $opt_uc = uc $opt; + eval "\$have_${opt}=\"flags & HAS_${opt_uc}\""; + } + + common_top; + print <<EOF; +#include "vpx_config.h" + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + (void)flags; + +EOF + + set_function_pointers("c", @ALL_ARCHS); + + print <<EOF; +} +#endif +EOF + common_bottom; +} + +sub mips() { + determine_indirection("c", @ALL_ARCHS); + common_top; + + print <<EOF; +#include "vpx_config.h" + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +EOF + + set_function_pointers("c", @ALL_ARCHS); + + print <<EOF; +#if HAVE_DSPR2 +#if CONFIG_VP8 +void dsputil_static_init(); +dsputil_static_init(); +#endif +#if CONFIG_VP9 +void vp9_dsputil_static_init(); +vp9_dsputil_static_init(); +#endif +#endif +} +#endif +EOF + common_bottom; +} + +sub unoptimized() { + determine_indirection "c"; + common_top; + print <<EOF; +#include "vpx_config.h" + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +EOF + + set_function_pointers "c"; + + print <<EOF; +} +#endif +EOF + common_bottom; +} + +# +# Main Driver +# + +&require("c"); +if ($opts{arch} eq 'x86') { + @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/); + x86; +} elsif ($opts{arch} eq 'x86_64') { + @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/); + @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/); + &require(@REQUIRES); + x86; +} elsif ($opts{arch} eq 'mips32') { + @ALL_ARCHS = filter(qw/mips32/); + open CONFIG_FILE, $opts{config} or + die "Error opening config file '$opts{config}': $!\n"; + while (<CONFIG_FILE>) { + if (/HAVE_DSPR2=yes/) { + @ALL_ARCHS = filter(qw/mips32 dspr2/); + last; + } + } + close CONFIG_FILE; + mips; +} elsif ($opts{arch} eq 'armv5te') { + @ALL_ARCHS = filter(qw/edsp/); + arm; +} elsif ($opts{arch} eq 'armv6') { + @ALL_ARCHS = filter(qw/edsp media/); + arm; +} elsif ($opts{arch} eq 'armv7') { + @ALL_ARCHS = filter(qw/edsp media neon/); + arm; +} else { + unoptimized; +} + +__END__ + +=head1 NAME + +rtcd - + +=head1 SYNOPSIS + +Usage: rtcd.pl [options] FILE + +See 'perldoc rtcd.pl' for more details. + +=head1 DESCRIPTION + +Reads the Run Time CPU Detections definitions from FILE and generates a +C header file on stdout. + +=head1 OPTIONS + +Options: + --arch=ARCH Architecture to generate defs for (required) + --disable-EXT Disable support for EXT extensions + --require-EXT Require support for EXT extensions + --sym=SYMBOL Unique symbol to use for RTCD initialization function + --config=FILE File with CONFIG_FOO=yes lines to parse diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh deleted file mode 100755 index 93c9adcac4c9e2dc2cfbe067a1c831e0bf1ba8c1..0000000000000000000000000000000000000000 --- a/build/make/rtcd.sh +++ /dev/null @@ -1,373 +0,0 @@ -#!/bin/sh -self=$0 - -usage() { - cat <<EOF >&2 -Usage: $self [options] FILE - -Reads the Run Time CPU Detections definitions from FILE and generates a -C header file on stdout. - -Options: - --arch=ARCH Architecture to generate defs for (required) - --disable-EXT Disable support for EXT extensions - --require-EXT Require support for EXT extensions - --sym=SYMBOL Unique symbol to use for RTCD initialization function - --config=FILE File with CONFIG_FOO=yes lines to parse -EOF - exit 1 -} - -die() { - echo "$@" >&2 - exit 1 -} - -die_argument_required() { - die "Option $opt requires argument" -} - -for opt; do - optval="${opt#*=}" - case "$opt" in - --arch) die_argument_required;; - --arch=*) arch=${optval};; - --disable-*) eval "disable_${opt#--disable-}=true";; - --require-*) REQUIRES="${REQUIRES}${opt#--require-} ";; - --sym) die_argument_required;; - --sym=*) symbol=${optval};; - --config=*) config_file=${optval};; - -h|--help) - usage - ;; - -*) - die "Unrecognized option: ${opt%%=*}" - ;; - *) - defs_file="$defs_file $opt" - ;; - esac - shift -done -for f in $defs_file; do [ -f "$f" ] || usage; done -[ -n "$arch" ] || usage - -# Import the configuration -[ -f "$config_file" ] && eval $(grep CONFIG_ "$config_file") - -# -# Routines for the RTCD DSL to call -# -prototype() { - rtyp="" - case "$1" in - unsigned) rtyp="$1 "; shift;; - esac - rtyp="${rtyp}$1" - fn="$2" - args="$3" - - eval "${2}_rtyp='$rtyp'" - eval "${2}_args='$3'" - ALL_FUNCS="$ALL_FUNCS $fn" - specialize $fn c -} - -specialize() { - fn="$1" - shift - for opt in "$@"; do - eval "${fn}_${opt}=${fn}_${opt}" - done -} - -require() { - for fn in $ALL_FUNCS; do - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - - # if we already have a default, then we can disable it, as we know - # we can do better. - best=$(eval "echo \$${fn}_default") - best_ofn=$(eval "echo \$${best}") - [ -n "$best" ] && [ "$best_ofn" != "$ofn" ] && eval "${best}_link=false" - eval "${fn}_default=${fn}_${opt}" - eval "${fn}_${opt}_link=true" - done - done -} - -forward_decls() { - ALL_FORWARD_DECLS="$ALL_FORWARD_DECLS $1" -} - -# -# Include the user's directives -# -for f in $defs_file; do - . $f -done - -# -# Process the directives according to the command line -# -process_forward_decls() { - for fn in $ALL_FORWARD_DECLS; do - eval $fn - done -} - -determine_indirection() { - [ "$CONFIG_RUNTIME_CPU_DETECT" = "yes" ] || require $ALL_ARCHS - for fn in $ALL_FUNCS; do - n="" - rtyp="$(eval "echo \$${fn}_rtyp")" - args="$(eval "echo \"\$${fn}_args\"")" - dfn="$(eval "echo \$${fn}_default")" - dfn=$(eval "echo \$${dfn}") - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - link=$(eval "echo \$${fn}_${opt}_link") - [ "$link" = "false" ] && continue - n="${n}x" - done - if [ "$n" = "x" ]; then - eval "${fn}_indirect=false" - else - eval "${fn}_indirect=true" - fi - done -} - -declare_function_pointers() { - for fn in $ALL_FUNCS; do - rtyp="$(eval "echo \$${fn}_rtyp")" - args="$(eval "echo \"\$${fn}_args\"")" - dfn="$(eval "echo \$${fn}_default")" - dfn=$(eval "echo \$${dfn}") - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - echo "$rtyp ${ofn}($args);" - done - if [ "$(eval "echo \$${fn}_indirect")" = "false" ]; then - echo "#define ${fn} ${dfn}" - else - echo "RTCD_EXTERN $rtyp (*${fn})($args);" - fi - echo - done -} - -set_function_pointers() { - for fn in $ALL_FUNCS; do - n="" - rtyp="$(eval "echo \$${fn}_rtyp")" - args="$(eval "echo \"\$${fn}_args\"")" - dfn="$(eval "echo \$${fn}_default")" - dfn=$(eval "echo \$${dfn}") - if $(eval "echo \$${fn}_indirect"); then - echo " $fn = $dfn;" - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - [ "$ofn" = "$dfn" ] && continue; - link=$(eval "echo \$${fn}_${opt}_link") - [ "$link" = "false" ] && continue - cond="$(eval "echo \$have_${opt}")" - echo " if (${cond}) $fn = $ofn;" - done - fi - echo - done -} - -filter() { - filtered="" - for opt in "$@"; do - [ -z $(eval "echo \$disable_${opt}") ] && filtered="$filtered $opt" - done - echo $filtered -} - -# -# Helper functions for generating the arch specific RTCD files -# -common_top() { - outfile_basename=$(basename ${symbol:-rtcd}) - include_guard=$(echo $outfile_basename | tr '[a-z]' '[A-Z]' | \ - tr -c '[A-Z0-9]' _)H_ - cat <<EOF -#ifndef ${include_guard} -#define ${include_guard} - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -$(process_forward_decls) - -$(declare_function_pointers c $ALL_ARCHS) - -void ${symbol:-rtcd}(void); -EOF -} - -common_bottom() { - cat <<EOF - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif -EOF -} - -x86() { - determine_indirection c $ALL_ARCHS - - # Assign the helper variable for each enabled extension - for opt in $ALL_ARCHS; do - uc=$(echo $opt | tr '[a-z]' '[A-Z]') - eval "have_${opt}=\"flags & HAS_${uc}\"" - done - - cat <<EOF -$(common_top) - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - (void)flags; - -$(set_function_pointers c $ALL_ARCHS) -} -#endif -$(common_bottom) -EOF -} - -arm() { - determine_indirection c $ALL_ARCHS - - # Assign the helper variable for each enabled extension - for opt in $ALL_ARCHS; do - uc=$(echo $opt | tr '[a-z]' '[A-Z]') - eval "have_${opt}=\"flags & HAS_${uc}\"" - done - - cat <<EOF -$(common_top) -#include "vpx_config.h" - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - (void)flags; - -$(set_function_pointers c $ALL_ARCHS) -} -#endif -$(common_bottom) -EOF -} - - -mips() { - determine_indirection c $ALL_ARCHS - cat <<EOF -$(common_top) -#include "vpx_config.h" - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -$(set_function_pointers c $ALL_ARCHS) -#if HAVE_DSPR2 -#if CONFIG_VP8 -void dsputil_static_init(); -dsputil_static_init(); -#endif -#if CONFIG_VP9 -void vp9_dsputil_static_init(); -vp9_dsputil_static_init(); -#endif -#endif -} -#endif -$(common_bottom) -EOF -} - -unoptimized() { - determine_indirection c - cat <<EOF -$(common_top) -#include "vpx_config.h" - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -$(set_function_pointers c) -} -#endif -$(common_bottom) -EOF - -} -# -# Main Driver -# -ALL_FUNCS=$(export LC_ALL=C; echo $ALL_FUNCS | tr ' ' '\n' | sort |tr '\n' ' ') -require c -case $arch in - x86) - ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2) - x86 - ;; - x86_64) - ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2) - REQUIRES=${REQUIRES:-mmx sse sse2} - require $(filter $REQUIRES) - x86 - ;; - mips32) - ALL_ARCHS=$(filter mips32) - dspr2=$([ -f "$config_file" ] && eval echo $(grep HAVE_DSPR2 "$config_file")) - HAVE_DSPR2="${dspr2#*=}" - if [ "$HAVE_DSPR2" = "yes" ]; then - ALL_ARCHS=$(filter mips32 dspr2) - fi - mips - ;; - armv5te) - ALL_ARCHS=$(filter edsp) - arm - ;; - armv6) - ALL_ARCHS=$(filter edsp media) - arm - ;; - armv7) - ALL_ARCHS=$(filter edsp media neon) - arm - ;; - *) - unoptimized - ;; -esac diff --git a/configure b/configure index 9f5a4352e06ccc896172b0f8d8631a9cf856a248..f37615f72777964bd18aadf9b303e4eae07668bf 100755 --- a/configure +++ b/configure @@ -160,6 +160,10 @@ for t in ${all_targets}; do [ -f ${source_path}/${t}.mk ] && enable_feature ${t} done +if ! perl --version >/dev/null; then + die "Perl is required to build" +fi + # check installed doxygen version doxy_version=$(doxygen --version 2>/dev/null) doxy_major=${doxy_version%%.*} diff --git a/libs.mk b/libs.mk index b6f641371fc8ba610420b9d318d34492d3d05074..a5c4b760d9b378cb5a588716b275a4acd0fa2ff0 100644 --- a/libs.mk +++ b/libs.mk @@ -49,7 +49,7 @@ endif # !gcc define rtcd_h_template $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2) @echo " [CREATE] $$@" - $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \ + $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \ --sym=$(1) \ --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \ $$(RTCD_OPTIONS) $$^ > $$@ @@ -162,7 +162,7 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%) endif CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh -CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh +CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl new file mode 100644 index 0000000000000000000000000000000000000000..130d965358f2bef54bb8baf739d2508cf44026dd --- /dev/null +++ b/vp8/common/rtcd_defs.pl @@ -0,0 +1,541 @@ +sub vp8_common_forward_decls() { +print <<EOF +/* + * VP8 + */ + +struct blockd; +struct macroblockd; +struct loop_filter_info; + +/* Encoder forward decls */ +struct block; +struct macroblock; +struct variance_vtable; +union int_mv; +struct yv12_buffer_config; +EOF +} +forward_decls qw/vp8_common_forward_decls/; + +# +# system state +# +add_proto qw/void vp8_clear_system_state/, ""; +specialize qw/vp8_clear_system_state mmx/; +$vp8_clear_system_state_mmx=vpx_reset_mmx_state; + +# +# Dequant +# +add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc"; +specialize qw/vp8_dequantize_b mmx media neon/; +$vp8_dequantize_b_media=vp8_dequantize_b_v6; + +add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride"; +specialize qw/vp8_dequant_idct_add mmx media neon dspr2/; +$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6; +$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2; + +add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs"; +specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/; +$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6; +$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; + +add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"; +specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/; +$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6; +$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; + +# +# Loopfilter +# +add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/; +$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6; +$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2; + +add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/; +$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6; +$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2; + +add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/; +$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6; +$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2; + +add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/; +$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6; +$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2; + + +add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/; +$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c; +$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx; +$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2; +$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6; +$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon; + +add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/; +$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c; +$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx; +$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2; +$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6; +$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon; + +add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/; +$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c; +$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx; +$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2; +$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6; +$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon; + +add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/; +$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c; +$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx; +$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2; +$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6; +$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon; + +# +# IDCT +# +#idct16 +add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"; +specialize qw/vp8_short_idct4x4llm mmx media neon dspr2/; +$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual; +$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2; + +#iwalsh1 +add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output"; +specialize qw/vp8_short_inv_walsh4x4_1 dspr2/; +$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2; +# no asm yet + +#iwalsh16 +add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output"; +specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2/; +$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6; +$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2; + +#idct1_scalar_add +add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"; +specialize qw/vp8_dc_only_idct_add mmx media neon dspr2/; +$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6; +$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2; + +# +# RECON +# +add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/; +$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6; +$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2; + +add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_copy_mem8x8 mmx media neon dspr2/; +$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6; +$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2; + +add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_copy_mem8x4 mmx media neon dspr2/; +$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6; +$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2; + +add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"; +specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3/; +#TODO: fix assembly for neon + +add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"; +specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3/; + +add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"; +specialize qw/vp8_intra4x4_predict media/; +$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6; + +# +# Postproc +# +if (vpx_config("CONFIG_POSTPROC") eq "yes") { + add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; + specialize qw/vp8_mbpost_proc_down mmx sse2/; + $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm; + + add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; + specialize qw/vp8_mbpost_proc_across_ip sse2/; + $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm; + + add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"; + specialize qw/vp8_post_proc_down_and_across_mb_row sse2/; + + add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"; + specialize qw/vp8_plane_add_noise mmx sse2/; + $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt; + + add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + # no asm yet + + add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + # no asm yet + + add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + # no asm yet + + add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; + specialize qw/vp8_filter_by_weight16x16 sse2/; + + add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; + specialize qw/vp8_filter_by_weight8x8 sse2/; + + add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; + # no asm yet +} + +# +# Subpixel +# +add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2/; +$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6; +$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2; + +add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2/; +$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6; +$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2; + +add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2/; +$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6; +$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2; + +add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2/; +$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6; +$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2; + +add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/; +$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6; + +add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/; +$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6; + +add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict8x4 mmx media neon/; +$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6; + +add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict4x4 mmx media neon/; +$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6; + +# +# Whole-pixel Variance +# +add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance4x4 mmx sse2/; +$vp8_variance4x4_sse2=vp8_variance4x4_wmt; + +add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance8x8 mmx sse2 media neon/; +$vp8_variance8x8_sse2=vp8_variance8x8_wmt; +$vp8_variance8x8_media=vp8_variance8x8_armv6; + +add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance8x16 mmx sse2 neon/; +$vp8_variance8x16_sse2=vp8_variance8x16_wmt; + +add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance16x8 mmx sse2 neon/; +$vp8_variance16x8_sse2=vp8_variance16x8_wmt; + +add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance16x16 mmx sse2 media neon/; +$vp8_variance16x16_sse2=vp8_variance16x16_wmt; +$vp8_variance16x16_media=vp8_variance16x16_armv6; + +# +# Sub-pixel Variance +# +add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/; +$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt; + +add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/; +$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt; +$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6; + +add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/; +$vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt; + +add_proto qw/unsigned int vp8_sub_pixel_variance16x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/; +$vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt; + +add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/; +$vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt; +$vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6; + +add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/; +$vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt; +$vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6; + +add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/; +$vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt; +$vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6; + +add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/; +$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt; +$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6; + +# +# Single block SAD +# +add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad4x4 mmx sse2 neon/; +$vp8_sad4x4_sse2=vp8_sad4x4_wmt; + +add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad8x8 mmx sse2 neon/; +$vp8_sad8x8_sse2=vp8_sad8x8_wmt; + +add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad8x16 mmx sse2 neon/; +$vp8_sad8x16_sse2=vp8_sad8x16_wmt; + +add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad16x8 mmx sse2 neon/; +$vp8_sad16x8_sse2=vp8_sad16x8_wmt; + +add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/; +$vp8_sad16x16_sse2=vp8_sad16x16_wmt; +$vp8_sad16x16_media=vp8_sad16x16_armv6; + +# +# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally +# +add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad4x4x3 sse3/; + +add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x8x3 sse3/; + +add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x16x3 sse3/; + +add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x8x3 sse3 ssse3/; + +add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x16x3 sse3 ssse3/; + +# Note the only difference in the following prototypes is that they return into +# an array of short +add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad4x4x8 sse4_1/; +$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4; + +add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad8x8x8 sse4_1/; +$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4; + +add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad8x16x8 sse4_1/; +$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4; + +add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad16x8x8 sse4_1/; +$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4; + +add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad16x16x8 sse4_1/; +$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4; + +# +# Multi-block SAD, comparing a reference to N independent blocks +# +add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad4x4x4d sse3/; + +add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x8x4d sse3/; + +add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x16x4d sse3/; + +add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x8x4d sse3/; + +add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x16x4d sse3/; + +# +# Encoder functions below this point. +# +if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") { + +# +# Sum of squares (vector) +# +add_proto qw/unsigned int vp8_get_mb_ss/, "const short *"; +specialize qw/vp8_get_mb_ss mmx sse2/; + +# +# SSE (Sum Squared Error) +# +add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/; +$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt; + +add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_mse16x16 mmx sse2 media neon/; +$vp8_mse16x16_sse2=vp8_mse16x16_wmt; +$vp8_mse16x16_media=vp8_mse16x16_armv6; + +add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; +specialize qw/vp8_get4x4sse_cs mmx neon/; + +# +# Block copy +# +if ($opts{arch} =~ /x86/) { + add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"; + specialize qw/vp8_copy32xn sse2 sse3/; +} + +# +# Structured Similarity (SSIM) +# +if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { + $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2"; + + add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64"; + + add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64"; +} + +# +# Forward DCT +# +add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch"; +specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/; +$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6; + +add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch"; +specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/; +$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6; + +add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch"; +specialize qw/vp8_short_walsh4x4 sse2 media neon/; +$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6; + +# +# Quantizer +# +add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *"; +specialize qw/vp8_regular_quantize_b sse2/; +# TODO(johann) Update sse4 implementation and re-enable +#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4; + +add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; +specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/; +$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6; + +add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"; +# no asm yet + +add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"; +specialize qw/vp8_fast_quantize_b_pair neon/; + +add_proto qw/void vp8_quantize_mb/, "struct macroblock *"; +specialize qw/vp8_quantize_mb neon/; + +add_proto qw/void vp8_quantize_mby/, "struct macroblock *"; +specialize qw/vp8_quantize_mby neon/; + +add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *"; +specialize qw/vp8_quantize_mbuv neon/; + +# +# Block subtraction +# +add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff"; +specialize qw/vp8_block_error mmx sse2/; +$vp8_block_error_sse2=vp8_block_error_xmm; + +add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc"; +specialize qw/vp8_mbblock_error mmx sse2/; +$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm; + +add_proto qw/int vp8_mbuverror/, "struct macroblock *mb"; +specialize qw/vp8_mbuverror mmx sse2/; +$vp8_mbuverror_sse2=vp8_mbuverror_xmm; + +add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch"; +specialize qw/vp8_subtract_b mmx sse2 media neon/; +$vp8_subtract_b_media=vp8_subtract_b_armv6; + +add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"; +specialize qw/vp8_subtract_mby mmx sse2 media neon/; +$vp8_subtract_mby_media=vp8_subtract_mby_armv6; + +add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"; +specialize qw/vp8_subtract_mbuv mmx sse2 media neon/; +$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6; + +# +# Motion search +# +add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; +specialize qw/vp8_full_search_sad sse3 sse4_1/; +$vp8_full_search_sad_sse3=vp8_full_search_sadx3; +$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8; + +add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; +specialize qw/vp8_refining_search_sad sse3/; +$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4; + +add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; +$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4; + +# +# Alt-ref Noise Reduction (ARNR) +# +if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { + add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"; + specialize qw/vp8_temporal_filter_apply sse2/; +} + +# +# Pick Loopfilter +# +add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; +specialize qw/vp8_yv12_copy_partial_frame neon/; + +# +# Denoiser filter +# +if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") { + add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"; + specialize qw/vp8_denoiser_filter sse2 neon/; +} + +# End of encoder only functions +} +1; diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh deleted file mode 100644 index 28e67547702840cdfa7e3d004787ff12572a1dfb..0000000000000000000000000000000000000000 --- a/vp8/common/rtcd_defs.sh +++ /dev/null @@ -1,542 +0,0 @@ -vp8_common_forward_decls() { -cat <<EOF -/* - * VP8 - */ - -struct blockd; -struct macroblockd; -struct loop_filter_info; - -/* Encoder forward decls */ -struct block; -struct macroblock; -struct variance_vtable; -union int_mv; -struct yv12_buffer_config; -EOF -} -forward_decls vp8_common_forward_decls - -# -# system state -# -prototype void vp8_clear_system_state "" -specialize vp8_clear_system_state mmx -vp8_clear_system_state_mmx=vpx_reset_mmx_state - -# -# Dequant -# -prototype void vp8_dequantize_b "struct blockd*, short *dqc" -specialize vp8_dequantize_b mmx media neon -vp8_dequantize_b_media=vp8_dequantize_b_v6 - -prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride" -specialize vp8_dequant_idct_add mmx media neon dspr2 -vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6 -vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2 - -prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs" -specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2 -vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6 -vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2 - -prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs" -specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2 -vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6 -vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2 - -# -# Loopfilter -# -prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2 -vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6 -vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2 - -prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bv mmx sse2 media neon dspr2 -vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6 -vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2 - -prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2 -vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6 -vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2 - -prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bh mmx sse2 media neon dspr2 -vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6 -vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2 - - -prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_mbv mmx sse2 media neon -vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c -vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx -vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2 -vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6 -vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon - -prototype void vp8_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_mbh mmx sse2 media neon -vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c -vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx -vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2 -vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6 -vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon - -prototype void vp8_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_bv mmx sse2 media neon -vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c -vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx -vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2 -vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6 -vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon - -prototype void vp8_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_bh mmx sse2 media neon -vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c -vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx -vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2 -vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6 -vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon - -# -# IDCT -# -#idct16 -prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride" -specialize vp8_short_idct4x4llm mmx media neon dspr2 -vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual -vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2 - -#iwalsh1 -prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output" -specialize vp8_short_inv_walsh4x4_1 dspr2 -vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2 -# no asm yet - -#iwalsh16 -prototype void vp8_short_inv_walsh4x4 "short *input, short *output" -specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2 -vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6 -vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2 - -#idct1_scalar_add -prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride" -specialize vp8_dc_only_idct_add mmx media neon dspr2 -vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6 -vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2 - -# -# RECON -# -prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2 -vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6 -vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2 - -prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp8_copy_mem8x8 mmx media neon dspr2 -vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6 -vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2 - -prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp8_copy_mem8x4 mmx media neon dspr2 -vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6 -vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2 - -prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride" -specialize vp8_build_intra_predictors_mby_s sse2 ssse3 -#TODO: fix assembly for neon - -prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride" -specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3 - -prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left" -specialize vp8_intra4x4_predict media -vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6 - -# -# Postproc -# -if [ "$CONFIG_POSTPROC" = "yes" ]; then - prototype void vp8_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols,int flimit" - specialize vp8_mbpost_proc_down mmx sse2 - vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm - - prototype void vp8_mbpost_proc_across_ip "unsigned char *dst, int pitch, int rows, int cols,int flimit" - specialize vp8_mbpost_proc_across_ip sse2 - vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm - - prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size" - specialize vp8_post_proc_down_and_across_mb_row sse2 - - prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch" - specialize vp8_plane_add_noise mmx sse2 - vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt - - prototype void vp8_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" - # no asm yet - - prototype void vp8_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" - # no asm yet - - prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" - # no asm yet - - prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" - specialize vp8_filter_by_weight16x16 sse2 - - prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" - specialize vp8_filter_by_weight8x8 sse2 - - prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" - # no asm yet -fi - -# -# Subpixel -# -prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2 -vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6 -vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2 - -prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2 -vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6 -vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2 - -prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2 -vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6 -vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2 - -prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2 -vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6 -vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2 - -prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon -vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6 - -prototype void vp8_bilinear_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon -vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6 - -prototype void vp8_bilinear_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict8x4 mmx media neon -vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6 - -prototype void vp8_bilinear_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict4x4 mmx media neon -vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6 - -# -# Whole-pixel Variance -# -prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance4x4 mmx sse2 -vp8_variance4x4_sse2=vp8_variance4x4_wmt - -prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance8x8 mmx sse2 media neon -vp8_variance8x8_sse2=vp8_variance8x8_wmt -vp8_variance8x8_media=vp8_variance8x8_armv6 - -prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance8x16 mmx sse2 neon -vp8_variance8x16_sse2=vp8_variance8x16_wmt - -prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance16x8 mmx sse2 neon -vp8_variance16x8_sse2=vp8_variance16x8_wmt - -prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance16x16 mmx sse2 media neon -vp8_variance16x16_sse2=vp8_variance16x16_wmt -vp8_variance16x16_media=vp8_variance16x16_armv6 - -# -# Sub-pixel Variance -# -prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance4x4 mmx sse2 -vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt - -prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance8x8 mmx sse2 media neon -vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt -vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6 - -prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance8x16 mmx sse2 -vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt - -prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance16x8 mmx sse2 ssse3 -vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt - -prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon -vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt -vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6 - -prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance_halfpixvar16x16_h mmx sse2 media neon -vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt -vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6 - -prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance_halfpixvar16x16_v mmx sse2 media neon -vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt -vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6 - -prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance_halfpixvar16x16_hv mmx sse2 media neon -vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt -vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6 - -# -# Single block SAD -# -prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad4x4 mmx sse2 neon -vp8_sad4x4_sse2=vp8_sad4x4_wmt - -prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad8x8 mmx sse2 neon -vp8_sad8x8_sse2=vp8_sad8x8_wmt - -prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad8x16 mmx sse2 neon -vp8_sad8x16_sse2=vp8_sad8x16_wmt - -prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad16x8 mmx sse2 neon -vp8_sad16x8_sse2=vp8_sad16x8_wmt - -prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad16x16 mmx sse2 sse3 media neon -vp8_sad16x16_sse2=vp8_sad16x16_wmt -vp8_sad16x16_media=vp8_sad16x16_armv6 - -# -# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally -# -prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad4x4x3 sse3 - -prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x8x3 sse3 - -prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x16x3 sse3 - -prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x8x3 sse3 ssse3 - -prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x16x3 sse3 ssse3 - -# Note the only difference in the following prototypes is that they return into -# an array of short -prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad4x4x8 sse4_1 -vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4 - -prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad8x8x8 sse4_1 -vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4 - -prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad8x16x8 sse4_1 -vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4 - -prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad16x8x8 sse4_1 -vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4 - -prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad16x16x8 sse4_1 -vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4 - -# -# Multi-block SAD, comparing a reference to N independent blocks -# -prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad4x4x4d sse3 - -prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x8x4d sse3 - -prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x16x4d sse3 - -prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x8x4d sse3 - -prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x16x4d sse3 - -# -# Encoder functions below this point. -# -if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then - -# -# Sum of squares (vector) -# -prototype unsigned int vp8_get_mb_ss "const short *" -specialize vp8_get_mb_ss mmx sse2 - -# -# SSE (Sum Squared Error) -# -prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_mse16x16 mmx sse2 -vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt - -prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_mse16x16 mmx sse2 media neon -vp8_mse16x16_sse2=vp8_mse16x16_wmt -vp8_mse16x16_media=vp8_mse16x16_armv6 - -prototype unsigned int vp8_get4x4sse_cs "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride" -specialize vp8_get4x4sse_cs mmx neon - -# -# Block copy -# -case $arch in - x86*) - prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n" - specialize vp8_copy32xn sse2 sse3 - ;; -esac - -# -# Structured Similarity (SSIM) -# -if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then - [ $arch = "x86_64" ] && sse2_on_x86_64=sse2 - - prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp8_ssim_parms_8x8 $sse2_on_x86_64 - - prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp8_ssim_parms_16x16 $sse2_on_x86_64 -fi - -# -# Forward DCT -# -prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch" -specialize vp8_short_fdct4x4 mmx sse2 media neon -vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6 - -prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch" -specialize vp8_short_fdct8x4 mmx sse2 media neon -vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6 - -prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch" -specialize vp8_short_walsh4x4 sse2 media neon -vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6 - -# -# Quantizer -# -prototype void vp8_regular_quantize_b "struct block *, struct blockd *" -specialize vp8_regular_quantize_b sse2 #sse4_1 -# TODO(johann) Update sse4 implementation and re-enable -#vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4 - -prototype void vp8_fast_quantize_b "struct block *, struct blockd *" -specialize vp8_fast_quantize_b sse2 ssse3 media neon -vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6 - -prototype void vp8_regular_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2" -# no asm yet - -prototype void vp8_fast_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2" -specialize vp8_fast_quantize_b_pair neon - -prototype void vp8_quantize_mb "struct macroblock *" -specialize vp8_quantize_mb neon - -prototype void vp8_quantize_mby "struct macroblock *" -specialize vp8_quantize_mby neon - -prototype void vp8_quantize_mbuv "struct macroblock *" -specialize vp8_quantize_mbuv neon - -# -# Block subtraction -# -prototype int vp8_block_error "short *coeff, short *dqcoeff" -specialize vp8_block_error mmx sse2 -vp8_block_error_sse2=vp8_block_error_xmm - -prototype int vp8_mbblock_error "struct macroblock *mb, int dc" -specialize vp8_mbblock_error mmx sse2 -vp8_mbblock_error_sse2=vp8_mbblock_error_xmm - -prototype int vp8_mbuverror "struct macroblock *mb" -specialize vp8_mbuverror mmx sse2 -vp8_mbuverror_sse2=vp8_mbuverror_xmm - -prototype void vp8_subtract_b "struct block *be, struct blockd *bd, int pitch" -specialize vp8_subtract_b mmx sse2 media neon -vp8_subtract_b_media=vp8_subtract_b_armv6 - -prototype void vp8_subtract_mby "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride" -specialize vp8_subtract_mby mmx sse2 media neon -vp8_subtract_mby_media=vp8_subtract_mby_armv6 - -prototype void vp8_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride" -specialize vp8_subtract_mbuv mmx sse2 media neon -vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6 - -# -# Motion search -# -prototype int vp8_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv" -specialize vp8_full_search_sad sse3 sse4_1 -vp8_full_search_sad_sse3=vp8_full_search_sadx3 -vp8_full_search_sad_sse4_1=vp8_full_search_sadx8 - -prototype int vp8_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv" -specialize vp8_refining_search_sad sse3 -vp8_refining_search_sad_sse3=vp8_refining_search_sadx4 - -prototype int vp8_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv" -vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4 - -# -# Alt-ref Noise Reduction (ARNR) -# -if [ "$CONFIG_REALTIME_ONLY" != "yes" ]; then - prototype void vp8_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count" - specialize vp8_temporal_filter_apply sse2 -fi - -# -# Pick Loopfilter -# -prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_partial_frame neon - -# -# Denoiser filter -# -if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then - prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset" - specialize vp8_denoiser_filter sse2 neon -fi - -# End of encoder only functions -fi diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index ac91d7af5ba63409b9e38b8f1208b7c6d63c7a16..dfb54a52c857cae0ae833826a7472e04ec66da00 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -47,7 +47,7 @@ VP8_COMMON_SRCS-yes += common/quant_common.h VP8_COMMON_SRCS-yes += common/reconinter.h VP8_COMMON_SRCS-yes += common/reconintra4x4.h VP8_COMMON_SRCS-yes += common/rtcd.c -VP8_COMMON_SRCS-yes += common/rtcd_defs.sh +VP8_COMMON_SRCS-yes += common/rtcd_defs.pl VP8_COMMON_SRCS-yes += common/setupintrarecon.h VP8_COMMON_SRCS-yes += common/swapyv12buffer.h VP8_COMMON_SRCS-yes += common/systemdependent.h @@ -189,4 +189,4 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c -$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh)) +$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl)) diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl new file mode 100644 index 0000000000000000000000000000000000000000..e4cd9d4a0107825c1495c60847641d4eb28e340a --- /dev/null +++ b/vp9/common/vp9_rtcd_defs.pl @@ -0,0 +1,778 @@ +sub vp9_common_forward_decls() { +print <<EOF +/* + * VP9 + */ + +#include "vpx/vpx_integer.h" +#include "vp9/common/vp9_enums.h" + +struct macroblockd; + +/* Encoder forward decls */ +struct macroblock; +struct vp9_variance_vtable; + +#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] +struct mv; +union int_mv; +struct yv12_buffer_config; +EOF +} +forward_decls qw/vp9_common_forward_decls/; + +# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. +if (vpx_config("CONFIG_USE_X86INC") eq "yes") { + $mmx_x86inc = 'mmx'; + $sse_x86inc = 'sse'; + $sse2_x86inc = 'sse2'; + $ssse3_x86inc = 'ssse3'; + $avx_x86inc = 'avx'; + $avx2_x86inc = 'avx2'; +} else { + $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = + $avx_x86inc = $avx2_x86inc = ''; +} + +# this variable is for functions that are 64 bit only. +if ($opts{arch} eq "x86_64") { + $mmx_x86_64 = 'mmx'; + $sse2_x86_64 = 'sse2'; + $ssse3_x86_64 = 'ssse3'; + $avx_x86_64 = 'avx'; + $avx2_x86_64 = 'avx2'; +} else { + $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = + $avx_x86_64 = $avx2_x86_64 = ''; +} + +# +# RECON +# +add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_4x4/; + +add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_4x4/; + +add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc"; + +add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_4x4/; + +add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_4x4/; + +add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_4x4/; + +add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_8x8/; + +add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_8x8/; + +add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc"; + +add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_8x8/; + +add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_8x8/; + +add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_8x8/; + +add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_16x16/; + +add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_16x16/; + +add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_16x16/; + +add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_16x16/; + +add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_16x16/; + +add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_32x32/; + +add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_32x32/; + +add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_32x32/; + +add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64"; + +add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_32x32/; + +add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_32x32/; + +add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_32x32/; + +# +# Loopfilter +# +add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/; + +# +# post proc +# +if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { +add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit"; +specialize qw/vp9_mbpost_proc_down mmx sse2/; +$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm; + +add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit"; +specialize qw/vp9_mbpost_proc_across_ip sse2/; +$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm; + +add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; +specialize qw/vp9_post_proc_down_and_across mmx sse2/; +$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm; + +add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; +specialize qw/vp9_plane_add_noise mmx sse2/; +$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt; +} + +add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; +specialize qw/vp9_blend_mb_inner/; + +add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; +specialize qw/vp9_blend_mb_outer/; + +add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; +specialize qw/vp9_blend_b/; + +# +# Sub Pixel Filters +# +add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/; + +add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/; + +add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/; + +add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/; + +add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/; + +add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/; + +# +# dct +# +add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/; +$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon; + +add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; +specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/; + +add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; +specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/; + +add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type"; +specialize qw/vp9_iht16x16_256_add sse2 dspr2/; + +# dct and add + +add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_iwht4x4_1_add/; + +add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_iwht4x4_16_add/; + +# +# Encoder functions below this point. +# +if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { + + +# variance +add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance16x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance32x64/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc"; + +add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get_sse_sum_8x8 sse2/; +$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2; + +add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance8x4/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance4x8/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form +add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; +#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad64x64/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad32x64/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad64x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad32x16/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad16x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad32x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad8x4/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad4x8/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar64x64_h/; + +add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar64x64_v/; + +add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar64x64_hv/; + +add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar32x32_h/; + +add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar32x32_v/; + +add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar32x32_hv/; + +add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad64x64x3/; + +add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x32x3/; + +add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x16x3 sse3 ssse3/; + +add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x8x3 sse3 ssse3/; + +add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x16x3 sse3/; + +add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x8x3 sse3/; + +add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad4x4x3 sse3/; + +add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad64x64x8/; + +add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad32x32x8/; + +add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad16x16x8 sse4/; + +add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad16x8x8 sse4/; + +add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad8x16x8 sse4/; + +add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad8x8x8 sse4/; + +add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad8x4x8/; + +add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad4x8x8/; + +add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad4x4x8 sse4/; + +add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad64x64x4d sse2/; + +add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x64x4d sse2/; + +add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad64x32x4d sse2/; + +add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x16x4d sse2/; + +add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x32x4d sse2/; + +add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x32x4d sse2/; + +add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x16x4d sse2/; + +add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x8x4d sse2/; + +add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x16x4d sse2/; + +add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x8x4d sse2/; + +# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form +add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x4x4d sse2/; + +add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad4x8x4d sse/; + +add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad4x4x4d sse/; + +#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"; +#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/; + +add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse8x16/; + +add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse16x8/; + +add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse8x8/; + +add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_mse64x64/; + +add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_mse32x32/; + +add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; +specialize qw/vp9_get_mb_ss mmx sse2/; +# ENCODEMB INVOKE + +add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"; +specialize qw/vp9_block_error/, "$sse2_x86inc"; + +add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; +specialize qw/vp9_subtract_block/, "$sse2_x86inc"; + +add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +specialize qw/vp9_quantize_b/, "$ssse3_x86_64"; + +add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64"; + +# +# Structured Similarity (SSIM) +# +if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { + add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64"; + + add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64"; +} + +# fdct functions +add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type"; +specialize qw/vp9_fht4x4 sse2 avx2/; + +add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type"; +specialize qw/vp9_fht8x8 sse2 avx2/; + +add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type"; +specialize qw/vp9_fht16x16 sse2 avx2/; + +add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fwht4x4/; + +add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct4x4 sse2 avx2/; + +add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct8x8 sse2 avx2/; + +add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct16x16 sse2 avx2/; + +add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct32x32 sse2 avx2/; + +add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct32x32_rd sse2 avx2/; + +# +# Motion search +# +add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"; +specialize qw/vp9_full_search_sad sse3 sse4_1/; +$vp9_full_search_sad_sse3=vp9_full_search_sadx3; +$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; + +add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +specialize qw/vp9_refining_search_sad sse3/; +$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4; + +add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +specialize qw/vp9_diamond_search_sad sse3/; +$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4; + +add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +specialize qw/vp9_full_range_search/; + +add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; +specialize qw/vp9_temporal_filter_apply sse2/; + +} +# end encoder functions +1; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh deleted file mode 100644 index 5b449705af9fd823d09a8a20666f72ca8a8bd25e..0000000000000000000000000000000000000000 --- a/vp9/common/vp9_rtcd_defs.sh +++ /dev/null @@ -1,760 +0,0 @@ -vp9_common_forward_decls() { -cat <<EOF -/* - * VP9 - */ - -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_enums.h" - -struct macroblockd; - -/* Encoder forward decls */ -struct macroblock; -struct vp9_variance_vtable; - -#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] -struct mv; -union int_mv; -struct yv12_buffer_config; -EOF -} -forward_decls vp9_common_forward_decls - -# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. -[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse_x86inc=sse && - sse2_x86inc=sse2 && ssse3_x86inc=ssse3 && avx_x86inc=avx && avx2_x86inc=avx2 - -# this variable is for functions that are 64 bit only. -[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && - ssse3_x86_64=ssse3 && avx_x86_64=avx && avx2_x86_64=avx2 - -# -# RECON -# -prototype void vp9_d207_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_4x4 $ssse3_x86inc - -prototype void vp9_d45_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_4x4 $ssse3_x86inc - -prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_4x4 $ssse3_x86inc - -prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_4x4 $ssse3_x86inc neon dspr2 - -prototype void vp9_d117_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_4x4 - -prototype void vp9_d135_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_4x4 - -prototype void vp9_d153_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_4x4 $ssse3_x86inc - -prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_4x4 $sse_x86inc neon - -prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2 - -prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2 - -prototype void vp9_dc_top_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_4x4 - -prototype void vp9_dc_left_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_4x4 - -prototype void vp9_dc_128_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_4x4 - -prototype void vp9_d207_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_8x8 $ssse3_x86inc - -prototype void vp9_d45_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_8x8 $ssse3_x86inc - -prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_8x8 $ssse3_x86inc - -prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_8x8 $ssse3_x86inc neon dspr2 - -prototype void vp9_d117_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_8x8 - -prototype void vp9_d135_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_8x8 - -prototype void vp9_d153_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_8x8 $ssse3_x86inc - -prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_8x8 $sse_x86inc neon - -prototype void vp9_tm_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_8x8 $sse2_x86inc neon dspr2 - -prototype void vp9_dc_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_8x8 $sse_x86inc dspr2 - -prototype void vp9_dc_top_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_8x8 - -prototype void vp9_dc_left_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_8x8 - -prototype void vp9_dc_128_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_8x8 - -prototype void vp9_d207_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_16x16 $ssse3_x86inc - -prototype void vp9_d45_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_16x16 $ssse3_x86inc - -prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_16x16 $ssse3_x86inc - -prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_16x16 $ssse3_x86inc neon dspr2 - -prototype void vp9_d117_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_16x16 - -prototype void vp9_d135_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_16x16 - -prototype void vp9_d153_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_16x16 $ssse3_x86inc - -prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_16x16 $sse2_x86inc neon - -prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_16x16 $sse2_x86inc neon - -prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2 - -prototype void vp9_dc_top_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_16x16 - -prototype void vp9_dc_left_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_16x16 - -prototype void vp9_dc_128_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_16x16 - -prototype void vp9_d207_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_32x32 $ssse3_x86inc - -prototype void vp9_d45_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_32x32 $ssse3_x86inc - -prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_32x32 $ssse3_x86inc - -prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_32x32 $ssse3_x86inc neon - -prototype void vp9_d117_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_32x32 - -prototype void vp9_d135_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_32x32 - -prototype void vp9_d153_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_32x32 - -prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_32x32 $sse2_x86inc neon - -prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon - -prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_32x32 $sse2_x86inc - -prototype void vp9_dc_top_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_32x32 - -prototype void vp9_dc_left_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_32x32 - -prototype void vp9_dc_128_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_32x32 - -# -# Loopfilter -# -prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_lpf_vertical_16 sse2 neon dspr2 - -prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_lpf_vertical_16_dual sse2 neon dspr2 - -prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_vertical_8 sse2 neon dspr2 - -prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_vertical_8_dual sse2 neon dspr2 - -prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_vertical_4 mmx neon dspr2 - -prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_vertical_4_dual sse2 neon dspr2 - -prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2 - -prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_horizontal_8 sse2 neon dspr2 - -prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2 - -prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_horizontal_4 mmx neon dspr2 - -prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2 - -# -# post proc -# -if [ "$CONFIG_VP9_POSTPROC" = "yes" ]; then -prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit" -specialize vp9_mbpost_proc_down mmx sse2 -vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm - -prototype void vp9_mbpost_proc_across_ip "uint8_t *src, int pitch, int rows, int cols, int flimit" -specialize vp9_mbpost_proc_across_ip sse2 -vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm - -prototype void vp9_post_proc_down_and_across "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit" -specialize vp9_post_proc_down_and_across mmx sse2 -vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm - -prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch" -specialize vp9_plane_add_noise mmx sse2 -vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt -fi - -prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" -specialize vp9_blend_mb_inner - -prototype void vp9_blend_mb_outer "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" -specialize vp9_blend_mb_outer - -prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" -specialize vp9_blend_b - -# -# Sub Pixel Filters -# -prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_copy $sse2_x86inc neon dspr2 - -prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_avg $sse2_x86inc neon dspr2 - -prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8 sse2 ssse3 avx2 neon dspr2 - -prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2 - -prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2 - -prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg sse2 ssse3 neon dspr2 - -prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2 - -prototype void vp9_convolve8_avg_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 - -# -# dct -# -prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct4x4_1_add sse2 neon dspr2 - -prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct4x4_16_add sse2 neon dspr2 - -prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct8x8_1_add sse2 neon dspr2 - -prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct8x8_64_add sse2 neon dspr2 - -prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct8x8_10_add sse2 neon dspr2 - -prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct16x16_1_add sse2 neon dspr2 - -prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct16x16_256_add sse2 neon dspr2 - -prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct16x16_10_add sse2 neon dspr2 - -prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct32x32_1024_add sse2 neon dspr2 - -prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct32x32_34_add sse2 neon dspr2 -vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon - -prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct32x32_1_add sse2 neon dspr2 - -prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type" -specialize vp9_iht4x4_16_add sse2 neon dspr2 - -prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type" -specialize vp9_iht8x8_64_add sse2 neon dspr2 - -prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type" -specialize vp9_iht16x16_256_add sse2 dspr2 - -# dct and add - -prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_iwht4x4_1_add - -prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_iwht4x4_16_add - -# -# Encoder functions below this point. -# -if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then - - -# variance -prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance32x16 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance16x32 $sse2_x86inc - -prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance64x32 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance32x64 $sse2_x86inc - -prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance32x32 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance64x64 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance16x16 mmx $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance16x8 mmx $sse2_x86inc - -prototype unsigned int vp9_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance8x16 mmx $sse2_x86inc - -prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance8x8 mmx $sse2_x86inc - -prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum" -specialize vp9_get_sse_sum_8x8 sse2 -vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2 - -prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance8x4 $sse2_x86inc - -prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance4x8 $sse2_x86inc - -prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance4x4 mmx $sse2_x86inc - -prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance32x64 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance64x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance64x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance32x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance32x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance16x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance16x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance8x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance8x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance16x8 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance16x8 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance8x8 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance8x8 $sse2_x86inc $ssse3_x86inc - -# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form -prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance8x4 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance8x4 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance4x8 $sse_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance4x8 $sse_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance4x4 $sse_x86inc $ssse3_x86inc -#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt - -prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance4x4 $sse_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad64x64 $sse2_x86inc - -prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad32x64 $sse2_x86inc - -prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad64x32 $sse2_x86inc - -prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad32x16 $sse2_x86inc - -prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad16x32 $sse2_x86inc - -prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad32x32 $sse2_x86inc - -prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad16x16 mmx $sse2_x86inc - -prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad16x8 mmx $sse2_x86inc - -prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad8x16 mmx $sse2_x86inc - -prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad8x8 mmx $sse2_x86inc - -prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad8x4 $sse2_x86inc - -prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad4x8 $sse_x86inc - -prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad4x4 mmx $sse_x86inc - -prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad64x64_avg $sse2_x86inc - -prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad32x64_avg $sse2_x86inc - -prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad64x32_avg $sse2_x86inc - -prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad32x16_avg $sse2_x86inc - -prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad16x32_avg $sse2_x86inc - -prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad32x32_avg $sse2_x86inc - -prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad16x16_avg $sse2_x86inc - -prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad16x8_avg $sse2_x86inc - -prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad8x16_avg $sse2_x86inc - -prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad8x8_avg $sse2_x86inc - -prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad8x4_avg $sse2_x86inc - -prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad4x8_avg $sse_x86inc - -prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad4x4_avg $sse_x86inc - -prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar16x16_h $sse2_x86inc - -prototype unsigned int vp9_variance_halfpixvar16x16_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar16x16_v $sse2_x86inc - -prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar16x16_hv $sse2_x86inc - -prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar64x64_h - -prototype unsigned int vp9_variance_halfpixvar64x64_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar64x64_v - -prototype unsigned int vp9_variance_halfpixvar64x64_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar64x64_hv - -prototype unsigned int vp9_variance_halfpixvar32x32_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar32x32_h - -prototype unsigned int vp9_variance_halfpixvar32x32_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar32x32_v - -prototype unsigned int vp9_variance_halfpixvar32x32_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar32x32_hv - -prototype void vp9_sad64x64x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad64x64x3 - -prototype void vp9_sad32x32x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x32x3 - -prototype void vp9_sad16x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x16x3 sse3 ssse3 - -prototype void vp9_sad16x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x8x3 sse3 ssse3 - -prototype void vp9_sad8x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x16x3 sse3 - -prototype void vp9_sad8x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x8x3 sse3 - -prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad4x4x3 sse3 - -prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad64x64x8 - -prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad32x32x8 - -prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad16x16x8 sse4 - -prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad16x8x8 sse4 - -prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad8x16x8 sse4 - -prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad8x8x8 sse4 - -prototype void vp9_sad8x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad8x4x8 - -prototype void vp9_sad4x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad4x8x8 - -prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad4x4x8 sse4 - -prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad64x64x4d sse2 - -prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x64x4d sse2 - -prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad64x32x4d sse2 - -prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x16x4d sse2 - -prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x32x4d sse2 - -prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x32x4d sse2 - -prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x16x4d sse2 - -prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x8x4d sse2 - -prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x16x4d sse2 - -prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x8x4d sse2 - -# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form -prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x4x4d sse2 - -prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad4x8x4d sse - -prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad4x4x4d sse - -#prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" -#specialize vp9_sub_pixel_mse16x16 sse2 mmx - -prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse16x16 mmx $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_mse8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse8x16 - -prototype unsigned int vp9_mse16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse16x8 - -prototype unsigned int vp9_mse8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse8x8 - -prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_mse64x64 - -prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_mse32x32 - -prototype unsigned int vp9_get_mb_ss "const int16_t *" -specialize vp9_get_mb_ss mmx sse2 -# ENCODEMB INVOKE - -prototype int64_t vp9_block_error "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz" -specialize vp9_block_error $sse2_x86inc - -prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride" -specialize vp9_subtract_block $sse2_x86inc - -prototype void vp9_quantize_b "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" -specialize vp9_quantize_b $ssse3_x86_64 - -prototype void vp9_quantize_b_32x32 "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" -specialize vp9_quantize_b_32x32 $ssse3_x86_64 - -# -# Structured Similarity (SSIM) -# -if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then - prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_8x8 $sse2_x86_64 - - prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_16x16 $sse2_x86_64 -fi - -# fdct functions -prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_fht4x4 sse2 avx2 - -prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_fht8x8 sse2 avx2 - -prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_fht16x16 sse2 avx2 - -prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fwht4x4 - -prototype void vp9_fdct4x4 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct4x4 sse2 avx2 - -prototype void vp9_fdct8x8 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct8x8 sse2 avx2 - -prototype void vp9_fdct16x16 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct16x16 sse2 avx2 - -prototype void vp9_fdct32x32 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct32x32 sse2 avx2 - -prototype void vp9_fdct32x32_rd "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct32x32_rd sse2 avx2 - -# -# Motion search -# -prototype int vp9_full_search_sad "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv" -specialize vp9_full_search_sad sse3 sse4_1 -vp9_full_search_sad_sse3=vp9_full_search_sadx3 -vp9_full_search_sad_sse4_1=vp9_full_search_sadx8 - -prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv" -specialize vp9_refining_search_sad sse3 -vp9_refining_search_sad_sse3=vp9_refining_search_sadx4 - -prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv" -specialize vp9_diamond_search_sad sse3 -vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4 - -prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv" -specialize vp9_full_range_search - -prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count" -specialize vp9_temporal_filter_apply sse2 - -fi -# end encoder functions diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 9fb611504f1d8a8c899f18efc30e5e399af150df..b4ab99b7a41ef10a8daaae1bb967b489d7f29585 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -45,7 +45,7 @@ VP9_COMMON_SRCS-yes += common/vp9_quant_common.h VP9_COMMON_SRCS-yes += common/vp9_reconinter.h VP9_COMMON_SRCS-yes += common/vp9_reconintra.h VP9_COMMON_SRCS-yes += common/vp9_rtcd.c -VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh +VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.pl VP9_COMMON_SRCS-yes += common/vp9_scale.h VP9_COMMON_SRCS-yes += common/vp9_scale.c VP9_COMMON_SRCS-yes += common/vp9_seg_common.h @@ -145,4 +145,4 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM) -$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh)) +$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index 50d3e9d8ea7007b0ad86f0c2c3e42cd2b0c2e9a9..51a0ec93ce2d7e088da3ec1fe0b7cf44efc312d7 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -7,7 +7,7 @@ SCALE_SRCS-yes += generic/yv12extend.c SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c SCALE_SRCS-yes += vpx_scale_asm_offsets.c SCALE_SRCS-yes += vpx_scale_rtcd.c -SCALE_SRCS-yes += vpx_scale_rtcd.sh +SCALE_SRCS-yes += vpx_scale_rtcd.pl #neon SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM) @@ -24,4 +24,4 @@ SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes) $(eval $(call asm_offsets_template,\ vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c)) -$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh)) +$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl)) diff --git a/vpx_scale/vpx_scale_rtcd.pl b/vpx_scale/vpx_scale_rtcd.pl new file mode 100644 index 0000000000000000000000000000000000000000..28e168e71ccfa636bf6f479aa7ba00a986e3a79c --- /dev/null +++ b/vpx_scale/vpx_scale_rtcd.pl @@ -0,0 +1,35 @@ +sub vpx_scale_forward_decls() { +print <<EOF +struct yv12_buffer_config; +EOF +} +forward_decls qw/vpx_scale_forward_decls/; + +# Scaler functions +if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") { + add_proto qw/void vp8_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_5_4_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; + add_proto qw/void vp8_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_5_3_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; + add_proto qw/void vp8_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_2_1_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_2_1_scale_i/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; +} + +add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf"; +specialize qw/vp8_yv12_extend_frame_borders neon/; + +add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; +specialize qw/vp8_yv12_copy_frame neon/; + +add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; +specialize qw/vpx_yv12_copy_y neon/; + +if (vpx_config("CONFIG_VP9") eq "yes") { + add_proto qw/void vp9_extend_frame_borders/, "struct yv12_buffer_config *ybf"; + specialize qw/vp9_extend_frame_borders dspr2/; + + add_proto qw/void vp9_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf"; + specialize qw/vp9_extend_frame_inner_borders dspr2/; +} +1; diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh deleted file mode 100644 index c26208c59361887819fa80548f2905491ad56424..0000000000000000000000000000000000000000 --- a/vpx_scale/vpx_scale_rtcd.sh +++ /dev/null @@ -1,34 +0,0 @@ -vpx_scale_forward_decls() { -cat <<EOF -struct yv12_buffer_config; -EOF -} -forward_decls vpx_scale_forward_decls - -# Scaler functions -if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then - prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" -fi - -prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf" -specialize vp8_yv12_extend_frame_borders neon - -prototype void vp8_yv12_copy_frame "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_frame neon - -prototype void vpx_yv12_copy_y "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vpx_yv12_copy_y neon - -if [ "$CONFIG_VP9" = "yes" ]; then - prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf" - specialize vp9_extend_frame_borders dspr2 - - prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf" - specialize vp9_extend_frame_inner_borders dspr2 -fi