vp9_rtcd_defs.pl 89.5 KB
Newer Older
James Zern's avatar
James Zern committed
1 2 3 4 5 6 7
sub vp9_common_forward_decls() {
print <<EOF
/*
 * VP9
 */

#include "vpx/vpx_integer.h"
8
#include "vp9/common/vp9_common.h"
James Zern's avatar
James Zern committed
9 10 11 12 13 14 15
#include "vp9/common/vp9_enums.h"

struct macroblockd;

/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
16
struct search_site_config;
James Zern's avatar
James Zern committed
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
struct mv;
union int_mv;
struct yv12_buffer_config;
EOF
}
forward_decls qw/vp9_common_forward_decls/;

# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
  $mmx_x86inc = 'mmx';
  $sse_x86inc = 'sse';
  $sse2_x86inc = 'sse2';
  $ssse3_x86inc = 'ssse3';
  $avx_x86inc = 'avx';
  $avx2_x86inc = 'avx2';
} else {
  $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
  $avx_x86inc = $avx2_x86inc = '';
}

# this variable is for functions that are 64 bit only.
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
} else {
  $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
  $avx_x86_64 = $avx2_x86_64 = '';
}

Johann's avatar
Johann committed
49 50 51 52 53 54 55
# optimizations which depend on multiple features
if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
} else {
  $avx2_ssse3 = '';
}

James Zern's avatar
James Zern committed
56 57 58 59 60 61 62 63 64 65 66 67 68
#
# RECON
#
add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
69
specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
70 71 72 73 74 75 76 77 78 79 80

add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_4x4/;

add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_4x4/;

add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
81
specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
82 83

add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
84
specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
James Zern's avatar
James Zern committed
85 86 87 88 89

add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";

add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
90
specialize qw/vp9_dc_top_predictor_4x4/, "$sse_x86inc";
James Zern's avatar
James Zern committed
91 92

add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
93
specialize qw/vp9_dc_left_predictor_4x4/, "$sse_x86inc";
James Zern's avatar
James Zern committed
94 95

add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
96
specialize qw/vp9_dc_128_predictor_4x4/, "$sse_x86inc";
James Zern's avatar
James Zern committed
97 98 99 100 101 102 103 104 105 106 107

add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
108
specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
109 110 111 112 113 114 115 116 117 118 119

add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_8x8/;

add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_8x8/;

add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
120
specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
121 122

add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
123
specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
124 125

add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
126
specialize qw/vp9_dc_predictor_8x8 dspr2 neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
127 128

add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
129
specialize qw/vp9_dc_top_predictor_8x8 neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
130 131

add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
132
specialize qw/vp9_dc_left_predictor_8x8 neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
133 134

add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
135
specialize qw/vp9_dc_128_predictor_8x8 neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
136 137 138 139 140 141 142 143 144 145 146

add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
147
specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
148 149 150 151 152 153 154 155 156 157 158

add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_16x16/;

add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_16x16/;

add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
159
specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
160 161

add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
162
specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
163 164

add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
165
specialize qw/vp9_dc_predictor_16x16 dspr2 neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
166 167

add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
168
specialize qw/vp9_dc_top_predictor_16x16 neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
169 170

add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
171
specialize qw/vp9_dc_left_predictor_16x16 neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
172 173

add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
174
specialize qw/vp9_dc_128_predictor_16x16 neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
175 176 177 178 179 180 181 182 183 184 185

add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
186
specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
187 188 189 190 191 192 193 194 195 196 197

add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_32x32/;

add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_32x32/;

add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_32x32/;

add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
198
specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
199 200

add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
201
specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
James Zern's avatar
James Zern committed
202 203 204 205 206

add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";

add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
207
specialize qw/vp9_dc_top_predictor_32x32/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
208 209

add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
210
specialize qw/vp9_dc_left_predictor_32x32/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
211 212

add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
213
specialize qw/vp9_dc_128_predictor_32x32/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
214 215 216 217 218

#
# Loopfilter
#
add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
219 220
specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/;
$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
James Zern's avatar
James Zern committed
221 222

add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
223 224
specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
James Zern's avatar
James Zern committed
225 226

add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
Johann's avatar
Johann committed
227 228
specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
James Zern's avatar
James Zern committed
229 230

add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
Johann's avatar
Johann committed
231 232
specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
James Zern's avatar
James Zern committed
233 234

add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
235
specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
James Zern's avatar
James Zern committed
236 237

add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
238
specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
James Zern's avatar
James Zern committed
239 240

add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
241 242
specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
James Zern's avatar
James Zern committed
243 244

add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
Johann's avatar
Johann committed
245 246
specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
James Zern's avatar
James Zern committed
247 248

add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
Johann's avatar
Johann committed
249 250
specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
James Zern's avatar
James Zern committed
251 252

add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
253
specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
James Zern's avatar
James Zern committed
254 255

add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
256
specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
James Zern's avatar
James Zern committed
257 258 259 260 261 262

#
# post proc
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
Dmitry Kovalev's avatar
Dmitry Kovalev committed
263
specialize qw/vp9_mbpost_proc_down sse2/;
James Zern's avatar
James Zern committed
264 265 266 267 268 269 270
$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;

add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
specialize qw/vp9_mbpost_proc_across_ip sse2/;
$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;

add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
Dmitry Kovalev's avatar
Dmitry Kovalev committed
271
specialize qw/vp9_post_proc_down_and_across sse2/;
James Zern's avatar
James Zern committed
272 273 274
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;

add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
Dmitry Kovalev's avatar
Dmitry Kovalev committed
275
specialize qw/vp9_plane_add_noise sse2/;
James Zern's avatar
James Zern committed
276
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
JackyChen's avatar
JackyChen committed
277 278 279 280 281 282

add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp9_filter_by_weight16x16 sse2/;

add_proto qw/void vp9_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp9_filter_by_weight8x8 sse2/;
James Zern's avatar
James Zern committed
283 284 285 286 287 288
}

#
# Sub Pixel Filters
#
add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
289
specialize qw/vp9_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
290 291

add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
292
specialize qw/vp9_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
293 294

add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
295
specialize qw/vp9_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
James Zern's avatar
James Zern committed
296 297

add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
298
specialize qw/vp9_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
James Zern's avatar
James Zern committed
299 300

add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
301
specialize qw/vp9_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
James Zern's avatar
James Zern committed
302 303

add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
304
specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
James Zern's avatar
James Zern committed
305 306

add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
307
specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
James Zern's avatar
James Zern committed
308 309

add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
310
specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
James Zern's avatar
James Zern committed
311 312 313 314

#
# dct
#
315
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
316 317
  # Note as optimized versions of these functions are added we need to add a check to ensure
  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
  add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct4x4_1_add/;

  add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct4x4_16_add/;

  add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct8x8_1_add/;

  add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct8x8_64_add/;

  add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct8x8_12_add/;

  add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct16x16_1_add/;
James Zern's avatar
James Zern committed
335

336 337
  add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct16x16_256_add/;
James Zern's avatar
James Zern committed
338

339 340
  add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct16x16_10_add/;
James Zern's avatar
James Zern committed
341

342 343
  add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct32x32_1024_add/;
James Zern's avatar
James Zern committed
344

345 346
  add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct32x32_34_add/;
James Zern's avatar
James Zern committed
347

348 349
  add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct32x32_1_add/;
James Zern's avatar
James Zern committed
350

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
  add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
  specialize qw/vp9_iht4x4_16_add/;

  add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
  specialize qw/vp9_iht8x8_64_add/;

  add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
  specialize qw/vp9_iht16x16_256_add/;

  # dct and add

  add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_iwht4x4_1_add/;

  add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_iwht4x4_16_add/;
367

368
} else {
369 370 371 372
  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
    add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct4x4_1_add/;
James Zern's avatar
James Zern committed
373

374 375
    add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct4x4_16_add/;
James Zern's avatar
James Zern committed
376

377 378
    add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct8x8_1_add/;
James Zern's avatar
James Zern committed
379

380 381
    add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct8x8_64_add/;
James Zern's avatar
James Zern committed
382

383 384
    add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct8x8_12_add/;
James Zern's avatar
James Zern committed
385

386 387
    add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct16x16_1_add/;
James Zern's avatar
James Zern committed
388

389 390
    add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct16x16_256_add/;
391

392 393
    add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct16x16_10_add/;
394

395 396
    add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct32x32_1024_add/;
397

398 399
    add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct32x32_34_add/;
400

401 402
    add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct32x32_1_add/;
403

404 405
    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
    specialize qw/vp9_iht4x4_16_add/;
406

407 408
    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
    specialize qw/vp9_iht8x8_64_add/;
409

410 411
    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
    specialize qw/vp9_iht16x16_256_add/;
412

413
    # dct and add
414

415 416
    add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_iwht4x4_1_add/;
417

418 419 420 421
    add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_iwht4x4_16_add/;
  } else {
    add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
422
    specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
423 424

    add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
425
    specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
426 427

    add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
428
    specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
429 430

    add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
431
    specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
432 433

    add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
434
    specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
435 436

    add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
437
    specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
438 439

    add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
440
    specialize qw/vp9_idct16x16_256_add sse2 neon dspr2 msa/;
441 442

    add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
443
    specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
444 445

    add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
446
    specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
447 448

    add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
449
    specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
450
    #is this a typo?
451 452 453
    $vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;

    add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
454
    specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
455 456

    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
457
    specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
458 459

    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
460
    specialize qw/vp9_iht8x8_64_add sse2 neon dspr2 msa/;
461 462

    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
463
    specialize qw/vp9_iht16x16_256_add sse2 dspr2 msa/;
464 465 466 467 468 469 470 471 472

    # dct and add

    add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_iwht4x4_1_add/;

    add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_iwht4x4_16_add/;
  }
473
}
James Zern's avatar
James Zern committed
474

475 476
# High bitdepth functions
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
477 478 479
  #
  # Intra prediction
  #
480 481
  add_proto qw/void vp9_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_4x4/;
482

483 484
  add_proto qw/void vp9_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_4x4/;
485

486 487
  add_proto qw/void vp9_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_4x4/;
488

489 490
  add_proto qw/void vp9_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_4x4/;
491

492 493
  add_proto qw/void vp9_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_4x4/;
494

495 496
  add_proto qw/void vp9_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_4x4/;
497

498 499
  add_proto qw/void vp9_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_4x4/;
500

501
  add_proto qw/void vp9_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
James Zern's avatar
James Zern committed
502
  specialize qw/vp9_highbd_v_predictor_4x4/, "$sse_x86inc";
503

504 505
  add_proto qw/void vp9_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_tm_predictor_4x4/, "$sse_x86inc";
506

507 508
  add_proto qw/void vp9_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_predictor_4x4/, "$sse_x86inc";
509

510 511
  add_proto qw/void vp9_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_4x4/;
512

513 514
  add_proto qw/void vp9_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_4x4/;
515

516 517
  add_proto qw/void vp9_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_4x4/;
518

519 520
  add_proto qw/void vp9_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_8x8/;
521

522 523
  add_proto qw/void vp9_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_8x8/;
524

525 526
  add_proto qw/void vp9_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_8x8/;
527

528 529
  add_proto qw/void vp9_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_8x8/;
530

531 532
  add_proto qw/void vp9_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_8x8/;
533

534 535
  add_proto qw/void vp9_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_8x8/;
536

537 538
  add_proto qw/void vp9_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_8x8/;
539

540 541
  add_proto qw/void vp9_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_v_predictor_8x8/, "$sse2_x86inc";
542

543 544
  add_proto qw/void vp9_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_tm_predictor_8x8/, "$sse2_x86inc";
545

546 547
  add_proto qw/void vp9_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
548

549 550
  add_proto qw/void vp9_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_8x8/;
551

552 553
  add_proto qw/void vp9_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_8x8/;
554

555 556
  add_proto qw/void vp9_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_8x8/;
557

558 559
  add_proto qw/void vp9_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_16x16/;
560

561 562
  add_proto qw/void vp9_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_16x16/;
563

564 565
  add_proto qw/void vp9_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_16x16/;
566

567 568
  add_proto qw/void vp9_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_16x16/;
569

570 571
  add_proto qw/void vp9_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_16x16/;
572

573 574
  add_proto qw/void vp9_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_16x16/;
575

576 577
  add_proto qw/void vp9_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_16x16/;
578

579
  add_proto qw/void vp9_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
James Zern's avatar
James Zern committed
580
  specialize qw/vp9_highbd_v_predictor_16x16/, "$sse2_x86inc";
581

582 583
  add_proto qw/void vp9_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_tm_predictor_16x16/, "$sse2_x86_64";
584

585 586
  add_proto qw/void vp9_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_predictor_16x16/, "$sse2_x86inc";
587

588 589
  add_proto qw/void vp9_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_16x16/;
590

591 592
  add_proto qw/void vp9_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_16x16/;
593

594 595
  add_proto qw/void vp9_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_16x16/;
596

597 598
  add_proto qw/void vp9_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_32x32/;
599

600 601
  add_proto qw/void vp9_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_32x32/;
602

603 604
  add_proto qw/void vp9_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_32x32/;
605

606 607
  add_proto qw/void vp9_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_32x32/;
608

609 610
  add_proto qw/void vp9_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_32x32/;
611

612 613
  add_proto qw/void vp9_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_32x32/;
614

615 616
  add_proto qw/void vp9_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_32x32/;
617

618 619
  add_proto qw/void vp9_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_v_predictor_32x32/, "$sse2_x86inc";
620

621 622
  add_proto qw/void vp9_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_tm_predictor_32x32/, "$sse2_x86_64";
623

624 625
  add_proto qw/void vp9_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_predictor_32x32/, "$sse2_x86_64";
626

627 628
  add_proto qw/void vp9_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_32x32/;
629

630 631
  add_proto qw/void vp9_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_32x32/;
632

633 634
  add_proto qw/void vp9_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_32x32/;
635

636 637 638
  #
  # Sub Pixel Filters
  #
639 640
  add_proto qw/void vp9_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve_copy/;
641

642 643
  add_proto qw/void vp9_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve_avg/;
644

645 646
  add_proto qw/void vp9_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8/, "$sse2_x86_64";
647

648 649
  add_proto qw/void vp9_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_horiz/, "$sse2_x86_64";
650

651 652
  add_proto qw/void vp9_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_vert/, "$sse2_x86_64";
653

654 655
  add_proto qw/void vp9_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_avg/, "$sse2_x86_64";
656

657 658
  add_proto qw/void vp9_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
659

660 661
  add_proto qw/void vp9_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_avg_vert/, "$sse2_x86_64";
662

663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
  #
  # Loopfilter
  #
  add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vp9_highbd_lpf_vertical_16 sse2/;

  add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/;

  add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vp9_highbd_lpf_vertical_8 sse2/;

  add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/;

  add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vp9_highbd_lpf_vertical_4 sse2/;

  add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/;

  add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vp9_highbd_lpf_horizontal_16 sse2/;

  add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vp9_highbd_lpf_horizontal_8 sse2/;

  add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/;

  add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vp9_highbd_lpf_horizontal_4 sse2/;

  add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/;

  #
  # post proc
  #
  if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
    add_proto qw/void vp9_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit";
    specialize qw/vp9_highbd_mbpost_proc_down/;

    add_proto qw/void vp9_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit";
    specialize qw/vp9_highbd_mbpost_proc_across_ip/;

    add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
    specialize qw/vp9_highbd_post_proc_down_and_across/;

    add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
    specialize qw/vp9_highbd_plane_add_noise/;
  }

716 717 718
  #
  # dct
  #
719 720
  # Note as optimized versions of these functions are added we need to add a check to ensure
  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
721 722
  add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct4x4_1_add/;
723

724 725
  add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct8x8_1_add/;
726

727 728
  add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct16x16_1_add/;
729

730 731
  add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct32x32_1024_add/;
732

733 734
  add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct32x32_34_add/;
735

736 737
  add_proto qw/void vp9_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct32x32_1_add/;
738

739 740
  add_proto qw/void vp9_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
  specialize qw/vp9_highbd_iht4x4_16_add/;
741

742 743
  add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
  specialize qw/vp9_highbd_iht8x8_64_add/;
744

745 746
  add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
  specialize qw/vp9_highbd_iht16x16_256_add/;
James Zern's avatar
James Zern committed
747

748
  # dct and add
James Zern's avatar
James Zern committed
749

750 751
  add_proto qw/void vp9_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_iwht4x4_1_add/;
James Zern's avatar
James Zern committed
752

753 754
  add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_iwht4x4_16_add/;
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790

  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {

    add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct4x4_16_add/;

    add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_64_add/;

    add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_10_add/;

    add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_256_add/;

    add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_10_add/;

  } else {

    add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct4x4_16_add sse2/;

    add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_64_add sse2/;

    add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_10_add sse2/;

    add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_256_add sse2/;

    add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_10_add sse2/;
  }
791
}
James Zern's avatar
James Zern committed
792 793 794 795 796 797 798 799 800

#
# Encoder functions below this point.
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {


# variance
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
801
specialize qw/vp9_sub_pixel_variance64x64 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
802 803

add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
levytamar82's avatar
levytamar82 committed
804
specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830

add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
levytamar82's avatar
levytamar82 committed
831
specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
832 833

add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
levytamar82's avatar
levytamar82 committed
834
specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
835 836

add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
837
specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854

add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
855
specialize qw/vp9_sub_pixel_variance8x8 neon/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879

add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";

# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt

add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";

880
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
881
specialize qw/vp9_avg_8x8 sse2 neon/;
882

883 884 885
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2/;

886 887 888
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_minmax_8x8 sse2/;

889
add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
890
specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64";
891

892 893
add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vp9_hadamard_16x16 sse2/;
894 895 896 897

add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
specialize qw/vp9_satd sse2/;

898 899 900 901 902 903
add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
specialize qw/vp9_int_pro_row sse2/;

add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width";
specialize qw/vp9_int_pro_col sse2/;

904 905
add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
specialize qw/vp9_vector_var sse2/;