Commit 789ae447 authored by Yunqing Wang's avatar Yunqing Wang
Browse files

Fix high bit depth assembly function bugs

The high bit depth build failed while building for 32bit target.
The bugs were in vp9_highbd_subpel_variance.asm and
vp9_highbd_sad4d_sse2.asm functions. This patch fixed the bugs,
and made 32bit build work.

Change-Id: Idc8e5e1b7965bb70d4afba140c6583c5d9666b75
parent b3b7645a
......@@ -215,13 +215,20 @@ SECTION .text
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
%macro HIGH_SADNXN4D 2
%if UNIX64
cglobal highbd_sad%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, \
res, ref2, ref3, ref4, one
cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
res, ref2, ref3, ref4
%else
cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \
ref2, ref3, ref4, one
cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
ref2, ref3, ref4
%endif
; set m1
push srcq
mov srcd, 0x00010001
movd m1, srcd
pshufd m1, m1, 0x0
pop srcq
movsxdifnidn src_strideq, src_strided
movsxdifnidn ref_strideq, ref_strided
mov ref2q, [ref1q+gprsize*1]
......@@ -236,10 +243,6 @@ cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \
shl ref4q, 1
shl ref1q, 1
mov oned, 0x00010001
movd m1, oned
pshufd m1, m1, 0x0
HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
%rep (%2-4)/2
HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
......
......@@ -199,6 +199,9 @@ SECTION .text
%if %1 < 16
sar h, 1
%endif
%if %2 == 1 ; avg
shl sec_str, 1
%endif
; FIXME(rbultje) replace by jumptable?
test x_offsetd, x_offsetd
......@@ -223,7 +226,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -232,14 +235,15 @@ SECTION .text
mova m3, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m2, [secq + sec_str*2]
add secq, sec_str
pavgw m2, [secq]
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -270,7 +274,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -282,14 +286,15 @@ SECTION .text
pavgw m1, m5
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m1, [secq+sec_str*2]
add secq, sec_str
pavgw m1, [secq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -358,7 +363,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -379,14 +384,15 @@ SECTION .text
psrlw m0, 4
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m1, [secq+sec_str*2]
add secq, sec_str
pavgw m1, [secq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -423,7 +429,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -436,14 +442,15 @@ SECTION .text
pavgw m1, m5
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m1, [secq+sec_str*2]
add secq, sec_str
pavgw m1, [secq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -485,7 +492,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -505,7 +512,8 @@ SECTION .text
mova m5, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m2, [secq+sec_str*2]
add secq, sec_str
pavgw m2, [secq]
%endif
SUM_SSE m0, m4, m2, m5, m6, m7
mova m0, m3
......@@ -513,7 +521,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -590,7 +598,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*2]
lea dstq, [dstq + dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -620,7 +628,8 @@ SECTION .text
mova m3, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m4, [secq+sec_str*2]
add secq, sec_str
pavgw m4, [secq]
%endif
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
......@@ -628,7 +637,7 @@ SECTION .text
lea srcq, [srcq + src_strideq*4]
lea dstq, [dstq + dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -698,7 +707,7 @@ SECTION .text
lea srcq, [srcq+src_strideq*2]
lea dstq, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -719,14 +728,15 @@ SECTION .text
psrlw m0, 4
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m1, [secq+sec_str*2]
add secq, sec_str
pavgw m1, [secq]
%endif
SUM_SSE m0, m4, m1, m5, m6, m7
lea srcq, [srcq+src_strideq*4]
lea dstq, [dstq+dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -815,7 +825,7 @@ SECTION .text
lea srcq, [srcq+src_strideq*2]
lea dstq, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -847,7 +857,8 @@ SECTION .text
pavgw m2, m3
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m2, [secq+sec_str*2]
add secq, sec_str
pavgw m2, [secq]
%endif
SUM_SSE m0, m4, m2, m5, m6, m7
mova m0, m3
......@@ -855,7 +866,7 @@ SECTION .text
lea srcq, [srcq+src_strideq*4]
lea dstq, [dstq+dst_strideq*4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......@@ -969,7 +980,7 @@ SECTION .text
INC_SRC_BY_SRC_STRIDE
lea dstq, [dstq + dst_strideq * 2]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*2]
add secq, sec_str
%endif
%else ; %1 < 16
movu m0, [srcq]
......@@ -1013,7 +1024,8 @@ SECTION .text
mova m3, [dstq+dst_strideq*2]
%if %2 == 1 ; avg
pavgw m0, [secq]
pavgw m4, [secq+sec_str*2]
add secq, sec_str
pavgw m4, [secq]
%endif
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
......@@ -1021,7 +1033,7 @@ SECTION .text
INC_SRC_BY_SRC_2STRIDE
lea dstq, [dstq + dst_strideq * 4]
%if %2 == 1 ; avg
lea secq, [secq + sec_str*4]
add secq, sec_str
%endif
%endif
dec h
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment