Commit 9d901217 authored by Yunqing Wang's avatar Yunqing Wang

Fix x86inc.asm to build PIC code correctly

Current x86inc.asm didn't handle 32bit PIC build properly.
TEXTRELs were seen in the library built. The PIC macros from
libvpx's x86_abi_support.asm was used to fix this problem.
The assembly code was modified to use the macros.

Notes: We need this fix in for decoder building. Functions in
encoder will be fixed later.

Change-Id: Ifa548d37b1d0bc7d0528db75009cc18cd5eb1838
parent 84758960
......@@ -97,21 +97,91 @@
%endif
%endmacro
%if WIN64
; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
; from original code is added in for 64bit.
%ifidn __OUTPUT_FORMAT__,elf32
%define ABI_IS_32BIT 1
%elifidn __OUTPUT_FORMAT__,macho32
%define ABI_IS_32BIT 1
%elifidn __OUTPUT_FORMAT__,win32
%define ABI_IS_32BIT 1
%elifidn __OUTPUT_FORMAT__,aout
%define ABI_IS_32BIT 1
%else
%define ABI_IS_32BIT 0
%endif
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
%define GET_GOT_SAVE_ARG 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
push %1
call %%get_got
%%sub_offset:
jmp %%exitGG
%%get_got:
mov %1, [esp]
add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
ret
%%exitGG:
%undef GLOBAL
%define GLOBAL(x) x + %1 wrt ..gotoff
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
%define GET_GOT_SAVE_ARG 1
%macro GET_GOT 1
push %1
call %%get_got
%%get_got:
pop %1
%undef GLOBAL
%define GLOBAL(x) x + %1 - %%get_got
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
%endif
%endif
%if ARCH_X86_64 == 0
%undef PIC
%endif
%else
%macro GET_GOT 1
%endmacro
%define GLOBAL(x) rel x
%define WRT_PLT wrt ..plt
%if WIN64
%define PIC
%elifidn __OUTPUT_FORMAT__,macho64
%elifidn __OUTPUT_FORMAT__,macho64
%define PIC
%elif ARCH_X86_64 == 0
; x86_32 doesn't require PIC.
; Some distros prefer shared objects to be PIC, but nothing breaks if
; the code contains a few textrels, so we'll skip that complexity.
%undef PIC
%elif CONFIG_PIC
%elif CONFIG_PIC
%define PIC
%endif
%endif
%ifnmacro GET_GOT
%macro GET_GOT 1
%endmacro
%define GLOBAL(x) x
%endif
%ifndef RESTORE_GOT
%define RESTORE_GOT
%endif
%ifndef WRT_PLT
%define WRT_PLT
%endif
%ifdef PIC
default rel
%endif
; Done with PIC macros
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
%ifndef __NASM_VER__
......
......@@ -19,12 +19,14 @@ pw_32: times 8 dw 32
SECTION .text
INIT_MMX sse
cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left
cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
movd m0, [aboveq]
punpckldq m0, [leftq]
psadbw m0, m1
paddw m0, [pw_4]
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
pshufw m0, m0, 0x0
packuswb m0, m0
......@@ -33,10 +35,14 @@ cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left
lea dstq, [dstq+strideq*2]
movd [dstq ], m0
movd [dstq+strideq], m0
RESTORE_GOT
RET
INIT_MMX sse
cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
movq m0, [aboveq]
movq m2, [leftq]
......@@ -45,7 +51,7 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
psadbw m0, m1
psadbw m2, m1
paddw m0, m2
paddw m0, [pw_8]
paddw m0, [GLOBAL(pw_8)]
psraw m0, 4
pshufw m0, m0, 0x0
packuswb m0, m0
......@@ -58,10 +64,14 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
movq [dstq+strideq ], m0
movq [dstq+strideq*2], m0
movq [dstq+stride3q ], m0
RESTORE_GOT
RET
INIT_XMM sse2
cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
mova m0, [aboveq]
mova m2, [leftq]
......@@ -73,7 +83,7 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
paddw m0, m2
movhlps m2, m0
paddw m0, m2
paddw m0, [pw_16]
paddw m0, [GLOBAL(pw_16)]
psraw m0, 5
pshuflw m0, m0, 0x0
punpcklqdq m0, m0
......@@ -86,10 +96,14 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
lea dstq, [dstq+strideq*4]
dec lines4d
jnz .loop
RESTORE_GOT
REP_RET
INIT_XMM sse2
cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
mova m0, [aboveq]
mova m2, [aboveq+16]
......@@ -107,7 +121,7 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
paddw m0, m4
movhlps m2, m0
paddw m0, m2
paddw m0, [pw_32]
paddw m0, [GLOBAL(pw_32)]
psraw m0, 6
pshuflw m0, m0, 0x0
punpcklqdq m0, m0
......@@ -124,6 +138,8 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
lea dstq, [dstq+strideq*4]
dec lines4d
jnz .loop
RESTORE_GOT
REP_RET
INIT_MMX sse
......
......@@ -112,14 +112,16 @@ cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
REP_RET
INIT_MMX ssse3
cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset
GET_GOT goffsetq
movq m0, [aboveq]
pshufb m2, m0, [sh_b23456777]
pshufb m1, m0, [sh_b01234577]
pshufb m0, [sh_b12345677]
pshufb m2, m0, [GLOBAL(sh_b23456777)]
pshufb m1, m0, [GLOBAL(sh_b01234577)]
pshufb m0, [GLOBAL(sh_b12345677)]
pavgb m3, m2, m1
pxor m2, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
......@@ -132,19 +134,23 @@ cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
movd [dstq ], m0
psrlq m0, 8
movd [dstq+strideq], m0
RESTORE_GOT
RET
INIT_MMX ssse3
cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset
GET_GOT goffsetq
movq m0, [aboveq]
mova m1, [sh_b12345677]
DEFINE_ARGS dst, stride, stride3, line
mova m1, [GLOBAL(sh_b12345677)]
DEFINE_ARGS dst, stride, stride3
lea stride3q, [strideq*3]
pshufb m2, m0, [sh_b23456777]
pshufb m2, m0, [GLOBAL(sh_b23456777)]
pavgb m3, m2, m0
pxor m2, m0
pshufb m0, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
......@@ -167,20 +173,24 @@ cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
movq [dstq+strideq*2], m0
pshufb m0, m1
movq [dstq+stride3q ], m0
RESTORE_GOT
RET
INIT_XMM ssse3
cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset
GET_GOT goffsetq
mova m0, [aboveq]
DEFINE_ARGS dst, stride, stride3, dst8, line
lea stride3q, [strideq*3]
lea dst8q, [dstq+strideq*8]
mova m1, [sh_b123456789abcdeff]
pshufb m2, m0, [sh_b23456789abcdefff]
mova m1, [GLOBAL(sh_b123456789abcdeff)]
pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)]
pavgb m3, m2, m0
pxor m2, m0
pshufb m0, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
......@@ -214,29 +224,33 @@ cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
movhps [dstq+strideq +8], m0
movhps [dstq+strideq*2+8], m0
movhps [dstq+stride3q +8], m0
RESTORE_GOT
RET
INIT_XMM ssse3
cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset
GET_GOT goffsetq
mova m0, [aboveq]
mova m4, [aboveq+16]
DEFINE_ARGS dst, stride, stride3, dst16, line
lea stride3q, [strideq*3]
lea dst16q, [dstq +strideq*8]
lea dst16q, [dst16q+strideq*8]
mova m1, [sh_b123456789abcdeff]
pshufb m2, m4, [sh_b23456789abcdefff]
mova m1, [GLOBAL(sh_b123456789abcdeff)]
pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)]
pavgb m3, m2, m4
pxor m2, m4
palignr m5, m4, m0, 1
palignr m6, m4, m0, 2
pshufb m4, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m4, m3
pavgb m3, m0, m6
pxor m0, m6
pand m0, [pb_1]
pand m0, [GLOBAL(pb_1)]
psubb m3, m0
pavgb m5, m3
......@@ -288,4 +302,6 @@ cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
mova [dstq +strideq +16], m4
mova [dstq +strideq*2+16], m4
mova [dstq +stride3q +16], m4
RESTORE_GOT
RET
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment