Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
ffmpeg
Commits
348493db
Commit
348493db
authored
May 24, 2011
by
Daniel Kang
Committed by
Ronald S. Bultje
May 31, 2011
Browse files
Update 8-bit H.264 IDCT function names to reflect bit-depth.
Signed-off-by:
Ronald S. Bultje
<
rbultje@google.com
>
parent
836f47d3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
57 additions
and
72 deletions
+57
-72
libavcodec/h264dsp.h
libavcodec/h264dsp.h
+0
-1
libavcodec/x86/h264_idct.asm
libavcodec/x86/h264_idct.asm
+19
-19
libavcodec/x86/h264dsp_mmx.c
libavcodec/x86/h264dsp_mmx.c
+38
-52
No files found.
libavcodec/h264dsp.h
View file @
348493db
...
...
@@ -66,7 +66,6 @@ typedef struct H264DSPContext{
void
(
*
h264_idct_dc_add
)(
uint8_t
*
dst
/*align 4*/
,
DCTELEM
*
block
/*align 16*/
,
int
stride
);
void
(
*
h264_idct8_dc_add
)(
uint8_t
*
dst
/*align 8*/
,
DCTELEM
*
block
/*align 16*/
,
int
stride
);
void
(
*
h264_dct
)(
DCTELEM
block
[
4
][
4
]);
void
(
*
h264_idct_add16
)(
uint8_t
*
dst
/*align 16*/
,
const
int
*
blockoffset
,
DCTELEM
*
block
/*align 16*/
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
(
*
h264_idct8_add4
)(
uint8_t
*
dst
/*align 16*/
,
const
int
*
blockoffset
,
DCTELEM
*
block
/*align 16*/
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
(
*
h264_idct_add8
)(
uint8_t
**
dst
/*align 16*/
,
const
int
*
blockoffset
,
DCTELEM
*
block
/*align 16*/
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
...
...
libavcodec/x86/h264_idct.asm
View file @
348493db
...
...
@@ -73,7 +73,7 @@ SECTION .text
INIT_MMX
; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_add_mmx
,
3
,
3
,
0
cglobal
h264_idct_add_
8_
mmx
,
3
,
3
,
0
IDCT4_ADD
r0
,
r1
,
r2
RET
...
...
@@ -125,7 +125,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
SUMSUB_BA
w
,
0
,
4
SUMSUB_BA
w
,
3
,
2
SUMSUB_BA
w
,
1
,
5
SWAP
7
,
6
,
4
,
5
,
2
,
3
,
1
,
0
; 70315246 -> 01234567
SWAP
7
,
6
,
4
,
5
,
2
,
3
,
1
,
0
; 70315246 -> 01234567
%endmacro
%macro IDCT8_1D_FULL 1
...
...
@@ -177,7 +177,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
INIT_MMX
; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_mmx
,
3
,
4
,
0
cglobal
h264_idct8_add_
8_
mmx
,
3
,
4
,
0
%assign pad 128+4-(stack_offset&7)
SUB
rsp
,
pad
...
...
@@ -237,7 +237,7 @@ cglobal h264_idct8_add_mmx, 3, 4, 0
INIT_XMM
; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_sse2
,
3
,
4
,
10
cglobal
h264_idct8_add_
8_
sse2
,
3
,
4
,
10
IDCT8_ADD_SSE
r0
,
r1
,
r2
,
r3
RET
...
...
@@ -261,7 +261,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
packuswb
m1
,
m1
%endmacro
%macro DC_ADD_MMX2_OP
3-
4
%macro DC_ADD_MMX2_OP 4
%1 m2, [%2 ]
%1 m3, [%2+%3 ]
%1 m4, [%2+%3*2]
...
...
@@ -282,13 +282,13 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
INIT_MMX
; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_mmx2
,
3
,
3
,
0
cglobal
h264_idct_dc_add_
8_
mmx2
,
3
,
3
,
0
DC_ADD_MMX2_INIT
r1
,
r2
DC_ADD_MMX2_OP
movh
,
r0
,
r2
,
r1
RET
; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_mmx2
,
3
,
3
,
0
cglobal
h264_idct8_dc_add_
8_
mmx2
,
3
,
3
,
0
DC_ADD_MMX2_INIT
r1
,
r2
DC_ADD_MMX2_OP
mova
,
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
...
...
@@ -297,7 +297,7 @@ cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_mmx
,
5
,
7
,
0
cglobal
h264_idct_add16_
8_
mmx
,
5
,
7
,
0
xor
r5
,
r5
%ifdef PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -319,7 +319,7 @@ cglobal h264_idct_add16_mmx, 5, 7, 0
; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_mmx
,
5
,
7
,
0
cglobal
h264_idct8_add4_
8_
mmx
,
5
,
7
,
0
%assign pad 128+4-(stack_offset&7)
SUB
rsp
,
pad
...
...
@@ -351,7 +351,7 @@ cglobal h264_idct8_add4_mmx, 5, 7, 0
; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_mmx2
,
5
,
7
,
0
cglobal
h264_idct_add16_
8_
mmx2
,
5
,
7
,
0
xor
r5
,
r5
%ifdef PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -398,7 +398,7 @@ cglobal h264_idct_add16_mmx2, 5, 7, 0
; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_mmx
,
5
,
7
,
0
cglobal
h264_idct_add16intra_
8_
mmx
,
5
,
7
,
0
xor
r5
,
r5
%ifdef PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -421,7 +421,7 @@ cglobal h264_idct_add16intra_mmx, 5, 7, 0
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_mmx2
,
5
,
7
,
0
cglobal
h264_idct_add16intra_
8_
mmx2
,
5
,
7
,
0
xor
r5
,
r5
%ifdef PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -466,7 +466,7 @@ cglobal h264_idct_add16intra_mmx2, 5, 7, 0
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_mmx2
,
5
,
7
,
0
cglobal
h264_idct8_add4_
8_
mmx2
,
5
,
7
,
0
%assign pad 128+4-(stack_offset&7)
SUB
rsp
,
pad
...
...
@@ -529,7 +529,7 @@ cglobal h264_idct8_add4_mmx2, 5, 7, 0
INIT_XMM
; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_sse2
,
5
,
7
,
10
cglobal
h264_idct8_add4_
8_
sse2
,
5
,
7
,
10
xor
r5
,
r5
%ifdef PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -607,7 +607,7 @@ h264_idct_add8_mmx_plane:
; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_mmx
,
5
,
7
,
0
cglobal
h264_idct_add8_
8_
mmx
,
5
,
7
,
0
mov
r5
,
16
add
r2
,
512
%ifdef PIC
...
...
@@ -668,7 +668,7 @@ h264_idct_add8_mmx2_plane
; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_mmx2
,
5
,
7
,
0
cglobal
h264_idct_add8_
8_
mmx2
,
5
,
7
,
0
mov
r5
,
16
add
r2
,
512
%ifdef ARCH_X86_64
...
...
@@ -744,7 +744,7 @@ x264_add8x4_idct_sse2:
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_sse2
,
5
,
5
,
8
cglobal
h264_idct_add16_
8_
sse2
,
5
,
5
,
8
%ifdef ARCH_X86_64
mov
r10
,
r0
%endif
...
...
@@ -791,7 +791,7 @@ cglobal h264_idct_add16_sse2, 5, 5, 8
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_sse2
,
5
,
7
,
8
cglobal
h264_idct_add16intra_
8_
sse2
,
5
,
7
,
8
%ifdef ARCH_X86_64
mov
r10
,
r0
%endif
...
...
@@ -840,7 +840,7 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_sse2
,
5
,
7
,
8
cglobal
h264_idct_add8_
8_
sse2
,
5
,
7
,
8
add
r2
,
512
%ifdef ARCH_X86_64
mov
r10
,
r0
...
...
libavcodec/x86/h264dsp_mmx.c
View file @
348493db
...
...
@@ -30,9 +30,14 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
IDCT_ADD_FUNC
(,
8
,
mmx
)
IDCT_ADD_FUNC
(,
10
,
sse2
)
IDCT_ADD_FUNC
(
_dc
,
8
,
mmx2
)
IDCT_ADD_FUNC
(
_dc
,
10
,
mmx2
)
IDCT_ADD_FUNC
(
8
_dc
,
8
,
mmx2
)
IDCT_ADD_FUNC
(
8
_dc
,
10
,
sse2
)
IDCT_ADD_FUNC
(
8
,
8
,
mmx
)
IDCT_ADD_FUNC
(
8
,
8
,
sse2
)
IDCT_ADD_FUNC
(
8
,
10
,
sse2
)
#if HAVE_AVX
IDCT_ADD_FUNC
(,
10
,
avx
)
...
...
@@ -46,9 +51,18 @@ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, const int *block_offset, \
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
IDCT_ADD_REP_FUNC
(
8
,
4
,
8
,
mmx
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
8
,
sse2
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
10
,
sse2
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
10
,
avx
)
IDCT_ADD_REP_FUNC
(,
16
,
8
,
mmx
)
IDCT_ADD_REP_FUNC
(,
16
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC
(,
16
,
8
,
sse2
)
IDCT_ADD_REP_FUNC
(,
16
,
10
,
sse2
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
8
,
mmx
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
8
,
sse2
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
10
,
sse2
)
#if HAVE_AVX
IDCT_ADD_REP_FUNC
(,
16
,
10
,
avx
)
...
...
@@ -60,42 +74,14 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
(uint8_t **dst, const int *block_offset, \
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
IDCT_ADD_REP_FUNC2
(,
8
,
8
,
mmx
)
IDCT_ADD_REP_FUNC2
(,
8
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC2
(,
8
,
8
,
sse2
)
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
sse2
)
#if HAVE_AVX
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
avx
)
#endif
void
ff_h264_idct_add_mmx
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_add_mmx
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_add_sse2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct_add16_mmx
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct8_add4_mmx
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16_mmx2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16intra_mmx
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16intra_mmx2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct8_add4_mmx2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct8_add4_sse2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add8_mmx
(
uint8_t
**
dest
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add8_mmx2
(
uint8_t
**
dest
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16_sse2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16intra_sse2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add8_sse2
(
uint8_t
**
dest
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_luma_dc_dequant_idct_mmx
(
DCTELEM
*
output
,
DCTELEM
*
input
,
int
qmul
);
void
ff_h264_luma_dc_dequant_idct_sse2
(
DCTELEM
*
output
,
DCTELEM
*
input
,
int
qmul
);
...
...
@@ -350,24 +336,24 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
}
#if HAVE_YASM
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
c
->
h264_idct_dc_add
=
c
->
h264_idct_add
=
ff_h264_idct_add_mmx
;
c
->
h264_idct8_dc_add
=
c
->
h264_idct8_add
=
ff_h264_idct8_add_mmx
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_mmx
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_mmx
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_mmx
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_mmx
;
c
->
h264_idct_dc_add
=
c
->
h264_idct_add
=
ff_h264_idct_add_
8_
mmx
;
c
->
h264_idct8_dc_add
=
c
->
h264_idct8_add
=
ff_h264_idct8_add_
8_
mmx
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_
8_
mmx
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_
8_
mmx
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_
8_
mmx
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_
8_
mmx
;
c
->
h264_luma_dc_dequant_idct
=
ff_h264_luma_dc_dequant_idct_mmx
;
if
(
mm_flags
&
AV_CPU_FLAG_MMX2
)
{
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add_mmx2
;
c
->
h264_idct8_dc_add
=
ff_h264_idct8_dc_add_mmx2
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_mmx2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_mmx2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_mmx2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_mmx2
;
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add_
8_
mmx2
;
c
->
h264_idct8_dc_add
=
ff_h264_idct8_dc_add_
8_
mmx2
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_
8_
mmx2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_
8_
mmx2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_
8_
mmx2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_
8_
mmx2
;
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_8_mmxext
;
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmxext
;
...
...
@@ -398,8 +384,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c
->
biweight_h264_pixels_tab
[
7
]
=
ff_h264_biweight_4x2_mmx2
;
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
c
->
h264_idct8_add
=
ff_h264_idct8_add_sse2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_sse2
;
c
->
h264_idct8_add
=
ff_h264_idct8_add_8_sse2
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_8_sse2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_8_sse2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_8_sse2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_8_sse2
;
c
->
h264_luma_dc_dequant_idct
=
ff_h264_luma_dc_dequant_idct_sse2
;
c
->
weight_h264_pixels_tab
[
0
]
=
ff_h264_weight_16x16_sse2
;
...
...
@@ -420,10 +410,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_8_sse2
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_8_sse2
;
#endif
c
->
h264_idct_add16
=
ff_h264_idct_add16_sse2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_sse2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_sse2
;
}
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
)
{
c
->
biweight_h264_pixels_tab
[
0
]
=
ff_h264_biweight_16x16_ssse3
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment