Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
libvpx
Commits
57e72208
Commit
57e72208
authored
Dec 03, 2012
by
Johann
Committed by
Gerrit Code Review
Dec 03, 2012
Browse files
Merge "Remove ARM optimizations from VP9" into experimental
parents
514e1c93
34591b54
Changes
112
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
5462 deletions
+0
-5462
libs.mk
libs.mk
+0
-1
vp8/vp8dx.mk
vp8/vp8dx.mk
+0
-24
vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm
vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm
+0
-237
vp9/common/arm/armv6/vp9_copymem16x16_v6.asm
vp9/common/arm/armv6/vp9_copymem16x16_v6.asm
+0
-186
vp9/common/arm/armv6/vp9_copymem8x4_v6.asm
vp9/common/arm/armv6/vp9_copymem8x4_v6.asm
+0
-128
vp9/common/arm/armv6/vp9_copymem8x8_v6.asm
vp9/common/arm/armv6/vp9_copymem8x8_v6.asm
+0
-128
vp9/common/arm/armv6/vp9_dc_only_idct_add_v6.asm
vp9/common/arm/armv6/vp9_dc_only_idct_add_v6.asm
+0
-67
vp9/common/arm/armv6/vp9_filter_v6.asm
vp9/common/arm/armv6/vp9_filter_v6.asm
+0
-624
vp9/common/arm/armv6/vp9_idct_v6.asm
vp9/common/arm/armv6/vp9_idct_v6.asm
+0
-345
vp9/common/arm/armv6/vp9_iwalsh_v6.asm
vp9/common/arm/armv6/vp9_iwalsh_v6.asm
+0
-152
vp9/common/arm/armv6/vp9_loopfilter_v6.asm
vp9/common/arm/armv6/vp9_loopfilter_v6.asm
+0
-1282
vp9/common/arm/armv6/vp9_recon_v6.asm
vp9/common/arm/armv6/vp9_recon_v6.asm
+0
-281
vp9/common/arm/armv6/vp9_simpleloopfilter_v6.asm
vp9/common/arm/armv6/vp9_simpleloopfilter_v6.asm
+0
-286
vp9/common/arm/armv6/vp9_sixtappredict8x4_v6.asm
vp9/common/arm/armv6/vp9_sixtappredict8x4_v6.asm
+0
-273
vp9/common/arm/neon/vp9_bilinearpredict16x16_neon.asm
vp9/common/arm/neon/vp9_bilinearpredict16x16_neon.asm
+0
-357
vp9/common/arm/neon/vp9_bilinearpredict4x4_neon.asm
vp9/common/arm/neon/vp9_bilinearpredict4x4_neon.asm
+0
-130
vp9/common/arm/neon/vp9_bilinearpredict8x4_neon.asm
vp9/common/arm/neon/vp9_bilinearpredict8x4_neon.asm
+0
-135
vp9/common/arm/neon/vp9_bilinearpredict8x8_neon.asm
vp9/common/arm/neon/vp9_bilinearpredict8x8_neon.asm
+0
-183
vp9/common/arm/neon/vp9_buildintrapredictorsmby_neon.asm
vp9/common/arm/neon/vp9_buildintrapredictorsmby_neon.asm
+0
-584
vp9/common/arm/neon/vp9_copymem16x16_neon.asm
vp9/common/arm/neon/vp9_copymem16x16_neon.asm
+0
-59
No files found.
libs.mk
View file @
57e72208
...
...
@@ -109,7 +109,6 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
CODEC_SRCS-yes
+=
$(
addprefix
$(VP9_PREFIX)
,
$(
call
enabled,VP9_CX_SRCS
))
CODEC_EXPORTS-yes
+=
$(
addprefix
$(VP9_PREFIX)
,
$(VP9_CX_EXPORTS)
)
CODEC_SRCS-yes
+=
$(VP9_PREFIX)
vp9cx.mk vpx/vp8.h vpx/vp8cx.h
CODEC_SRCS-$(ARCH_ARM)
+=
$(VP9_PREFIX)
vp98cx_arm.mk
INSTALL-LIBS-yes
+=
include/vpx/vp8.h include/vpx/vp8cx.h
INSTALL_MAPS
+=
include/vpx/%
$(SRC_PATH_BARE)
/
$(VP9_PREFIX)
/%
CODEC_DOC_SRCS
+=
vpx/vp8.h vpx/vp8cx.h
...
...
vp8/vp8dx.mk
View file @
57e72208
...
...
@@ -18,30 +18,6 @@ VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no)
VP8_DX_SRCS-yes
+=
vp8_dx_iface.c
# common
#define ARM
#define DISABLE_THREAD
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += decoder
# decoder
#define ARM
#define DISABLE_THREAD
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += decoder
VP8_DX_SRCS-yes
+=
decoder/asm_dec_offsets.c
VP8_DX_SRCS-yes
+=
decoder/dboolhuff.c
VP8_DX_SRCS-yes
+=
decoder/decodemv.c
...
...
vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm
deleted
100644 → 0
View file @
514e1c93
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp9_filter_block2d_bil_first_pass_armv6
|
EXPORT
|
vp9_filter_block2d_bil_second_pass_armv6
|
AREA
|
.text
|
,
CODE
,
READONLY
; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr,
; r1 unsigned short *dst_ptr,
; r2 unsigned int src_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp9_filter
;-------------------------------------
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
vp9_filter_block2d_bil_first_pass_armv6
|
PROC
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
ldr
r11
,
[
sp
,
#
40
]
; vp9_filter address
ldr
r4
,
[
sp
,
#
36
]
; width
mov
r12
,
r3
; outer-loop counter
add
r7
,
r2
,
r4
; preload next row
pld
[
r0
,
r7
]
sub
r2
,
r2
,
r4
; src increment for height loop
ldr
r5
,
[
r11
]
; load up filter coefficients
mov
r3
,
r3
,
lsl
#
1
; height*2
add
r3
,
r3
,
#
2
; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
mov
r11
,
r1
; save dst_ptr for each row
cmp
r5
,
#
128
; if filter coef = 128, then skip the filter
beq
bil_null_1st_filter
|
bil_height_loop_1st_v6
|
ldrb
r6
,
[
r0
]
; load source data
ldrb
r7
,
[
r0
,
#
1
]
ldrb
r8
,
[
r0
,
#
2
]
mov
lr
,
r4
,
lsr
#
2
; 4-in-parellel loop counter
|
bil_width_loop_1st_v6
|
ldrb
r9
,
[
r0
,
#
3
]
ldrb
r10
,
[
r0
,
#
4
]
pkhbt
r6
,
r6
,
r7
,
lsl
#
16
; src[1] | src[0]
pkhbt
r7
,
r7
,
r8
,
lsl
#
16
; src[2] | src[1]
smuad
r6
,
r6
,
r5
; apply the filter
pkhbt
r8
,
r8
,
r9
,
lsl
#
16
; src[3] | src[2]
smuad
r7
,
r7
,
r5
pkhbt
r9
,
r9
,
r10
,
lsl
#
16
; src[4] | src[3]
smuad
r8
,
r8
,
r5
smuad
r9
,
r9
,
r5
add
r0
,
r0
,
#
4
subs
lr
,
lr
,
#
1
add
r6
,
r6
,
#
0x40
; round_shift_and_clamp
add
r7
,
r7
,
#
0x40
usat
r6
,
#
16
,
r6
,
asr
#
7
usat
r7
,
#
16
,
r7
,
asr
#
7
strh
r6
,
[
r1
],
r3
; result is transposed and stored
add
r8
,
r8
,
#
0x40
; round_shift_and_clamp
strh
r7
,
[
r1
],
r3
add
r9
,
r9
,
#
0x40
usat
r8
,
#
16
,
r8
,
asr
#
7
usat
r9
,
#
16
,
r9
,
asr
#
7
strh
r8
,
[
r1
],
r3
; result is transposed and stored
ldrneb
r6
,
[
r0
]
; load source data
strh
r9
,
[
r1
],
r3
ldrneb
r7
,
[
r0
,
#
1
]
ldrneb
r8
,
[
r0
,
#
2
]
bne
bil_width_loop_1st_v6
add
r0
,
r0
,
r2
; move to next input row
subs
r12
,
r12
,
#
1
add
r9
,
r2
,
r4
,
lsl
#
1
; adding back block width
pld
[
r0
,
r9
]
; preload next row
add
r11
,
r11
,
#
2
; move over to next column
mov
r1
,
r11
bne
bil_height_loop_1st_v6
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
|
bil_null_1st_filter
|
|
bil_height_loop_null_1st
|
mov
lr
,
r4
,
lsr
#
2
; loop counter
|
bil_width_loop_null_1st
|
ldrb
r6
,
[
r0
]
; load data
ldrb
r7
,
[
r0
,
#
1
]
ldrb
r8
,
[
r0
,
#
2
]
ldrb
r9
,
[
r0
,
#
3
]
strh
r6
,
[
r1
],
r3
; store it to immediate buffer
add
r0
,
r0
,
#
4
strh
r7
,
[
r1
],
r3
subs
lr
,
lr
,
#
1
strh
r8
,
[
r1
],
r3
strh
r9
,
[
r1
],
r3
bne
bil_width_loop_null_1st
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
r2
; move to next input line
add
r11
,
r11
,
#
2
; move over to next column
mov
r1
,
r11
bne
bil_height_loop_null_1st
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
ENDP
; |vp9_filter_block2d_bil_first_pass_armv6|
;---------------------------------
; r0 unsigned short *src_ptr,
; r1 unsigned char *dst_ptr,
; r2 int dst_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp9_filter
;---------------------------------
|
vp9_filter_block2d_bil_second_pass_armv6
|
PROC
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
ldr
r11
,
[
sp
,
#
40
]
; vp9_filter address
ldr
r4
,
[
sp
,
#
36
]
; width
ldr
r5
,
[
r11
]
; load up filter coefficients
mov
r12
,
r4
; outer-loop counter = width, since we work on transposed data matrix
mov
r11
,
r1
cmp
r5
,
#
128
; if filter coef = 128, then skip the filter
beq
bil_null_2nd_filter
|
bil_height_loop_2nd
|
ldr
r6
,
[
r0
]
; load the data
ldr
r8
,
[
r0
,
#
4
]
ldrh
r10
,
[
r0
,
#
8
]
mov
lr
,
r3
,
lsr
#
2
; loop counter
|
bil_width_loop_2nd
|
pkhtb
r7
,
r6
,
r8
; src[1] | src[2]
pkhtb
r9
,
r8
,
r10
; src[3] | src[4]
smuad
r6
,
r6
,
r5
; apply filter
smuad
r8
,
r8
,
r5
; apply filter
subs
lr
,
lr
,
#
1
smuadx
r7
,
r7
,
r5
; apply filter
smuadx
r9
,
r9
,
r5
; apply filter
add
r0
,
r0
,
#
8
add
r6
,
r6
,
#
0x40
; round_shift_and_clamp
add
r7
,
r7
,
#
0x40
usat
r6
,
#
8
,
r6
,
asr
#
7
usat
r7
,
#
8
,
r7
,
asr
#
7
strb
r6
,
[
r1
],
r2
; the result is transposed back and stored
add
r8
,
r8
,
#
0x40
; round_shift_and_clamp
strb
r7
,
[
r1
],
r2
add
r9
,
r9
,
#
0x40
usat
r8
,
#
8
,
r8
,
asr
#
7
usat
r9
,
#
8
,
r9
,
asr
#
7
strb
r8
,
[
r1
],
r2
; the result is transposed back and stored
ldrne
r6
,
[
r0
]
; load data
strb
r9
,
[
r1
],
r2
ldrne
r8
,
[
r0
,
#
4
]
ldrneh
r10
,
[
r0
,
#
8
]
bne
bil_width_loop_2nd
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
#
4
; update src for next row
add
r11
,
r11
,
#
1
mov
r1
,
r11
bne
bil_height_loop_2nd
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
|
bil_null_2nd_filter
|
|
bil_height_loop_null_2nd
|
mov
lr
,
r3
,
lsr
#
2
|
bil_width_loop_null_2nd
|
ldr
r6
,
[
r0
],
#
4
; load data
subs
lr
,
lr
,
#
1
ldr
r8
,
[
r0
],
#
4
strb
r6
,
[
r1
],
r2
; store data
mov
r7
,
r6
,
lsr
#
16
strb
r7
,
[
r1
],
r2
mov
r9
,
r8
,
lsr
#
16
strb
r8
,
[
r1
],
r2
strb
r9
,
[
r1
],
r2
bne
bil_width_loop_null_2nd
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
#
4
add
r11
,
r11
,
#
1
mov
r1
,
r11
bne
bil_height_loop_null_2nd
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
ENDP
; |vp9_filter_block2d_second_pass_armv6|
END
vp9/common/arm/armv6/vp9_copymem16x16_v6.asm
deleted
100644 → 0
View file @
514e1c93
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp9_copy_mem16x16_v6
|
; ARM
; REQUIRE8
; PRESERVE8
AREA
Bl
ock
,
CODE
,
READONLY
; name this block of code
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
vp9_copy_mem16x16_v6
|
PROC
stmdb
sp
!
,
{
r4
-
r7
}
;push {r4-r7}
;preload
pld
[
r0
,
#
31
]
; preload for next 16x16 block
ands
r4
,
r0
,
#
15
beq
copy_mem16x16_fast
ands
r4
,
r0
,
#
7
beq
copy_mem16x16_8
ands
r4
,
r0
,
#
3
beq
copy_mem16x16_4
;copy one byte each time
ldrb
r4
,
[
r0
]
ldrb
r5
,
[
r0
,
#
1
]
ldrb
r6
,
[
r0
,
#
2
]
ldrb
r7
,
[
r0
,
#
3
]
mov
r12
,
#
16
copy_mem16x16_1_loop
strb
r4
,
[
r2
]
strb
r5
,
[
r2
,
#
1
]
strb
r6
,
[
r2
,
#
2
]
strb
r7
,
[
r2
,
#
3
]
ldrb
r4
,
[
r0
,
#
4
]
ldrb
r5
,
[
r0
,
#
5
]
ldrb
r6
,
[
r0
,
#
6
]
ldrb
r7
,
[
r0
,
#
7
]
subs
r12
,
r12
,
#
1
strb
r4
,
[
r2
,
#
4
]
strb
r5
,
[
r2
,
#
5
]
strb
r6
,
[
r2
,
#
6
]
strb
r7
,
[
r2
,
#
7
]
ldrb
r4
,
[
r0
,
#
8
]
ldrb
r5
,
[
r0
,
#
9
]
ldrb
r6
,
[
r0
,
#
10
]
ldrb
r7
,
[
r0
,
#
11
]
strb
r4
,
[
r2
,
#
8
]
strb
r5
,
[
r2
,
#
9
]
strb
r6
,
[
r2
,
#
10
]
strb
r7
,
[
r2
,
#
11
]
ldrb
r4
,
[
r0
,
#
12
]
ldrb
r5
,
[
r0
,
#
13
]
ldrb
r6
,
[
r0
,
#
14
]
ldrb
r7
,
[
r0
,
#
15
]
add
r0
,
r0
,
r1
strb
r4
,
[
r2
,
#
12
]
strb
r5
,
[
r2
,
#
13
]
strb
r6
,
[
r2
,
#
14
]
strb
r7
,
[
r2
,
#
15
]
add
r2
,
r2
,
r3
ldrneb
r4
,
[
r0
]
ldrneb
r5
,
[
r0
,
#
1
]
ldrneb
r6
,
[
r0
,
#
2
]
ldrneb
r7
,
[
r0
,
#
3
]
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_1_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
;copy 4 bytes each time
copy_mem16x16_4
ldr
r4
,
[
r0
]
ldr
r5
,
[
r0
,
#
4
]
ldr
r6
,
[
r0
,
#
8
]
ldr
r7
,
[
r0
,
#
12
]
mov
r12
,
#
16
copy_mem16x16_4_loop
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
r1
str
r4
,
[
r2
]
str
r5
,
[
r2
,
#
4
]
str
r6
,
[
r2
,
#
8
]
str
r7
,
[
r2
,
#
12
]
add
r2
,
r2
,
r3
ldrne
r4
,
[
r0
]
ldrne
r5
,
[
r0
,
#
4
]
ldrne
r6
,
[
r0
,
#
8
]
ldrne
r7
,
[
r0
,
#
12
]
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_4_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
;copy 8 bytes each time
copy_mem16x16_8
sub
r1
,
r1
,
#
16
sub
r3
,
r3
,
#
16
mov
r12
,
#
16
copy_mem16x16_8_loop
ldmia
r0
!
,
{
r4
-
r5
}
;ldm r0, {r4-r5}
ldmia
r0
!
,
{
r6
-
r7
}
add
r0
,
r0
,
r1
stmia
r2
!
,
{
r4
-
r5
}
subs
r12
,
r12
,
#
1
;stm r2, {r4-r5}
stmia
r2
!
,
{
r6
-
r7
}
add
r2
,
r2
,
r3
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_8_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
;copy 16 bytes each time
copy_mem16x16_fast
;sub r1, r1, #16
;sub r3, r3, #16
mov
r12
,
#
16
copy_mem16x16_fast_loop
ldmia
r0
,
{
r4
-
r7
}
;ldm r0, {r4-r7}
add
r0
,
r0
,
r1
subs
r12
,
r12
,
#
1
stmia
r2
,
{
r4
-
r7
}
;stm r2, {r4-r7}
add
r2
,
r2
,
r3
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_fast_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
ENDP
; |vp9_copy_mem16x16_v6|
END
vp9/common/arm/armv6/vp9_copymem8x4_v6.asm
deleted
100644 → 0
View file @
514e1c93
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp9_copy_mem8x4_v6
|
; ARM
; REQUIRE8
; PRESERVE8
AREA
Bl
ock
,
CODE
,
READONLY
; name this block of code
;void vp9_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
vp9_copy_mem8x4_v6
|
PROC
;push {r4-r5}
stmdb
sp
!
,
{
r4
-
r5
}
;preload
pld
[
r0
]
pld
[
r0
,
r1
]
pld
[
r0
,
r1
,
lsl
#
1
]
ands
r4
,
r0
,
#
7
beq
copy_mem8x4_fast
ands
r4
,
r0
,
#
3
beq
copy_mem8x4_4
;copy 1 byte each time
ldrb
r4
,
[
r0
]
ldrb
r5
,
[
r0
,
#
1
]
mov
r12
,
#
4
copy_mem8x4_1_loop
strb
r4
,
[
r2
]
strb
r5
,
[
r2
,
#
1
]
ldrb
r4
,
[
r0
,
#
2
]
ldrb
r5
,
[
r0
,
#
3
]
subs
r12
,
r12
,
#
1
strb
r4
,
[
r2
,
#
2
]
strb
r5
,
[
r2
,
#
3
]
ldrb
r4
,
[
r0
,
#
4
]
ldrb
r5
,
[
r0
,
#
5
]
strb
r4
,
[
r2
,
#
4
]
strb
r5
,
[
r2
,
#
5
]
ldrb
r4
,
[
r0
,
#
6
]
ldrb
r5
,
[
r0
,
#
7
]
add
r0
,
r0
,
r1
strb
r4
,
[
r2
,
#
6
]
strb
r5
,
[
r2
,
#
7
]
add
r2
,
r2
,
r3
ldrneb
r4
,
[
r0
]
ldrneb
r5
,
[
r0
,
#
1
]
bne
copy_mem8x4_1_loop
ldmia
sp
!
,
{
r4
-
r5
}
;pop {r4-r5}
mov
pc
,
lr
;copy 4 bytes each time
copy_mem8x4_4
ldr
r4
,
[
r0
]
ldr
r5
,
[
r0
,
#
4
]
mov
r12
,
#
4
copy_mem8x4_4_loop
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
r1
str
r4
,
[
r2
]
str
r5
,
[
r2
,
#
4
]
add
r2
,
r2
,
r3
ldrne
r4
,
[
r0
]
ldrne
r5
,
[
r0
,
#
4
]
bne
copy_mem8x4_4_loop
ldmia
sp
!
,
{
r4
-
r5
}
;pop {r4-r5}
mov
pc
,
lr
;copy 8 bytes each time
copy_mem8x4_fast
;sub r1, r1, #8
;sub r3, r3, #8
mov
r12
,
#
4
copy_mem8x4_fast_loop
ldmia
r0
,
{
r4
-
r5
}
;ldm r0, {r4-r5}
add
r0
,
r0
,
r1
subs
r12
,
r12
,
#
1
stmia
r2
,
{
r4
-
r5
}
;stm r2, {r4-r5}
add
r2
,
r2
,
r3
bne
copy_mem8x4_fast_loop
ldmia
sp
!
,
{
r4
-
r5
}
;pop {r4-r5}
mov
pc
,
lr