syn_filt_neon.s 3.75 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
@/*
@ ** Copyright 2003-2010, VisualOn, Inc.
@ **
@ ** Licensed under the Apache License, Version 2.0 (the "License");
@ ** you may not use this file except in compliance with the License.
@ ** You may obtain a copy of the License at
@ **
@ **     http://www.apache.org/licenses/LICENSE-2.0
@ **
@ ** Unless required by applicable law or agreed to in writing, software
@ ** distributed under the License is distributed on an "AS IS" BASIS,
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ ** See the License for the specific language governing permissions and
@ ** limitations under the License.
@ */
@
@void Syn_filt(
@     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients           */
@     Word16 x[],                           /* (i)     : input signal                             */
@     Word16 y[],                           /* (o)     : output signal                            */
@     Word16 mem[],                         /* (i/o)   : memory associated with this filtering.   */
@)
@***********************************************************************
@ a[]    ---   r0
@ x[]    ---   r1
@ y[]    ---   r2
@ mem[]  ---   r3
@ m ---  16  lg --- 80  update --- 1

30
          .section  .text
31 32 33 34
          .global   Syn_filt_asm

Syn_filt_asm:

35
          STMFD   	r13!, {r4 - r12, r14}
36
          SUB           r13, r13, #700                   @ y_buf[L_FRAME16k + M16k]
37

38 39 40 41 42 43
          MOV           r4, r3                           @ copy mem[] address
          MOV           r5, r13                          @ copy yy = y_buf address

          @ for(i = 0@ i < m@ i++)
          @{
          @    *yy++ = mem[i]@
44
          @}
45 46 47 48 49 50 51 52 53 54 55 56
          VLD1.S16      {D0, D1, D2, D3}, [r4]!          @load 16 mems
	  VST1.S16      {D0, D1, D2, D3}, [r5]!          @store 16 mem[] to *yy

          LDRSH         r5, [r0], #2                     @ load a[0]
          MOV           r8, #0                           @ i = 0
          MOV           r5, r5, ASR #1                   @ a0 = a[0] >> 1
          VMOV.S16      D8[0], r5
          @ load all a[]
          VLD1.S16      {D0, D1, D2, D3}, [r0]!          @ load a[1] ~ a[16]
	  VREV64.16     D0, D0
	  VREV64.16     D1, D1
	  VREV64.16     D2, D2
57
	  VREV64.16     D3, D3
58 59 60 61 62 63 64 65 66 67 68 69 70
	  MOV           r8, #0                           @ loop times
	  MOV           r10, r13                         @ temp = y_buf
	  ADD           r4, r13, #32                     @ yy[i] address

          VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ first 16 temp_p

SYN_LOOP:

          LDRSH         r6, [r1], #2                     @ load x[i]
	  MUL           r12, r6, r5                      @ L_tmp = x[i] * a0
	  ADD           r10, r4, r8, LSL #1              @ y[i], yy[i] address

	  VDUP.S32      Q10, r12
71
	  VMULL.S16     Q5, D3, D4
72 73 74 75 76 77 78 79 80 81 82 83 84
          VMLAL.S16     Q5, D2, D5
          VMLAL.S16     Q5, D1, D6
          VMLAL.S16     Q5, D0, D7
          VEXT.8        D4, D4, D5, #2
          VEXT.8        D5, D5, D6, #2
          VEXT.8        D6, D6, D7, #2
          VPADD.S32     D12, D10, D11
          ADD           r8, r8, #1
          VPADD.S32     D10, D12, D12

	  VDUP.S32      Q7, D10[0]

	  VSUB.S32      Q9, Q10, Q7
85
          VQRSHRN.S32   D20, Q9, #12
86 87 88 89
          VMOV.S16      r9, D20[0]
          VEXT.8        D7, D7, D20, #2
          CMP           r8, #80
          STRH          r9, [r10]                        @ yy[i]
90 91
          STRH          r9, [r2], #2                     @ y[i]

92
          BLT           SYN_LOOP
93

94 95 96
          @ update mem[]
          ADD           r5, r13, #160                    @ yy[64] address
	  VLD1.S16      {D0, D1, D2, D3}, [r5]!
97
	  VST1.S16      {D0, D1, D2, D3}, [r3]!
98 99

Syn_filt_asm_end:
100 101 102

          ADD           r13, r13, #700
          LDMFD   	r13!, {r4 - r12, r15}
103
          @ENDFUNC
104
          .end
105

106