quantize_sse2.asm 7.43 KB
Newer Older
1
;
2
;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
;
;  Use of this source code is governed by a BSD-style license and patent
;  grant that can be found in the LICENSE file in the root of the source
;  tree. All contributing project authors may be found in the AUTHORS
;  file in the root of the source tree.
;


%include "vpx_ports/x86_abi_support.asm"


;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
;               short *qcoeff_ptr,short *dequant_ptr,
;               const int *default_zig_zag, short *round_ptr,
;               short *quant_ptr, short *dqcoeff_ptr,
;               unsigned short zbin_oq_value,
;               short *zbin_boost_ptr);
;
global sym(vp8_regular_quantize_b_impl_sse2)
sym(vp8_regular_quantize_b_impl_sse2):
    push        rbp
    mov         rbp, rsp
    SHADOW_ARGS_TO_STACK 10
    push        rsi
    push        rdi
    push        rbx
    ; end prolog

    ALIGN_STACK 16, rax

    %define abs_minus_zbin_lo 0
    %define abs_minus_zbin_hi 16
    %define temp_qcoeff_lo 32
    %define temp_qcoeff_hi 48
    %define save_xmm6 64
    %define save_xmm7 80
    %define eob 96

    %define vp8_regularquantizeb_stack_size eob + 16

    sub         rsp, vp8_regularquantizeb_stack_size

Jan Kratochvil's avatar
Jan Kratochvil committed
45 46
    movdqa      OWORD PTR[rsp + save_xmm6], xmm6
    movdqa      OWORD PTR[rsp + save_xmm7], xmm7
47 48 49 50 51 52 53

    mov         rdx, arg(0)                 ;coeff_ptr
    mov         eax, arg(8)                 ;zbin_oq_value

    mov         rcx, arg(1)                 ;zbin_ptr
    movd        xmm7, eax

Jan Kratochvil's avatar
Jan Kratochvil committed
54 55
    movdqa      xmm0, OWORD PTR[rdx]
    movdqa      xmm4, OWORD PTR[rdx + 16]
56 57 58 59 60 61 62 63 64 65

    movdqa      xmm1, xmm0
    movdqa      xmm5, xmm4

    psraw       xmm0, 15                    ;sign of z (aka sz)
    psraw       xmm4, 15                    ;sign of z (aka sz)

    pxor        xmm1, xmm0
    pxor        xmm5, xmm4

Jan Kratochvil's avatar
Jan Kratochvil committed
66 67
    movdqa      xmm2, OWORD PTR[rcx]        ;load zbin_ptr
    movdqa      xmm3, OWORD PTR[rcx + 16]   ;load zbin_ptr
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83

    pshuflw     xmm7, xmm7, 0
    psubw       xmm1, xmm0                  ;x = abs(z)

    punpcklwd   xmm7, xmm7                  ;duplicated zbin_oq_value
    psubw       xmm5, xmm4                  ;x = abs(z)

    paddw       xmm2, xmm7
    paddw       xmm3, xmm7

    psubw       xmm1, xmm2                  ;sub (zbin_ptr + zbin_oq_value)
    psubw       xmm5, xmm3                  ;sub (zbin_ptr + zbin_oq_value)

    mov         rdi, arg(5)                 ;round_ptr
    mov         rsi, arg(6)                 ;quant_ptr

Jan Kratochvil's avatar
Jan Kratochvil committed
84 85
    movdqa      OWORD PTR[rsp + abs_minus_zbin_lo], xmm1
    movdqa      OWORD PTR[rsp + abs_minus_zbin_hi], xmm5
86 87 88 89

    paddw       xmm1, xmm2                  ;add (zbin_ptr + zbin_oq_value) back
    paddw       xmm5, xmm3                  ;add (zbin_ptr + zbin_oq_value) back

Jan Kratochvil's avatar
Jan Kratochvil committed
90 91
    movdqa      xmm2, OWORD PTR[rdi]
    movdqa      xmm3, OWORD PTR[rsi]
92

Jan Kratochvil's avatar
Jan Kratochvil committed
93 94
    movdqa      xmm6, OWORD PTR[rdi + 16]
    movdqa      xmm7, OWORD PTR[rsi + 16]
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110

    paddw       xmm1, xmm2
    paddw       xmm5, xmm6

    pmulhw      xmm1, xmm3
    pmulhw      xmm5, xmm7

    mov         rsi, arg(2)                 ;qcoeff_ptr
    pxor        xmm6, xmm6

    pxor        xmm1, xmm0
    pxor        xmm5, xmm4

    psubw       xmm1, xmm0
    psubw       xmm5, xmm4

Jan Kratochvil's avatar
Jan Kratochvil committed
111 112
    movdqa      OWORD PTR[rsp + temp_qcoeff_lo], xmm1
    movdqa      OWORD PTR[rsp + temp_qcoeff_hi], xmm5
113

Jan Kratochvil's avatar
Jan Kratochvil committed
114 115
    movdqa      OWORD PTR[rsi], xmm6        ;zero qcoeff
    movdqa      OWORD PTR[rsi + 16], xmm6   ;zero qcoeff
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225

    xor         rax, rax
    mov         rcx, -1

    mov         [rsp + eob], rcx
    mov         rsi, arg(9)                 ;zbin_boost_ptr

    mov         rbx, arg(4)                 ;default_zig_zag

rq_zigzag_loop:
    movsxd      rcx, DWORD PTR[rbx + rax*4] ;now we have rc
    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++

    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]

    sub         edx, edi                    ;x - zbin
    jl          rq_zigzag_1

    mov         rdi, arg(2)                 ;qcoeff_ptr

    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]

    cmp         edx, 0
    je          rq_zigzag_1

    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]

    mov         rsi, arg(9)                 ;zbin_boost_ptr
    mov         [rsp + eob], rax            ;eob = i

rq_zigzag_1:
    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++

    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
    lea         rax, [rax + 1]

    sub         edx, edi                    ;x - zbin
    jl          rq_zigzag_1a

    mov         rdi, arg(2)                 ;qcoeff_ptr

    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]

    cmp         edx, 0
    je          rq_zigzag_1a

    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]

    mov         rsi, arg(9)                 ;zbin_boost_ptr
    mov         [rsp + eob], rax            ;eob = i

rq_zigzag_1a:
    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++

    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
    lea         rax, [rax + 1]

    sub         edx, edi                    ;x - zbin
    jl          rq_zigzag_1b

    mov         rdi, arg(2)                 ;qcoeff_ptr

    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]

    cmp         edx, 0
    je          rq_zigzag_1b

    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]

    mov         rsi, arg(9)                 ;zbin_boost_ptr
    mov         [rsp + eob], rax            ;eob = i

rq_zigzag_1b:
    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++

    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
    lea         rax, [rax + 1]

    sub         edx, edi                    ;x - zbin
    jl          rq_zigzag_1c

    mov         rdi, arg(2)                 ;qcoeff_ptr

    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]

    cmp         edx, 0
    je          rq_zigzag_1c

    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]

    mov         rsi, arg(9)                 ;zbin_boost_ptr
    mov         [rsp + eob], rax            ;eob = i

rq_zigzag_1c:
    lea         rax, [rax + 1]

    cmp         rax, 16
    jl          rq_zigzag_loop

    mov         rdi, arg(2)                 ;qcoeff_ptr
    mov         rcx, arg(3)                 ;dequant_ptr
    mov         rsi, arg(7)                 ;dqcoeff_ptr

Jan Kratochvil's avatar
Jan Kratochvil committed
226 227
    movdqa      xmm2, OWORD PTR[rdi]
    movdqa      xmm3, OWORD PTR[rdi + 16]
228

Jan Kratochvil's avatar
Jan Kratochvil committed
229 230
    movdqa      xmm0, OWORD PTR[rcx]
    movdqa      xmm1, OWORD PTR[rcx + 16]
231 232 233 234

    pmullw      xmm0, xmm2
    pmullw      xmm1, xmm3

Jan Kratochvil's avatar
Jan Kratochvil committed
235 236
    movdqa      OWORD PTR[rsi], xmm0        ;store dqcoeff
    movdqa      OWORD PTR[rsi + 16], xmm1   ;store dqcoeff
237 238 239

    mov         rax, [rsp + eob]

Jan Kratochvil's avatar
Jan Kratochvil committed
240 241
    movdqa      xmm6, OWORD PTR[rsp + save_xmm6]
    movdqa      xmm7, OWORD PTR[rsp + save_xmm7]
242 243 244 245 246 247 248 249 250 251 252 253 254

    add         rax, 1

    add         rsp, vp8_regularquantizeb_stack_size
    pop         rsp

    ; begin epilog
    pop         rbx
    pop         rdi
    pop         rsi
    UNSHADOW_ARGS
    pop         rbp
    ret