rgb2rgb_template.c 111 KB
Newer Older
1
/*
2 3 4 5 6 7 8
 * software RGB to RGB converter
 * pluralize by software PAL8 to RGB converter
 *              software YUV to YUV converter
 *              software YUV to RGB converter
 * Written by Nick Kurshev.
 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
 * lot of big-endian byte order fixes by Alex Beregszaszi
9
 *
10 11
 * This file is part of FFmpeg.
 *
12 13 14 15
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
21
 *
22 23
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 26
 */

Arpi's avatar
Arpi committed
27 28
#include <stddef.h>

Michael Niedermayer's avatar
Michael Niedermayer committed
29 30 31 32 33 34 35
#undef PREFETCH
#undef MOVNTQ
#undef EMMS
#undef SFENCE
#undef MMREG_SIZE
#undef PAVGB

36
#if HAVE_SSE2
Michael Niedermayer's avatar
Michael Niedermayer committed
37 38 39 40 41
#define MMREG_SIZE 16
#else
#define MMREG_SIZE 8
#endif

42
#if HAVE_AMD3DNOW
Michael Niedermayer's avatar
Michael Niedermayer committed
43
#define PREFETCH  "prefetch"
44
#define PAVGB     "pavgusb"
45
#elif HAVE_MMX2
Michael Niedermayer's avatar
Michael Niedermayer committed
46
#define PREFETCH "prefetchnta"
47
#define PAVGB     "pavgb"
Michael Niedermayer's avatar
Michael Niedermayer committed
48
#else
49
#define PREFETCH  " # nop"
50
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
51

52
#if HAVE_AMD3DNOW
Diego Biurrun's avatar
Diego Biurrun committed
53
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
Michael Niedermayer's avatar
Michael Niedermayer committed
54 55 56
#define EMMS     "femms"
#else
#define EMMS     "emms"
57
#endif
Nick Kurshev's avatar
sfence  
Nick Kurshev committed
58

59
#if HAVE_MMX2
Michael Niedermayer's avatar
Michael Niedermayer committed
60 61 62 63
#define MOVNTQ "movntq"
#define SFENCE "sfence"
#else
#define MOVNTQ "movq"
64
#define SFENCE " # nop"
65
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
66

67
static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
Nick Kurshev's avatar
Nick Kurshev committed
68
{
69 70 71
    uint8_t *dest = dst;
    const uint8_t *s = src;
    const uint8_t *end;
Ramiro Polla's avatar
Indent.  
Ramiro Polla committed
72
#if HAVE_MMX
Ramiro Polla's avatar
Ramiro Polla committed
73
    const uint8_t *mm_end;
Ramiro Polla's avatar
Indent.  
Ramiro Polla committed
74
#endif
75
    end = s + src_size;
Ramiro Polla's avatar
Indent.  
Ramiro Polla committed
76
#if HAVE_MMX
Ramiro Polla's avatar
Ramiro Polla committed
77 78 79
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
    mm_end = end - 23;
    __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
Ramiro Polla's avatar
Ramiro Polla committed
80
    while (s < mm_end) {
Ramiro Polla's avatar
Ramiro Polla committed
81
        __asm__ volatile(
82 83 84 85 86 87 88 89 90
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "punpckldq    3%1, %%mm0    \n\t"
            "movd         6%1, %%mm1    \n\t"
            "punpckldq    9%1, %%mm1    \n\t"
            "movd        12%1, %%mm2    \n\t"
            "punpckldq   15%1, %%mm2    \n\t"
            "movd        18%1, %%mm3    \n\t"
            "punpckldq   21%1, %%mm3    \n\t"
91 92 93 94
            "por        %%mm7, %%mm0    \n\t"
            "por        %%mm7, %%mm1    \n\t"
            "por        %%mm7, %%mm2    \n\t"
            "por        %%mm7, %%mm3    \n\t"
95 96 97 98 99 100 101
            MOVNTQ"     %%mm0,   %0     \n\t"
            MOVNTQ"     %%mm1,  8%0     \n\t"
            MOVNTQ"     %%mm2, 16%0     \n\t"
            MOVNTQ"     %%mm3, 24%0"
            :"=m"(*dest)
            :"m"(*s)
            :"memory");
Ramiro Polla's avatar
Ramiro Polla committed
102 103 104 105 106
        dest += 32;
        s += 24;
    }
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
Ramiro Polla's avatar
Indent.  
Ramiro Polla committed
107
#endif
Ramiro Polla's avatar
Ramiro Polla committed
108
    while (s < end) {
Ramiro Polla's avatar
Indent.  
Ramiro Polla committed
109
#if HAVE_BIGENDIAN
110
        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
111
        *dest++ = 255;
112 113 114 115
        *dest++ = s[2];
        *dest++ = s[1];
        *dest++ = s[0];
        s+=3;
Ramiro Polla's avatar
Indent.  
Ramiro Polla committed
116
#else
117 118 119
        *dest++ = *s++;
        *dest++ = *s++;
        *dest++ = *s++;
120
        *dest++ = 255;
Ramiro Polla's avatar
Indent.  
Ramiro Polla committed
121
#endif
122
    }
Nick Kurshev's avatar
Nick Kurshev committed
123
}
Nick Kurshev's avatar
Nick Kurshev committed
124

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
#define STORE_BGR24_MMX \
            "psrlq         $8, %%mm2    \n\t" \
            "psrlq         $8, %%mm3    \n\t" \
            "psrlq         $8, %%mm6    \n\t" \
            "psrlq         $8, %%mm7    \n\t" \
            "pand "MANGLE(mask24l)", %%mm0\n\t" \
            "pand "MANGLE(mask24l)", %%mm1\n\t" \
            "pand "MANGLE(mask24l)", %%mm4\n\t" \
            "pand "MANGLE(mask24l)", %%mm5\n\t" \
            "pand "MANGLE(mask24h)", %%mm2\n\t" \
            "pand "MANGLE(mask24h)", %%mm3\n\t" \
            "pand "MANGLE(mask24h)", %%mm6\n\t" \
            "pand "MANGLE(mask24h)", %%mm7\n\t" \
            "por        %%mm2, %%mm0    \n\t" \
            "por        %%mm3, %%mm1    \n\t" \
            "por        %%mm6, %%mm4    \n\t" \
            "por        %%mm7, %%mm5    \n\t" \
 \
            "movq       %%mm1, %%mm2    \n\t" \
            "movq       %%mm4, %%mm3    \n\t" \
            "psllq        $48, %%mm2    \n\t" \
            "psllq        $32, %%mm3    \n\t" \
            "pand "MANGLE(mask24hh)", %%mm2\n\t" \
            "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
            "por        %%mm2, %%mm0    \n\t" \
            "psrlq        $16, %%mm1    \n\t" \
            "psrlq        $32, %%mm4    \n\t" \
            "psllq        $16, %%mm5    \n\t" \
            "por        %%mm3, %%mm1    \n\t" \
            "pand  "MANGLE(mask24hhhh)", %%mm5\n\t" \
            "por        %%mm5, %%mm4    \n\t" \
 \
            MOVNTQ"     %%mm0,   %0     \n\t" \
            MOVNTQ"     %%mm1,  8%0     \n\t" \
            MOVNTQ"     %%mm4, 16%0"


162
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
Nick Kurshev's avatar
Nick Kurshev committed
163
{
164 165 166
    uint8_t *dest = dst;
    const uint8_t *s = src;
    const uint8_t *end;
167
#if HAVE_MMX
168
    const uint8_t *mm_end;
169
#endif
170
    end = s + src_size;
171
#if HAVE_MMX
172
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
173
    mm_end = end - 31;
Ramiro Polla's avatar
Ramiro Polla committed
174
    while (s < mm_end) {
175
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
176 177 178 179 180 181 182 183 184
            PREFETCH"    32%1           \n\t"
            "movq          %1, %%mm0    \n\t"
            "movq         8%1, %%mm1    \n\t"
            "movq        16%1, %%mm4    \n\t"
            "movq        24%1, %%mm5    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm1, %%mm3    \n\t"
            "movq       %%mm4, %%mm6    \n\t"
            "movq       %%mm5, %%mm7    \n\t"
185
            STORE_BGR24_MMX
Ramiro Polla's avatar
Ramiro Polla committed
186
            :"=m"(*dest)
187
            :"m"(*s)
Ramiro Polla's avatar
Ramiro Polla committed
188
            :"memory");
189 190 191
        dest += 24;
        s += 32;
    }
192 193
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
194
#endif
Ramiro Polla's avatar
Ramiro Polla committed
195
    while (s < end) {
196
#if HAVE_BIGENDIAN
197 198 199 200 201 202
        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
        s++;
        dest[2] = *s++;
        dest[1] = *s++;
        dest[0] = *s++;
        dest += 3;
Alex Beregszaszi's avatar
Alex Beregszaszi committed
203
#else
204 205 206 207
        *dest++ = *s++;
        *dest++ = *s++;
        *dest++ = *s++;
        s++;
Alex Beregszaszi's avatar
Alex Beregszaszi committed
208
#endif
209
    }
Nick Kurshev's avatar
Nick Kurshev committed
210
}
Nick Kurshev's avatar
Nick Kurshev committed
211

212
/*
213 214
 original by Strepto/Astral
 ported to gcc & bugfixed: A'rpi
Nick Kurshev's avatar
Nick Kurshev committed
215
 MMX2, 3DNOW optimization by Nick Kurshev
216
 32-bit C version, and and&add trick by Michael Niedermayer
217
*/
Benoit Fouet's avatar
Benoit Fouet committed
218
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
Nick Kurshev's avatar
Nick Kurshev committed
219
{
220 221 222 223 224
    register const uint8_t* s=src;
    register uint8_t* d=dst;
    register const uint8_t *end;
    const uint8_t *mm_end;
    end = s + src_size;
225
#if HAVE_MMX
226 227
    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
228
    mm_end = end - 15;
Ramiro Polla's avatar
Ramiro Polla committed
229
    while (s<mm_end) {
230
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
231 232 233 234 235 236 237 238 239 240 241 242 243
            PREFETCH"  32%1         \n\t"
            "movq        %1, %%mm0  \n\t"
            "movq       8%1, %%mm2  \n\t"
            "movq     %%mm0, %%mm1  \n\t"
            "movq     %%mm2, %%mm3  \n\t"
            "pand     %%mm4, %%mm0  \n\t"
            "pand     %%mm4, %%mm2  \n\t"
            "paddw    %%mm1, %%mm0  \n\t"
            "paddw    %%mm3, %%mm2  \n\t"
            MOVNTQ"   %%mm0,  %0    \n\t"
            MOVNTQ"   %%mm2, 8%0"
            :"=m"(*d)
            :"m"(*s)
244 245 246 247
        );
        d+=16;
        s+=16;
    }
248 249
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
Nick Kurshev's avatar
Nick Kurshev committed
250
#endif
251
    mm_end = end - 3;
Ramiro Polla's avatar
Ramiro Polla committed
252
    while (s < mm_end) {
253
        register unsigned x= *((const uint32_t *)s);
254 255 256
        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
        d+=4;
        s+=4;
Arpi's avatar
Arpi committed
257
    }
Ramiro Polla's avatar
Ramiro Polla committed
258
    if (s < end) {
259
        register unsigned short x= *((const uint16_t *)s);
260
        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
Arpi's avatar
Arpi committed
261
    }
Nick Kurshev's avatar
Nick Kurshev committed
262
}
263

Benoit Fouet's avatar
Benoit Fouet committed
264
static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
265
{
266 267 268 269 270
    register const uint8_t* s=src;
    register uint8_t* d=dst;
    register const uint8_t *end;
    const uint8_t *mm_end;
    end = s + src_size;
271
#if HAVE_MMX
272 273 274
    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
275
    mm_end = end - 15;
Ramiro Polla's avatar
Ramiro Polla committed
276
    while (s<mm_end) {
277
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
            PREFETCH"  32%1         \n\t"
            "movq        %1, %%mm0  \n\t"
            "movq       8%1, %%mm2  \n\t"
            "movq     %%mm0, %%mm1  \n\t"
            "movq     %%mm2, %%mm3  \n\t"
            "psrlq       $1, %%mm0  \n\t"
            "psrlq       $1, %%mm2  \n\t"
            "pand     %%mm7, %%mm0  \n\t"
            "pand     %%mm7, %%mm2  \n\t"
            "pand     %%mm6, %%mm1  \n\t"
            "pand     %%mm6, %%mm3  \n\t"
            "por      %%mm1, %%mm0  \n\t"
            "por      %%mm3, %%mm2  \n\t"
            MOVNTQ"   %%mm0,  %0    \n\t"
            MOVNTQ"   %%mm2, 8%0"
            :"=m"(*d)
            :"m"(*s)
295 296 297 298
        );
        d+=16;
        s+=16;
    }
299 300
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
301
#endif
302
    mm_end = end - 3;
Ramiro Polla's avatar
Ramiro Polla committed
303
    while (s < mm_end) {
304
        register uint32_t x= *((const uint32_t*)s);
305 306 307
        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
        s+=4;
        d+=4;
308
    }
Ramiro Polla's avatar
Ramiro Polla committed
309
    if (s < end) {
310
        register uint16_t x= *((const uint16_t*)s);
311
        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
312 313 314
    }
}

315
static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
316
{
317 318
    const uint8_t *s = src;
    const uint8_t *end;
319
#if HAVE_MMX
320
    const uint8_t *mm_end;
Arpi's avatar
Arpi committed
321
#endif
322 323
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
324
#if HAVE_MMX
325
    mm_end = end - 15;
Diego Biurrun's avatar
Diego Biurrun committed
326
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
327
    __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
        "movq           %3, %%mm5   \n\t"
        "movq           %4, %%mm6   \n\t"
        "movq           %5, %%mm7   \n\t"
        "jmp 2f                     \n\t"
        ASMALIGN(4)
        "1:                         \n\t"
        PREFETCH"   32(%1)          \n\t"
        "movd         (%1), %%mm0   \n\t"
        "movd        4(%1), %%mm3   \n\t"
        "punpckldq   8(%1), %%mm0   \n\t"
        "punpckldq  12(%1), %%mm3   \n\t"
        "movq        %%mm0, %%mm1   \n\t"
        "movq        %%mm3, %%mm4   \n\t"
        "pand        %%mm6, %%mm0   \n\t"
        "pand        %%mm6, %%mm3   \n\t"
        "pmaddwd     %%mm7, %%mm0   \n\t"
        "pmaddwd     %%mm7, %%mm3   \n\t"
        "pand        %%mm5, %%mm1   \n\t"
        "pand        %%mm5, %%mm4   \n\t"
        "por         %%mm1, %%mm0   \n\t"
        "por         %%mm4, %%mm3   \n\t"
        "psrld          $5, %%mm0   \n\t"
        "pslld         $11, %%mm3   \n\t"
        "por         %%mm3, %%mm0   \n\t"
        MOVNTQ"      %%mm0, (%0)    \n\t"
        "add           $16,  %1     \n\t"
        "add            $8,  %0     \n\t"
        "2:                         \n\t"
        "cmp            %2,  %1     \n\t"
        " jb            1b          \n\t"
        : "+r" (d), "+r"(s)
        : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
360
    );
Michael Niedermayer's avatar
Michael Niedermayer committed
361
#else
362 363
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
364 365 366
        "movq    %0, %%mm7    \n\t"
        "movq    %1, %%mm6    \n\t"
        ::"m"(red_16mask),"m"(green_16mask));
Ramiro Polla's avatar
Ramiro Polla committed
367
    while (s < mm_end) {
368
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "movd         4%1, %%mm3    \n\t"
            "punpckldq    8%1, %%mm0    \n\t"
            "punpckldq   12%1, %%mm3    \n\t"
            "movq       %%mm0, %%mm1    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm3, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "psrlq         $3, %%mm0    \n\t"
            "psrlq         $3, %%mm3    \n\t"
            "pand          %2, %%mm0    \n\t"
            "pand          %2, %%mm3    \n\t"
            "psrlq         $5, %%mm1    \n\t"
            "psrlq         $5, %%mm4    \n\t"
            "pand       %%mm6, %%mm1    \n\t"
            "pand       %%mm6, %%mm4    \n\t"
            "psrlq         $8, %%mm2    \n\t"
            "psrlq         $8, %%mm5    \n\t"
            "pand       %%mm7, %%mm2    \n\t"
            "pand       %%mm7, %%mm5    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "por        %%mm5, %%mm3    \n\t"
            "psllq        $16, %%mm3    \n\t"
            "por        %%mm3, %%mm0    \n\t"
            MOVNTQ"     %%mm0, %0       \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
398 399 400 401
        d += 4;
        s += 16;
    }
#endif
402 403
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
404
#endif
Ramiro Polla's avatar
Ramiro Polla committed
405
    while (s < end) {
406
        register int rgb = *(const uint32_t*)s; s += 4;
407 408
        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
    }
409 410
}

411
static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
412
{
413 414
    const uint8_t *s = src;
    const uint8_t *end;
415
#if HAVE_MMX
416
    const uint8_t *mm_end;
417
#endif
418 419
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
420
#if HAVE_MMX
421 422
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
423 424 425 426
        "movq          %0, %%mm7    \n\t"
        "movq          %1, %%mm6    \n\t"
        ::"m"(red_16mask),"m"(green_16mask));
    mm_end = end - 15;
Ramiro Polla's avatar
Ramiro Polla committed
427
    while (s < mm_end) {
428
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "movd         4%1, %%mm3    \n\t"
            "punpckldq    8%1, %%mm0    \n\t"
            "punpckldq   12%1, %%mm3    \n\t"
            "movq       %%mm0, %%mm1    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm3, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "psllq         $8, %%mm0    \n\t"
            "psllq         $8, %%mm3    \n\t"
            "pand       %%mm7, %%mm0    \n\t"
            "pand       %%mm7, %%mm3    \n\t"
            "psrlq         $5, %%mm1    \n\t"
            "psrlq         $5, %%mm4    \n\t"
            "pand       %%mm6, %%mm1    \n\t"
            "pand       %%mm6, %%mm4    \n\t"
            "psrlq        $19, %%mm2    \n\t"
            "psrlq        $19, %%mm5    \n\t"
            "pand          %2, %%mm2    \n\t"
            "pand          %2, %%mm5    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "por        %%mm5, %%mm3    \n\t"
            "psllq        $16, %%mm3    \n\t"
            "por        %%mm3, %%mm0    \n\t"
            MOVNTQ"     %%mm0, %0       \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
458 459 460
        d += 4;
        s += 16;
    }
461 462
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
463
#endif
Ramiro Polla's avatar
Ramiro Polla committed
464
    while (s < end) {
465
        register int rgb = *(const uint32_t*)s; s += 4;
466 467
        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
    }
468 469
}

470
static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
471
{
472 473
    const uint8_t *s = src;
    const uint8_t *end;
474
#if HAVE_MMX
475
    const uint8_t *mm_end;
Arpi's avatar
Arpi committed
476
#endif
477 478
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
479
#if HAVE_MMX
480
    mm_end = end - 15;
Diego Biurrun's avatar
Diego Biurrun committed
481
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
482
    __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
        "movq           %3, %%mm5   \n\t"
        "movq           %4, %%mm6   \n\t"
        "movq           %5, %%mm7   \n\t"
        "jmp            2f          \n\t"
        ASMALIGN(4)
        "1:                         \n\t"
        PREFETCH"   32(%1)          \n\t"
        "movd         (%1), %%mm0   \n\t"
        "movd        4(%1), %%mm3   \n\t"
        "punpckldq   8(%1), %%mm0   \n\t"
        "punpckldq  12(%1), %%mm3   \n\t"
        "movq        %%mm0, %%mm1   \n\t"
        "movq        %%mm3, %%mm4   \n\t"
        "pand        %%mm6, %%mm0   \n\t"
        "pand        %%mm6, %%mm3   \n\t"
        "pmaddwd     %%mm7, %%mm0   \n\t"
        "pmaddwd     %%mm7, %%mm3   \n\t"
        "pand        %%mm5, %%mm1   \n\t"
        "pand        %%mm5, %%mm4   \n\t"
        "por         %%mm1, %%mm0   \n\t"
        "por         %%mm4, %%mm3   \n\t"
        "psrld          $6, %%mm0   \n\t"
        "pslld         $10, %%mm3   \n\t"
        "por         %%mm3, %%mm0   \n\t"
        MOVNTQ"      %%mm0, (%0)    \n\t"
        "add           $16,  %1     \n\t"
        "add            $8,  %0     \n\t"
        "2:                         \n\t"
        "cmp            %2,  %1     \n\t"
        " jb            1b          \n\t"
        : "+r" (d), "+r"(s)
        : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
515
    );
Michael Niedermayer's avatar
Michael Niedermayer committed
516
#else
517 518
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
519 520 521
        "movq          %0, %%mm7    \n\t"
        "movq          %1, %%mm6    \n\t"
        ::"m"(red_15mask),"m"(green_15mask));
Ramiro Polla's avatar
Ramiro Polla committed
522
    while (s < mm_end) {
523
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "movd         4%1, %%mm3    \n\t"
            "punpckldq    8%1, %%mm0    \n\t"
            "punpckldq   12%1, %%mm3    \n\t"
            "movq       %%mm0, %%mm1    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm3, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "psrlq         $3, %%mm0    \n\t"
            "psrlq         $3, %%mm3    \n\t"
            "pand          %2, %%mm0    \n\t"
            "pand          %2, %%mm3    \n\t"
            "psrlq         $6, %%mm1    \n\t"
            "psrlq         $6, %%mm4    \n\t"
            "pand       %%mm6, %%mm1    \n\t"
            "pand       %%mm6, %%mm4    \n\t"
            "psrlq         $9, %%mm2    \n\t"
            "psrlq         $9, %%mm5    \n\t"
            "pand       %%mm7, %%mm2    \n\t"
            "pand       %%mm7, %%mm5    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "por        %%mm5, %%mm3    \n\t"
            "psllq        $16, %%mm3    \n\t"
            "por        %%mm3, %%mm0    \n\t"
            MOVNTQ"     %%mm0, %0       \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
553 554 555 556
        d += 4;
        s += 16;
    }
#endif
557 558
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
559
#endif
Ramiro Polla's avatar
Ramiro Polla committed
560
    while (s < end) {
561
        register int rgb = *(const uint32_t*)s; s += 4;
562 563
        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
    }
564 565
}

566
static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
567
{
568 569
    const uint8_t *s = src;
    const uint8_t *end;
570
#if HAVE_MMX
571
    const uint8_t *mm_end;
572
#endif
573 574
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
575
#if HAVE_MMX
576 577
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
578 579 580 581
        "movq          %0, %%mm7    \n\t"
        "movq          %1, %%mm6    \n\t"
        ::"m"(red_15mask),"m"(green_15mask));
    mm_end = end - 15;
Ramiro Polla's avatar
Ramiro Polla committed
582
    while (s < mm_end) {
583
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "movd         4%1, %%mm3    \n\t"
            "punpckldq    8%1, %%mm0    \n\t"
            "punpckldq   12%1, %%mm3    \n\t"
            "movq       %%mm0, %%mm1    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm3, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "psllq         $7, %%mm0    \n\t"
            "psllq         $7, %%mm3    \n\t"
            "pand       %%mm7, %%mm0    \n\t"
            "pand       %%mm7, %%mm3    \n\t"
            "psrlq         $6, %%mm1    \n\t"
            "psrlq         $6, %%mm4    \n\t"
            "pand       %%mm6, %%mm1    \n\t"
            "pand       %%mm6, %%mm4    \n\t"
            "psrlq        $19, %%mm2    \n\t"
            "psrlq        $19, %%mm5    \n\t"
            "pand          %2, %%mm2    \n\t"
            "pand          %2, %%mm5    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "por        %%mm5, %%mm3    \n\t"
            "psllq        $16, %%mm3    \n\t"
            "por        %%mm3, %%mm0    \n\t"
            MOVNTQ"     %%mm0, %0       \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
613 614 615
        d += 4;
        s += 16;
    }
616 617
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
618
#endif
Ramiro Polla's avatar
Ramiro Polla committed
619
    while (s < end) {
620
        register int rgb = *(const uint32_t*)s; s += 4;
621 622
        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
    }
623 624
}

625
static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
626
{
627 628
    const uint8_t *s = src;
    const uint8_t *end;
629
#if HAVE_MMX
630
    const uint8_t *mm_end;
Arpi's avatar
Arpi committed
631
#endif
632 633
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
634
#if HAVE_MMX
635 636
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
637 638 639 640
        "movq         %0, %%mm7     \n\t"
        "movq         %1, %%mm6     \n\t"
        ::"m"(red_16mask),"m"(green_16mask));
    mm_end = end - 11;
Ramiro Polla's avatar
Ramiro Polla committed
641
    while (s < mm_end) {
642
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "movd         3%1, %%mm3    \n\t"
            "punpckldq    6%1, %%mm0    \n\t"
            "punpckldq    9%1, %%mm3    \n\t"
            "movq       %%mm0, %%mm1    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm3, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "psrlq         $3, %%mm0    \n\t"
            "psrlq         $3, %%mm3    \n\t"
            "pand          %2, %%mm0    \n\t"
            "pand          %2, %%mm3    \n\t"
            "psrlq         $5, %%mm1    \n\t"
            "psrlq         $5, %%mm4    \n\t"
            "pand       %%mm6, %%mm1    \n\t"
            "pand       %%mm6, %%mm4    \n\t"
            "psrlq         $8, %%mm2    \n\t"
            "psrlq         $8, %%mm5    \n\t"
            "pand       %%mm7, %%mm2    \n\t"
            "pand       %%mm7, %%mm5    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "por        %%mm5, %%mm3    \n\t"
            "psllq        $16, %%mm3    \n\t"
            "por        %%mm3, %%mm0    \n\t"
            MOVNTQ"     %%mm0, %0       \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
672 673 674
        d += 4;
        s += 12;
    }
675 676
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
677
#endif
Ramiro Polla's avatar
Ramiro Polla committed
678
    while (s < end) {
679 680 681 682 683
        const int b = *s++;
        const int g = *s++;
        const int r = *s++;
        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
    }
684 685
}

686
static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
687
{
688 689
    const uint8_t *s = src;
    const uint8_t *end;
690
#if HAVE_MMX
691
    const uint8_t *mm_end;
692
#endif
693 694
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
695
#if HAVE_MMX
696 697
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
698 699 700 701
        "movq         %0, %%mm7     \n\t"
        "movq         %1, %%mm6     \n\t"
        ::"m"(red_16mask),"m"(green_16mask));
    mm_end = end - 15;
Ramiro Polla's avatar
Ramiro Polla committed
702
    while (s < mm_end) {
703
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "movd         3%1, %%mm3    \n\t"
            "punpckldq    6%1, %%mm0    \n\t"
            "punpckldq    9%1, %%mm3    \n\t"
            "movq       %%mm0, %%mm1    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm3, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "psllq         $8, %%mm0    \n\t"
            "psllq         $8, %%mm3    \n\t"
            "pand       %%mm7, %%mm0    \n\t"
            "pand       %%mm7, %%mm3    \n\t"
            "psrlq         $5, %%mm1    \n\t"
            "psrlq         $5, %%mm4    \n\t"
            "pand       %%mm6, %%mm1    \n\t"
            "pand       %%mm6, %%mm4    \n\t"
            "psrlq        $19, %%mm2    \n\t"
            "psrlq        $19, %%mm5    \n\t"
            "pand          %2, %%mm2    \n\t"
            "pand          %2, %%mm5    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "por        %%mm5, %%mm3    \n\t"
            "psllq        $16, %%mm3    \n\t"
            "por        %%mm3, %%mm0    \n\t"
            MOVNTQ"     %%mm0, %0       \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
733 734 735
        d += 4;
        s += 12;
    }
736 737
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
738
#endif
Ramiro Polla's avatar
Ramiro Polla committed
739
    while (s < end) {
740 741 742 743 744
        const int r = *s++;
        const int g = *s++;
        const int b = *s++;
        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
    }
745 746
}

747
static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
748
{
749 750
    const uint8_t *s = src;
    const uint8_t *end;
751
#if HAVE_MMX
752
    const uint8_t *mm_end;
Arpi's avatar
Arpi committed
753
#endif
754 755
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
756
#if HAVE_MMX
757 758
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
759 760 761 762
        "movq          %0, %%mm7    \n\t"
        "movq          %1, %%mm6    \n\t"
        ::"m"(red_15mask),"m"(green_15mask));
    mm_end = end - 11;
Ramiro Polla's avatar
Ramiro Polla committed
763
    while (s < mm_end) {
764
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
            PREFETCH"    32%1           \n\t"
            "movd          %1, %%mm0    \n\t"
            "movd         3%1, %%mm3    \n\t"
            "punpckldq    6%1, %%mm0    \n\t"
            "punpckldq    9%1, %%mm3    \n\t"
            "movq       %%mm0, %%mm1    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm3, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "psrlq         $3, %%mm0    \n\t"
            "psrlq         $3, %%mm3    \n\t"
            "pand          %2, %%mm0    \n\t"
            "pand          %2, %%mm3    \n\t"
            "psrlq         $6, %%mm1    \n\t"
            "psrlq         $6, %%mm4    \n\t"
            "pand       %%mm6, %%mm1    \n\t"
            "pand       %%mm6, %%mm4    \n\t"
            "psrlq         $9, %%mm2    \n\t"
            "psrlq         $9, %%mm5    \n\t"
            "pand       %%mm7, %%mm2    \n\t"
            "pand       %%mm7, %%mm5    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "por        %%mm5, %%mm3    \n\t"
            "psllq        $16, %%mm3    \n\t"
            "por        %%mm3, %%mm0    \n\t"
            MOVNTQ"     %%mm0, %0       \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
794 795 796
        d += 4;
        s += 12;
    }
797 798
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
799
#endif
Ramiro Polla's avatar
Ramiro Polla committed
800
    while (s < end) {
801 802 803 804 805
        const int b = *s++;
        const int g = *s++;
        const int r = *s++;
        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
    }
Arpi's avatar
Arpi committed
806 807
}

808
static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
809
{
810 811
    const uint8_t *s = src;
    const uint8_t *end;
812
#if HAVE_MMX
813
    const uint8_t *mm_end;
814
#endif
815 816
    uint16_t *d = (uint16_t *)dst;
    end = s + src_size;
817
#if HAVE_MMX
818 819
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
    __asm__ volatile(
820 821 822 823
        "movq         %0, %%mm7     \n\t"
        "movq         %1, %%mm6     \n\t"
        ::"m"(red_15mask),"m"(green_15mask));
    mm_end = end - 15;
Ramiro Polla's avatar
Ramiro Polla committed
824
    while (s < mm_end) {
825
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
            PREFETCH"   32%1            \n\t"
            "movd         %1, %%mm0     \n\t"
            "movd        3%1, %%mm3     \n\t"
            "punpckldq   6%1, %%mm0     \n\t"
            "punpckldq   9%1, %%mm3     \n\t"
            "movq      %%mm0, %%mm1     \n\t"
            "movq      %%mm0, %%mm2     \n\t"
            "movq      %%mm3, %%mm4     \n\t"
            "movq      %%mm3, %%mm5     \n\t"
            "psllq        $7, %%mm0     \n\t"
            "psllq        $7, %%mm3     \n\t"
            "pand      %%mm7, %%mm0     \n\t"
            "pand      %%mm7, %%mm3     \n\t"
            "psrlq        $6, %%mm1     \n\t"
            "psrlq        $6, %%mm4     \n\t"
            "pand      %%mm6, %%mm1     \n\t"
            "pand      %%mm6, %%mm4     \n\t"
            "psrlq       $19, %%mm2     \n\t"
            "psrlq       $19, %%mm5     \n\t"
            "pand         %2, %%mm2     \n\t"
            "pand         %2, %%mm5     \n\t"
            "por       %%mm1, %%mm0     \n\t"
            "por       %%mm4, %%mm3     \n\t"
            "por       %%mm2, %%mm0     \n\t"
            "por       %%mm5, %%mm3     \n\t"
            "psllq       $16, %%mm3     \n\t"
            "por       %%mm3, %%mm0     \n\t"
            MOVNTQ"    %%mm0, %0        \n\t"
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
855 856 857
        d += 4;
        s += 12;
    }
858 859
    __asm__ volatile(SFENCE:::"memory");
    __asm__ volatile(EMMS:::"memory");
860
#endif
Ramiro Polla's avatar
Ramiro Polla committed
861
    while (s < end) {
862 863 864 865 866
        const int r = *s++;
        const int g = *s++;
        const int b = *s++;
        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
    }
867 868
}

Arpi's avatar
Arpi committed
869
/*
Diego Biurrun's avatar
Diego Biurrun committed
870 871 872 873
  I use less accurate approximation here by simply left-shifting the input
  value and filling the low order bits with zeroes. This method improves PNG
  compression but this scheme cannot reproduce white exactly, since it does
  not generate an all-ones maximum value; the net effect is to darken the
Arpi's avatar
Arpi committed
874 875 876 877 878 879 880 881 882 883 884 885
  image slightly.

  The better method should be "left bit replication":

   4 3 2 1 0
   ---------
   1 1 0 1 1

   7 6 5 4 3  2 1 0
   ----------------
   1 1 0 1 1  1 1 0
   |=======|  |===|
886
       |      leftmost bits repeated to fill open bits
Arpi's avatar
Arpi committed
887
       |
888
   original bits
Arpi's avatar
Arpi committed
889
*/
890
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
Arpi's avatar
Arpi committed
891
{
892
    const uint16_t *end;
893
#if HAVE_MMX
894
    const uint16_t *mm_end;
Arpi's avatar
Arpi committed
895
#endif
Baptiste Coudurier's avatar
Baptiste Coudurier committed
896
    uint8_t *d = dst;
897
    const uint16_t *s = (const uint16_t*)src;
898
    end = s + src_size/2;
899
#if HAVE_MMX
900
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
901
    mm_end = end - 7;
Ramiro Polla's avatar
Ramiro Polla committed
902
    while (s < mm_end) {
903
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
            PREFETCH"    32%1           \n\t"
            "movq          %1, %%mm0    \n\t"
            "movq          %1, %%mm1    \n\t"
            "movq          %1, %%mm2    \n\t"
            "pand          %2, %%mm0    \n\t"
            "pand          %3, %%mm1    \n\t"
            "pand          %4, %%mm2    \n\t"
            "psllq         $3, %%mm0    \n\t"
            "psrlq         $2, %%mm1    \n\t"
            "psrlq         $7, %%mm2    \n\t"
            "movq       %%mm0, %%mm3    \n\t"
            "movq       %%mm1, %%mm4    \n\t"
            "movq       %%mm2, %%mm5    \n\t"
            "punpcklwd     %5, %%mm0    \n\t"
            "punpcklwd     %5, %%mm1    \n\t"
            "punpcklwd     %5, %%mm2    \n\t"
            "punpckhwd     %5, %%mm3    \n\t"
            "punpckhwd     %5, %%mm4    \n\t"
            "punpckhwd     %5, %%mm5    \n\t"
            "psllq         $8, %%mm1    \n\t"
            "psllq        $16, %%mm2    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "psllq         $8, %%mm4    \n\t"
            "psllq        $16, %%mm5    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm5, %%mm3    \n\t"

            "movq       %%mm0, %%mm6    \n\t"
            "movq       %%mm3, %%mm7    \n\t"

            "movq         8%1, %%mm0    \n\t"
            "movq         8%1, %%mm1    \n\t"
            "movq         8%1, %%mm2    \n\t"
            "pand          %2, %%mm0    \n\t"
            "pand          %3, %%mm1    \n\t"
            "pand          %4, %%mm2    \n\t"
            "psllq         $3, %%mm0    \n\t"
            "psrlq         $2, %%mm1    \n\t"
            "psrlq         $7, %%mm2    \n\t"
            "movq       %%mm0, %%mm3    \n\t"
            "movq       %%mm1, %%mm4    \n\t"
            "movq       %%mm2, %%mm5    \n\t"
            "punpcklwd     %5, %%mm0    \n\t"
            "punpcklwd     %5, %%mm1    \n\t"
            "punpcklwd     %5, %%mm2    \n\t"
            "punpckhwd     %5, %%mm3    \n\t"
            "punpckhwd     %5, %%mm4    \n\t"
            "punpckhwd     %5, %%mm5    \n\t"
            "psllq         $8, %%mm1    \n\t"
            "psllq        $16, %%mm2    \n\t"
            "por        %%mm1, %%mm0    \n\t"
            "por        %%mm2, %%mm0    \n\t"
            "psllq         $8, %%mm4    \n\t"
            "psllq        $16, %%mm5    \n\t"
            "por        %%mm4, %%mm3    \n\t"
            "por        %%mm5, %%mm3    \n\t"

            :"=m"(*d)
            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
            :"memory");
965
        /* borrowed 32 to 24 */
966
        __asm__ volatile(
Ramiro Polla's avatar
Ramiro Polla committed
967 968 969 970 971 972 973 974 975 976
            "movq       %%mm0, %%mm4    \n\t"
            "movq       %%mm3, %%mm5    \n\t"
            "movq       %%mm6, %%mm0    \n\t"
            "movq       %%mm7, %%mm1    \n\t"

            "movq       %%mm4, %%mm6    \n\t"
            "movq       %%mm5, %%mm7    \n\t"
            "movq       %%mm0, %%mm2    \n\t"
            "movq       %%mm1, %%mm3    \n\t"

977
            STORE_BGR24_MMX
Ramiro Polla's avatar
Ramiro Polla committed
978 979

            :"=m"(*d)
980
            :"m"(*s)
Ramiro Polla's avatar
Ramiro Polla committed
981
            :"memory");
982 983 984
        d += 24;
        s += 8;
    }
985