Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
ffmpeg
Commits
ac4d0aea
Commit
ac4d0aea
authored
Jun 29, 2002
by
Michael Niedermayer
Browse files
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
Originally committed as revision 6607 to
svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent
d8dad2a5
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
827 additions
and
1 deletion
+827
-1
postproc/rgb2rgb.c
postproc/rgb2rgb.c
+285
-0
postproc/rgb2rgb.h
postproc/rgb2rgb.h
+30
-1
postproc/rgb2rgb_template.c
postproc/rgb2rgb_template.c
+512
-0
No files found.
postproc/rgb2rgb.c
View file @
ac4d0aea
...
...
@@ -244,6 +244,20 @@ void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size)
rgb15to16_C
(
src
,
dst
,
src_size
);
}
void
rgb16to15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if
(
gCpuCaps
.
hasMMX2
)
rgb16to15_MMX2
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
has3DNow
)
rgb16to15_3DNow
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
hasMMX
)
rgb16to15_MMX
(
src
,
dst
,
src_size
);
else
#endif
rgb16to15_C
(
src
,
dst
,
src_size
);
}
/**
* Pallete is assumed to contain bgr32
*/
...
...
@@ -387,6 +401,61 @@ void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
rgb32tobgr32_C
(
src
,
dst
,
src_size
);
}
void
rgb32tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
unsigned
i
;
unsigned
num_pixels
=
src_size
>>
2
;
for
(
i
=
0
;
i
<
num_pixels
;
i
++
)
{
dst
[
3
*
i
+
0
]
=
src
[
4
*
i
+
2
];
dst
[
3
*
i
+
1
]
=
src
[
4
*
i
+
1
];
dst
[
3
*
i
+
2
]
=
src
[
4
*
i
+
0
];
}
}
void
rgb32tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if
(
gCpuCaps
.
hasMMX2
)
rgb32tobgr16_MMX2
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
has3DNow
)
rgb32tobgr16_3DNow
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
hasMMX
)
rgb32tobgr16_MMX
(
src
,
dst
,
src_size
);
else
#endif
rgb32tobgr16_C
(
src
,
dst
,
src_size
);
}
void
rgb32tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if
(
gCpuCaps
.
hasMMX2
)
rgb32tobgr15_MMX2
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
has3DNow
)
rgb32tobgr15_3DNow
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
hasMMX
)
rgb32tobgr15_MMX
(
src
,
dst
,
src_size
);
else
#endif
rgb32tobgr15_C
(
src
,
dst
,
src_size
);
}
void
rgb24tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
unsigned
i
;
unsigned
num_pixels
=
src_size
>>
2
;
for
(
i
=
0
;
i
<
num_pixels
;
i
++
)
{
dst
[
4
*
i
+
0
]
=
src
[
3
*
i
+
2
];
dst
[
4
*
i
+
1
]
=
src
[
3
*
i
+
1
];
dst
[
4
*
i
+
2
]
=
src
[
3
*
i
+
0
];
dst
[
4
*
i
+
3
]
=
0
;
}
}
void
rgb24tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
#ifdef CAN_COMPILE_X86_ASM
...
...
@@ -402,6 +471,186 @@ void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
rgb24tobgr24_C
(
src
,
dst
,
src_size
);
}
void
rgb24tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if
(
gCpuCaps
.
hasMMX2
)
rgb24tobgr16_MMX2
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
has3DNow
)
rgb24tobgr16_3DNow
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
hasMMX
)
rgb24tobgr16_MMX
(
src
,
dst
,
src_size
);
else
#endif
rgb24tobgr16_C
(
src
,
dst
,
src_size
);
}
void
rgb24tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if
(
gCpuCaps
.
hasMMX2
)
rgb24tobgr15_MMX2
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
has3DNow
)
rgb24tobgr15_3DNow
(
src
,
dst
,
src_size
);
else
if
(
gCpuCaps
.
hasMMX
)
rgb24tobgr15_MMX
(
src
,
dst
,
src_size
);
else
#endif
rgb24tobgr15_C
(
src
,
dst
,
src_size
);
}
void
rgb16tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
const
uint16_t
*
end
;
uint8_t
*
d
=
(
uint8_t
*
)
dst
;
const
uint16_t
*
s
=
(
uint16_t
*
)
src
;
end
=
s
+
src_size
/
2
;
while
(
s
<
end
)
{
register
uint16_t
bgr
;
bgr
=
*
s
++
;
*
d
++
=
(
bgr
&
0xF800
)
>>
8
;
*
d
++
=
(
bgr
&
0x7E0
)
>>
3
;
*
d
++
=
(
bgr
&
0x1F
)
<<
3
;
*
d
++
=
0
;
}
}
void
rgb16tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
const
uint16_t
*
end
;
uint8_t
*
d
=
(
uint8_t
*
)
dst
;
const
uint16_t
*
s
=
(
const
uint16_t
*
)
src
;
end
=
s
+
src_size
/
2
;
while
(
s
<
end
)
{
register
uint16_t
bgr
;
bgr
=
*
s
++
;
*
d
++
=
(
bgr
&
0xF800
)
>>
8
;
*
d
++
=
(
bgr
&
0x7E0
)
>>
3
;
*
d
++
=
(
bgr
&
0x1F
)
<<
3
;
}
}
void
rgb16tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
unsigned
i
;
unsigned
num_pixels
=
src_size
>>
1
;
for
(
i
=
0
;
i
<
num_pixels
;
i
++
)
{
unsigned
b
,
g
,
r
;
register
uint16_t
rgb
;
rgb
=
src
[
2
*
i
];
r
=
rgb
&
0x1F
;
g
=
(
rgb
&
0x7E0
)
>>
5
;
b
=
(
rgb
&
0xF800
)
>>
11
;
dst
[
2
*
i
]
=
(
b
&
0x1F
)
|
((
g
&
0x3F
)
<<
5
)
|
((
r
&
0x1F
)
<<
11
);
}
}
void
rgb16tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
unsigned
i
;
unsigned
num_pixels
=
src_size
>>
1
;
for
(
i
=
0
;
i
<
num_pixels
;
i
++
)
{
unsigned
b
,
g
,
r
;
register
uint16_t
rgb
;
rgb
=
src
[
2
*
i
];
r
=
rgb
&
0x1F
;
g
=
(
rgb
&
0x7E0
)
>>
5
;
b
=
(
rgb
&
0xF800
)
>>
11
;
dst
[
2
*
i
]
=
(
b
&
0x1F
)
|
((
g
&
0x1F
)
<<
5
)
|
((
r
&
0x1F
)
<<
10
);
}
}
void
rgb15tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
const
uint16_t
*
end
;
uint8_t
*
d
=
(
uint8_t
*
)
dst
;
const
uint16_t
*
s
=
(
const
uint16_t
*
)
src
;
end
=
s
+
src_size
/
2
;
while
(
s
<
end
)
{
register
uint16_t
bgr
;
bgr
=
*
s
++
;
*
d
++
=
(
bgr
&
0x7C00
)
>>
7
;
*
d
++
=
(
bgr
&
0x3E0
)
>>
2
;
*
d
++
=
(
bgr
&
0x1F
)
<<
3
;
*
d
++
=
0
;
}
}
void
rgb15tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
const
uint16_t
*
end
;
uint8_t
*
d
=
(
uint8_t
*
)
dst
;
const
uint16_t
*
s
=
(
uint16_t
*
)
src
;
end
=
s
+
src_size
/
2
;
while
(
s
<
end
)
{
register
uint16_t
bgr
;
bgr
=
*
s
++
;
*
d
++
=
(
bgr
&
0x7C00
)
>>
7
;
*
d
++
=
(
bgr
&
0x3E0
)
>>
2
;
*
d
++
=
(
bgr
&
0x1F
)
<<
3
;
}
}
void
rgb15tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
unsigned
i
;
unsigned
num_pixels
=
src_size
>>
1
;
for
(
i
=
0
;
i
<
num_pixels
;
i
++
)
{
unsigned
b
,
g
,
r
;
register
uint16_t
rgb
;
rgb
=
src
[
2
*
i
];
r
=
rgb
&
0x1F
;
g
=
(
rgb
&
0x3E0
)
>>
5
;
b
=
(
rgb
&
0x7C00
)
>>
10
;
dst
[
2
*
i
]
=
(
b
&
0x1F
)
|
((
g
&
0x3F
)
<<
5
)
|
((
r
&
0x1F
)
<<
11
);
}
}
void
rgb15tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
unsigned
i
;
unsigned
num_pixels
=
src_size
>>
1
;
for
(
i
=
0
;
i
<
num_pixels
;
i
++
)
{
unsigned
b
,
g
,
r
;
register
uint16_t
rgb
;
rgb
=
src
[
2
*
i
];
r
=
rgb
&
0x1F
;
g
=
(
rgb
&
0x3E0
)
>>
5
;
b
=
(
rgb
&
0x7C00
)
>>
10
;
dst
[
2
*
i
]
=
(
b
&
0x1F
)
|
((
g
&
0x1F
)
<<
5
)
|
((
r
&
0x1F
)
<<
10
);
}
}
void
rgb8tobgr8
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
unsigned
i
;
unsigned
num_pixels
=
src_size
;
for
(
i
=
0
;
i
<
num_pixels
;
i
++
)
{
unsigned
b
,
g
,
r
;
register
uint8_t
rgb
;
rgb
=
src
[
i
];
r
=
(
rgb
&
0x07
);
g
=
(
rgb
&
0x38
)
>>
3
;
b
=
(
rgb
&
0xC0
)
>>
6
;
dst
[
i
]
=
((
b
<<
1
)
&
0x07
)
|
((
g
&
0x07
)
<<
3
)
|
((
r
&
0x03
)
<<
6
);
}
}
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
...
...
@@ -564,3 +813,39 @@ void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
#endif
interleaveBytes_C
(
src1
,
src2
,
dst
,
width
,
height
,
src1Stride
,
src2Stride
,
dstStride
);
}
void
vu9_to_vu12
(
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
uint8_t
*
dst1
,
uint8_t
*
dst2
,
unsigned
width
,
unsigned
height
,
unsigned
srcStride1
,
unsigned
srcStride2
,
unsigned
dstStride1
,
unsigned
dstStride2
)
{
#ifdef CAN_COMPILE_X86_ASM
if
(
gCpuCaps
.
hasMMX2
)
vu9_to_vu12_MMX2
(
src1
,
src2
,
dst1
,
dst2
,
width
,
height
,
srcStride1
,
srcStride2
,
dstStride1
,
dstStride2
);
else
if
(
gCpuCaps
.
has3DNow
)
vu9_to_vu12_3DNow
(
src1
,
src2
,
dst1
,
dst2
,
width
,
height
,
srcStride1
,
srcStride2
,
dstStride1
,
dstStride2
);
else
if
(
gCpuCaps
.
hasMMX
)
vu9_to_vu12_MMX
(
src1
,
src2
,
dst1
,
dst2
,
width
,
height
,
srcStride1
,
srcStride2
,
dstStride1
,
dstStride2
);
else
#endif
vu9_to_vu12_C
(
src1
,
src2
,
dst1
,
dst2
,
width
,
height
,
srcStride1
,
srcStride2
,
dstStride1
,
dstStride2
);
}
void
yvu9_to_yuy2
(
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
const
uint8_t
*
src3
,
uint8_t
*
dst
,
unsigned
width
,
unsigned
height
,
unsigned
srcStride1
,
unsigned
srcStride2
,
unsigned
srcStride3
,
unsigned
dstStride
)
{
#ifdef CAN_COMPILE_X86_ASM
if
(
gCpuCaps
.
hasMMX2
)
yvu9_to_yuy2_MMX2
(
src1
,
src2
,
src3
,
dst
,
width
,
height
,
srcStride1
,
srcStride2
,
srcStride3
,
dstStride
);
else
if
(
gCpuCaps
.
has3DNow
)
yvu9_to_yuy2_3DNow
(
src1
,
src2
,
src3
,
dst
,
width
,
height
,
srcStride1
,
srcStride2
,
srcStride3
,
dstStride
);
else
if
(
gCpuCaps
.
hasMMX
)
yvu9_to_yuy2_MMX
(
src1
,
src2
,
src3
,
dst
,
width
,
height
,
srcStride1
,
srcStride2
,
srcStride3
,
dstStride
);
else
#endif
yvu9_to_yuy2_C
(
src1
,
src2
,
src3
,
dst
,
width
,
height
,
srcStride1
,
srcStride2
,
srcStride3
,
dstStride
);
}
postproc/rgb2rgb.h
View file @
ac4d0aea
...
...
@@ -9,6 +9,7 @@
#ifndef RGB2RGB_INCLUDED
#define RGB2RGB_INCLUDED
/* A full collection of rgb to rgb(bgr) convertors */
extern
void
rgb24to32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb24to16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb24to15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
...
...
@@ -18,10 +19,26 @@ extern void rgb32to15(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern
void
rgb15to16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb15to24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb15to32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb16to15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb16to24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb16to32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb
3
2tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb2
4
tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb24tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb24tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb24tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb32tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb32tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb32tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb32tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb16tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb16tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb16tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb16tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb15tobgr32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb15tobgr24
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb15tobgr16
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb15tobgr15
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
rgb8tobgr8
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
);
extern
void
palette8torgb32
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
num_pixels
,
const
uint8_t
*
palette
);
...
...
@@ -46,6 +63,18 @@ extern void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, in
extern
void
interleaveBytes
(
uint8_t
*
src1
,
uint8_t
*
src2
,
uint8_t
*
dst
,
unsigned
width
,
unsigned
height
,
unsigned
src1Stride
,
unsigned
src2Stride
,
unsigned
dstStride
);
extern
void
vu9_to_vu12
(
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
uint8_t
*
dst1
,
uint8_t
*
dst2
,
unsigned
width
,
unsigned
height
,
unsigned
srcStride1
,
unsigned
srcStride2
,
unsigned
dstStride1
,
unsigned
dstStride2
);
extern
void
yvu9_to_yuy2
(
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
const
uint8_t
*
src3
,
uint8_t
*
dst
,
unsigned
width
,
unsigned
height
,
unsigned
srcStride1
,
unsigned
srcStride2
,
unsigned
srcStride3
,
unsigned
dstStride
);
#define MODE_RGB 0x1
...
...
postproc/rgb2rgb_template.c
View file @
ac4d0aea
...
...
@@ -251,6 +251,61 @@ static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsign
}
}
static
inline
void
RENAME
(
rgb16to15
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
)
{
register
const
uint8_t
*
s
=
src
;
register
uint8_t
*
d
=
dst
;
register
const
uint8_t
*
end
;
uint8_t
*
mm_end
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
s
));
__asm
__volatile
(
"movq %0, %%mm7"
::
"m"
(
mask15rg
));
__asm
__volatile
(
"movq %0, %%mm6"
::
"m"
(
mask15b
));
mm_end
=
(
uint8_t
*
)((((
unsigned
long
)
end
)
/
16
)
*
16
);
while
(
s
<
mm_end
)
{
__asm
__volatile
(
PREFETCH
" 32%1
\n\t
"
"movq %1, %%mm0
\n\t
"
"movq 8%1, %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"psrlq $1, %%mm0
\n\t
"
"psrlq $1, %%mm2
\n\t
"
"pand %%mm7, %%mm0
\n\t
"
"pand %%mm7, %%mm2
\n\t
"
"pand %%mm6, %%mm1
\n\t
"
"pand %%mm6, %%mm3
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"por %%mm3, %%mm2
\n\t
"
MOVNTQ
" %%mm0, %0
\n\t
"
MOVNTQ
" %%mm2, 8%0"
:
"=m"
(
*
d
)
:
"m"
(
*
s
)
);
d
+=
16
;
s
+=
16
;
}
__asm
__volatile
(
SFENCE
:::
"memory"
);
__asm
__volatile
(
EMMS
:::
"memory"
);
#endif
while
(
s
<
end
)
{
register
uint32_t
x
=
*
((
uint32_t
*
)
s
);
*
((
uint32_t
*
)
d
)
=
((
x
>>
1
)
&
0x7FE07FE0
)
|
(
x
&
0x001F001F
);
s
+=
4
;
d
+=
4
;
}
if
(
s
<
end
)
{
register
uint16_t
x
=
*
((
uint16_t
*
)
s
);
*
((
uint16_t
*
)
d
)
=
((
x
>>
1
)
&
0x7FE0
)
|
(
x
&
0x001F
);
s
+=
2
;
d
+=
2
;
}
}
static
inline
void
RENAME
(
rgb32to16
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
)
{
const
uint8_t
*
s
=
src
;
...
...
@@ -315,6 +370,70 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned
}
}
static
inline
void
RENAME
(
rgb32tobgr16
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
const
uint8_t
*
s
=
src
;
const
uint8_t
*
end
;
#ifdef HAVE_MMX
const
uint8_t
*
mm_end
;
#endif
uint16_t
*
d
=
(
uint16_t
*
)
dst
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
src
)
:
"memory"
);
__asm
__volatile
(
"movq %0, %%mm7
\n\t
"
"movq %1, %%mm6
\n\t
"
::
"m"
(
red_16mask
),
"m"
(
green_16mask
));
mm_end
=
(
uint8_t
*
)((((
unsigned
long
)
end
)
/
16
)
*
16
);
while
(
s
<
mm_end
)
{
__asm
__volatile
(
PREFETCH
" 32%1
\n\t
"
"movd %1, %%mm0
\n\t
"
"movd 4%1, %%mm3
\n\t
"
"punpckldq 8%1, %%mm0
\n\t
"
"punpckldq 12%1, %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"movq %%mm3, %%mm5
\n\t
"
"psllq $8, %%mm0
\n\t
"
"psllq $8, %%mm3
\n\t
"
"pand %%mm7, %%mm0
\n\t
"
"pand %%mm7, %%mm3
\n\t
"
"psrlq $5, %%mm1
\n\t
"
"psrlq $5, %%mm4
\n\t
"
"pand %%mm6, %%mm1
\n\t
"
"pand %%mm6, %%mm4
\n\t
"
"psrlq $19, %%mm2
\n\t
"
"psrlq $19, %%mm5
\n\t
"
"pand %2, %%mm2
\n\t
"
"pand %2, %%mm5
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"por %%mm2, %%mm0
\n\t
"
"por %%mm5, %%mm3
\n\t
"
"psllq $16, %%mm3
\n\t
"
"por %%mm3, %%mm0
\n\t
"
MOVNTQ
" %%mm0, %0
\n\t
"
:
"=m"
(
*
d
)
:
"m"
(
*
s
),
"m"
(
blue_16mask
)
:
"memory"
);
d
+=
4
;
s
+=
16
;
}
__asm
__volatile
(
SFENCE
:::
"memory"
);
__asm
__volatile
(
EMMS
:::
"memory"
);
#endif
while
(
s
<
end
)
{
const
int
r
=
*
s
++
;
const
int
g
=
*
s
++
;
const
int
b
=
*
s
++
;
*
d
++
=
(
b
>>
3
)
|
((
g
&
0xFC
)
<<
3
)
|
((
r
&
0xF8
)
<<
8
);
s
++
;
}
}
static
inline
void
RENAME
(
rgb32to15
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
)
{
const
uint8_t
*
s
=
src
;
...
...
@@ -379,6 +498,70 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned
}
}
static
inline
void
RENAME
(
rgb32tobgr15
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
)
{
const
uint8_t
*
s
=
src
;
const
uint8_t
*
end
;
#ifdef HAVE_MMX
const
uint8_t
*
mm_end
;
#endif
uint16_t
*
d
=
(
uint16_t
*
)
dst
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
src
)
:
"memory"
);
__asm
__volatile
(
"movq %0, %%mm7
\n\t
"
"movq %1, %%mm6
\n\t
"
::
"m"
(
red_15mask
),
"m"
(
green_15mask
));
mm_end
=
(
uint8_t
*
)((((
unsigned
long
)
end
)
/
16
)
*
16
);
while
(
s
<
mm_end
)
{
__asm
__volatile
(
PREFETCH
" 32%1
\n\t
"
"movd %1, %%mm0
\n\t
"
"movd 4%1, %%mm3
\n\t
"
"punpckldq 8%1, %%mm0
\n\t
"
"punpckldq 12%1, %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"movq %%mm3, %%mm5
\n\t
"
"psllq $7, %%mm0
\n\t
"
"psllq $7, %%mm3
\n\t
"
"pand %%mm7, %%mm0
\n\t
"
"pand %%mm7, %%mm3
\n\t
"
"psrlq $6, %%mm1
\n\t
"
"psrlq $6, %%mm4
\n\t
"
"pand %%mm6, %%mm1
\n\t
"
"pand %%mm6, %%mm4
\n\t
"
"psrlq $19, %%mm2
\n\t
"
"psrlq $19, %%mm5
\n\t
"
"pand %2, %%mm2
\n\t
"
"pand %2, %%mm5
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"por %%mm2, %%mm0
\n\t
"
"por %%mm5, %%mm3
\n\t
"
"psllq $16, %%mm3
\n\t
"
"por %%mm3, %%mm0
\n\t
"
MOVNTQ
" %%mm0, %0
\n\t
"
:
"=m"
(
*
d
)
:
"m"
(
*
s
),
"m"
(
blue_15mask
)
:
"memory"
);
d
+=
4
;
s
+=
16
;
}
__asm
__volatile
(
SFENCE
:::
"memory"
);
__asm
__volatile
(
EMMS
:::
"memory"
);
#endif
while
(
s
<
end
)
{
const
int
r
=
*
s
++
;
const
int
g
=
*
s
++
;
const
int
b
=
*
s
++
;
*
d
++
=
(
b
>>
3
)
|
((
g
&
0xF8
)
<<
2
)
|
((
r
&
0xF8
)
<<
7
);
s
++
;
}
}
static
inline
void
RENAME
(
rgb24to16
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
src_size
)
{
const
uint8_t
*
s
=
src
;
...
...
@@ -442,6 +625,69 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned
}
}
static
inline
void
RENAME
(
rgb24tobgr16
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
unsigned
int
src_size
)
{
const
uint8_t
*
s
=
src
;
const
uint8_t
*
end
;
#ifdef HAVE_MMX
const
uint8_t
*
mm_end
;
#endif
uint16_t
*
d
=
(
uint16_t
*
)
dst
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
src
)
:
"memory"
);
__asm
__volatile
(
"movq %0, %%mm7
\n\t
"
"movq %1, %%mm6
\n\t
"
::
"m"
(
red_16mask
),
"m"
(
green_16mask
));
mm_end
=
(
uint8_t
*
)((((
unsigned
long
)
end
)
/
16
)
*
16
);
while
(
s
<
mm_end
)
{
__asm
__volatile
(
PREFETCH
" 32%1
\n\t
"
"movd %1, %%mm0
\n\t
"
"movd 3%1, %%mm3
\n\t
"
"punpckldq 6%1, %%mm0
\n\t
"
"punpckldq 9%1, %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"