An error occurred while loading the file. Please try again.
-
Johann authored
Change-Id: I178352813d2b8702d081caf405de9dbad9af2cc3
1009f765
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/****************************************************************************
*
* Module Title : scaleopt.cpp
*
* Description : Optimized scaling functions
*
****************************************************************************/
#include "pragmas.h"
/****************************************************************************
* Module Statics
****************************************************************************/
__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 };
#include "vpx_scale/vpx_scale.h"
#include "vpx_mem/vpx_mem.h"
__declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 };
__declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, 64 };
/****************************************************************************
*
* ROUTINE : horizontal_line_5_4_scale_mmx
*
* INPUTS : const unsigned char *source : Pointer to source data.
* unsigned int source_width : Stride of source.
* unsigned char *dest : Pointer to destination data.
* unsigned int dest_width : Stride of destination (NOT USED).
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Copies horizontal line of pixels from source to
* destination scaling up by 4 to 5.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
static
void horizontal_line_5_4_scale_mmx
(
const unsigned char *source,
unsigned int source_width,
unsigned char *dest,
unsigned int dest_width
) {
/*
unsigned i;
unsigned int a, b, c, d, e;
unsigned char *des = dest;
const unsigned char *src = source;
(void) dest_width;
for ( i=0; i<source_width; i+=5 )
{
a = src[0];
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
b = src[1];
c = src[2];
d = src[3];
e = src[4];
des[0] = a;
des[1] = ((b*192 + c* 64 + 128)>>8);
des[2] = ((c*128 + d*128 + 128)>>8);
des[3] = ((d* 64 + e*192 + 128)>>8);
src += 5;
des += 4;
}
*/
(void) dest_width;
__asm {
mov esi, source;
mov edi, dest;
mov ecx, source_width;
movq mm5, const54_1;
pxor mm7, mm7;
movq mm6, const54_2;
movq mm4, round_values;
lea edx, [esi+ecx];
horizontal_line_5_4_loop:
movq mm0, QWORD PTR [esi];
00 01 02 03 04 05 06 07
movq mm1, mm0;
00 01 02 03 04 05 06 07
psrlq mm0, 8;
01 02 03 04 05 06 07 xx
punpcklbw mm1, mm7;
xx 00 xx 01 xx 02 xx 03
punpcklbw mm0, mm7;
xx 01 xx 02 xx 03 xx 04
pmullw mm1, mm5
pmullw mm0, mm6
add esi, 5
add edi, 4
paddw mm1, mm0
paddw mm1, mm4
psrlw mm1, 8
cmp esi, edx
packuswb mm1, mm7
movd DWORD PTR [edi-4], mm1
jl horizontal_line_5_4_loop
}
}
__declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 };
__declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, 128, 128 };
__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 };
static
void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
__asm {
push ebx
mov esi, source // Get the source and destination pointer
mov ecx, src_pitch // Get the pitch size
mov edi, dest // tow lines below
pxor mm7, mm7 // clear out mm7
mov edx, dest_pitch // Loop counter
mov ebx, dest_width
vs_5_4_loop:
movd mm0, DWORD ptr [esi] // src[0];
movd mm1, DWORD ptr [esi+ecx] // src[1];
movd mm2, DWORD ptr [esi+ecx*2]
lea eax, [esi+ecx*2] //
punpcklbw mm1, mm7
punpcklbw mm2, mm7
movq mm3, mm2
pmullw mm1, three_fourths
pmullw mm2, one_fourths
movd mm4, [eax+ecx]
pmullw mm3, two_fourths
punpcklbw mm4, mm7
movq mm5, mm4
pmullw mm4, two_fourths
paddw mm1, mm2
movd mm6, [eax+ecx*2]
pmullw mm5, one_fourths
paddw mm1, round_values;
paddw mm3, mm4
psrlw mm1, 8
punpcklbw mm6, mm7
paddw mm3, round_values
pmullw mm6, three_fourths
psrlw mm3, 8
packuswb mm1, mm7
packuswb mm3, mm7
movd DWORD PTR [edi], mm0
movd DWORD PTR [edi+edx], mm1
paddw mm5, mm6
movd DWORD PTR [edi+edx*2], mm3
lea eax, [edi+edx*2]
paddw mm5, round_values
psrlw mm5, 8
add edi, 4
packuswb mm5, mm7
movd DWORD PTR [eax+edx], mm5
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
add esi, 4
sub ebx, 4
jg vs_5_4_loop
pop ebx
}
}
__declspec(align(16)) const static unsigned short const53_1[] = { 0, 85, 171, 0 };
__declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, 0 };
static
void horizontal_line_5_3_scale_mmx
(
const unsigned char *source,
unsigned int source_width,
unsigned char *dest,
unsigned int dest_width
) {
(void) dest_width;
__asm {
mov esi, source;
mov edi, dest;
mov ecx, source_width;
movq mm5, const53_1;
pxor mm7, mm7;
movq mm6, const53_2;
movq mm4, round_values;
lea edx, [esi+ecx-5];
horizontal_line_5_3_loop:
movq mm0, QWORD PTR [esi];
00 01 02 03 04 05 06 07
movq mm1, mm0;
00 01 02 03 04 05 06 07
psllw mm0, 8;
xx 00 xx 02 xx 04 xx 06
psrlw mm1, 8;
01 xx 03 xx 05 xx 07 xx
psrlw mm0, 8;
00 xx 02 xx 04 xx 06 xx
psllq mm1, 16;
xx xx 01 xx 03 xx 05 xx
pmullw mm0, mm6
pmullw mm1, mm5
add esi, 5
add edi, 3
paddw mm1, mm0
paddw mm1, mm4
psrlw mm1, 8
cmp esi, edx
packuswb mm1, mm7
movd DWORD PTR [edi-3], mm1
jl horizontal_line_5_3_loop
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
// exit condition
movq mm0, QWORD PTR [esi];
00 01 02 03 04 05 06 07
movq mm1, mm0;
00 01 02 03 04 05 06 07
psllw mm0, 8;
xx 00 xx 02 xx 04 xx 06
psrlw mm1, 8;
01 xx 03 xx 05 xx 07 xx
psrlw mm0, 8;
00 xx 02 xx 04 xx 06 xx
psllq mm1, 16;
xx xx 01 xx 03 xx 05 xx
pmullw mm0, mm6
pmullw mm1, mm5
paddw mm1, mm0
paddw mm1, mm4
psrlw mm1, 8
packuswb mm1, mm7
movd eax, mm1
mov edx, eax
shr edx, 16
mov WORD PTR[edi], ax
mov BYTE PTR[edi+2], dl
}
}
__declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85, 85 };
__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 };
static
void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
__asm {
push ebx
mov esi, source // Get the source and destination pointer
mov ecx, src_pitch // Get the pitch size
mov edi, dest // tow lines below
pxor mm7, mm7 // clear out mm7
mov edx, dest_pitch // Loop counter
movq mm5, one_thirds
movq mm6, two_thirds
mov ebx, dest_width;
vs_5_3_loop:
movd mm0, DWORD ptr [esi] // src[0];
movd mm1, DWORD ptr [esi+ecx] // src[1];
movd mm2, DWORD ptr [esi+ecx*2]
lea eax, [esi+ecx*2] //
punpcklbw mm1, mm7
punpcklbw mm2, mm7
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
pmullw mm1, mm5
pmullw mm2, mm6
movd mm3, DWORD ptr [eax+ecx]
movd mm4, DWORD ptr [eax+ecx*2]
punpcklbw mm3, mm7
punpcklbw mm4, mm7
pmullw mm3, mm6
pmullw mm4, mm5
movd DWORD PTR [edi], mm0
paddw mm1, mm2
paddw mm1, round_values
psrlw mm1, 8
packuswb mm1, mm7
paddw mm3, mm4
paddw mm3, round_values
movd DWORD PTR [edi+edx], mm1
psrlw mm3, 8
packuswb mm3, mm7
movd DWORD PTR [edi+edx*2], mm3
add edi, 4
add esi, 4
sub ebx, 4
jg vs_5_3_loop
pop ebx
}
}
/****************************************************************************
*
* ROUTINE : horizontal_line_2_1_scale
*
* INPUTS : const unsigned char *source :
* unsigned int source_width :
* unsigned char *dest :
* unsigned int dest_width :
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
static
void horizontal_line_2_1_scale_mmx
(
const unsigned char *source,
unsigned int source_width,
unsigned char *dest,
unsigned int dest_width
) {
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
(void) dest_width;
(void) source_width;
__asm {
mov esi, source
mov edi, dest
pxor mm7, mm7
mov ecx, dest_width
xor edx, edx
hs_2_1_loop:
movq mm0, [esi+edx*2]
psllw mm0, 8
psrlw mm0, 8
packuswb mm0, mm7
movd DWORD Ptr [edi+edx], mm0;
add edx, 4
cmp edx, ecx
jl hs_2_1_loop
}
}
static
void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
(void) dest_pitch;
(void) src_pitch;
vpx_memcpy(dest, source, dest_width);
}
__declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 48, 48, 48 };
__declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 };
static
void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
(void) dest_pitch;
__asm {
mov esi, source
mov edi, dest
mov eax, src_pitch
mov edx, dest_width
pxor mm7, mm7
sub esi, eax // back one line
lea ecx, [esi+edx];
movq mm6, round_values;
movq mm5, three_sixteenths;
movq mm4, ten_sixteenths;
vs_2_1_i_loop:
movd mm0, [esi] //
movd mm1, [esi+eax] //
movd mm2, [esi+eax*2] //
punpcklbw mm0, mm7
pmullw mm0, mm5
punpcklbw mm1, mm7
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
pmullw mm1, mm4
punpcklbw mm2, mm7
pmullw mm2, mm5
paddw mm0, round_values
paddw mm1, mm2
paddw mm0, mm1
psrlw mm0, 8
packuswb mm0, mm7
movd DWORD PTR [edi], mm0
add esi, 4
add edi, 4;
cmp esi, ecx
jl vs_2_1_i_loop
}
}
void
register_mmxscalers(void) {
vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx;
vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx;
vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx;
vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx;
vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx;
vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx;
vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx;
}