Commit 80c92b9e authored by Simon Morlat's avatar Simon Morlat

scaler works not so bad

parent c2f19a86
......@@ -27,6 +27,7 @@ void ms_line_rgb2rgb565_4(const int16_t *r, const int16_t *g, const int16_t *b,
void ms_line_rgb2rgb565_8(const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width);
void ms_line_scale_8(const uint32_t *grid, const int16_t * const src[], int16_t *dst[], int dst_width, const int16_t *filter);
void ms_line_scale_simple_8(const uint32_t *grid, const int16_t * const src[], int16_t *dst[], int dst_width);
typedef struct AndroidScalerCtx{
MSVideoSize src_size;
......@@ -136,14 +137,16 @@ static inline void yuv2rgb_4x2(const uint8_t *y1, const uint8_t *y2, const uint8
LOAD_UV_PREMULTS(0)
LOAD_UV_PREMULTS(1)
max=vld1q_s32(yuvmax);
/*the following does not work */
/*max=vdupq_n_s32(255);*/
rr1=vaddq_s32(ry1,rvr);
rr2=vaddq_s32(ry2,rvr);
rg1=vaddq_s32(ry1,rvug);
rg2=vaddq_s32(ry2,rvug);
rb1=vaddq_s32(ry1,rub);
rb2=vaddq_s32(ry2,rub);
max=vmovq_n_s32(255);
rr1=vminq_s32(vabsq_s32(vshrq_n_s32(rr1,13)),max);
rr2=vminq_s32(vabsq_s32(vshrq_n_s32(rr2,13)),max);
......@@ -200,6 +203,7 @@ static inline void line_horizontal_scale(AndroidScalerCtx * ctx, int16_t *src_li
dst_lines[2][i]=src_lines[2][pos];
}
#else
//ms_line_scale_simple_8(ctx->hgrid,src_lines,dst_lines,ctx->dst_w_padded);
ms_line_scale_8(ctx->hgrid,src_lines,dst_lines,ctx->dst_w_padded,ctx->hcoeffs);
#endif
}
......
/*
mediastreamer2 library - modular sound and video processing and streaming
Copyright (C) 2006-2010 Belledonne Communications SARL (simon.morlat@linphone.org)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef __ELF__
# define ELF
......@@ -80,13 +99,12 @@ function ms_line_rgb2rgb565_8
bx lr
.endfunc
.macro load_pixels_4 d_reg1, d_reg2, src
mov r12, \src
.macro load_pixels_4_2 d_reg1, d_reg2, src
add r12, \src, #2 /* offset to reach next pixels */
vld1.16 \d_reg1[0], [\src], r4 /* transfer the pixel pointed by r4 into q2 */
vld1.16 \d_reg1[1], [\src], r5
vld1.16 \d_reg1[2], [\src], r6
vld1.16 \d_reg1[3], [\src], r7
add r12 , #2
vld1.16 \d_reg2[0], [r12], r4 /* transfer the pixel pointed by r4 into q2 */
vld1.16 \d_reg2[1], [r12], r5
vld1.16 \d_reg2[2], [r12], r6
......@@ -105,27 +123,26 @@ function ms_line_rgb2rgb565_8
function ms_line_scale_8
push {r4-r12,lr} /* we use lr as a normal register here */
ldr lr , [sp ,#40] /*r4-r12+lr= 10 registers 40=10*4 offset to retrieve filter table*/
ldr r8, [r1,#4]
ldr r9, [r1,#8]
ldr r1, [r1]
ldr r10, [r2,#4]
ldr r11, [r2,#8]
ldr r2, [r2]
/* vmov q10, #255 */
ldm r1, {r8,r9}
ldr r1, [r1,#8]
ldm r2, {r10,r11}
ldr r2, [r2,#8]
1:
ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */
ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */
load_pixels_4 d4, d10, r1
load_pixels_4 d6, d12, r8
load_pixels_4 d8, d14, r9
ldm r0!, {r4,r5,r6,r7} /* load 4 entries of the grid into r4,r5,r6,r7 */
ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */
ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */
load_pixels_4_2 d4, d10, r1
load_pixels_4_2 d6, d12, r8
load_pixels_4_2 d8, d14, r9
load_pixels_4 d5, d11, r1
load_pixels_4 d7, d13, r8
load_pixels_4 d9, d15, r9
ldm r0!, {r4,r5,r6,r7} /* load 4 more entries of the grid into r4,r5,r6,r7 */
load_pixels_4_2 d5, d11, r1
load_pixels_4_2 d7, d13, r8
load_pixels_4_2 d9, d15, r9
/* x(n)= q2,q3,q4 x(n+1)=q5,q6,q7 */
vld1.16 {q8} , [lr]! /* load the filtering coefficients in q8*/
/* we need to compute (coef*(x(n+1)-x(n)) + (x(n)<<7))>>7 */
......@@ -144,6 +161,47 @@ function ms_line_scale_8
.macro load_pixels_4 d_reg, src
vld1.16 \d_reg[0], [\src], r4 /* transfer the pixel pointed by r4 into q2 */
vld1.16 \d_reg[1], [\src], r5
vld1.16 \d_reg[2], [\src], r6
vld1.16 \d_reg[3], [\src], r7
.endm
/*void ms_line_scale_8(const uint32_t *grid, const uint16_t **src, uint16_t **dst int dst_width);*/
function ms_line_scale_simple_8
push {r4-r11}
ldr r8, [r1,#4]
ldr r9, [r1,#8]
ldr r1, [r1]
ldr r10, [r2,#4]
ldr r11, [r2,#8]
ldr r2, [r2]
1:
ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */
ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */
load_pixels_4 d4, r1
load_pixels_4 d6, r8
load_pixels_4 d8, r9
ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */
ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */
load_pixels_4 d5, r1
load_pixels_4 d7, r8
load_pixels_4 d9, r9
vst1.16 {q2} , [r2]! /*write q2 (the 8 selected pixels) into memory pointed by r2*/
vst1.16 {q3} , [r10]!
vst1.16 {q4} , [r11]!
subs r3,r3,#8 /*we have processed 8 pixels, decrement width*/
bne 1b
pop {r4-r11}
bx lr
.endfunc
.if 0
/* void line_yuv2rgb(uint8_t *y, uint8_t *u, uint8_t *v, int16_t *r, int16_t *g, int16_t *b, int n) */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment