Commit c2f19a86 authored by Simon Morlat's avatar Simon Morlat
Browse files

unstable horizontal filtering..

parent 8e568062
......@@ -26,7 +26,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
void ms_line_rgb2rgb565_4(const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width);
void ms_line_rgb2rgb565_8(const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width);
void ms_line_scale_8(const uint32_t *grid, const int16_t *src[], int16_t *dst[], int dst_width);
void ms_line_scale_8(const uint32_t *grid, const int16_t * const src[], int16_t *dst[], int dst_width, const int16_t *filter);
typedef struct AndroidScalerCtx{
MSVideoSize src_size;
......@@ -34,6 +34,7 @@ typedef struct AndroidScalerCtx{
int16_t *unscaled_2lines[3];
int16_t *hscaled_img[3];
uint32_t *hgrid;
int16_t *hcoeffs;
int hscaled_img_stride;
int unscaled_stride;
int w_inc;
......@@ -126,7 +127,6 @@ static inline void yuv2rgb_4x2(const uint8_t *y1, const uint8_t *y2, const uint8
int32x4_t rub;
int32x4_t rr1,rg1,rb1,rr2,rg2,rb2;
int32x4_t max;
int16x4_t res1,res2;
LOAD_Y_PREMULTS(0)
LOAD_Y_PREMULTS(1)
......@@ -143,7 +143,7 @@ static inline void yuv2rgb_4x2(const uint8_t *y1, const uint8_t *y2, const uint8
rb1=vaddq_s32(ry1,rub);
rb2=vaddq_s32(ry2,rub);
max=vld1q_s32(yuvmax);
max=vmovq_n_s32(255);
rr1=vminq_s32(vabsq_s32(vshrq_n_s32(rr1,13)),max);
rr2=vminq_s32(vabsq_s32(vshrq_n_s32(rr2,13)),max);
......@@ -152,20 +152,14 @@ static inline void yuv2rgb_4x2(const uint8_t *y1, const uint8_t *y2, const uint8
rb1=vminq_s32(vabsq_s32(vshrq_n_s32(rb1,13)),max);
rb2=vminq_s32(vabsq_s32(vshrq_n_s32(rb2,13)),max);
res1=vmovn_s32(rr1);
res2=vmovn_s32(rr2);
vst1_s16(r1,res1);
vst1_s16(r2,res2);
vst1_s16(r1,vmovn_s32(rr1));
vst1_s16(r2,vmovn_s32(rr2));
res1=vmovn_s32(rg1);
res2=vmovn_s32(rg2);
vst1_s16(g1,res1);
vst1_s16(g2,res2);
vst1_s16(g1,vmovn_s32(rg1));
vst1_s16(g2,vmovn_s32(rg2));
res1=vmovn_s32(rb1);
res2=vmovn_s32(rb2);
vst1_s16(b1,res1);
vst1_s16(b2,res2);
vst1_s16(b1,vmovn_s32(rb1));
vst1_s16(b2,vmovn_s32(rb2));
}
#endif
......@@ -206,7 +200,7 @@ static inline void line_horizontal_scale(AndroidScalerCtx * ctx, int16_t *src_li
dst_lines[2][i]=src_lines[2][pos];
}
#else
ms_line_scale_8(ctx->hgrid,src_lines,dst_lines,ctx->dst_w_padded);
ms_line_scale_8(ctx->hgrid,src_lines,dst_lines,ctx->dst_w_padded,ctx->hcoeffs);
#endif
}
......@@ -316,11 +310,13 @@ static MSScalerContext *android_create_scaler_context(int src_w, int src_h, MSPi
/*compute the grid (map) for original lines into destination lines*/
ctx->dst_w_padded=ROUND_UP(dst_w,PAD);
ctx->hgrid=ms_new0(uint32_t,ctx->dst_w_padded);
ctx->hcoeffs=ms_new0(int16_t,ctx->dst_w_padded);
tmp=0;
prev=0;
for(i=0;i<dst_w;++i){
int offset=(tmp>>16)*2;
ctx->hgrid[i]=offset-prev;
ctx->hcoeffs[i]=(tmp&0xffff)>>9;
prev=offset;
tmp+=ctx->w_inc;
}
......
......@@ -80,44 +80,66 @@ function ms_line_rgb2rgb565_8
bx lr
.endfunc
.macro load_pixels_4 d_reg, src
vld1.16 \d_reg[0], [\src], r4 /* transfer the pixel pointed by r4 into q2 */
vld1.16 \d_reg[1], [\src], r5
vld1.16 \d_reg[2], [\src], r6
vld1.16 \d_reg[3], [\src], r7
.macro load_pixels_4 d_reg1, d_reg2, src
mov r12, \src
vld1.16 \d_reg1[0], [\src], r4 /* transfer the pixel pointed by r4 into q2 */
vld1.16 \d_reg1[1], [\src], r5
vld1.16 \d_reg1[2], [\src], r6
vld1.16 \d_reg1[3], [\src], r7
add r12 , #2
vld1.16 \d_reg2[0], [r12], r4 /* transfer the pixel pointed by r4 into q2 */
vld1.16 \d_reg2[1], [r12], r5
vld1.16 \d_reg2[2], [r12], r6
vld1.16 \d_reg2[3], [r12], r7
.endm
/*void ms_line_scale_8(const uint32_t *grid, const uint16_t **src, uint16_t **dst int dst_width);*/
.macro filter_pixels_8 q_srcdst, q_src2
vsub.s16 q9 , \q_src2, \q_srcdst /* q9=x(n+1)-x(n) */
vshl.s16 \q_srcdst , \q_srcdst , #7 /* (x(n)<<7) */
vmla.s16 \q_srcdst , q9, q8 /* q_srcdst += coef * q9 */
vshr.s16 \q_srcdst , \q_srcdst , #7
vabs.s16 \q_srcdst , \q_srcdst
.endm
/*void ms_line_scale_8(const uint32_t *grid, const int16_t **src, int16_t **dst int dst_width, int16_t *filter);*/
function ms_line_scale_8
push {r4-r11}
push {r4-r12,lr} /* we use lr as a normal register here */
ldr lr , [sp ,#40] /*r4-r12+lr= 10 registers 40=10*4 offset to retrieve filter table*/
ldr r8, [r1,#4]
ldr r9, [r1,#8]
ldr r1, [r1]
ldr r10, [r2,#4]
ldr r11, [r2,#8]
ldr r2, [r2]
/* vmov q10, #255 */
1:
ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */
ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */
load_pixels_4 d4, r1
load_pixels_4 d6, r8
load_pixels_4 d8, r9
load_pixels_4 d4, d10, r1
load_pixels_4 d6, d12, r8
load_pixels_4 d8, d14, r9
ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */
ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */
load_pixels_4 d5, r1
load_pixels_4 d7, r8
load_pixels_4 d9, r9
load_pixels_4 d5, d11, r1
load_pixels_4 d7, d13, r8
load_pixels_4 d9, d15, r9
/* x(n)= q2,q3,q4 x(n+1)=q5,q6,q7 */
vld1.16 {q8} , [lr]! /* load the filtering coefficients in q8*/
/* we need to compute (coef*(x(n+1)-x(n)) + (x(n)<<7))>>7 */
filter_pixels_8 q2 , q5
filter_pixels_8 q3 , q6
filter_pixels_8 q4 , q7
vst1.16 {q2} , [r2]! /*write q2 (the 8 selected pixels) into memory pointed by r2*/
vst1.16 {q3} , [r10]!
vst1.16 {q4} , [r11]!
subs r3,r3,#8 /*we have processed 8 pixels, decrement width*/
bne 1b
pop {r4-r11}
bx lr
pop {r4-r12,pc}
.endfunc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment