scaler.c 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
mediastreamer2 library - modular sound and video processing and streaming
Copyright (C) 2006-2010  Belledonne Communications SARL (simon.morlat@linphone.org)

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

#include "mediastreamer2/msvideo.h"
jehan's avatar
jehan committed
21
#ifdef __ARM_NEON__
22
#include <arm_neon.h>
jehan's avatar
jehan committed
23
#endif
24 25 26 27

void ms_line_rgb2rgb565_4(const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width);
void ms_line_rgb2rgb565_8(const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width);

Simon Morlat's avatar
Simon Morlat committed
28
void ms_line_scale_8(const uint32_t *grid, const int16_t * const src[], int16_t *dst[], int dst_width, const int16_t *filter);
Simon Morlat's avatar
Simon Morlat committed
29
void ms_line_scale_simple_8(const uint32_t *grid, const int16_t * const src[], int16_t *dst[], int dst_width);
Simon Morlat's avatar
Simon Morlat committed
30

31 32 33 34 35
typedef struct AndroidScalerCtx{
	MSVideoSize src_size;
	MSVideoSize dst_size;
	int16_t *unscaled_2lines[3];
	int16_t *hscaled_img[3];
Simon Morlat's avatar
Simon Morlat committed
36
	uint32_t *hgrid;
Simon Morlat's avatar
Simon Morlat committed
37
	int16_t *hcoeffs;
38 39 40 41
	int hscaled_img_stride;
	int unscaled_stride;
	int w_inc;
	int h_inc;
Simon Morlat's avatar
Simon Morlat committed
42
	int dst_w_padded;
43 44
}AndroidScalerCtx;

jehan's avatar
jehan committed
45

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
#define ROUND_UP(i,p)	((i+(p-1)) & ~(p-1))

#define PAD 16

static int32_t premult_y[256];
static int32_t premult_vr[256];
static int32_t premult_vg[256];
static int32_t premult_ug[256];
static int32_t premult_ub[256];
static bool_t premult_initd=FALSE;

static void init_premults(){
	int i;
	for(i=0;i<256;++i){
		premult_y[i]=(i-16)*9535;
		premult_vr[i]=(i-128)*13074;
		premult_vg[i]=-(i-128)*6660;
		premult_ug[i]=-(i-128)*3203;
		premult_ub[i]=(i-128)*16531;
	}
}


jehan's avatar
jehan committed
69
#if !defined (__ARM_NEON__) 
Simon Morlat's avatar
Simon Morlat committed
70

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
static inline void yuv2rgb_4x2(const uint8_t *y1, const uint8_t *y2, const uint8_t *u, const uint8_t *v, int16_t *r1, int16_t *g1, int16_t *b1, int16_t *r2, int16_t *g2, int16_t *b2){
	int32_t py1[4];
	int32_t py2[4];
	int32_t pvr[4];
	int32_t pvug[4];
	int32_t pub[4];
	int i,j;

	for(i=0,j=0;i<2;++i,j+=2){
		int val_v=v[i];
		int val_u=u[i];
		py1[j]=premult_y[y1[j]];
		py1[j+1]=premult_y[y1[j+1]];
		py2[j]=premult_y[y2[j]];
		py2[j+1]=premult_y[y2[j+1]];
		pvr[j]=pvr[j+1]=premult_vr[val_v];
		pvug[j]=pvug[j+1]=premult_vg[val_v]+premult_ug[val_u];
		pub[j]=pub[j+1]=premult_ub[val_u];
	}
Simon Morlat's avatar
Simon Morlat committed
90

91 92 93 94 95 96 97 98
	for(i=0;i<4;++i){
		r1[i]=MAX(MIN(((py1[i]+pvr[i])>>13) ,255),0);
		g1[i]=MAX(MIN(((py1[i]+pvug[i])>>13) ,255),0);
		b1[i]=MAX(MIN(((py1[i]+pub[i])>>13)  ,255),0);
		r2[i]=MAX(MIN(((py2[i]+pvr[i])>>13) ,255),0);
		g2[i]=MAX(MIN(((py2[i]+pvug[i])>>13) ,255),0);
		b2[i]=MAX(MIN(((py2[i]+pub[i])>>13) ,255),0);
	}
Simon Morlat's avatar
Simon Morlat committed
99 100
}

101
#else
Simon Morlat's avatar
Simon Morlat committed
102 103 104

#define LOAD_Y_PREMULTS(i) \
	ry1=vld1q_lane_s32(&premult_y[y1[i]],ry1,i); \
105
	ry2=vld1q_lane_s32(&premult_y[y2[i]],ry2,i);
Simon Morlat's avatar
Simon Morlat committed
106

Simon Morlat's avatar
Simon Morlat committed
107 108 109 110 111 112 113 114 115 116 117 118 119

#define LOAD_UV_PREMULTS(i) \
{\
		int tmp=premult_vr[v[i]]; \
		rvr=vsetq_lane_s32(tmp,rvr,2*i); \
		rvr=vsetq_lane_s32(tmp,rvr,2*i+1); \
		tmp=premult_vg[v[i]]+premult_ug[u[i]]; \
		rvug=vsetq_lane_s32(tmp,rvug,2*i); \
		rvug=vsetq_lane_s32(tmp,rvug,2*i+1); \
		tmp=premult_ub[u[i]]; \
		rub=vsetq_lane_s32(tmp,rub,2*i); \
		rub=vsetq_lane_s32(tmp,rub,2*i+1); \
}
jehan's avatar
jehan committed
120
#endif
Simon Morlat's avatar
Simon Morlat committed
121

jehan's avatar
jehan committed
122
#if defined (__ARM_NEON__) 
Ghislain MARY's avatar
Ghislain MARY committed
123 124
static int32_t yuvmax[4]={255<<13,255<<13,255<<13,255<<13};

Simon Morlat's avatar
Simon Morlat committed
125
static inline void yuv2rgb_4x2(const uint8_t *y1, const uint8_t *y2, const uint8_t *u, const uint8_t *v, int16_t *r1, int16_t *g1, int16_t *b1, int16_t *r2, int16_t *g2, int16_t *b2){
126 127 128 129 130 131 132
	int32x4_t ry1={0};
	int32x4_t ry2={0};
	int32x4_t rvug={0};
	int32x4_t rvr={0};
	int32x4_t rub={0};
	int32x4_t rr1={0},rg1={0},rb1={0},rr2={0},rg2={0},rb2={0};
	int32x4_t max={0};
Simon Morlat's avatar
Simon Morlat committed
133 134 135 136 137 138 139 140 141

	LOAD_Y_PREMULTS(0)
	LOAD_Y_PREMULTS(1)
	LOAD_Y_PREMULTS(2)
	LOAD_Y_PREMULTS(3)

	LOAD_UV_PREMULTS(0)
	LOAD_UV_PREMULTS(1)

Simon Morlat's avatar
Simon Morlat committed
142 143
	max=vld1q_s32(yuvmax);
	/*the following does not work */
Simon Morlat's avatar
Simon Morlat committed
144
	//max=vdupq_n_s32(255);
Simon Morlat's avatar
Simon Morlat committed
145

Simon Morlat's avatar
Simon Morlat committed
146 147 148 149 150 151
	rr1=vaddq_s32(ry1,rvr);
	rr2=vaddq_s32(ry2,rvr);
	rg1=vaddq_s32(ry1,rvug);
	rg2=vaddq_s32(ry2,rvug);
	rb1=vaddq_s32(ry1,rub);
	rb2=vaddq_s32(ry2,rub);
Simon Morlat's avatar
Simon Morlat committed
152

153 154


Simon Morlat's avatar
Simon Morlat committed
155 156 157 158 159 160
	rr1=vminq_s32(vabsq_s32(rr1),max);
	rr2=vminq_s32(vabsq_s32(rr2),max);
	rg1=vminq_s32(vabsq_s32(rg1),max);
	rg2=vminq_s32(vabsq_s32(rg2),max);
	rb1=vminq_s32(vabsq_s32(rb1),max);
	rb2=vminq_s32(vabsq_s32(rb2),max);
161

Simon Morlat's avatar
Simon Morlat committed
162 163
	vst1_s16(r1,vqshrn_n_s32(rr1,13));
	vst1_s16(r2,vqshrn_n_s32(rr2,13));
Simon Morlat's avatar
Simon Morlat committed
164

Simon Morlat's avatar
Simon Morlat committed
165 166
	vst1_s16(g1,vqshrn_n_s32(rg1,13));
	vst1_s16(g2,vqshrn_n_s32(rg2,13));
Simon Morlat's avatar
Simon Morlat committed
167

Simon Morlat's avatar
Simon Morlat committed
168 169
	vst1_s16(b1,vqshrn_n_s32(rb1,13));
	vst1_s16(b2,vqshrn_n_s32(rb2,13));
Simon Morlat's avatar
Simon Morlat committed
170 171
}

jehan's avatar
jehan committed
172

Simon Morlat's avatar
Simon Morlat committed
173 174
#endif

jehan's avatar
jehan committed
175 176 177 178
void img_ycrcb420p_to_bgra(uint8_t* src[],unsigned short w,unsigned short h, uint32_t dest[]) {
	unsigned int offset_y=0;
	unsigned int offset_dest=0;
	unsigned int offset_cbcr=0;
jehan's avatar
jehan committed
179 180
	int row;
	int i;
jehan's avatar
jehan committed
181 182 183
	if (premult_initd == FALSE) {
		init_premults();
	}
jehan's avatar
jehan committed
184
	for (row=0;row<h;row+=2) {
jehan's avatar
jehan committed
185 186 187
		offset_y=row*w;
		offset_cbcr=offset_y>>2;
		int col_crcb=0;
jehan's avatar
jehan committed
188 189
		int col_y;
		for (col_y=0;col_y<w;col_y+=4) {
jehan's avatar
jehan committed
190 191 192 193 194 195 196 197 198 199 200
			int16_t r1[4],  g1[4],  b1[4],  r2[4],  g2[4],  b2[4];
			yuv2rgb_4x2(src[0]+offset_y+col_y
						,src[0]+offset_y+w+col_y
						,(src[1]+offset_cbcr+col_crcb)
						,(src[2]+offset_cbcr+col_crcb)
						,r1
						,g1
						,b1
						,r2
						,g2
						,b2);
jehan's avatar
jehan committed
201
			for (i =0;i<4;i++) {
jehan's avatar
jehan committed
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
				*(uint8_t*)(dest+offset_dest+i) = b1[i];
				*((uint8_t*)(dest+offset_dest+i)+1) = g1[i];
				*((uint8_t*)(dest+offset_dest+i)+2) = r1[i];
				*((uint8_t*)(dest+offset_dest+i)+3) = 255;
			
				*(uint8_t*)(dest+w+offset_dest+i) = b2[i];
				*((uint8_t*)(dest+w+offset_dest+i)+1) = g2[i];
				*((uint8_t*)(dest+w+offset_dest+i)+2) = r2[i];
				*((uint8_t*)(dest+w+offset_dest+i)+3) = 255;
			}
			col_crcb+=2;
			offset_dest=offset_y+col_y;
			
		}
	}
}

Simon Morlat's avatar
Simon Morlat committed
219
static inline void line_yuv2rgb_2(const uint8_t *src_lines[],  int src_strides[], int16_t *dst_lines[], int src_w, int dst_stride ){
Simon Morlat's avatar
Simon Morlat committed
220
	int i;
221
	int16_t *line2[3]={dst_lines[0]+dst_stride,dst_lines[1]+dst_stride,dst_lines[2]+dst_stride};
222

Simon Morlat's avatar
Simon Morlat committed
223
	const uint8_t *y1,*y2,*u,*v;
224

Simon Morlat's avatar
Simon Morlat committed
225 226 227 228 229 230 231 232 233 234
	y1=src_lines[0];
	y2=src_lines[0]+src_strides[0];
	u= src_lines[1];
	v= src_lines[2];

	for(i=0;i<src_w;i+=4){
		yuv2rgb_4x2(y1,
		            y2,
		            u,
		            v,
235 236 237
		            dst_lines[0]+i,
		            dst_lines[1]+i,
		            dst_lines[2]+i,
Simon Morlat's avatar
Simon Morlat committed
238
				line2[0]+i,
239 240
		            line2[1]+i,
		            line2[2]+i);
Simon Morlat's avatar
Simon Morlat committed
241 242 243 244
		y1+=4;
		y2+=4;
		u+=2;
		v+=2;
245 246 247 248
	}
}

/*horizontal scaling of a single line (with 3 color planes)*/
Simon Morlat's avatar
Simon Morlat committed
249
static inline void line_horizontal_scale(AndroidScalerCtx * ctx, int16_t *src_lines[], int16_t *dst_lines[]){
jehan's avatar
jehan committed
250
#ifndef __ARM_NEON__
Simon Morlat's avatar
cleanup  
Simon Morlat committed
251
	int dst_w=ctx->dst_size.width;
252 253
	int x=0;
	int i,pos;
Simon Morlat's avatar
Simon Morlat committed
254 255
	int inc=ctx->w_inc;

256 257 258 259 260 261 262
	for(i=0;i<dst_w;++i){
		pos=x>>16;
		x+=inc;
		dst_lines[0][i]=src_lines[0][pos];
		dst_lines[1][i]=src_lines[1][pos];
		dst_lines[2][i]=src_lines[2][pos];
	}
Simon Morlat's avatar
Simon Morlat committed
263
#else
Simon Morlat's avatar
Simon Morlat committed
264
	//ms_line_scale_simple_8(ctx->hgrid,src_lines,dst_lines,ctx->dst_w_padded);
265
	ms_line_scale_8(ctx->hgrid,(const int16_t * const*)src_lines,dst_lines,ctx->dst_w_padded,ctx->hcoeffs);
Simon Morlat's avatar
Simon Morlat committed
266
#endif
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
}

/* horizontal scaling of the entire image */
static void img_yuv2rgb_hscale(AndroidScalerCtx * ctx, uint8_t *src[], int src_strides[]){
	int i;
	const uint8_t *p_src[3];
	int16_t *p_dst[3];
	int16_t *line2[3];


	p_src[0]=src[0];
	p_src[1]=src[1];
	p_src[2]=src[2];
	p_dst[0]=ctx->hscaled_img[0];
	p_dst[1]=ctx->hscaled_img[1];
	p_dst[2]=ctx->hscaled_img[2];

	line2[0]=ctx->unscaled_2lines[0]+ctx->unscaled_stride;
	line2[1]=ctx->unscaled_2lines[1]+ctx->unscaled_stride;
	line2[2]=ctx->unscaled_2lines[2]+ctx->unscaled_stride;
287

288 289 290 291 292 293
	for(i=0;i<ctx->src_size.height;i+=2){
		/* this will convert two lines of yuv into 2 lines of rgb*/
		line_yuv2rgb_2(p_src,src_strides,ctx->unscaled_2lines,ctx->src_size.width,ctx->unscaled_stride);
		p_src[0]+=2*src_strides[0];
		p_src[1]+=src_strides[1];
		p_src[2]+=src_strides[2];
294

Simon Morlat's avatar
Simon Morlat committed
295
		line_horizontal_scale(ctx,ctx->unscaled_2lines,p_dst);
296 297 298
		p_dst[0]+=ctx->hscaled_img_stride;
		p_dst[1]+=ctx->hscaled_img_stride;
		p_dst[2]+=ctx->hscaled_img_stride;
Simon Morlat's avatar
Simon Morlat committed
299
		line_horizontal_scale(ctx,line2,p_dst);
300 301 302 303 304 305
		p_dst[0]+=ctx->hscaled_img_stride;
		p_dst[1]+=ctx->hscaled_img_stride;
		p_dst[2]+=ctx->hscaled_img_stride;
	}
}

jehan's avatar
jehan committed
306
#ifndef __ARM_NEON__
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324

void ms_line_rgb2rgb565(const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width){
	int i;
	for(i=0;i<width;++i){
		uint16_t vr=(uint16_t)r[i]>>3;
		uint16_t vg=(uint16_t)g[i]>>2;
		uint16_t vb=(uint16_t)b[i]>>3;
		dst[i]=(vr<<11)|(vg<<5)|vb;
	}
}

#endif

static void img_yuv2rgb565_scale(AndroidScalerCtx *ctx, uint8_t *src[], int src_strides[], uint8_t *dst[], int dst_strides[]){
	int i,pos,y=0;
	int16_t *p_src[3];
	uint8_t *p_dst=dst[0];
	int offset;
325

326 327 328 329 330 331 332 333 334
	/*scale the entire image horizontally into some temporary buffers*/
	img_yuv2rgb_hscale(ctx,src,src_strides);
	/*write lines as rgb565 format*/
	for(i=0;i<ctx->dst_size.height;++i){
		pos=y>>16;
		offset=pos*ctx->hscaled_img_stride;
		p_src[0]=ctx->hscaled_img[0]+offset;
		p_src[1]=ctx->hscaled_img[1]+offset;
		p_src[2]=ctx->hscaled_img[2]+offset;
jehan's avatar
jehan committed
335
#ifndef __ARM_NEON__
336
		ms_line_rgb2rgb565(p_src[0],p_src[1],p_src[2],(uint16_t*)p_dst,ctx->dst_size.width);
Simon Morlat's avatar
cleanup  
Simon Morlat committed
337 338 339
#else
		ms_line_rgb2rgb565_8(p_src[0],p_src[1],p_src[2],(uint16_t*)p_dst,ctx->dst_w_padded);
#endif
340 341 342 343 344 345 346 347
		y+=ctx->h_inc;
		p_dst+=dst_strides[0];
	}
}

static MSScalerContext *android_create_scaler_context(int src_w, int src_h, MSPixFmt src_fmt, int dst_w, int dst_h, MSPixFmt dst_fmt, int flags){
	AndroidScalerCtx *ctx=ms_new0(AndroidScalerCtx,1);
	int i;
Simon Morlat's avatar
Simon Morlat committed
348
	int tmp,prev;
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371

	if (!premult_initd){
		init_premults();
		premult_initd=TRUE;
	}
	if (src_fmt!=MS_YUV420P && dst_fmt!=MS_RGB565){
		ms_fatal("FIXME: unsupported rescaling scheme.");
		ms_free(ctx);
		return NULL;
	}
	ctx->src_size.width=src_w;
	ctx->src_size.height=src_h;
	ctx->dst_size.width=dst_w;
	ctx->dst_size.height=dst_h;

	ctx->hscaled_img_stride=ROUND_UP(dst_w,PAD);
	ctx->unscaled_stride=ROUND_UP(src_w,PAD);
	for(i=0;i<3;++i){
		ctx->unscaled_2lines[i]=ms_new(int16_t,ROUND_UP(src_w,PAD)*2);
		ctx->hscaled_img[i]=ms_new(int16_t,ctx->hscaled_img_stride*dst_h);
	}
	ctx->w_inc=(src_w<<16)/dst_w;
	ctx->h_inc=(src_h<<16)/dst_h;
Simon Morlat's avatar
Simon Morlat committed
372 373 374
	/*compute the grid (map) for original lines into destination lines*/
	ctx->dst_w_padded=ROUND_UP(dst_w,PAD);
	ctx->hgrid=ms_new0(uint32_t,ctx->dst_w_padded);
Simon Morlat's avatar
Simon Morlat committed
375
	ctx->hcoeffs=ms_new0(int16_t,ctx->dst_w_padded);
Simon Morlat's avatar
Simon Morlat committed
376 377 378 379 380
	tmp=0;
	prev=0;
	for(i=0;i<dst_w;++i){
		int offset=(tmp>>16)*2;
		ctx->hgrid[i]=offset-prev;
Simon Morlat's avatar
Simon Morlat committed
381
		ctx->hcoeffs[i]=(tmp&0xffff)>>9;
Simon Morlat's avatar
Simon Morlat committed
382 383 384
		prev=offset;
		tmp+=ctx->w_inc;
	}
385

386 387 388 389 390 391 392 393 394 395
	return (MSScalerContext*)ctx;
}

static void android_scaler_context_free(MSScalerContext *c){
	AndroidScalerCtx *ctx=(AndroidScalerCtx*)c;
	int i;
	for(i=0;i<3;++i){
		ms_free(ctx->unscaled_2lines[i]);
		ms_free(ctx->hscaled_img[i]);
	}
Simon Morlat's avatar
Simon Morlat committed
396
	ms_free(ctx->hgrid);
397 398 399 400 401 402 403 404 405 406 407 408 409
	ms_free(ctx);
}

static int android_scaler_process(MSScalerContext *ctx, uint8_t *src[], int src_strides[], uint8_t *dst[], int dst_strides[]){
	img_yuv2rgb565_scale((AndroidScalerCtx *)ctx,src,src_strides,dst,dst_strides);
	return 0;
}

MSScalerDesc ms_android_scaler={
	android_create_scaler_context,
	android_scaler_process,
	android_scaler_context_free
};