postprocess.c 24.1 KB
Newer Older
1
/*
Michael Niedermayer's avatar
Michael Niedermayer committed
2
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

/*
20
			C	MMX	MMX2	3DNow
21 22
isVertDC		Ec	Ec
isVertMinMaxOk		Ec	Ec
23
doVertLowPass		E		e	e
24
doVertDefFilter		Ec	Ec	e	e
25
isHorizDC		Ec	Ec
26 27
isHorizMinMaxOk		a	E
doHorizLowPass		E		e	e
28
doHorizDefFilter	Ec	Ec	e	e
Michael Niedermayer's avatar
Michael Niedermayer committed
29
deRing			E		e	e*
30
Vertical RKAlgo1	E		a	a
31
Horizontal RKAlgo1			a	a
32 33
Vertical X1#		a		E	E
Horizontal X1#		a		E	E
34 35 36
LinIpolDeinterlace	e		E	E*
CubicIpolDeinterlace	a		e	e*
LinBlendDeinterlace	e		E	E*
37
MedianDeinterlace#	E	Ec	Ec
38
TempDeNoiser#		E		e	e
39

40 41
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
# more or less selfinvented filters so the exactness isnt too meaningfull
42
E = Exact implementation
43
e = allmost exact implementation (slightly different rounding,...)
44 45 46 47 48 49 50 51 52
a = alternative / approximate impl
c = checked against the other implementations (-vo md5)
*/

/*
TODO:
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
53
write a faster and higher quality deblocking filter :)
54 55
make the mainloop more flexible (variable number of blocks at once
	(the if/else stuff per block is slowing things down)
56 57
compare the quality & speed of all filters
split this huge file
Michael Niedermayer's avatar
Michael Niedermayer committed
58
optimize c versions
59
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
60
...
61 62
*/

Michael Niedermayer's avatar
Michael Niedermayer committed
63
//Changelog: use the CVS log
64

Michael Niedermayer's avatar
Michael Niedermayer committed
65
#include "config.h"
66 67
#include <inttypes.h>
#include <stdio.h>
68
#include <stdlib.h>
69
#include <string.h>
70 71 72
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
73
//#undef HAVE_MMX2
74
//#define HAVE_3DNOW
75
//#undef HAVE_MMX
Michael Niedermayer's avatar
Michael Niedermayer committed
76
//#undef ARCH_X86
77
//#define DEBUG_BRIGHTNESS
78 79
#ifdef USE_FASTMEMCPY
#include "libvo/fastmemcpy.h"
80
#endif
81
#include "postprocess.h"
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
82
#include "postprocess_internal.h"
83 84

#include "mangle.h" //FIXME should be supressed
85

86 87 88 89
#ifndef HAVE_MEMALIGN
#define memalign(a,b) malloc(b)
#endif

Arpi's avatar
Arpi committed
90 91 92 93 94
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#define MAX(a,b) ((a) < (b) ? (b) : (a))
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
#define SIGN(a) ((a) > 0 ? 1 : -1)

95 96
#define GET_MODE_BUFFER_SIZE 500
#define OPTIONS_ARRAY_SIZE 10
Michael Niedermayer's avatar
Michael Niedermayer committed
97 98 99
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
100

Michael Niedermayer's avatar
Michael Niedermayer committed
101
#ifdef ARCH_X86
102 103 104 105 106 107 108 109
static uint64_t __attribute__((aligned(8))) w05=		0x0005000500050005LL;
static uint64_t __attribute__((aligned(8))) w20=		0x0020002000200020LL;
static uint64_t __attribute__((aligned(8))) b00= 		0x0000000000000000LL;
static uint64_t __attribute__((aligned(8))) b01= 		0x0101010101010101LL;
static uint64_t __attribute__((aligned(8))) b02= 		0x0202020202020202LL;
static uint64_t __attribute__((aligned(8))) b08= 		0x0808080808080808LL;
static uint64_t __attribute__((aligned(8))) b80= 		0x8080808080808080LL;
#endif
110

Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
111
static int verbose= 0;
112

Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
113
static const int deringThreshold= 20;
114

Michael Niedermayer's avatar
Michael Niedermayer committed
115

116 117 118 119
static struct PPFilter filters[]=
{
	{"hb", "hdeblock", 		1, 1, 3, H_DEBLOCK},
	{"vb", "vdeblock", 		1, 2, 4, V_DEBLOCK},
Michael Niedermayer's avatar
Michael Niedermayer committed
120 121
/*	{"hr", "rkhdeblock", 		1, 1, 3, H_RK1_FILTER},
	{"vr", "rkvdeblock", 		1, 2, 4, V_RK1_FILTER},*/
122 123 124 125
	{"h1", "x1hdeblock", 		1, 1, 3, H_X1_FILTER},
	{"v1", "x1vdeblock", 		1, 2, 4, V_X1_FILTER},
	{"dr", "dering", 		1, 5, 6, DERING},
	{"al", "autolevels", 		0, 1, 2, LEVEL_FIX},
126 127 128 129
	{"lb", "linblenddeint", 	1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
	{"li", "linipoldeint", 		1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
	{"ci", "cubicipoldeint",	1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
	{"md", "mediandeint", 		1, 1, 4, MEDIAN_DEINT_FILTER},
Michael Niedermayer's avatar
Michael Niedermayer committed
130
	{"fd", "ffmpegdeint", 		1, 1, 4, FFMPEG_DEINT_FILTER},
131
	{"tn", "tmpnoise", 		1, 7, 8, TEMP_NOISE_FILTER},
Michael Niedermayer's avatar
Michael Niedermayer committed
132
	{"fq", "forcequant", 		1, 0, 0, FORCE_QUANT},
133 134 135 136 137
	{NULL, NULL,0,0,0,0} //End Marker
};

static char *replaceTable[]=
{
138 139 140 141
	"default", 	"hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
	"de", 		"hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
	"fast", 	"x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
	"fa", 		"x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
142 143 144
	NULL //End Marker
};

Michael Niedermayer's avatar
Michael Niedermayer committed
145
#ifdef ARCH_X86
146 147
static inline void unusedVariableWarningFixer()
{
Michael Niedermayer's avatar
Michael Niedermayer committed
148
	if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
149
}
150
#endif
151

152

Michael Niedermayer's avatar
Michael Niedermayer committed
153
#ifdef ARCH_X86
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
static inline void prefetchnta(void *p)
{
	asm volatile(	"prefetchnta (%0)\n\t"
		: : "r" (p)
	);
}

static inline void prefetcht0(void *p)
{
	asm volatile(	"prefetcht0 (%0)\n\t"
		: : "r" (p)
	);
}

static inline void prefetcht1(void *p)
{
	asm volatile(	"prefetcht1 (%0)\n\t"
		: : "r" (p)
	);
}

static inline void prefetcht2(void *p)
{
	asm volatile(	"prefetcht2 (%0)\n\t"
		: : "r" (p)
	);
}
Arpi's avatar
Arpi committed
181
#endif
182

Michael Niedermayer's avatar
Michael Niedermayer committed
183
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
184

185 186 187
/**
 * Check if the given 8x8 Block is mostly "flat"
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
188
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
189 190 191
{
	int numEq= 0;
	int y;
Michael Niedermayer's avatar
Michael Niedermayer committed
192 193
	const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
	const int dcThreshold= dcOffset*2 + 1;
194 195
	for(y=0; y<BLOCK_SIZE; y++)
	{
Michael Niedermayer's avatar
Michael Niedermayer committed
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
		if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
		src+= stride;
	}
	return numEq > c->ppMode.flatnessThreshold;
}

/**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
	int numEq= 0;
	int y;
Michael Niedermayer's avatar
Michael Niedermayer committed
214 215
	const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
	const int dcThreshold= dcOffset*2 + 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
216 217 218 219 220 221 222 223 224 225 226
	src+= stride*4; // src points to begin of the 8x8 Block
	for(y=0; y<BLOCK_SIZE-1; y++)
	{
		if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
		if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
227 228
		src+= stride;
	}
Michael Niedermayer's avatar
Michael Niedermayer committed
229
	return numEq > c->ppMode.flatnessThreshold;
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
}

static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
{
	if(abs(src[0] - src[7]) > 2*QP) return 0;

	return 1;
}

static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
{
	int y;
	for(y=0; y<BLOCK_SIZE; y++)
	{
		const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);

		if(ABS(middleEnergy) < 8*QP)
		{
			const int q=(dst[3] - dst[4])/2;
			const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
			const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);

			int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
			d= MAX(d, 0);

			d= (5*d + 32) >> 6;
			d*= SIGN(-middleEnergy);

			if(q>0)
			{
				d= d<0 ? 0 : d;
				d= d>q ? q : d;
			}
			else
			{
				d= d>0 ? 0 : d;
				d= d<q ? q : d;
			}

        		dst[3]-= d;
	        	dst[4]+= d;
		}
		dst+= stride;
	}
}

/**
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 */
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
{

	int y;
	for(y=0; y<BLOCK_SIZE; y++)
	{
		const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
		const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];

		int sums[9];
		sums[0] = first + dst[0];
		sums[1] = dst[0] + dst[1];
		sums[2] = dst[1] + dst[2];
		sums[3] = dst[2] + dst[3];
		sums[4] = dst[3] + dst[4];
		sums[5] = dst[4] + dst[5];
		sums[6] = dst[5] + dst[6];
		sums[7] = dst[6] + dst[7];
		sums[8] = dst[7] + last;

		dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
		dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
		dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
		dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
		dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
		dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
		dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
		dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;

		dst+= stride;
	}
}

313
/**
Michael Niedermayer's avatar
Michael Niedermayer committed
314 315 316 317 318 319
 * Experimental Filter 1 (Horizontal)
 * will not damage linear gradients
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
 * MMX2 version does correct clipping C version doesnt
 * not identical with the vertical one
320
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
321 322
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
{
323
	int y;
Michael Niedermayer's avatar
Michael Niedermayer committed
324 325
	static uint64_t *lut= NULL;
	if(lut==NULL)
326
	{
Michael Niedermayer's avatar
Michael Niedermayer committed
327 328 329
		int i;
		lut= (uint64_t*)memalign(8, 256*8);
		for(i=0; i<256; i++)
330
		{
Michael Niedermayer's avatar
Michael Niedermayer committed
331
			int v= i < 128 ? 2*i : 2*(i-256);
332
/*
Michael Niedermayer's avatar
Michael Niedermayer committed
333 334 335 336 337
//Simulate 112242211 9-Tap filter
			uint64_t a= (v/16) & 0xFF;
			uint64_t b= (v/8) & 0xFF;
			uint64_t c= (v/4) & 0xFF;
			uint64_t d= (3*v/8) & 0xFF;
338
*/
Michael Niedermayer's avatar
Michael Niedermayer committed
339 340 341 342 343 344 345 346 347 348 349 350 351
//Simulate piecewise linear interpolation
			uint64_t a= (v/16) & 0xFF;
			uint64_t b= (v*3/16) & 0xFF;
			uint64_t c= (v*5/16) & 0xFF;
			uint64_t d= (7*v/16) & 0xFF;
			uint64_t A= (0x100 - a)&0xFF;
			uint64_t B= (0x100 - b)&0xFF;
			uint64_t C= (0x100 - c)&0xFF;
			uint64_t D= (0x100 - c)&0xFF;

			lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
				(D<<24) | (C<<16) | (B<<8) | (A);
			//lut[i] = (v<<32) | (v<<24);
352 353
		}
	}
Michael Niedermayer's avatar
Michael Niedermayer committed
354 355

	for(y=0; y<BLOCK_SIZE; y++)
356
	{
Michael Niedermayer's avatar
Michael Niedermayer committed
357 358 359 360 361 362 363
		int a= src[1] - src[2];
		int b= src[3] - src[4];
		int c= src[5] - src[6];

		int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);

		if(d < QP)
364
		{
Michael Niedermayer's avatar
Michael Niedermayer committed
365 366 367 368 369 370 371 372 373
			int v = d * SIGN(-b);

			src[1] +=v/8;
			src[2] +=v/4;
			src[3] +=3*v/8;
			src[4] -=3*v/8;
			src[5] -=v/4;
			src[6] -=v/8;

374
		}
Michael Niedermayer's avatar
Michael Niedermayer committed
375
		src+=stride;
376
	}
Michael Niedermayer's avatar
Michael Niedermayer committed
377 378 379
}


380
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
Michael Niedermayer's avatar
Michael Niedermayer committed
381
//Plain C versions
382 383 384 385
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
#define COMPILE_C
#endif

Michael Niedermayer's avatar
Michael Niedermayer committed
386
#ifdef ARCH_X86
387 388 389 390 391 392 393 394 395 396 397 398

#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX
#endif

#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX2
#endif

#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_3DNOW
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
399
#endif //ARCH_X86
400 401 402 403 404 405 406

#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#undef ARCH_X86

#ifdef COMPILE_C
Michael Niedermayer's avatar
Michael Niedermayer committed
407 408 409 410 411 412
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#undef ARCH_X86
#define RENAME(a) a ## _C
#include "postprocess_template.c"
413
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
414 415

//MMX versions
416
#ifdef COMPILE_MMX
Michael Niedermayer's avatar
Michael Niedermayer committed
417 418 419 420 421 422 423
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define ARCH_X86
#define RENAME(a) a ## _MMX
#include "postprocess_template.c"
424
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
425 426

//MMX2 versions
427
#ifdef COMPILE_MMX2
Michael Niedermayer's avatar
Michael Niedermayer committed
428 429 430 431 432 433 434
#undef RENAME
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
#define ARCH_X86
#define RENAME(a) a ## _MMX2
#include "postprocess_template.c"
435
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
436 437

//3DNOW versions
438
#ifdef COMPILE_3DNOW
Michael Niedermayer's avatar
Michael Niedermayer committed
439 440 441 442 443 444 445
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
#define ARCH_X86
#define RENAME(a) a ## _3DNow
#include "postprocess_template.c"
446
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
447 448 449 450

// minor note: the HAVE_xyz is messed up after that line so dont use it

static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
451
	QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
Michael Niedermayer's avatar
Michael Niedermayer committed
452
{
Michael Niedermayer's avatar
Michael Niedermayer committed
453
	PPContext *c= (PPContext *)vc;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
454
	PPMode *ppMode= (PPMode *)vm;
Michael Niedermayer's avatar
Michael Niedermayer committed
455 456
	c->ppMode= *ppMode; //FIXME

Michael Niedermayer's avatar
Michael Niedermayer committed
457 458 459
	// useing ifs here as they are faster than function pointers allthough the
	// difference wouldnt be messureable here but its much better because
	// someone might exchange the cpu whithout restarting mplayer ;)
460
#ifdef RUNTIME_CPUDETECT
Michael Niedermayer's avatar
Michael Niedermayer committed
461
#ifdef ARCH_X86
Michael Niedermayer's avatar
Michael Niedermayer committed
462
	// ordered per speed fasterst first
463
	if(c->cpuCaps & PP_CPU_CAPS_MMX2)
Michael Niedermayer's avatar
Michael Niedermayer committed
464
		postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
465
	else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
Michael Niedermayer's avatar
Michael Niedermayer committed
466
		postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
467
	else if(c->cpuCaps & PP_CPU_CAPS_MMX)
Michael Niedermayer's avatar
Michael Niedermayer committed
468
		postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
Michael Niedermayer's avatar
Michael Niedermayer committed
469
	else
Michael Niedermayer's avatar
Michael Niedermayer committed
470
		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
Michael Niedermayer's avatar
Michael Niedermayer committed
471
#else
Michael Niedermayer's avatar
Michael Niedermayer committed
472
		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
473
#endif
474 475
#else //RUNTIME_CPUDETECT
#ifdef HAVE_MMX2
Michael Niedermayer's avatar
Michael Niedermayer committed
476
		postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
477
#elif defined (HAVE_3DNOW)
Michael Niedermayer's avatar
Michael Niedermayer committed
478
		postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
479
#elif defined (HAVE_MMX)
Michael Niedermayer's avatar
Michael Niedermayer committed
480
		postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
481
#else
Michael Niedermayer's avatar
Michael Niedermayer committed
482
		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
483 484
#endif
#endif //!RUNTIME_CPUDETECT
485 486
}

Michael Niedermayer's avatar
Michael Niedermayer committed
487 488
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
//	QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
489

490 491
/* -pp Command line Help
*/
492
char *pp_help=
Michael Niedermayer's avatar
pp docs  
Michael Niedermayer committed
493
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
494
"long form example:\n"
Michael Niedermayer's avatar
pp docs  
Michael Niedermayer committed
495
"vdeblock:autoq/hdeblock:autoq/linblenddeint	default,-vdeblock\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
496
"short form example:\n"
Michael Niedermayer's avatar
pp docs  
Michael Niedermayer committed
497
"vb:a/hb:a/lb					de,-vb\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
498
"more examples:\n"
499
"tn:64:128:256\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
500 501 502 503 504 505
"Filters			Options\n"
"short	long name	short	long option	Description\n"
"*	*		a	autoq		cpu power dependant enabler\n"
"			c	chrom		chrominance filtring enabled\n"
"			y	nochrom		chrominance filtring disabled\n"
"hb	hdeblock	(2 Threshold)		horizontal deblocking filter\n"
Michael Niedermayer's avatar
pp docs  
Michael Niedermayer committed
506 507
"	1. difference factor: default=64, higher -> more deblocking\n"
"	2. flatness threshold: default=40, lower -> more deblocking\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
508 509 510 511 512 513 514 515 516 517 518 519
"			the h & v deblocking filters share these\n"
"			so u cant set different thresholds for h / v\n"
"vb	vdeblock	(2 Threshold)		vertical deblocking filter\n"
"h1	x1hdeblock				Experimental h deblock filter 1\n"
"v1	x1vdeblock				Experimental v deblock filter 1\n"
"dr	dering					Deringing filter\n"
"al	autolevels				automatic brightness / contrast\n"
"			f	fullyrange	stretch luminance to (0..255)\n"
"lb	linblenddeint				linear blend deinterlacer\n"
"li	linipoldeint				linear interpolating deinterlace\n"
"ci	cubicipoldeint				cubic interpolating deinterlacer\n"
"md	mediandeint				median deinterlacer\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
520
"fd	ffmpegdeint				ffmpeg deinterlacer\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
521 522 523 524 525 526
"de	default					hb:a,vb:a,dr:a,al\n"
"fa	fast					h1:a,v1:a,dr:a,al\n"
"tn	tmpnoise	(3 Thresholds)		Temporal Noise Reducer\n"
"			1. <= 2. <= 3.		larger -> stronger filtering\n"
"fq	forceQuant	<quantizer>		Force quantizer\n"
;
527

Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
528
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
529 530 531
{
	char temp[GET_MODE_BUFFER_SIZE];
	char *p= temp;
Michael Niedermayer's avatar
Michael Niedermayer committed
532
	char *filterDelimiters= ",/";
533
	char *optionDelimiters= ":";
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
534
	struct PPMode *ppMode;
535 536
	char *filterToken;

Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
537 538 539 540 541 542 543 544 545 546 547 548 549
	ppMode= memalign(8, sizeof(PPMode));
	
	ppMode->lumMode= 0;
	ppMode->chromMode= 0;
	ppMode->maxTmpNoise[0]= 700;
	ppMode->maxTmpNoise[1]= 1500;
	ppMode->maxTmpNoise[2]= 3000;
	ppMode->maxAllowedY= 234;
	ppMode->minAllowedY= 16;
	ppMode->baseDcDiff= 256/4;
	ppMode->flatnessThreshold= 56-16;
	ppMode->maxClippedThreshold= 0.01;
	ppMode->error=0;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
550

551 552
	strncpy(temp, name, GET_MODE_BUFFER_SIZE);

553
	if(verbose>1) printf("pp: %s\n", name);
554

555 556
	for(;;){
		char *filterName;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
557
		int q= 1000000; //PP_QUALITY_MAX;
558 559 560 561 562 563 564 565 566 567
		int chrom=-1;
		char *option;
		char *options[OPTIONS_ARRAY_SIZE];
		int i;
		int filterNameOk=0;
		int numOfUnknownOptions=0;
		int enable=1; //does the user want us to enabled or disabled the filter

		filterToken= strtok(p, filterDelimiters);
		if(filterToken == NULL) break;
568
		p+= strlen(filterToken) + 1; // p points to next filterToken
569
		filterName= strtok(filterToken, optionDelimiters);
570
		if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
571 572 573 574 575 576

		if(*filterName == '-')
		{
			enable=0;
			filterName++;
		}
577

578 579 580 581
		for(;;){ //for all options
			option= strtok(NULL, optionDelimiters);
			if(option == NULL) break;

582
			if(verbose>1) printf("pp: option: %s\n", option);
583 584 585 586 587 588 589 590 591 592
			if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
			else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
			else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
			else
			{
				options[numOfUnknownOptions] = option;
				numOfUnknownOptions++;
			}
			if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
		}
593
		options[numOfUnknownOptions] = NULL;
594 595 596 597 598 599 600 601 602 603 604 605 606 607

		/* replace stuff from the replace Table */
		for(i=0; replaceTable[2*i]!=NULL; i++)
		{
			if(!strcmp(replaceTable[2*i], filterName))
			{
				int newlen= strlen(replaceTable[2*i + 1]);
				int plen;
				int spaceLeft;

				if(p==NULL) p= temp, *p=0; 	//last filter
				else p--, *p=',';		//not last filter

				plen= strlen(p);
608
				spaceLeft= p - temp + plen;
609 610
				if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
				{
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
611
					ppMode->error++;
612 613 614 615 616 617 618 619 620 621
					break;
				}
				memmove(p + newlen, p, plen+1);
				memcpy(p, replaceTable[2*i + 1], newlen);
				filterNameOk=1;
			}
		}

		for(i=0; filters[i].shortName!=NULL; i++)
		{
622
//			printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
623 624 625
			if(   !strcmp(filters[i].longName, filterName)
			   || !strcmp(filters[i].shortName, filterName))
			{
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
626 627
				ppMode->lumMode &= ~filters[i].mask;
				ppMode->chromMode &= ~filters[i].mask;
628 629 630 631 632

				filterNameOk=1;
				if(!enable) break; // user wants to disable it

				if(q >= filters[i].minLumQuality)
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
633
					ppMode->lumMode|= filters[i].mask;
634 635
				if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
					if(q >= filters[i].minChromQuality)
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
636
						ppMode->chromMode|= filters[i].mask;
637 638 639 640

				if(filters[i].mask == LEVEL_FIX)
				{
					int o;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
641 642
					ppMode->minAllowedY= 16;
					ppMode->maxAllowedY= 234;
643
					for(o=0; options[o]!=NULL; o++)
644
					{
645 646 647
						if(  !strcmp(options[o],"fullyrange")
						   ||!strcmp(options[o],"f"))
						{
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
648 649
							ppMode->minAllowedY= 0;
							ppMode->maxAllowedY= 255;
650 651
							numOfUnknownOptions--;
						}
652
					}
653
				}
654 655 656 657 658 659 660 661
				else if(filters[i].mask == TEMP_NOISE_FILTER)
				{
					int o;
					int numOfNoises=0;

					for(o=0; options[o]!=NULL; o++)
					{
						char *tail;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
662
						ppMode->maxTmpNoise[numOfNoises]=
663 664 665 666 667 668 669 670 671
							strtol(options[o], &tail, 0);
						if(tail!=options[o])
						{
							numOfNoises++;
							numOfUnknownOptions--;
							if(numOfNoises >= 3) break;
						}
					}
				}
672 673 674 675 676 677 678 679 680 681 682
				else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
				{
					int o;

					for(o=0; options[o]!=NULL && o<2; o++)
					{
						char *tail;
						int val= strtol(options[o], &tail, 0);
						if(tail==options[o]) break;

						numOfUnknownOptions--;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
683 684
						if(o==0) ppMode->baseDcDiff= val;
						else ppMode->flatnessThreshold= val;
685 686
					}
				}
Michael Niedermayer's avatar
Michael Niedermayer committed
687 688 689
				else if(filters[i].mask == FORCE_QUANT)
				{
					int o;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
690
					ppMode->forcedQuant= 15;
Michael Niedermayer's avatar
Michael Niedermayer committed
691 692 693 694 695 696 697 698

					for(o=0; options[o]!=NULL && o<1; o++)
					{
						char *tail;
						int val= strtol(options[o], &tail, 0);
						if(tail==options[o]) break;

						numOfUnknownOptions--;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
699
						ppMode->forcedQuant= val;
Michael Niedermayer's avatar
Michael Niedermayer committed
700 701
					}
				}
702 703
			}
		}
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
704 705
		if(!filterNameOk) ppMode->error++;
		ppMode->error += numOfUnknownOptions;
706 707
	}

Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
708 709 710 711 712 713 714
	if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
	if(ppMode->error)
	{
		fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
		free(ppMode);
		return NULL;
	}
715 716 717
	return ppMode;
}

Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
718 719 720 721
void pp_free_mode(pp_mode_t *mode){
    if(mode) free(mode);
}

722 723 724 725 726 727 728
static void reallocAlign(void **p, int alignment, int size){
	if(*p) free(*p);
	*p= memalign(alignment, size);
	memset(*p, 0, size);
}

static void reallocBuffers(PPContext *c, int width, int height, int stride){
Michael Niedermayer's avatar
Michael Niedermayer committed
729 730
	int mbWidth = (width+15)>>4;
	int mbHeight= (height+15)>>4;
731 732 733
	int i;

	c->stride= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
734

735 736 737 738
	reallocAlign((void **)&c->tempDst, 8, stride*24);
	reallocAlign((void **)&c->tempSrc, 8, stride*24);
	reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
	reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
Michael Niedermayer's avatar
Michael Niedermayer committed
739 740 741 742
	for(i=0; i<256; i++)
		c->yHistogram[i]= width*height/64*15/256;

	for(i=0; i<3; i++)
743
	{
Michael Niedermayer's avatar
Michael Niedermayer committed
744
		//Note:the +17*1024 is just there so i dont have to worry about r/w over te end
745 746
		reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
		reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
747
	}
748

749 750 751 752 753 754 755 756 757 758 759
	reallocAlign((void **)&c->deintTemp, 8, width+16);
	reallocAlign((void **)&c->nonBQPTable, 8, mbWidth*mbHeight*sizeof(QP_STORE_T));
	reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
}

pp_context_t *pp_get_context(int width, int height, int cpuCaps){
	PPContext *c= memalign(32, sizeof(PPContext));
	int stride= (width+15)&(~15); //assumed / will realloc if needed
        
	memset(c, 0, sizeof(PPContext));
	c->cpuCaps= cpuCaps;
760 761 762 763 764 765 766
	if(cpuCaps&PP_FORMAT){
		c->hChromaSubSample= cpuCaps&0x3;
		c->vChromaSubSample= (cpuCaps>>4)&0x3;
	}else{
		c->hChromaSubSample= 1;
		c->vChromaSubSample= 1;
	}
767 768 769

	reallocBuffers(c, width, height, stride);
        
Michael Niedermayer's avatar
Michael Niedermayer committed
770
	c->frameNum=-1;
771

Michael Niedermayer's avatar
Michael Niedermayer committed
772
	return c;
773 774
}

Michael Niedermayer's avatar
Michael Niedermayer committed
775
void pp_free_context(void *vc){
Michael Niedermayer's avatar
Michael Niedermayer committed
776 777 778 779 780 781 782 783 784 785 786
	PPContext *c = (PPContext*)vc;
	int i;
	
	for(i=0; i<3; i++) free(c->tempBlured[i]);
	for(i=0; i<3; i++) free(c->tempBluredPast[i]);
	
	free(c->tempBlocks);
	free(c->yHistogram);
	free(c->tempDst);
	free(c->tempSrc);
	free(c->deintTemp);
Michael Niedermayer's avatar
Michael Niedermayer committed
787
	free(c->nonBQPTable);
788 789 790 791
	free(c->forcedQPTable);
        
	memset(c, 0, sizeof(PPContext));

Michael Niedermayer's avatar
Michael Niedermayer committed
792 793 794
	free(c);
}

Michael Niedermayer's avatar
Michael Niedermayer committed
795
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
Michael Niedermayer's avatar
Michael Niedermayer committed
796
                 uint8_t * dst[3], int dstStride[3],
Michael Niedermayer's avatar
Michael Niedermayer committed
797
                 int width, int height,
Michael Niedermayer's avatar
Michael Niedermayer committed
798
                 QP_STORE_T *QP_store,  int QPStride,
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
799
		 pp_mode_t *vm,  void *vc, int pict_type)
800
{
Michael Niedermayer's avatar
Michael Niedermayer committed
801 802
	int mbWidth = (width+15)>>4;
	int mbHeight= (height+15)>>4;
Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
803
	PPMode *mode = (PPMode*)vm;
Michael Niedermayer's avatar
Michael Niedermayer committed
804
	PPContext *c = (PPContext*)vc;
805 806 807 808
        int minStride= MAX(srcStride[0], dstStride[0]);
	
	if(c->stride < minStride)
		reallocBuffers(c, width, height, minStride);
Michael Niedermayer's avatar
Michael Niedermayer committed
809

Michael Niedermayer's avatar
Michael Niedermayer committed
810
	if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
811
	{
Michael Niedermayer's avatar
Michael Niedermayer committed
812
		int i;
813
		QP_store= c->forcedQPTable;
Michael Niedermayer's avatar
Michael Niedermayer committed
814
		QPStride= 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
815
		if(mode->lumMode & FORCE_QUANT)
816
			for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
Michael Niedermayer's avatar
Michael Niedermayer committed
817
		else
818
			for(i=0; i<mbWidth; i++) QP_store[i]= 1;
819
	}
Michael Niedermayer's avatar
Michael Niedermayer committed
820 821 822 823 824 825 826 827 828 829 830
if(0){
int x,y;
for(y=0; y<mbHeight; y++){
	for(x=0; x<mbWidth; x++){
		printf("%2d ", QP_store[x + y*QPStride]);
	}
	printf("\n");
}
	printf("\n");
}
//printf("pict_type:%d\n", pict_type);
Michael Niedermayer's avatar
10l  
Michael Niedermayer committed
831

Michael Niedermayer's avatar
Michael Niedermayer committed
832 833 834 835 836 837 838 839 840 841 842 843
	if(pict_type!=3)
	{
		int x,y;
		for(y=0; y<mbHeight; y++){
			for(x=0; x<mbWidth; x++){
				int qscale= QP_store[x + y*QPStride];
				if(qscale&~31)
				    qscale=31;
				c->nonBQPTable[y*mbWidth + x]= qscale;
			}
		}
	}
844

Michael Niedermayer's avatar
cleanup  
Michael Niedermayer committed
845
	if(verbose>2)
846 847 848 849
	{
		printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
	}

Michael Niedermayer's avatar
Michael Niedermayer committed
850
	postProcess(src[0], srcStride[0], dst[0], dstStride[0],
851
		width, height, QP_store, QPStride, 0, mode, c);
852

853 854
	width  = (width )>>c->hChromaSubSample;
	height = (height)>>c->vChromaSubSample;
855

856 857
	if(mode->chromMode)
	{
Michael Niedermayer's avatar
Michael Niedermayer committed
858
		postProcess(src[1], srcStride[1], dst[1], dstStride[1],
859
			width, height, QP_store, QPStride, 1, mode, c);
Michael Niedermayer's avatar
Michael Niedermayer committed
860
		postProcess(src[2], srcStride[2], dst[2], dstStride[2],
861
			width, height, QP_store, QPStride, 2, mode, c);
862
	}
Michael Niedermayer's avatar
Michael Niedermayer committed
863
	else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
864
	{
Michael Niedermayer's avatar
Michael Niedermayer committed
865 866
		memcpy(dst[1], src[1], srcStride[1]*height);
		memcpy(dst[2], src[2], srcStride[2]*height);
867 868 869 870
	}
	else
	{
		int y;
Michael Niedermayer's avatar
Michael Niedermayer committed
871
		for(y=0; y<height; y++)
872
		{
Michael Niedermayer's avatar
Michael Niedermayer committed
873 874
			memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
			memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
875 876
		}
	}
877 878
}