Commit 3ba153f8 authored by Dylan Yudaken's avatar Dylan Yudaken Committed by Diego Biurrun

Replace MPEG group reference DCT implementation by newly rewritten version.

patch by Dylan Yudaken, dyudaken gmail com

Originally committed as revision 18274 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 41b4dbbb
...@@ -31,7 +31,3 @@ There are a handful of files under other licensing terms, namely: ...@@ -31,7 +31,3 @@ There are a handful of files under other licensing terms, namely:
* The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c * The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
are taken from libjpeg, see the top of the files for licensing details. are taken from libjpeg, see the top of the files for licensing details.
* The file libavcodec/fdctref.c is copyrighted by the MPEG Software Simulation
Group with all rights reserved. It is only used to create a DCT test program
and not compiled into libavcodec.
...@@ -81,7 +81,6 @@ unassigned TODO: (unordered) ...@@ -81,7 +81,6 @@ unassigned TODO: (unordered)
- add support for using mplayers video filters to ffmpeg - add support for using mplayers video filters to ffmpeg
- H264 encoder - H264 encoder
- per MB ratecontrol (so VCD and such do work better) - per MB ratecontrol (so VCD and such do work better)
- replace/rewrite libavcodec/fdctref.c
- write a script which iteratively changes all functions between always_inline and noinline and benchmarks the result to find the best set of inlined functions - write a script which iteratively changes all functions between always_inline and noinline and benchmarks the result to find the best set of inlined functions
- convert all the non SIMD asm into small asm vs. C testcases and submit them to the gcc devels so they can improve gcc - convert all the non SIMD asm into small asm vs. C testcases and submit them to the gcc devels so they can improve gcc
- generic audio mixing API - generic audio mixing API
......
...@@ -541,4 +541,4 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86 ...@@ -541,4 +541,4 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
include $(SUBDIR)../subdir.mak include $(SUBDIR)../subdir.mak
$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)fdctref.o $(SUBDIR)aandcttab.o $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o
/**
* @file libavcodec/fdctref.c
* forward discrete cosine transform, double precision.
*/
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
/*
* Disclaimer of Warranty
*
* These software programs are available to the user without any license fee or
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
* any and all warranties, whether express, implied, or statuary, including any
* implied warranties or merchantability or of fitness for a particular
* purpose. In no event shall the copyright-holder be liable for any
* incidental, punitive, or consequential damages of any kind whatsoever
* arising from the use of these programs.
*
* This disclaimer of warranty extends to the user of these programs and user's
* customers, employees, agents, transferees, successors, and assigns.
*
* The MPEG Software Simulation Group does not represent or warrant that the
* programs furnished hereunder are free of infringement of any third-party
* patents.
*
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
* are subject to royalty fees to patent holders. Many of these patents are
* general enough such that they are unavoidable regardless of implementation
* design.
*/
#include <math.h>
#ifndef PI
# ifdef M_PI
# define PI M_PI
# else
# define PI 3.14159265358979323846
# endif
#endif
/* global declarations */
void ff_ref_dct_init (void);
void ff_ref_fdct (short *block);
/* private data */
static double c[8][8]; /* transform coefficients */
void ff_ref_dct_init(void)
{
int i, j;
double s;
for (i=0; i<8; i++)
{
s = (i==0) ? sqrt(0.125) : 0.5;
for (j=0; j<8; j++)
c[i][j] = s * cos((PI/8.0)*i*(j+0.5));
}
}
void ff_ref_fdct(block)
short *block;
{
register int i, j;
double s;
double tmp[64];
for(i = 0; i < 8; i++)
for(j = 0; j < 8; j++)
{
s = 0.0;
/*
* for(k = 0; k < 8; k++)
* s += c[j][k] * block[8 * i + k];
*/
s += c[j][0] * block[8 * i + 0];
s += c[j][1] * block[8 * i + 1];
s += c[j][2] * block[8 * i + 2];
s += c[j][3] * block[8 * i + 3];
s += c[j][4] * block[8 * i + 4];
s += c[j][5] * block[8 * i + 5];
s += c[j][6] * block[8 * i + 6];
s += c[j][7] * block[8 * i + 7];
tmp[8 * i + j] = s;
}
for(j = 0; j < 8; j++)
for(i = 0; i < 8; i++)
{
s = 0.0;
/*
* for(k = 0; k < 8; k++)
* s += c[i][k] * tmp[8 * k + j];
*/
s += c[i][0] * tmp[8 * 0 + j];
s += c[i][1] * tmp[8 * 1 + j];
s += c[i][2] * tmp[8 * 2 + j];
s += c[i][3] * tmp[8 * 3 + j];
s += c[i][4] * tmp[8 * 4 + j];
s += c[i][5] * tmp[8 * 5 + j];
s += c[i][6] * tmp[8 * 6 + j];
s += c[i][7] * tmp[8 * 7 + j];
s*=8.0;
block[8 * i + j] = (short)floor(s + 0.499999);
/*
* reason for adding 0.499999 instead of 0.5:
* s is quite often x.5 (at least for i and/or j = 0 or 4)
* and setting the rounding threshold exactly to 0.5 leads to an
* extremely high arithmetic implementation dependency of the result;
* s being between x.5 and x.500001 (which is now incorrectly rounded
* downwards instead of upwards) is assumed to occur less often
* (if at all)
*/
}
}
/* perform IDCT matrix multiply for 8x8 coefficient block */
void ff_ref_idct(block)
short *block;
{
int i, j, k, v;
double partial_product;
double tmp[64];
for (i=0; i<8; i++)
for (j=0; j<8; j++)
{
partial_product = 0.0;
for (k=0; k<8; k++)
partial_product+= c[k][j]*block[8*i+k];
tmp[8*i+j] = partial_product;
}
/* Transpose operation is integrated into address mapping by switching
loop order of i and j */
for (j=0; j<8; j++)
for (i=0; i<8; i++)
{
partial_product = 0.0;
for (k=0; k<8; k++)
partial_product+= c[k][i]*tmp[8*k+j];
v = (int) floor(partial_product+0.5);
block[8*i+j] = v;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment