idct_mmx.c 21.5 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * idct_mmx.c
 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
 *
 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 *
 * mpeg2dec is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * mpeg2dec is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
Diego Biurrun's avatar
Diego Biurrun committed
18
 * along with mpeg2dec; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
20 21
 */

22
#include "common.h"
23
#include "dsputil.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
24 25 26 27 28 29 30 31 32 33 34 35

#include "mmx.h"

#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))

#define ROW_SHIFT 11
#define COL_SHIFT 6

#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
#define rounder(bias) {round (bias), round (bias)}

#if 0
Diego Biurrun's avatar
Diego Biurrun committed
36
/* C row IDCT - it is just here to document the MMXEXT and MMX versions */
Fabrice Bellard's avatar
Fabrice Bellard committed
37
static inline void idct_row (int16_t * row, int offset,
38
                             int16_t * table, int32_t * rounder)
Fabrice Bellard's avatar
Fabrice Bellard committed
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
{
    int C1, C2, C3, C4, C5, C6, C7;
    int a0, a1, a2, a3, b0, b1, b2, b3;

    row += offset;

    C1 = table[1];
    C2 = table[2];
    C3 = table[3];
    C4 = table[4];
    C5 = table[5];
    C6 = table[6];
    C7 = table[7];

    a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
    a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
    a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
    a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;

    b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
    b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
    b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
    b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];

    row[0] = (a0 + b0) >> ROW_SHIFT;
    row[1] = (a1 + b1) >> ROW_SHIFT;
    row[2] = (a2 + b2) >> ROW_SHIFT;
    row[3] = (a3 + b3) >> ROW_SHIFT;
    row[4] = (a3 - b3) >> ROW_SHIFT;
    row[5] = (a2 - b2) >> ROW_SHIFT;
    row[6] = (a1 - b1) >> ROW_SHIFT;
    row[7] = (a0 - b0) >> ROW_SHIFT;
}
#endif


/* MMXEXT row IDCT */

77 78 79 80 81 82 83 84
#define mmxext_table(c1,c2,c3,c4,c5,c6,c7)      {  c4,  c2, -c4, -c2,   \
                                                   c4,  c6,  c4,  c6,   \
                                                   c1,  c3, -c1, -c5,   \
                                                   c5,  c7,  c3, -c7,   \
                                                   c4, -c6,  c4, -c6,   \
                                                  -c4,  c2,  c4, -c2,   \
                                                   c5, -c1,  c3, -c1,   \
                                                   c7,  c3,  c7, -c5 }
Fabrice Bellard's avatar
Fabrice Bellard committed
85

86
static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
Fabrice Bellard's avatar
Fabrice Bellard committed
87
{
88
    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
Fabrice Bellard's avatar
Fabrice Bellard committed
89

90 91
    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
Fabrice Bellard's avatar
Fabrice Bellard committed
92

93 94
    movq_m2r (*table, mm3);             // mm3 = -C2 -C4 C2 C4
    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
Fabrice Bellard's avatar
Fabrice Bellard committed
95

96 97
    movq_m2r (*(table+4), mm4);         // mm4 = C6 C4 C6 C4
    pmaddwd_r2r (mm0, mm3);             // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
Fabrice Bellard's avatar
Fabrice Bellard committed
98

99
    pshufw_r2r (mm2, mm2, 0x4e);        // mm2 = x2 x0 x6 x4
Fabrice Bellard's avatar
Fabrice Bellard committed
100 101
}

102
static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
Fabrice Bellard's avatar
Fabrice Bellard committed
103
{
104 105
    movq_m2r (*(table+8), mm1);         // mm1 = -C5 -C1 C3 C1
    pmaddwd_r2r (mm2, mm4);             // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
Fabrice Bellard's avatar
Fabrice Bellard committed
106

107 108
    pmaddwd_m2r (*(table+16), mm0);     // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
    pshufw_r2r (mm6, mm6, 0x4e);        // mm6 = x3 x1 x7 x5
Fabrice Bellard's avatar
Fabrice Bellard committed
109

110 111
    movq_m2r (*(table+12), mm7);        // mm7 = -C7 C3 C7 C5
    pmaddwd_r2r (mm5, mm1);             // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
Fabrice Bellard's avatar
Fabrice Bellard committed
112

113 114
    paddd_m2r (*rounder, mm3);          // mm3 += rounder
    pmaddwd_r2r (mm6, mm7);             // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
Fabrice Bellard's avatar
Fabrice Bellard committed
115

116 117
    pmaddwd_m2r (*(table+20), mm2);     // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
    paddd_r2r (mm4, mm3);               // mm3 = a1 a0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
118

119 120
    pmaddwd_m2r (*(table+24), mm5);     // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
    movq_r2r (mm3, mm4);                // mm4 = a1 a0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
121

122 123
    pmaddwd_m2r (*(table+28), mm6);     // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
    paddd_r2r (mm7, mm1);               // mm1 = b1 b0
Fabrice Bellard's avatar
Fabrice Bellard committed
124

125 126
    paddd_m2r (*rounder, mm0);          // mm0 += rounder
    psubd_r2r (mm1, mm3);               // mm3 = a1-b1 a0-b0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
127

128 129
    psrad_i2r (ROW_SHIFT, mm3);         // mm3 = y6 y7
    paddd_r2r (mm4, mm1);               // mm1 = a1+b1 a0+b0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
130

131 132
    paddd_r2r (mm2, mm0);               // mm0 = a3 a2 + rounder
    psrad_i2r (ROW_SHIFT, mm1);         // mm1 = y1 y0
Fabrice Bellard's avatar
Fabrice Bellard committed
133

134 135
    paddd_r2r (mm6, mm5);               // mm5 = b3 b2
    movq_r2r (mm0, mm4);                // mm4 = a3 a2 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
136

137 138
    paddd_r2r (mm5, mm0);               // mm0 = a3+b3 a2+b2 + rounder
    psubd_r2r (mm5, mm4);               // mm4 = a3-b3 a2-b2 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
139 140 141 142
}

static inline void mmxext_row_tail (int16_t * row, int store)
{
143
    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
Fabrice Bellard's avatar
Fabrice Bellard committed
144

145
    psrad_i2r (ROW_SHIFT, mm4);         // mm4 = y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
146

147
    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
Fabrice Bellard's avatar
Fabrice Bellard committed
148

149
    packssdw_r2r (mm3, mm4);            // mm4 = y6 y7 y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
150

151 152
    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
    pshufw_r2r (mm4, mm4, 0xb1);        // mm4 = y7 y6 y5 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
153 154 155

    /* slot */

156
    movq_r2m (mm4, *(row+store+4));     // save y7 y6 y5 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
157 158 159
}

static inline void mmxext_row_mid (int16_t * row, int store,
160
                                   int offset, const int16_t * table)
Fabrice Bellard's avatar
Fabrice Bellard committed
161
{
162 163
    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
Fabrice Bellard's avatar
Fabrice Bellard committed
164

165 166
    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
    psrad_i2r (ROW_SHIFT, mm4);         // mm4 = y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
167

168 169
    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
Fabrice Bellard's avatar
Fabrice Bellard committed
170

171 172
    packssdw_r2r (mm3, mm4);            // mm4 = y6 y7 y4 y5
    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
Fabrice Bellard's avatar
Fabrice Bellard committed
173

174 175
    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
    pshufw_r2r (mm4, mm4, 0xb1);        // mm4 = y7 y6 y5 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
176

177 178
    movq_m2r (*table, mm3);             // mm3 = -C2 -C4 C2 C4
    movq_r2m (mm4, *(row+store+4));     // save y7 y6 y5 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
179

180
    pmaddwd_r2r (mm0, mm3);             // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
Fabrice Bellard's avatar
Fabrice Bellard committed
181

182 183
    movq_m2r (*(table+4), mm4);         // mm4 = C6 C4 C6 C4
    pshufw_r2r (mm2, mm2, 0x4e);        // mm2 = x2 x0 x6 x4
Fabrice Bellard's avatar
Fabrice Bellard committed
184 185 186 187 188
}


/* MMX row IDCT */

189 190 191 192 193 194 195 196
#define mmx_table(c1,c2,c3,c4,c5,c6,c7) {  c4,  c2,  c4,  c6,   \
                                           c4,  c6, -c4, -c2,   \
                                           c1,  c3,  c3, -c7,   \
                                           c5,  c7, -c1, -c5,   \
                                           c4, -c6,  c4, -c2,   \
                                          -c4,  c2,  c4, -c6,   \
                                           c5, -c1,  c7, -c5,   \
                                           c7,  c3,  c3, -c1 }
Fabrice Bellard's avatar
Fabrice Bellard committed
197

198
static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
Fabrice Bellard's avatar
Fabrice Bellard committed
199
{
200
    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
Fabrice Bellard's avatar
Fabrice Bellard committed
201

202 203
    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
Fabrice Bellard's avatar
Fabrice Bellard committed
204

205 206
    movq_m2r (*table, mm3);             // mm3 = C6 C4 C2 C4
    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
Fabrice Bellard's avatar
Fabrice Bellard committed
207

208
    punpckldq_r2r (mm0, mm0);           // mm0 = x2 x0 x2 x0
Fabrice Bellard's avatar
Fabrice Bellard committed
209

210 211
    movq_m2r (*(table+4), mm4);         // mm4 = -C2 -C4 C6 C4
    pmaddwd_r2r (mm0, mm3);             // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
Fabrice Bellard's avatar
Fabrice Bellard committed
212

213 214
    movq_m2r (*(table+8), mm1);         // mm1 = -C7 C3 C3 C1
    punpckhdq_r2r (mm2, mm2);           // mm2 = x6 x4 x6 x4
Fabrice Bellard's avatar
Fabrice Bellard committed
215 216
}

217
static inline void mmx_row (const int16_t * table, const int32_t * rounder)
Fabrice Bellard's avatar
Fabrice Bellard committed
218
{
219 220
    pmaddwd_r2r (mm2, mm4);             // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
    punpckldq_r2r (mm5, mm5);           // mm5 = x3 x1 x3 x1
Fabrice Bellard's avatar
Fabrice Bellard committed
221

222 223
    pmaddwd_m2r (*(table+16), mm0);     // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
    punpckhdq_r2r (mm6, mm6);           // mm6 = x7 x5 x7 x5
Fabrice Bellard's avatar
Fabrice Bellard committed
224

225 226
    movq_m2r (*(table+12), mm7);        // mm7 = -C5 -C1 C7 C5
    pmaddwd_r2r (mm5, mm1);             // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
Fabrice Bellard's avatar
Fabrice Bellard committed
227

228 229
    paddd_m2r (*rounder, mm3);          // mm3 += rounder
    pmaddwd_r2r (mm6, mm7);             // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
Fabrice Bellard's avatar
Fabrice Bellard committed
230

231 232
    pmaddwd_m2r (*(table+20), mm2);     // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
    paddd_r2r (mm4, mm3);               // mm3 = a1 a0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
233

234 235
    pmaddwd_m2r (*(table+24), mm5);     // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
    movq_r2r (mm3, mm4);                // mm4 = a1 a0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
236

237 238
    pmaddwd_m2r (*(table+28), mm6);     // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
    paddd_r2r (mm7, mm1);               // mm1 = b1 b0
Fabrice Bellard's avatar
Fabrice Bellard committed
239

240 241
    paddd_m2r (*rounder, mm0);          // mm0 += rounder
    psubd_r2r (mm1, mm3);               // mm3 = a1-b1 a0-b0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
242

243 244
    psrad_i2r (ROW_SHIFT, mm3);         // mm3 = y6 y7
    paddd_r2r (mm4, mm1);               // mm1 = a1+b1 a0+b0 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
245

246 247
    paddd_r2r (mm2, mm0);               // mm0 = a3 a2 + rounder
    psrad_i2r (ROW_SHIFT, mm1);         // mm1 = y1 y0
Fabrice Bellard's avatar
Fabrice Bellard committed
248

249 250
    paddd_r2r (mm6, mm5);               // mm5 = b3 b2
    movq_r2r (mm0, mm7);                // mm7 = a3 a2 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
251

252 253
    paddd_r2r (mm5, mm0);               // mm0 = a3+b3 a2+b2 + rounder
    psubd_r2r (mm5, mm7);               // mm7 = a3-b3 a2-b2 + rounder
Fabrice Bellard's avatar
Fabrice Bellard committed
254 255 256 257
}

static inline void mmx_row_tail (int16_t * row, int store)
{
258
    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
Fabrice Bellard's avatar
Fabrice Bellard committed
259

260
    psrad_i2r (ROW_SHIFT, mm7);         // mm7 = y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
261

262
    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
Fabrice Bellard's avatar
Fabrice Bellard committed
263

264
    packssdw_r2r (mm3, mm7);            // mm7 = y6 y7 y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
265

266 267
    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
    movq_r2r (mm7, mm4);                // mm4 = y6 y7 y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
268

269
    pslld_i2r (16, mm7);                // mm7 = y7 0 y5 0
Fabrice Bellard's avatar
Fabrice Bellard committed
270

271
    psrld_i2r (16, mm4);                // mm4 = 0 y6 0 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
272

273
    por_r2r (mm4, mm7);                 // mm7 = y7 y6 y5 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
274 275 276

    /* slot */

277
    movq_r2m (mm7, *(row+store+4));     // save y7 y6 y5 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
278 279 280
}

static inline void mmx_row_mid (int16_t * row, int store,
281
                                int offset, const int16_t * table)
Fabrice Bellard's avatar
Fabrice Bellard committed
282
{
283 284
    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
Fabrice Bellard's avatar
Fabrice Bellard committed
285

286 287
    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
    psrad_i2r (ROW_SHIFT, mm7);         // mm7 = y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
288

289 290
    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
Fabrice Bellard's avatar
Fabrice Bellard committed
291

292 293
    packssdw_r2r (mm3, mm7);            // mm7 = y6 y7 y4 y5
    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
Fabrice Bellard's avatar
Fabrice Bellard committed
294

295 296
    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
    movq_r2r (mm7, mm1);                // mm1 = y6 y7 y4 y5
Fabrice Bellard's avatar
Fabrice Bellard committed
297

298 299
    punpckldq_r2r (mm0, mm0);           // mm0 = x2 x0 x2 x0
    psrld_i2r (16, mm7);                // mm7 = 0 y6 0 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
300

301 302
    movq_m2r (*table, mm3);             // mm3 = C6 C4 C2 C4
    pslld_i2r (16, mm1);                // mm1 = y7 0 y5 0
Fabrice Bellard's avatar
Fabrice Bellard committed
303

304 305
    movq_m2r (*(table+4), mm4);         // mm4 = -C2 -C4 C6 C4
    por_r2r (mm1, mm7);                 // mm7 = y7 y6 y5 y4
Fabrice Bellard's avatar
Fabrice Bellard committed
306

307 308
    movq_m2r (*(table+8), mm1);         // mm1 = -C7 C3 C3 C1
    punpckhdq_r2r (mm2, mm2);           // mm2 = x6 x4 x6 x4
Fabrice Bellard's avatar
Fabrice Bellard committed
309

310 311
    movq_r2m (mm7, *(row+store+4));     // save y7 y6 y5 y4
    pmaddwd_r2r (mm0, mm3);             // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
Fabrice Bellard's avatar
Fabrice Bellard committed
312 313 314 315
}


#if 0
Diego Biurrun's avatar
Diego Biurrun committed
316
// C column IDCT - it is just here to document the MMXEXT and MMX versions
Fabrice Bellard's avatar
Fabrice Bellard committed
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
static inline void idct_col (int16_t * col, int offset)
{
/* multiplication - as implemented on mmx */
#define F(c,x) (((c) * (x)) >> 16)

/* saturation - it helps us handle torture test cases */
#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))

    int16_t x0, x1, x2, x3, x4, x5, x6, x7;
    int16_t y0, y1, y2, y3, y4, y5, y6, y7;
    int16_t a0, a1, a2, a3, b0, b1, b2, b3;
    int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;

    col += offset;

    x0 = col[0*8];
    x1 = col[1*8];
    x2 = col[2*8];
    x3 = col[3*8];
    x4 = col[4*8];
    x5 = col[5*8];
    x6 = col[6*8];
    x7 = col[7*8];

    u04 = S (x0 + x4);
    v04 = S (x0 - x4);
    u26 = S (F (T2, x6) + x2);
    v26 = S (F (T2, x2) - x6);

    a0 = S (u04 + u26);
    a1 = S (v04 + v26);
    a2 = S (v04 - v26);
    a3 = S (u04 - u26);

    u17 = S (F (T1, x7) + x1);
    v17 = S (F (T1, x1) - x7);
    u35 = S (F (T3, x5) + x3);
    v35 = S (F (T3, x3) - x5);

    b0 = S (u17 + u35);
    b3 = S (v17 - v35);
    u12 = S (u17 - u35);
    v12 = S (v17 + v35);
    u12 = S (2 * F (C4, u12));
    v12 = S (2 * F (C4, v12));
    b1 = S (u12 + v12);
    b2 = S (u12 - v12);

    y0 = S (a0 + b0) >> COL_SHIFT;
    y1 = S (a1 + b1) >> COL_SHIFT;
    y2 = S (a2 + b2) >> COL_SHIFT;
    y3 = S (a3 + b3) >> COL_SHIFT;

    y4 = S (a3 - b3) >> COL_SHIFT;
    y5 = S (a2 - b2) >> COL_SHIFT;
    y6 = S (a1 - b1) >> COL_SHIFT;
    y7 = S (a0 - b0) >> COL_SHIFT;

    col[0*8] = y0;
    col[1*8] = y1;
    col[2*8] = y2;
    col[3*8] = y3;
    col[4*8] = y4;
    col[5*8] = y5;
    col[6*8] = y6;
    col[7*8] = y7;
}
#endif


// MMX column IDCT
static inline void idct_col (int16_t * col, int offset)
{
#define T1 13036
#define T2 27146
#define T3 43790
#define C4 23170

395 396 397 398
    static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
    static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
    static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
    static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
Fabrice Bellard's avatar
Fabrice Bellard committed
399 400 401 402

    /* column code adapted from peter gubanov */
    /* http://www.elecard.com/peter/idct.shtml */

403
    movq_m2r (*_T1, mm0);               // mm0 = T1
Fabrice Bellard's avatar
Fabrice Bellard committed
404

405 406
    movq_m2r (*(col+offset+1*8), mm1);  // mm1 = x1
    movq_r2r (mm0, mm2);                // mm2 = T1
Fabrice Bellard's avatar
Fabrice Bellard committed
407

408 409
    movq_m2r (*(col+offset+7*8), mm4);  // mm4 = x7
    pmulhw_r2r (mm1, mm0);              // mm0 = T1*x1
Fabrice Bellard's avatar
Fabrice Bellard committed
410

411 412
    movq_m2r (*_T3, mm5);               // mm5 = T3
    pmulhw_r2r (mm4, mm2);              // mm2 = T1*x7
Fabrice Bellard's avatar
Fabrice Bellard committed
413

414 415
    movq_m2r (*(col+offset+5*8), mm6);  // mm6 = x5
    movq_r2r (mm5, mm7);                // mm7 = T3-1
Fabrice Bellard's avatar
Fabrice Bellard committed
416

417 418
    movq_m2r (*(col+offset+3*8), mm3);  // mm3 = x3
    psubsw_r2r (mm4, mm0);              // mm0 = v17
Fabrice Bellard's avatar
Fabrice Bellard committed
419

420 421
    movq_m2r (*_T2, mm4);               // mm4 = T2
    pmulhw_r2r (mm3, mm5);              // mm5 = (T3-1)*x3
Fabrice Bellard's avatar
Fabrice Bellard committed
422

423 424
    paddsw_r2r (mm2, mm1);              // mm1 = u17
    pmulhw_r2r (mm6, mm7);              // mm7 = (T3-1)*x5
Fabrice Bellard's avatar
Fabrice Bellard committed
425 426 427

    /* slot */

428 429
    movq_r2r (mm4, mm2);                // mm2 = T2
    paddsw_r2r (mm3, mm5);              // mm5 = T3*x3
Fabrice Bellard's avatar
Fabrice Bellard committed
430 431

    pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
432
    paddsw_r2r (mm6, mm7);              // mm7 = T3*x5
Fabrice Bellard's avatar
Fabrice Bellard committed
433

434 435
    psubsw_r2r (mm6, mm5);              // mm5 = v35
    paddsw_r2r (mm3, mm7);              // mm7 = u35
Fabrice Bellard's avatar
Fabrice Bellard committed
436

437 438
    movq_m2r (*(col+offset+6*8), mm3);  // mm3 = x6
    movq_r2r (mm0, mm6);                // mm6 = v17
Fabrice Bellard's avatar
Fabrice Bellard committed
439

440 441
    pmulhw_r2r (mm3, mm2);              // mm2 = T2*x6
    psubsw_r2r (mm5, mm0);              // mm0 = b3
Fabrice Bellard's avatar
Fabrice Bellard committed
442

443 444
    psubsw_r2r (mm3, mm4);              // mm4 = v26
    paddsw_r2r (mm6, mm5);              // mm5 = v12
Fabrice Bellard's avatar
Fabrice Bellard committed
445

446 447
    movq_r2m (mm0, *(col+offset+3*8));  // save b3 in scratch0
    movq_r2r (mm1, mm6);                // mm6 = u17
Fabrice Bellard's avatar
Fabrice Bellard committed
448 449

    paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
450
    paddsw_r2r (mm7, mm6);              // mm6 = b0
Fabrice Bellard's avatar
Fabrice Bellard committed
451

452 453
    psubsw_r2r (mm7, mm1);              // mm1 = u12
    movq_r2r (mm1, mm7);                // mm7 = u12
Fabrice Bellard's avatar
Fabrice Bellard committed
454

455 456
    movq_m2r (*(col+offset+0*8), mm3);  // mm3 = x0
    paddsw_r2r (mm5, mm1);              // mm1 = u12+v12
Fabrice Bellard's avatar
Fabrice Bellard committed
457

458 459
    movq_m2r (*_C4, mm0);               // mm0 = C4/2
    psubsw_r2r (mm5, mm7);              // mm7 = u12-v12
Fabrice Bellard's avatar
Fabrice Bellard committed
460

461 462
    movq_r2m (mm6, *(col+offset+5*8));  // save b0 in scratch1
    pmulhw_r2r (mm0, mm1);              // mm1 = b1/2
Fabrice Bellard's avatar
Fabrice Bellard committed
463

464 465
    movq_r2r (mm4, mm6);                // mm6 = v26
    pmulhw_r2r (mm0, mm7);              // mm7 = b2/2
Fabrice Bellard's avatar
Fabrice Bellard committed
466

467 468
    movq_m2r (*(col+offset+4*8), mm5);  // mm5 = x4
    movq_r2r (mm3, mm0);                // mm0 = x0
Fabrice Bellard's avatar
Fabrice Bellard committed
469

470 471
    psubsw_r2r (mm5, mm3);              // mm3 = v04
    paddsw_r2r (mm5, mm0);              // mm0 = u04
Fabrice Bellard's avatar
Fabrice Bellard committed
472

473 474
    paddsw_r2r (mm3, mm4);              // mm4 = a1
    movq_r2r (mm0, mm5);                // mm5 = u04
Fabrice Bellard's avatar
Fabrice Bellard committed
475

476 477
    psubsw_r2r (mm6, mm3);              // mm3 = a2
    paddsw_r2r (mm2, mm5);              // mm5 = a0
Fabrice Bellard's avatar
Fabrice Bellard committed
478

479 480
    paddsw_r2r (mm1, mm1);              // mm1 = b1
    psubsw_r2r (mm2, mm0);              // mm0 = a3
Fabrice Bellard's avatar
Fabrice Bellard committed
481

482 483
    paddsw_r2r (mm7, mm7);              // mm7 = b2
    movq_r2r (mm3, mm2);                // mm2 = a2
Fabrice Bellard's avatar
Fabrice Bellard committed
484

485 486
    movq_r2r (mm4, mm6);                // mm6 = a1
    paddsw_r2r (mm7, mm3);              // mm3 = a2+b2
Fabrice Bellard's avatar
Fabrice Bellard committed
487

488 489
    psraw_i2r (COL_SHIFT, mm3);         // mm3 = y2
    paddsw_r2r (mm1, mm4);              // mm4 = a1+b1
Fabrice Bellard's avatar
Fabrice Bellard committed
490

491 492
    psraw_i2r (COL_SHIFT, mm4);         // mm4 = y1
    psubsw_r2r (mm1, mm6);              // mm6 = a1-b1
Fabrice Bellard's avatar
Fabrice Bellard committed
493

494 495
    movq_m2r (*(col+offset+5*8), mm1);  // mm1 = b0
    psubsw_r2r (mm7, mm2);              // mm2 = a2-b2
Fabrice Bellard's avatar
Fabrice Bellard committed
496

497 498
    psraw_i2r (COL_SHIFT, mm6);         // mm6 = y6
    movq_r2r (mm5, mm7);                // mm7 = a0
Fabrice Bellard's avatar
Fabrice Bellard committed
499

500 501
    movq_r2m (mm4, *(col+offset+1*8));  // save y1
    psraw_i2r (COL_SHIFT, mm2);         // mm2 = y5
Fabrice Bellard's avatar
Fabrice Bellard committed
502

503 504
    movq_r2m (mm3, *(col+offset+2*8));  // save y2
    paddsw_r2r (mm1, mm5);              // mm5 = a0+b0
Fabrice Bellard's avatar
Fabrice Bellard committed
505

506 507
    movq_m2r (*(col+offset+3*8), mm4);  // mm4 = b3
    psubsw_r2r (mm1, mm7);              // mm7 = a0-b0
Fabrice Bellard's avatar
Fabrice Bellard committed
508

509 510
    psraw_i2r (COL_SHIFT, mm5);         // mm5 = y0
    movq_r2r (mm0, mm3);                // mm3 = a3
Fabrice Bellard's avatar
Fabrice Bellard committed
511

512 513
    movq_r2m (mm2, *(col+offset+5*8));  // save y5
    psubsw_r2r (mm4, mm3);              // mm3 = a3-b3
Fabrice Bellard's avatar
Fabrice Bellard committed
514

515 516
    psraw_i2r (COL_SHIFT, mm7);         // mm7 = y7
    paddsw_r2r (mm0, mm4);              // mm4 = a3+b3
Fabrice Bellard's avatar
Fabrice Bellard committed
517

518 519
    movq_r2m (mm5, *(col+offset+0*8));  // save y0
    psraw_i2r (COL_SHIFT, mm3);         // mm3 = y4
Fabrice Bellard's avatar
Fabrice Bellard committed
520

521 522
    movq_r2m (mm6, *(col+offset+6*8));  // save y6
    psraw_i2r (COL_SHIFT, mm4);         // mm4 = y3
Fabrice Bellard's avatar
Fabrice Bellard committed
523

524
    movq_r2m (mm7, *(col+offset+7*8));  // save y7
Fabrice Bellard's avatar
Fabrice Bellard committed
525

526
    movq_r2m (mm3, *(col+offset+4*8));  // save y4
Fabrice Bellard's avatar
Fabrice Bellard committed
527

528
    movq_r2m (mm4, *(col+offset+3*8));  // save y3
Fabrice Bellard's avatar
Fabrice Bellard committed
529

530 531 532 533 534
#undef T1
#undef T2
#undef T3
#undef C4
}
Fabrice Bellard's avatar
Fabrice Bellard committed
535

536
static const int32_t rounder0[] ATTR_ALIGN(8) =
Fabrice Bellard's avatar
Fabrice Bellard committed
537
    rounder ((1 << (COL_SHIFT - 1)) - 0.5);
538 539
static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
static const int32_t rounder1[] ATTR_ALIGN(8) =
540
    rounder (1.25683487303);        /* C1*(C1/C4+C1+C7)/2 */
541
static const int32_t rounder7[] ATTR_ALIGN(8) =
542
    rounder (-0.25);                /* C1*(C7/C4+C7-C1)/2 */
543
static const int32_t rounder2[] ATTR_ALIGN(8) =
544
    rounder (0.60355339059);        /* C2 * (C6+C2)/2 */
545
static const int32_t rounder6[] ATTR_ALIGN(8) =
546
    rounder (-0.25);                /* C2 * (C6-C2)/2 */
547
static const int32_t rounder3[] ATTR_ALIGN(8) =
548
    rounder (0.087788325588);       /* C3*(-C3/C4+C3+C5)/2 */
549
static const int32_t rounder5[] ATTR_ALIGN(8) =
550
    rounder (-0.441341716183);      /* C3*(-C5/C4+C5-C3)/2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
551

552 553
#undef COL_SHIFT
#undef ROW_SHIFT
Fabrice Bellard's avatar
Fabrice Bellard committed
554

555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
void idct (int16_t * block)                                             \
{                                                                       \
    static const int16_t table04[] ATTR_ALIGN(16) =                     \
        table (22725, 21407, 19266, 16384, 12873,  8867, 4520);         \
    static const int16_t table17[] ATTR_ALIGN(16) =                     \
        table (31521, 29692, 26722, 22725, 17855, 12299, 6270);         \
    static const int16_t table26[] ATTR_ALIGN(16) =                     \
        table (29692, 27969, 25172, 21407, 16819, 11585, 5906);         \
    static const int16_t table35[] ATTR_ALIGN(16) =                     \
        table (26722, 25172, 22654, 19266, 15137, 10426, 5315);         \
                                                                        \
    idct_row_head (block, 0*8, table04);                                \
    idct_row (table04, rounder0);                                       \
    idct_row_mid (block, 0*8, 4*8, table04);                            \
    idct_row (table04, rounder4);                                       \
    idct_row_mid (block, 4*8, 1*8, table17);                            \
    idct_row (table17, rounder1);                                       \
    idct_row_mid (block, 1*8, 7*8, table17);                            \
    idct_row (table17, rounder7);                                       \
    idct_row_mid (block, 7*8, 2*8, table26);                            \
    idct_row (table26, rounder2);                                       \
    idct_row_mid (block, 2*8, 6*8, table26);                            \
    idct_row (table26, rounder6);                                       \
    idct_row_mid (block, 6*8, 3*8, table35);                            \
    idct_row (table35, rounder3);                                       \
    idct_row_mid (block, 3*8, 5*8, table35);                            \
    idct_row (table35, rounder5);                                       \
    idct_row_tail (block, 5*8);                                         \
                                                                        \
    idct_col (block, 0);                                                \
    idct_col (block, 4);                                                \
Fabrice Bellard's avatar
Fabrice Bellard committed
587 588
}

589 590
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
Fabrice Bellard's avatar
Fabrice Bellard committed
591 592

declare_idct (ff_mmxext_idct, mmxext_table,
593
              mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
Fabrice Bellard's avatar
Fabrice Bellard committed
594 595

declare_idct (ff_mmx_idct, mmx_table,
596
              mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
597