swscale.c 29.4 KB
Newer Older
1
/*
2 3
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or
7 8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with Libav; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

21
#include <assert.h>
22
#include <inttypes.h>
23
#include <math.h>
24
#include <stdio.h>
25 26
#include <string.h>

27
#include "libavutil/avutil.h"
28
#include "libavutil/bswap.h"
29 30 31
#include "libavutil/cpu.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
32
#include "libavutil/pixdesc.h"
33 34 35 36
#include "config.h"
#include "rgb2rgb.h"
#include "swscale_internal.h"
#include "swscale.h"
Arpi's avatar
Arpi committed
37

38
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
39 40 41 42 43 44 45 46 47 48 49 50
    {  36, 68,  60, 92,  34, 66,  58, 90, },
    { 100,  4, 124, 28,  98,  2, 122, 26, },
    {  52, 84,  44, 76,  50, 82,  42, 74, },
    { 116, 20, 108, 12, 114, 18, 106, 10, },
    {  32, 64,  56, 88,  38, 70,  62, 94, },
    {  96,  0, 120, 24, 102,  6, 126, 30, },
    {  48, 80,  40, 72,  54, 86,  46, 78, },
    { 112, 16, 104,  8, 118, 22, 110, 14, },
};

DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = {
    64, 64, 64, 64, 64, 64, 64, 64
51
};
52

53 54
static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
                                       int height, int y, uint8_t val)
Ramiro Polla's avatar
Ramiro Polla committed
55
{
56
    int i;
57 58
    uint8_t *ptr = plane + stride * y;
    for (i = 0; i < height; i++) {
59 60 61 62 63
        memset(ptr, val, width);
        ptr += stride;
    }
}

64 65
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
                           const uint8_t *_src, const int16_t *filter,
66
                           const int32_t *filterPos, int filterSize)
67 68
{
    int i;
69
    int32_t *dst        = (int32_t *) _dst;
70
    const uint16_t *src = (const uint16_t *) _src;
71 72
    int bits            = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
    int sh              = bits - 4;
73 74 75 76

    for (i = 0; i < dstW; i++) {
        int j;
        int srcPos = filterPos[i];
77
        int val    = 0;
78 79 80 81 82

        for (j = 0; j < filterSize; j++) {
            val += src[srcPos + j] * filter[filterSize * i + j];
        }
        // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
83
        dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
84 85 86
    }
}

87 88
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
                           const uint8_t *_src, const int16_t *filter,
89
                           const int32_t *filterPos, int filterSize)
90 91 92
{
    int i;
    const uint16_t *src = (const uint16_t *) _src;
93
    int sh              = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
94 95 96 97

    for (i = 0; i < dstW; i++) {
        int j;
        int srcPos = filterPos[i];
98
        int val    = 0;
99 100 101 102 103 104 105 106 107

        for (j = 0; j < filterSize; j++) {
            val += src[srcPos + j] * filter[filterSize * i + j];
        }
        // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
    }
}

108
// bilinear / bicubic scaling
109 110 111
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
                          const uint8_t *src, const int16_t *filter,
                          const int32_t *filterPos, int filterSize)
112 113
{
    int i;
114
    for (i = 0; i < dstW; i++) {
115
        int j;
116 117 118 119
        int srcPos = filterPos[i];
        int val    = 0;
        for (j = 0; j < filterSize; j++) {
            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
120
        }
121
        dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
122 123 124
    }
}

125 126 127
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
                          const uint8_t *src, const int16_t *filter,
                          const int32_t *filterPos, int filterSize)
128 129 130
{
    int i;
    int32_t *dst = (int32_t *) _dst;
131
    for (i = 0; i < dstW; i++) {
132
        int j;
133 134 135 136
        int srcPos = filterPos[i];
        int val    = 0;
        for (j = 0; j < filterSize; j++) {
            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
137
        }
138
        dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
139 140 141
    }
}

142 143
// FIXME all pal and rgb srcFormats could do this convertion as well
// FIXME all scalers more complex than bilinear could do half of this transform
144
static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
145 146 147
{
    int i;
    for (i = 0; i < width; i++) {
148 149
        dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
        dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
150 151
    }
}
152

153
static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
154 155 156
{
    int i;
    for (i = 0; i < width; i++) {
157 158
        dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
        dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
159 160
    }
}
161

162
static void lumRangeToJpeg_c(int16_t *dst, int width)
163 164 165
{
    int i;
    for (i = 0; i < width; i++)
166
        dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
167
}
168

169
static void lumRangeFromJpeg_c(int16_t *dst, int width)
170 171 172
{
    int i;
    for (i = 0; i < width; i++)
173
        dst[i] = (dst[i] * 14071 + 33561947) >> 14;
174 175
}

176 177 178 179 180 181
static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
{
    int i;
    int32_t *dstU = (int32_t *) _dstU;
    int32_t *dstV = (int32_t *) _dstV;
    for (i = 0; i < width; i++) {
182 183
        dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
        dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
184 185
    }
}
186

187 188 189 190 191 192
static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
{
    int i;
    int32_t *dstU = (int32_t *) _dstU;
    int32_t *dstV = (int32_t *) _dstV;
    for (i = 0; i < width; i++) {
193 194
        dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
        dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
195 196
    }
}
197

198 199 200 201 202
static void lumRangeToJpeg16_c(int16_t *_dst, int width)
{
    int i;
    int32_t *dst = (int32_t *) _dst;
    for (i = 0; i < width; i++)
203
        dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
204
}
205

206 207 208 209 210
static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
{
    int i;
    int32_t *dst = (int32_t *) _dst;
    for (i = 0; i < width; i++)
211
        dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14;
212 213
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
214 215
static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
                           const uint8_t *src, int srcW, int xInc)
216 217
{
    int i;
218 219 220 221 222 223
    unsigned int xpos = 0;
    for (i = 0; i < dstWidth; i++) {
        register unsigned int xx     = xpos >> 16;
        register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
        dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
        xpos  += xInc;
224 225 226
    }
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
227
// *** horizontal scale Y line to temp buffer
228
static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
229 230
                                     const uint8_t *src_in[4],
                                     int srcW, int xInc,
231
                                     const int16_t *hLumFilter,
232 233
                                     const int32_t *hLumFilterPos,
                                     int hLumFilterSize,
234 235
                                     uint8_t *formatConvBuffer,
                                     uint32_t *pal, int isAlpha)
236
{
237 238
    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) =
        isAlpha ? c->alpToYV12 : c->lumToYV12;
239
    void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
240
    const uint8_t *src = src_in[isAlpha ? 3 : 0];
241 242 243

    if (toYV12) {
        toYV12(formatConvBuffer, src, srcW, pal);
244
        src = formatConvBuffer;
245 246 247
    } else if (c->readLumPlanar && !isAlpha) {
        c->readLumPlanar(formatConvBuffer, src_in, srcW);
        src = formatConvBuffer;
248 249 250
    }

    if (!c->hyscale_fast) {
251 252
        c->hyScale(c, dst, dstWidth, src, hLumFilter,
                   hLumFilterPos, hLumFilterSize);
253 254 255 256 257 258 259 260
    } else { // fast bilinear upscale / crap downscale
        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
    }

    if (convertRange)
        convertRange(dst, dstWidth);
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
261 262 263
static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
                           int dstWidth, const uint8_t *src1,
                           const uint8_t *src2, int srcW, int xInc)
264 265
{
    int i;
266 267 268 269 270 271 272
    unsigned int xpos = 0;
    for (i = 0; i < dstWidth; i++) {
        register unsigned int xx     = xpos >> 16;
        register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
        dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
        dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
        xpos   += xInc;
273 274 275
    }
}

276 277
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
                                     int16_t *dst2, int dstWidth,
278
                                     const uint8_t *src_in[4],
279 280 281 282
                                     int srcW, int xInc,
                                     const int16_t *hChrFilter,
                                     const int32_t *hChrFilterPos,
                                     int hChrFilterSize,
283
                                     uint8_t *formatConvBuffer, uint32_t *pal)
284
{
285
    const uint8_t *src1 = src_in[1], *src2 = src_in[2];
286
    if (c->chrToYV12) {
287 288
        uint8_t *buf2 = formatConvBuffer +
                        FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
289
        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
290 291
        src1 = formatConvBuffer;
        src2 = buf2;
292
    } else if (c->readChrPlanar) {
293 294
        uint8_t *buf2 = formatConvBuffer +
                        FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
295
        c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
296 297
        src1 = formatConvBuffer;
        src2 = buf2;
298 299 300
    }

    if (!c->hcscale_fast) {
301 302
        c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
        c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
303 304 305 306 307 308 309 310 311
    } else { // fast bilinear upscale / crap downscale
        c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
    }

    if (c->chrConvertRange)
        c->chrConvertRange(dst1, dst2, dstWidth);
}

#define DEBUG_SWSCALE_BUFFERS 0
312 313 314
#define DEBUG_BUFFERS(...)                      \
    if (DEBUG_SWSCALE_BUFFERS)                  \
        av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
315

316
static int swScale(SwsContext *c, const uint8_t *src[],
Ronald S. Bultje's avatar
Ronald S. Bultje committed
317
                   int srcStride[], int srcSliceY,
318
                   int srcSliceH, uint8_t *dst[], int dstStride[])
319
{
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
    /* load a few things into local vars to make the code more readable?
     * and faster */
    const int srcW                   = c->srcW;
    const int dstW                   = c->dstW;
    const int dstH                   = c->dstH;
    const int chrDstW                = c->chrDstW;
    const int chrSrcW                = c->chrSrcW;
    const int lumXInc                = c->lumXInc;
    const int chrXInc                = c->chrXInc;
    const enum PixelFormat dstFormat = c->dstFormat;
    const int flags                  = c->flags;
    int32_t *vLumFilterPos           = c->vLumFilterPos;
    int32_t *vChrFilterPos           = c->vChrFilterPos;
    int32_t *hLumFilterPos           = c->hLumFilterPos;
    int32_t *hChrFilterPos           = c->hChrFilterPos;
    int16_t *vLumFilter              = c->vLumFilter;
    int16_t *vChrFilter              = c->vChrFilter;
    int16_t *hLumFilter              = c->hLumFilter;
    int16_t *hChrFilter              = c->hChrFilter;
    int32_t *lumMmxFilter            = c->lumMmxFilter;
    int32_t *chrMmxFilter            = c->chrMmxFilter;
    const int vLumFilterSize         = c->vLumFilterSize;
    const int vChrFilterSize         = c->vChrFilterSize;
    const int hLumFilterSize         = c->hLumFilterSize;
    const int hChrFilterSize         = c->hChrFilterSize;
    int16_t **lumPixBuf              = c->lumPixBuf;
    int16_t **chrUPixBuf             = c->chrUPixBuf;
    int16_t **chrVPixBuf             = c->chrVPixBuf;
    int16_t **alpPixBuf              = c->alpPixBuf;
    const int vLumBufSize            = c->vLumBufSize;
    const int vChrBufSize            = c->vChrBufSize;
    uint8_t *formatConvBuffer        = c->formatConvBuffer;
    uint32_t *pal                    = c->pal_yuv;
    yuv2planar1_fn yuv2plane1        = c->yuv2plane1;
    yuv2planarX_fn yuv2planeX        = c->yuv2planeX;
    yuv2interleavedX_fn yuv2nv12cX   = c->yuv2nv12cX;
    yuv2packed1_fn yuv2packed1       = c->yuv2packed1;
    yuv2packed2_fn yuv2packed2       = c->yuv2packed2;
    yuv2packedX_fn yuv2packedX       = c->yuv2packedX;
    const int chrSrcSliceY           =     srcSliceY  >> c->chrSrcVSubSample;
    const int chrSrcSliceH           = -((-srcSliceH) >> c->chrSrcVSubSample);
    int should_dither                = is9_OR_10BPS(c->srcFormat) ||
                                       is16BPS(c->srcFormat);
363 364 365
    int lastDstY;

    /* vars which will change and which we need to store back in the context */
366 367 368 369 370
    int dstY         = c->dstY;
    int lumBufIndex  = c->lumBufIndex;
    int chrBufIndex  = c->chrBufIndex;
    int lastInLumBuf = c->lastInLumBuf;
    int lastInChrBuf = c->lastInChrBuf;
371 372

    if (isPacked(c->srcFormat)) {
373 374 375 376 377 378 379 380
        src[0] =
        src[1] =
        src[2] =
        src[3] = src[0];
        srcStride[0] =
        srcStride[1] =
        srcStride[2] =
        srcStride[3] = srcStride[0];
381
    }
382 383
    srcStride[1] <<= c->vChrDrop;
    srcStride[2] <<= c->vChrDrop;
384 385

    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
386 387 388 389
                  src[0], srcStride[0], src[1], srcStride[1],
                  src[2], srcStride[2], src[3], srcStride[3],
                  dst[0], dstStride[0], dst[1], dstStride[1],
                  dst[2], dstStride[2], dst[3], dstStride[3]);
390
    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
391
                  srcSliceY, srcSliceH, dstY, dstH);
392
    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
393
                  vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
394

395 396 397
    if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 ||
        dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) {
        static int warnedAlready = 0; // FIXME maybe move this into the context
398
        if (flags & SWS_PRINT_INFO && !warnedAlready) {
399 400
            av_log(c, AV_LOG_WARNING,
                   "Warning: dstStride is not aligned!\n"
401
                   "         ->cannot do aligned memory accesses anymore\n");
402
            warnedAlready = 1;
403 404 405 406
        }
    }

    /* Note the user might start scaling the picture in the middle so this
407 408 409 410 411 412 413 414
     * will not get executed. This is not really intended but works
     * currently, so people might do it. */
    if (srcSliceY == 0) {
        lumBufIndex  = -1;
        chrBufIndex  = -1;
        dstY         = 0;
        lastInLumBuf = -1;
        lastInChrBuf = -1;
415 416
    }

417 418 419
    if (!should_dither) {
        c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
    }
420
    lastDstY = dstY;
421

422 423 424
    for (; dstY < dstH; dstY++) {
        const int chrDstY = dstY >> c->chrDstVSubSample;
        uint8_t *dest[4]  = {
425 426 427 428 429
            dst[0] + dstStride[0] * dstY,
            dst[1] + dstStride[1] * chrDstY,
            dst[2] + dstStride[2] * chrDstY,
            (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
        };
430

431 432 433 434 435
        // First line needed as input
        const int firstLumSrcY  = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
        const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
        // First line needed as input
        const int firstChrSrcY  = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
436 437 438 439 440

        // Last line needed as input
        int lastLumSrcY  = FFMIN(c->srcH,    firstLumSrcY  + vLumFilterSize) - 1;
        int lastLumSrcY2 = FFMIN(c->srcH,    firstLumSrcY2 + vLumFilterSize) - 1;
        int lastChrSrcY  = FFMIN(c->chrSrcH, firstChrSrcY  + vChrFilterSize) - 1;
441 442
        int enough_lines;

443 444 445 446 447
        // handle holes (FAST_BILINEAR & weird filters)
        if (firstLumSrcY > lastInLumBuf)
            lastInLumBuf = firstLumSrcY - 1;
        if (firstChrSrcY > lastInChrBuf)
            lastInChrBuf = firstChrSrcY - 1;
448 449 450 451 452
        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);

        DEBUG_BUFFERS("dstY: %d\n", dstY);
        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
453
                      firstLumSrcY, lastLumSrcY, lastInLumBuf);
454
        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
455
                      firstChrSrcY, lastChrSrcY, lastInChrBuf);
456 457

        // Do we have enough lines in this slice to output the dstY line
458 459
        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
                       lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample);
460 461 462 463 464

        if (!enough_lines) {
            lastLumSrcY = srcSliceY + srcSliceH - 1;
            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
465
                          lastLumSrcY, lastChrSrcY);
466 467
        }

468 469
        // Do horizontal scaling
        while (lastInLumBuf < lastLumSrcY) {
470 471 472 473 474 475
            const uint8_t *src1[4] = {
                src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
                src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
                src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
                src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
            };
476
            lumBufIndex++;
477
            assert(lumBufIndex < 2 * vLumBufSize);
478 479
            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
            assert(lastInLumBuf + 1 - srcSliceY >= 0);
480
            hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
481
                    hLumFilter, hLumFilterPos, hLumFilterSize,
482
                    formatConvBuffer, pal, 0);
483
            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
484
                hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
485
                        lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
486
                        formatConvBuffer, pal, 1);
487 488
            lastInLumBuf++;
            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
489
                          lumBufIndex, lastInLumBuf);
490
        }
491
        while (lastInChrBuf < lastChrSrcY) {
492 493 494 495 496 497
            const uint8_t *src1[4] = {
                src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
                src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
                src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
                src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
            };
498
            chrBufIndex++;
499
            assert(chrBufIndex < 2 * vChrBufSize);
500 501
            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
502
            // FIXME replace parameters through context struct (some at least)
503 504

            if (c->needs_hcscale)
Ronald S. Bultje's avatar
Ronald S. Bultje committed
505
                hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
506 507 508
                        chrDstW, src1, chrSrcW, chrXInc,
                        hChrFilter, hChrFilterPos, hChrFilterSize,
                        formatConvBuffer, pal);
509 510
            lastInChrBuf++;
            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
511
                          chrBufIndex, lastInChrBuf);
512
        }
513 514 515 516 517
        // wrap buf index around to stay inside the ring buffer
        if (lumBufIndex >= vLumBufSize)
            lumBufIndex -= vLumBufSize;
        if (chrBufIndex >= vChrBufSize)
            chrBufIndex -= vChrBufSize;
518
        if (!enough_lines)
519
            break;  // we can't output a dstY line so let's try with the next slice
520

521
#if HAVE_MMX && HAVE_INLINE_ASM
522 523
        updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
                              lastInLumBuf, lastInChrBuf);
524
#endif
525 526
        if (should_dither) {
            c->chrDither8 = dither_8x8_128[chrDstY & 7];
527
            c->lumDither8 = dither_8x8_128[dstY    & 7];
528
        }
529 530 531 532
        if (dstY >= dstH - 2) {
            /* hmm looks like we can't use MMX here without overwriting
             * this array's tail */
            ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
533
                                     &yuv2packed1, &yuv2packed2, &yuv2packedX);
534 535 536
        }

        {
537 538 539 540 541
            const int16_t **lumSrcPtr  = (const int16_t **)lumPixBuf  + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
            const int16_t **chrUSrcPtr = (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
            const int16_t **chrVSrcPtr = (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
            const int16_t **alpSrcPtr  = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
                                         (const int16_t **)alpPixBuf  + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
542 543

            if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
544 545 546 547 548
                const int16_t **tmpY = (const int16_t **)lumPixBuf +
                                       2 * vLumBufSize;
                int neg = -firstLumSrcY, i;
                int end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
                for (i = 0; i < neg; i++)
549
                    tmpY[i] = lumSrcPtr[neg];
550
                for (; i < end; i++)
551
                    tmpY[i] = lumSrcPtr[i];
552 553
                for (; i < vLumFilterSize; i++)
                    tmpY[i] = tmpY[i - 1];
554 555 556
                lumSrcPtr = tmpY;

                if (alpSrcPtr) {
557 558 559
                    const int16_t **tmpA = (const int16_t **)alpPixBuf +
                                           2 * vLumBufSize;
                    for (i = 0; i < neg; i++)
560
                        tmpA[i] = alpSrcPtr[neg];
561
                    for (; i < end; i++)
562
                        tmpA[i] = alpSrcPtr[i];
563
                    for (; i < vLumFilterSize; i++)
564 565 566 567
                        tmpA[i] = tmpA[i - 1];
                    alpSrcPtr = tmpA;
                }
            }
568 569 570 571 572 573 574
            if (firstChrSrcY < 0 ||
                firstChrSrcY + vChrFilterSize > c->chrSrcH) {
                const int16_t **tmpU = (const int16_t **)chrUPixBuf + 2 * vChrBufSize,
                **tmpV               = (const int16_t **)chrVPixBuf + 2 * vChrBufSize;
                int neg = -firstChrSrcY, i;
                int end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
                for (i = 0; i < neg; i++) {
575 576 577
                    tmpU[i] = chrUSrcPtr[neg];
                    tmpV[i] = chrVSrcPtr[neg];
                }
578
                for (; i < end; i++) {
579 580 581
                    tmpU[i] = chrUSrcPtr[i];
                    tmpV[i] = chrVSrcPtr[i];
                }
582
                for (; i < vChrFilterSize; i++) {
583 584 585 586 587 588 589
                    tmpU[i] = tmpU[i - 1];
                    tmpV[i] = tmpV[i - 1];
                }
                chrUSrcPtr = tmpU;
                chrVSrcPtr = tmpV;
            }

590 591 592
            if (isPlanarYUV(dstFormat) ||
                (isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
                const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
593

594 595 596
                if (vLumFilterSize == 1) {
                    yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
                } else {
597 598 599
                    yuv2planeX(vLumFilter + dstY * vLumFilterSize,
                               vLumFilterSize, lumSrcPtr, dest[0],
                               dstW, c->lumDither8, 0);
600
                }
601

602
                if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
603
                    if (yuv2nv12cX) {
604 605 606
                        yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize,
                                   vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
                                   dest[1], chrDstW);
607 608 609 610
                    } else if (vChrFilterSize == 1) {
                        yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
                        yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
                    } else {
611 612 613 614 615 616
                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
                                   vChrFilterSize, chrUSrcPtr, dest[1],
                                   chrDstW, c->chrDither8, 0);
                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
                                   vChrFilterSize, chrVSrcPtr, dest[2],
                                   chrDstW, c->chrDither8, 3);
617
                    }
618
                }
619

620
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
621
                    if (vLumFilterSize == 1) {
622 623
                        yuv2plane1(alpSrcPtr[0], dest[3], dstW,
                                   c->lumDither8, 0);
624
                    } else {
625 626 627
                        yuv2planeX(vLumFilter + dstY * vLumFilterSize,
                                   vLumFilterSize, alpSrcPtr, dest[3],
                                   dstW, c->lumDither8, 0);
Kieran Kunhya's avatar
Kieran Kunhya committed
628
                    }
629 630
                }
            } else {
631 632 633 634
                assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize * 2);
                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize * 2);
                if (c->yuv2packed1 && vLumFilterSize == 1 &&
                    vChrFilterSize <= 2) { // unscaled RGB
635
                    int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
636 637 638
                    yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
                                alpPixBuf ? *alpSrcPtr : NULL,
                                dest[0], dstW, chrAlpha, dstY);
639 640
                } else if (c->yuv2packed2 && vLumFilterSize == 2 &&
                           vChrFilterSize == 2) { // bilinear upscale RGB
641 642 643
                    int lumAlpha = vLumFilter[2 * dstY + 1];
                    int chrAlpha = vChrFilter[2 * dstY + 1];
                    lumMmxFilter[2] =
644
                    lumMmxFilter[3] = vLumFilter[2 * dstY]    * 0x10001;
645 646 647 648 649
                    chrMmxFilter[2] =
                    chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
                    yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
                                alpPixBuf ? alpSrcPtr : NULL,
                                dest[0], dstW, lumAlpha, chrAlpha, dstY);
650
                } else { // general RGB
651 652 653 654 655
                    yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
                                lumSrcPtr, vLumFilterSize,
                                vChrFilter + dstY * vChrFilterSize,
                                chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                alpSrcPtr, dest[0], dstW, dstY);
656 657 658 659 660
                }
            }
        }
    }

661
    if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
662
        fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
663

664
#if HAVE_MMX2 && HAVE_INLINE_ASM
665
    if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
666
        __asm__ volatile ("sfence" ::: "memory");
667 668 669 670
#endif
    emms_c();

    /* store changed local vars back in the context */
671 672 673 674 675
    c->dstY         = dstY;
    c->lumBufIndex  = lumBufIndex;
    c->chrBufIndex  = chrBufIndex;
    c->lastInLumBuf = lastInLumBuf;
    c->lastInChrBuf = lastInChrBuf;
676 677 678 679

    return dstY - lastDstY;
}

680
static av_cold void sws_init_swScale_c(SwsContext *c)
681
{
682
    enum PixelFormat srcFormat = c->srcFormat;
683

684 685 686
    ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
                             &c->yuv2nv12cX, &c->yuv2packed1,
                             &c->yuv2packed2, &c->yuv2packedX);
687

688
    ff_sws_init_input_funcs(c);
689

690 691
    if (c->srcBpc == 8) {
        if (c->dstBpc <= 10) {
692
            c->hyScale = c->hcScale = hScale8To15_c;
693 694 695 696
            if (c->flags & SWS_FAST_BILINEAR) {
                c->hyscale_fast = hyscale_fast_c;
                c->hcscale_fast = hcscale_fast_c;
            }
697
        } else {
698
            c->hyScale = c->hcScale = hScale8To19_c;
699
        }
700
    } else {
701 702
        c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
                                                 : hScale16To15_c;
703
    }
704

705 706 707 708 709 710 711 712 713 714
    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
        if (c->dstBpc <= 10) {
            if (c->srcRange) {
                c->lumConvertRange = lumRangeFromJpeg_c;
                c->chrConvertRange = chrRangeFromJpeg_c;
            } else {
                c->lumConvertRange = lumRangeToJpeg_c;
                c->chrConvertRange = chrRangeToJpeg_c;
            }
        } else {
715 716 717 718 719 720 721 722 723
            if (c->srcRange) {
                c->lumConvertRange = lumRangeFromJpeg16_c;
                c->chrConvertRange = chrRangeFromJpeg16_c;
            } else {
                c->lumConvertRange = lumRangeToJpeg16_c;
                c->chrConvertRange = chrRangeToJpeg16_c;
            }
        }
    }
724 725 726 727 728

    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
          srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
        c->needs_hcscale = 1;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
729

730
SwsFunc ff_getSwsFunc(SwsContext *c)
Ramiro Polla's avatar
Ramiro Polla committed
731
{
732 733
    sws_init_swScale_c(c);

734 735
    if (HAVE_MMX)
        ff_sws_init_swScale_mmx(c);
736 737
    if (HAVE_ALTIVEC)
        ff_sws_init_swScale_altivec(c);
738

Ronald S. Bultje's avatar
Ronald S. Bultje committed
739
    return swScale;
740
}