swscale.c 35.6 KB
Newer Older
1
/*
2
 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3 4 5
 *
 * This file is part of FFmpeg.
 *
6 7 8 9
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
10 11 12
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
15
 *
16 17
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

21
#include <inttypes.h>
22
#include <math.h>
23
#include <stdio.h>
24 25
#include <string.h>

26
#include "libavutil/avassert.h"
27
#include "libavutil/avutil.h"
28
#include "libavutil/bswap.h"
29
#include "libavutil/cpu.h"
30
#include "libavutil/imgutils.h"
31 32
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
33
#include "libavutil/pixdesc.h"
34 35 36 37
#include "config.h"
#include "rgb2rgb.h"
#include "swscale_internal.h"
#include "swscale.h"
Arpi's avatar
Arpi committed
38

39
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[9][8] = {
40 41 42 43 44 45 46 47
    {  36, 68,  60, 92,  34, 66,  58, 90, },
    { 100,  4, 124, 28,  98,  2, 122, 26, },
    {  52, 84,  44, 76,  50, 82,  42, 74, },
    { 116, 20, 108, 12, 114, 18, 106, 10, },
    {  32, 64,  56, 88,  38, 70,  62, 94, },
    {  96,  0, 120, 24, 102,  6, 126, 30, },
    {  48, 80,  40, 72,  54, 86,  46, 78, },
    { 112, 16, 104,  8, 118, 22, 110, 14, },
48
    {  36, 68,  60, 92,  34, 66,  58, 90, },
49
};
50

51
DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = {
52
    64, 64, 64, 64, 64, 64, 64, 64
53
};
54

55 56
static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
                                       int height, int y, uint8_t val)
Ramiro Polla's avatar
Ramiro Polla committed
57
{
58
    int i;
59 60
    uint8_t *ptr = plane + stride * y;
    for (i = 0; i < height; i++) {
61 62 63 64 65
        memset(ptr, val, width);
        ptr += stride;
    }
}

66 67
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
                           const uint8_t *_src, const int16_t *filter,
68
                           const int32_t *filterPos, int filterSize)
69
{
70
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
71
    int i;
72
    int32_t *dst        = (int32_t *) _dst;
73
    const uint16_t *src = (const uint16_t *) _src;
74
    int bits            = desc->comp[0].depth - 1;
75
    int sh              = bits - 4;
76

77
    if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16)
78 79
        sh= 9;

80 81 82
    for (i = 0; i < dstW; i++) {
        int j;
        int srcPos = filterPos[i];
83
        int val    = 0;
84 85 86 87 88

        for (j = 0; j < filterSize; j++) {
            val += src[srcPos + j] * filter[filterSize * i + j];
        }
        // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
89
        dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
90 91 92
    }
}

93 94
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
                           const uint8_t *_src, const int16_t *filter,
95
                           const int32_t *filterPos, int filterSize)
96
{
97
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
98 99
    int i;
    const uint16_t *src = (const uint16_t *) _src;
100
    int sh              = desc->comp[0].depth - 1;
101

102
    if(sh<15)
103
        sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
104

105 106 107
    for (i = 0; i < dstW; i++) {
        int j;
        int srcPos = filterPos[i];
108
        int val    = 0;
109 110 111 112 113 114 115 116 117

        for (j = 0; j < filterSize; j++) {
            val += src[srcPos + j] * filter[filterSize * i + j];
        }
        // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
    }
}

118
// bilinear / bicubic scaling
119 120 121
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
                          const uint8_t *src, const int16_t *filter,
                          const int32_t *filterPos, int filterSize)
Ramiro Polla's avatar
Ramiro Polla committed
122
{
123
    int i;
124
    for (i = 0; i < dstW; i++) {
125
        int j;
126 127 128 129
        int srcPos = filterPos[i];
        int val    = 0;
        for (j = 0; j < filterSize; j++) {
            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
130
        }
131
        dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
132
    }
133
}
134

135 136 137
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
                          const uint8_t *src, const int16_t *filter,
                          const int32_t *filterPos, int filterSize)
138 139 140
{
    int i;
    int32_t *dst = (int32_t *) _dst;
141
    for (i = 0; i < dstW; i++) {
142
        int j;
143 144 145 146
        int srcPos = filterPos[i];
        int val    = 0;
        for (j = 0; j < filterSize; j++) {
            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
147
        }
148
        dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
149 150 151
    }
}

Diego Biurrun's avatar
Diego Biurrun committed
152
// FIXME all pal and rgb srcFormats could do this conversion as well
153
// FIXME all scalers more complex than bilinear could do half of this transform
154
static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
155 156 157
{
    int i;
    for (i = 0; i < width; i++) {
158 159
        dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
        dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
160
    }
Arpi's avatar
Arpi committed
161
}
162

163
static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
Ramiro Polla's avatar
Ramiro Polla committed
164
{
165 166
    int i;
    for (i = 0; i < width; i++) {
167 168
        dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
        dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
169
    }
170
}
171

172
static void lumRangeToJpeg_c(int16_t *dst, int width)
Ramiro Polla's avatar
Ramiro Polla committed
173
{
174 175
    int i;
    for (i = 0; i < width; i++)
176
        dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
177
}
178

179
static void lumRangeFromJpeg_c(int16_t *dst, int width)
180 181 182
{
    int i;
    for (i = 0; i < width; i++)
183
        dst[i] = (dst[i] * 14071 + 33561947) >> 14;
184 185
}

186 187 188 189 190 191
static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
{
    int i;
    int32_t *dstU = (int32_t *) _dstU;
    int32_t *dstV = (int32_t *) _dstV;
    for (i = 0; i < width; i++) {
192 193
        dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
        dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
194 195
    }
}
196

197 198 199 200 201 202
static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
{
    int i;
    int32_t *dstU = (int32_t *) _dstU;
    int32_t *dstV = (int32_t *) _dstV;
    for (i = 0; i < width; i++) {
203 204
        dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
        dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
205 206
    }
}
207

208 209 210 211
static void lumRangeToJpeg16_c(int16_t *_dst, int width)
{
    int i;
    int32_t *dst = (int32_t *) _dst;
212 213 214
    for (i = 0; i < width; i++) {
        dst[i] = ((int)(FFMIN(dst[i], 30189 << 4) * 4769U - (39057361 << 2))) >> 12;
    }
215
}
216

217 218 219 220 221
static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
{
    int i;
    int32_t *dst = (int32_t *) _dst;
    for (i = 0; i < width; i++)
222
        dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
223 224
}

225

226
#define DEBUG_SWSCALE_BUFFERS 0
227 228 229
#define DEBUG_BUFFERS(...)                      \
    if (DEBUG_SWSCALE_BUFFERS)                  \
        av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
230

231
static int swscale(SwsContext *c, const uint8_t *src[],
Ronald S. Bultje's avatar
Ronald S. Bultje committed
232
                   int srcStride[], int srcSliceY,
233
                   int srcSliceH, uint8_t *dst[], int dstStride[])
Ramiro Polla's avatar
Ramiro Polla committed
234
{
235 236 237 238
    /* load a few things into local vars to make the code more readable?
     * and faster */
    const int dstW                   = c->dstW;
    const int dstH                   = c->dstH;
239

240
    const enum AVPixelFormat dstFormat = c->dstFormat;
241 242 243
    const int flags                  = c->flags;
    int32_t *vLumFilterPos           = c->vLumFilterPos;
    int32_t *vChrFilterPos           = c->vChrFilterPos;
244

245 246
    const int vLumFilterSize         = c->vLumFilterSize;
    const int vChrFilterSize         = c->vChrFilterSize;
247

248 249 250 251 252 253
    yuv2planar1_fn yuv2plane1        = c->yuv2plane1;
    yuv2planarX_fn yuv2planeX        = c->yuv2planeX;
    yuv2interleavedX_fn yuv2nv12cX   = c->yuv2nv12cX;
    yuv2packed1_fn yuv2packed1       = c->yuv2packed1;
    yuv2packed2_fn yuv2packed2       = c->yuv2packed2;
    yuv2packedX_fn yuv2packedX       = c->yuv2packedX;
254
    yuv2anyX_fn yuv2anyX             = c->yuv2anyX;
255
    const int chrSrcSliceY           =                srcSliceY >> c->chrSrcVSubSample;
256
    const int chrSrcSliceH           = AV_CEIL_RSHIFT(srcSliceH,   c->chrSrcVSubSample);
257 258
    int should_dither                = is9_OR_10BPS(c->srcFormat) ||
                                       is16BPS(c->srcFormat);
259 260 261
    int lastDstY;

    /* vars which will change and which we need to store back in the context */
262 263 264 265 266
    int dstY         = c->dstY;
    int lumBufIndex  = c->lumBufIndex;
    int chrBufIndex  = c->chrBufIndex;
    int lastInLumBuf = c->lastInLumBuf;
    int lastInChrBuf = c->lastInChrBuf;
267

268

269 270 271 272
    int lumStart = 0;
    int lumEnd = c->descIndex[0];
    int chrStart = lumEnd;
    int chrEnd = c->descIndex[1];
273 274
    int vStart = chrEnd;
    int vEnd = c->numDesc;
275
    SwsSlice *src_slice = &c->slice[lumStart];
276 277
    SwsSlice *hout_slice = &c->slice[c->numSlice-2];
    SwsSlice *vout_slice = &c->slice[c->numSlice-1];
278
    SwsFilterDescriptor *desc = c->desc;
Pedro Arthur's avatar
Pedro Arthur committed
279

280 281 282

    int needAlpha = c->needAlpha;

283 284
    int hasLumHoles = 1;
    int hasChrHoles = 1;
285

286

287
    if (isPacked(c->srcFormat)) {
288 289 290 291 292 293 294 295
        src[0] =
        src[1] =
        src[2] =
        src[3] = src[0];
        srcStride[0] =
        srcStride[1] =
        srcStride[2] =
        srcStride[3] = srcStride[0];
296
    }
297 298
    srcStride[1] <<= c->vChrDrop;
    srcStride[2] <<= c->vChrDrop;
299

300
    DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
301 302 303 304
                  src[0], srcStride[0], src[1], srcStride[1],
                  src[2], srcStride[2], src[3], srcStride[3],
                  dst[0], dstStride[0], dst[1], dstStride[1],
                  dst[2], dstStride[2], dst[3], dstStride[3]);
305
    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
306
                  srcSliceY, srcSliceH, dstY, dstH);
307 308
    DEBUG_BUFFERS("vLumFilterSize: %d vChrFilterSize: %d\n",
                  vLumFilterSize, vChrFilterSize);
309

310 311
    if (dstStride[0]&15 || dstStride[1]&15 ||
        dstStride[2]&15 || dstStride[3]&15) {
312
        static int warnedAlready = 0; // FIXME maybe move this into the context
313
        if (flags & SWS_PRINT_INFO && !warnedAlready) {
314 315
            av_log(c, AV_LOG_WARNING,
                   "Warning: dstStride is not aligned!\n"
316
                   "         ->cannot do aligned memory accesses anymore\n");
317
            warnedAlready = 1;
318 319
        }
    }
320

321 322 323 324
    if (   (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15
        || (uintptr_t)src[0]&15 || (uintptr_t)src[1]&15 || (uintptr_t)src[2]&15
        || dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15
        || srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15
325 326 327
    ) {
        static int warnedAlready=0;
        int cpu_flags = av_get_cpu_flags();
328
        if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
329 330 331 332 333
            av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
            warnedAlready=1;
        }
    }

334
    /* Note the user might start scaling the picture in the middle so this
335 336 337 338 339 340 341 342
     * will not get executed. This is not really intended but works
     * currently, so people might do it. */
    if (srcSliceY == 0) {
        lumBufIndex  = -1;
        chrBufIndex  = -1;
        dstY         = 0;
        lastInLumBuf = -1;
        lastInChrBuf = -1;
343 344
    }

345
    if (!should_dither) {
346
        c->chrDither8 = c->lumDither8 = sws_pb_64;
347
    }
348
    lastDstY = dstY;
349

350 351
    ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
                   yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, c->use_mmx_vfilter);
352

353
    ff_init_slice_from_src(src_slice, (uint8_t**)src, srcStride, c->srcW,
Pedro Arthur's avatar
Pedro Arthur committed
354
            srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
355 356 357

    ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
            dstY, dstH, dstY >> c->chrDstVSubSample,
358
            AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
Pedro Arthur's avatar
Pedro Arthur committed
359 360 361 362 363 364 365 366 367 368 369 370
    if (srcSliceY == 0) {
        hout_slice->plane[0].sliceY = lastInLumBuf + 1;
        hout_slice->plane[1].sliceY = lastInChrBuf + 1;
        hout_slice->plane[2].sliceY = lastInChrBuf + 1;
        hout_slice->plane[3].sliceY = lastInLumBuf + 1;

        hout_slice->plane[0].sliceH =
        hout_slice->plane[1].sliceH =
        hout_slice->plane[2].sliceH =
        hout_slice->plane[3].sliceH = 0;
        hout_slice->width = dstW;
    }
371

372 373
    for (; dstY < dstH; dstY++) {
        const int chrDstY = dstY >> c->chrDstVSubSample;
374
        int use_mmx_vfilter= c->use_mmx_vfilter;
375

376 377 378 379 380
        // First line needed as input
        const int firstLumSrcY  = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
        const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
        // First line needed as input
        const int firstChrSrcY  = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
381 382 383 384 385

        // Last line needed as input
        int lastLumSrcY  = FFMIN(c->srcH,    firstLumSrcY  + vLumFilterSize) - 1;
        int lastLumSrcY2 = FFMIN(c->srcH,    firstLumSrcY2 + vLumFilterSize) - 1;
        int lastChrSrcY  = FFMIN(c->chrSrcH, firstChrSrcY  + vChrFilterSize) - 1;
386
        int enough_lines;
387

388
        int i;
389
        int posY, cPosY, firstPosY, lastPosY, firstCPosY, lastCPosY;
390

391
        // handle holes (FAST_BILINEAR & weird filters)
392
        if (firstLumSrcY > lastInLumBuf) {
393

394 395
            hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
            if (hasLumHoles) {
Pedro Arthur's avatar
Pedro Arthur committed
396 397
                hout_slice->plane[0].sliceY = firstLumSrcY;
                hout_slice->plane[3].sliceY = firstLumSrcY;
398 399
                hout_slice->plane[0].sliceH =
                hout_slice->plane[3].sliceH = 0;
400
            }
401

402
            lastInLumBuf = firstLumSrcY - 1;
403 404
        }
        if (firstChrSrcY > lastInChrBuf) {
405

406 407
            hasChrHoles = lastInChrBuf != firstChrSrcY - 1;
            if (hasChrHoles) {
Pedro Arthur's avatar
Pedro Arthur committed
408 409
                hout_slice->plane[1].sliceY = firstChrSrcY;
                hout_slice->plane[2].sliceY = firstChrSrcY;
410 411
                hout_slice->plane[1].sliceH =
                hout_slice->plane[2].sliceH = 0;
412
            }
413

414
            lastInChrBuf = firstChrSrcY - 1;
415
        }
416 417 418

        DEBUG_BUFFERS("dstY: %d\n", dstY);
        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
419
                      firstLumSrcY, lastLumSrcY, lastInLumBuf);
420
        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
421
                      firstChrSrcY, lastChrSrcY, lastInChrBuf);
422 423

        // Do we have enough lines in this slice to output the dstY line
424
        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
425
                       lastChrSrcY < AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample);
426 427 428 429 430

        if (!enough_lines) {
            lastLumSrcY = srcSliceY + srcSliceH - 1;
            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
431
                          lastLumSrcY, lastChrSrcY);
432
        }
433

434 435 436 437
        av_assert0((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->plane[0].available_lines);
        av_assert0((lastChrSrcY - firstChrSrcY + 1) <= hout_slice->plane[1].available_lines);


438
        posY = hout_slice->plane[0].sliceY + hout_slice->plane[0].sliceH;
439 440
        if (posY <= lastLumSrcY && !hasLumHoles) {
            firstPosY = FFMAX(firstLumSrcY, posY);
441
            lastPosY = FFMIN(firstLumSrcY + hout_slice->plane[0].available_lines - 1, srcSliceY + srcSliceH - 1);
442 443 444 445 446
        } else {
            firstPosY = lastInLumBuf + 1;
            lastPosY = lastLumSrcY;
        }

447
        cPosY = hout_slice->plane[1].sliceY + hout_slice->plane[1].sliceH;
448 449
        if (cPosY <= lastChrSrcY && !hasChrHoles) {
            firstCPosY = FFMAX(firstChrSrcY, cPosY);
450
            lastCPosY = FFMIN(firstChrSrcY + hout_slice->plane[1].available_lines - 1, AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample) - 1);
451 452 453 454 455
        } else {
            firstCPosY = lastInChrBuf + 1;
            lastCPosY = lastChrSrcY;
        }

456
        ff_rotate_slice(hout_slice, lastPosY, lastCPosY);
457

458
        if (posY < lastLumSrcY + 1) {
459
            for (i = lumStart; i < lumEnd; ++i)
460 461 462
                desc[i].process(c, &desc[i], firstPosY, lastPosY - firstPosY + 1);
        }

463 464 465
        lumBufIndex += lastLumSrcY - lastInLumBuf;
        lastInLumBuf = lastLumSrcY;

466
        if (cPosY < lastChrSrcY + 1) {
467
            for (i = chrStart; i < chrEnd; ++i)
468 469 470
                desc[i].process(c, &desc[i], firstCPosY, lastCPosY - firstCPosY + 1);
        }

471 472 473
        chrBufIndex += lastChrSrcY - lastInChrBuf;
        lastInChrBuf = lastChrSrcY;

474
        // wrap buf index around to stay inside the ring buffer
475 476 477 478
        if (lumBufIndex >= vLumFilterSize)
            lumBufIndex -= vLumFilterSize;
        if (chrBufIndex >= vChrFilterSize)
            chrBufIndex -= vChrFilterSize;
479
        if (!enough_lines)
480
            break;  // we can't output a dstY line so let's try with the next slice
481

482
#if HAVE_MMX_INLINE
483
        ff_updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
484
                              lastInLumBuf, lastInChrBuf);
485
#endif
486
        if (should_dither) {
487 488
            c->chrDither8 = ff_dither_8x8_128[chrDstY & 7];
            c->lumDither8 = ff_dither_8x8_128[dstY    & 7];
489
        }
490 491 492 493
        if (dstY >= dstH - 2) {
            /* hmm looks like we can't use MMX here without overwriting
             * this array's tail */
            ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
494
                                     &yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
495
            use_mmx_vfilter= 0;
496 497
            ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
                           yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, use_mmx_vfilter);
498 499 500
        }

        {
501 502
            for (i = vStart; i < vEnd; ++i)
                desc[i].process(c, &desc[i], dstY, 1);
503
        }
504
    }
505
    if (isPlanar(dstFormat) && isALPHA(dstFormat) && !needAlpha) {
506 507 508
        int length = dstW;
        int height = dstY - lastDstY;

509
        if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
510
            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
511
            fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
512
                    1, desc->comp[3].depth,
513
                    isBE(dstFormat));
514 515 516
        } else
            fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
    }
517

518
#if HAVE_MMXEXT_INLINE
519
    if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
520
        __asm__ volatile ("sfence" ::: "memory");
521 522 523 524
#endif
    emms_c();

    /* store changed local vars back in the context */
525 526 527 528 529
    c->dstY         = dstY;
    c->lumBufIndex  = lumBufIndex;
    c->chrBufIndex  = chrBufIndex;
    c->lastInLumBuf = lastInLumBuf;
    c->lastInChrBuf = lastInChrBuf;
530 531

    return dstY - lastDstY;
532 533
}

534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
av_cold void ff_sws_init_range_convert(SwsContext *c)
{
    c->lumConvertRange = NULL;
    c->chrConvertRange = NULL;
    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
        if (c->dstBpc <= 14) {
            if (c->srcRange) {
                c->lumConvertRange = lumRangeFromJpeg_c;
                c->chrConvertRange = chrRangeFromJpeg_c;
            } else {
                c->lumConvertRange = lumRangeToJpeg_c;
                c->chrConvertRange = chrRangeToJpeg_c;
            }
        } else {
            if (c->srcRange) {
                c->lumConvertRange = lumRangeFromJpeg16_c;
                c->chrConvertRange = chrRangeFromJpeg16_c;
            } else {
                c->lumConvertRange = lumRangeToJpeg16_c;
                c->chrConvertRange = chrRangeToJpeg16_c;
            }
        }
    }
}

559
static av_cold void sws_init_swscale(SwsContext *c)
Ramiro Polla's avatar
Ramiro Polla committed
560
{
561
    enum AVPixelFormat srcFormat = c->srcFormat;
562

563 564
    ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
                             &c->yuv2nv12cX, &c->yuv2packed1,
565
                             &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
566

567
    ff_sws_init_input_funcs(c);
568

569

570
    if (c->srcBpc == 8) {
571
        if (c->dstBpc <= 14) {
572
            c->hyScale = c->hcScale = hScale8To15_c;
573
            if (c->flags & SWS_FAST_BILINEAR) {
574 575
                c->hyscale_fast = ff_hyscale_fast_c;
                c->hcscale_fast = ff_hcscale_fast_c;
576
            }
577
        } else {
578
            c->hyScale = c->hcScale = hScale8To19_c;
579
        }
580
    } else {
581
        c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
582
                                                 : hScale16To15_c;
583
    }
584

585
    ff_sws_init_range_convert(c);
586

587
    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
588
          srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
589
        c->needs_hcscale = 1;
590
}
591

592
SwsFunc ff_getSwsFunc(SwsContext *c)
593
{
594
    sws_init_swscale(c);
595

596 597
    if (ARCH_PPC)
        ff_sws_init_swscale_ppc(c);
598 599
    if (ARCH_X86)
        ff_sws_init_swscale_x86(c);
600 601
    if (ARCH_AARCH64)
        ff_sws_init_swscale_aarch64(c);
602 603
    if (ARCH_ARM)
        ff_sws_init_swscale_arm(c);
604

605
    return swscale;
606
}
607

608
static void reset_ptr(const uint8_t *src[], enum AVPixelFormat format)
609 610 611 612 613 614 615 616 617 618 619
{
    if (!isALPHA(format))
        src[3] = NULL;
    if (!isPlanar(format)) {
        src[3] = src[2] = NULL;

        if (!usePal(format))
            src[1] = NULL;
    }
}

620
static int check_image_pointers(const uint8_t * const data[4], enum AVPixelFormat pix_fmt,
621 622
                                const int linesizes[4])
{
623
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
624 625
    int i;

626 627
    av_assert2(desc);

628 629 630 631 632 633 634 635 636
    for (i = 0; i < 4; i++) {
        int plane = desc->comp[i].plane;
        if (!data[plane] || !linesizes[plane])
            return 0;
    }

    return 1;
}

637 638 639 640 641 642 643 644 645 646
static void xyz12Torgb48(struct SwsContext *c, uint16_t *dst,
                         const uint16_t *src, int stride, int h)
{
    int xp,yp;
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);

    for (yp=0; yp<h; yp++) {
        for (xp=0; xp+2<stride; xp+=3) {
            int x, y, z, r, g, b;

647
            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
                x = AV_RB16(src + xp + 0);
                y = AV_RB16(src + xp + 1);
                z = AV_RB16(src + xp + 2);
            } else {
                x = AV_RL16(src + xp + 0);
                y = AV_RL16(src + xp + 1);
                z = AV_RL16(src + xp + 2);
            }

            x = c->xyzgamma[x>>4];
            y = c->xyzgamma[y>>4];
            z = c->xyzgamma[z>>4];

            // convert from XYZlinear to sRGBlinear
            r = c->xyz2rgb_matrix[0][0] * x +
                c->xyz2rgb_matrix[0][1] * y +
                c->xyz2rgb_matrix[0][2] * z >> 12;
            g = c->xyz2rgb_matrix[1][0] * x +
                c->xyz2rgb_matrix[1][1] * y +
                c->xyz2rgb_matrix[1][2] * z >> 12;
            b = c->xyz2rgb_matrix[2][0] * x +
669
                c->xyz2rgb_matrix[2][1] * y +
670 671 672
                c->xyz2rgb_matrix[2][2] * z >> 12;

            // limit values to 12-bit depth
673 674 675
            r = av_clip_uintp2(r, 12);
            g = av_clip_uintp2(g, 12);
            b = av_clip_uintp2(b, 12);
676 677

            // convert from sRGBlinear to RGB and scale from 12bit to 16bit
678
            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
679 680 681 682 683 684 685 686 687 688 689 690 691 692
                AV_WB16(dst + xp + 0, c->rgbgamma[r] << 4);
                AV_WB16(dst + xp + 1, c->rgbgamma[g] << 4);
                AV_WB16(dst + xp + 2, c->rgbgamma[b] << 4);
            } else {
                AV_WL16(dst + xp + 0, c->rgbgamma[r] << 4);
                AV_WL16(dst + xp + 1, c->rgbgamma[g] << 4);
                AV_WL16(dst + xp + 2, c->rgbgamma[b] << 4);
            }
        }
        src += stride;
        dst += stride;
    }
}

693 694 695 696
static void rgb48Toxyz12(struct SwsContext *c, uint16_t *dst,
                         const uint16_t *src, int stride, int h)
{
    int xp,yp;
697
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728

    for (yp=0; yp<h; yp++) {
        for (xp=0; xp+2<stride; xp+=3) {
            int x, y, z, r, g, b;

            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
                r = AV_RB16(src + xp + 0);
                g = AV_RB16(src + xp + 1);
                b = AV_RB16(src + xp + 2);
            } else {
                r = AV_RL16(src + xp + 0);
                g = AV_RL16(src + xp + 1);
                b = AV_RL16(src + xp + 2);
            }

            r = c->rgbgammainv[r>>4];
            g = c->rgbgammainv[g>>4];
            b = c->rgbgammainv[b>>4];

            // convert from sRGBlinear to XYZlinear
            x = c->rgb2xyz_matrix[0][0] * r +
                c->rgb2xyz_matrix[0][1] * g +
                c->rgb2xyz_matrix[0][2] * b >> 12;
            y = c->rgb2xyz_matrix[1][0] * r +
                c->rgb2xyz_matrix[1][1] * g +
                c->rgb2xyz_matrix[1][2] * b >> 12;
            z = c->rgb2xyz_matrix[2][0] * r +
                c->rgb2xyz_matrix[2][1] * g +
                c->rgb2xyz_matrix[2][2] * b >> 12;

            // limit values to 12-bit depth
729 730 731
            x = av_clip_uintp2(x, 12);
            y = av_clip_uintp2(y, 12);
            z = av_clip_uintp2(z, 12);
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748

            // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
                AV_WB16(dst + xp + 0, c->xyzgammainv[x] << 4);
                AV_WB16(dst + xp + 1, c->xyzgammainv[y] << 4);
                AV_WB16(dst + xp + 2, c->xyzgammainv[z] << 4);
            } else {
                AV_WL16(dst + xp + 0, c->xyzgammainv[x] << 4);
                AV_WL16(dst + xp + 1, c->xyzgammainv[y] << 4);
                AV_WL16(dst + xp + 2, c->xyzgammainv[z] << 4);
            }
        }
        src += stride;
        dst += stride;
    }
}

749 750 751 752 753 754 755 756 757 758 759
/**
 * swscale wrapper, so we don't need to export the SwsContext.
 * Assumes planar YUV to be in YUV order instead of YVU.
 */
int attribute_align_arg sws_scale(struct SwsContext *c,
                                  const uint8_t * const srcSlice[],
                                  const int srcStride[], int srcSliceY,
                                  int srcSliceH, uint8_t *const dst[],
                                  const int dstStride[])
{
    int i, ret;
760 761
    const uint8_t *src2[4];
    uint8_t *dst2[4];
762
    uint8_t *rgb0_tmp = NULL;
763
    int macro_height = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
764

765
    if (!srcStride || !dstStride || !dst || !srcSlice) {
766 767 768
        av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
        return 0;
    }
769

770 771 772 773 774 775 776
    if ((srcSliceY & (macro_height-1)) ||
        ((srcSliceH& (macro_height-1)) && srcSliceY + srcSliceH != c->srcH) ||
        srcSliceY + srcSliceH > c->srcH) {
        av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", srcSliceY, srcSliceH);
        return AVERROR(EINVAL);
    }

777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
    if (c->gamma_flag && c->cascaded_context[0]) {


        ret = sws_scale(c->cascaded_context[0],
                    srcSlice, srcStride, srcSliceY, srcSliceH,
                    c->cascaded_tmp, c->cascaded_tmpStride);

        if (ret < 0)
            return ret;

        if (c->cascaded_context[2])
            ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp, c->cascaded_tmpStride, srcSliceY, srcSliceH, c->cascaded1_tmp, c->cascaded1_tmpStride);
        else
            ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp, c->cascaded_tmpStride, srcSliceY, srcSliceH, dst, dstStride);

        if (ret < 0)
            return ret;

        if (c->cascaded_context[2]) {
            ret = sws_scale(c->cascaded_context[2],
                        (const uint8_t * const *)c->cascaded1_tmp, c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret, c->cascaded_context[1]->dstY,
                        dst, dstStride);
        }
        return ret;
    }

803 804 805 806 807 808 809 810 811 812 813 814
    if (c->cascaded_context[0] && srcSliceY == 0 && srcSliceH == c->cascaded_context[0]->srcH) {
        ret = sws_scale(c->cascaded_context[0],
                        srcSlice, srcStride, srcSliceY, srcSliceH,
                        c->cascaded_tmp, c->cascaded_tmpStride);
        if (ret < 0)
            return ret;
        ret = sws_scale(c->cascaded_context[1],
                        (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride, 0, c->cascaded_context[0]->dstH,
                        dst, dstStride);
        return ret;
    }

815 816 817
    memcpy(src2, srcSlice, sizeof(src2));
    memcpy(dst2, dst, sizeof(dst2));

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
    // do not mess up sliceDir if we have a "trailing" 0-size slice
    if (srcSliceH == 0)
        return 0;

    if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
        av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
        return 0;
    }
    if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
        av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
        return 0;
    }

    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
        return 0;
    }
    if (c->sliceDir == 0) {
        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
    }

    if (usePal(c->srcFormat)) {
        for (i = 0; i < 256; i++) {
841
            int r, g, b, y, u, v, a = 0xff;
842
            if (c->srcFormat == AV_PIX_FMT_PAL8) {
843
                uint32_t p = ((const uint32_t *)(srcSlice[1]))[i];
844 845 846 847
                a = (p >> 24) & 0xFF;
                r = (p >> 16) & 0xFF;
                g = (p >>  8) & 0xFF;
                b =  p        & 0xFF;
848
            } else if (c->srcFormat == AV_PIX_FMT_RGB8) {
849 850 851
                r = ( i >> 5     ) * 36;
                g = ((i >> 2) & 7) * 36;
                b = ( i       & 3) * 85;
852
            } else if (c->srcFormat == AV_PIX_FMT_BGR8) {
853 854 855
                b = ( i >> 6     ) * 85;
                g = ((i >> 3) & 7) * 36;
                r = ( i       & 7) * 36;
856
            } else if (c->srcFormat == AV_PIX_FMT_RGB4_BYTE) {
857 858 859
                r = ( i >> 3     ) * 255;
                g = ((i >> 1) & 3) * 85;
                b = ( i       & 1) * 255;
860
            } else if (c->srcFormat == AV_PIX_FMT_GRAY8 || c->srcFormat == AV_PIX_FMT_GRAY8A) {
861 862
                r = g = b = i;
            } else {
863
                av_assert1(c->srcFormat == AV_PIX_FMT_BGR4_BYTE);
864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
                b = ( i >> 3     ) * 255;
                g = ((i >> 1) & 3) * 85;
                r = ( i       & 1) * 255;
            }
#define RGB2YUV_SHIFT 15
#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))

            y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
            u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
            v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
882
            c->pal_yuv[i]= y + (u<<8) + (v<<16) + ((unsigned)a<<24);
883 884

            switch (c->dstFormat) {
885
            case AV_PIX_FMT_BGR32:
886
#if !HAVE_BIGENDIAN
887
            case AV_PIX_FMT_RGB24:
888
#endif
889
                c->pal_rgb[i]=  r + (g<<8) + (b<<16) + ((unsigned)a<<24);
890
                break;
891
            case AV_PIX_FMT_BGR32_1:
892
#if HAVE_BIGENDIAN
893
            case AV_PIX_FMT_BGR24:
894
#endif
895
                c->pal_rgb[i]= a + (r<<8) + (g<<16) + ((unsigned)b<<24);
896
                break;
897
            case AV_PIX_FMT_RGB32_1:
898
#if HAVE_BIGENDIAN
899
            case AV_PIX_FMT_RGB24:
900
#endif
901
                c->pal_rgb[i]= a + (b<<8) + (g<<16) + ((unsigned)r<<24);
902
                break;
903
            case AV_PIX_FMT_RGB32:
904
#if !HAVE_BIGENDIAN
905
            case AV_PIX_FMT_BGR24:
906 907
#endif
            default:
908
                c->pal_rgb[i]=  b + (g<<8) + (r<<16) + ((unsigned)a<<24);
909 910 911 912 913 914 915 916
            }
        }
    }

    if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
        uint8_t *base;
        int x,y;
        rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
917 918 919
        if (!rgb0_tmp)
            return AVERROR(ENOMEM);

920 921 922 923 924 925 926 927 928 929
        base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
        for (y=0; y<srcSliceH; y++){
            memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW);
            for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) {
                base[ srcStride[0]*y + x] = 0xFF;
            }
        }
        src2[0] = base;
    }

930 931 932
    if (c->srcXYZ && !(c->dstXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
        uint8_t *base;
        rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
933 934 935
        if (!rgb0_tmp)
            return AVERROR(ENOMEM);

936 937
        base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;

938
        xyz12Torgb48(c, (uint16_t*)base, (const uint16_t*)src2[0], srcStride[0]/2, srcSliceH);
939 940 941
        src2[0] = base;
    }

942
    if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0])
943 944 945 946
        for (i = 0; i < 4; i++)
            memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2));


947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
    // copy strides, so they can safely be modified
    if (c->sliceDir == 1) {
        // slices go from top to bottom
        int srcStride2[4] = { srcStride[0], srcStride[1], srcStride[2],
                              srcStride[3] };
        int dstStride2[4] = { dstStride[0], dstStride[1], dstStride[2],
                              dstStride[3] };

        reset_ptr(src2, c->srcFormat);
        reset_ptr((void*)dst2, c->dstFormat);

        /* reset slice direction at end of frame */
        if (srcSliceY + srcSliceH == c->srcH)
            c->sliceDir = 0;

962
        ret = c->swscale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2,
963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
                          dstStride2);
    } else {
        // slices go from bottom to top => we flip the image internally
        int srcStride2[4] = { -srcStride[0], -srcStride[1], -srcStride[2],
                              -srcStride[3] };
        int dstStride2[4] = { -dstStride[0], -dstStride[1], -dstStride[2],
                              -dstStride[3] };

        src2[0] += (srcSliceH - 1) * srcStride[0];
        if (!usePal(c->srcFormat))
            src2[1] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[1];
        src2[2] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[2];
        src2[3] += (srcSliceH - 1) * srcStride[3];
        dst2[0] += ( c->dstH                         - 1) * dstStride[0];
        dst2[1] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[1];
        dst2[2] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[2];
        dst2[3] += ( c->dstH                         - 1) * dstStride[3];

        reset_ptr(src2, c->srcFormat);
        reset_ptr((void*)dst2, c->dstFormat);

        /* reset slice direction at end of frame */
        if (!srcSliceY)
            c->sliceDir = 0;

988
        ret = c->swscale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH,
989 990 991
                          srcSliceH, dst2, dstStride2);
    }

992 993 994 995 996 997

    if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
        /* replace on the same data */
        rgb48Toxyz12(c, (uint16_t*)dst2[0], (const uint16_t*)dst2[0], dstStride[0]/2, ret);
    }

998 999 1000
    av_free(rgb0_tmp);
    return ret;
}