vpx_convolve.c 24 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
10

11
#include <assert.h>
Zoe Liu's avatar
Zoe Liu committed
12
#include <string.h>
13 14

#include "./vpx_config.h"
Zoe Liu's avatar
Zoe Liu committed
15
#include "./vpx_dsp_rtcd.h"
16
#include "vpx/vpx_integer.h"
Zoe Liu's avatar
Zoe Liu committed
17 18 19
#include "vpx_dsp/vpx_convolve.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/vpx_filter.h"
20
#include "vpx_ports/mem.h"
21

Dmitry Kovalev's avatar
Dmitry Kovalev committed
22 23
static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
24
                           const InterpKernel *x_filters,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
25 26 27
                           int x0_q4, int x_step_q4, int w, int h) {
  int x, y;
  src -= SUBPEL_TAPS / 2 - 1;
28
  for (y = 0; y < h; ++y) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
29
    int x_q4 = x0_q4;
30
    for (x = 0; x < w; ++x) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
31 32 33 34 35
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_x[k] * x_filter[k];
36
      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
37 38 39 40 41 42 43
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
44 45
static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
46
                               const InterpKernel *x_filters,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
47 48 49
                               int x0_q4, int x_step_q4, int w, int h) {
  int x, y;
  src -= SUBPEL_TAPS / 2 - 1;
50
  for (y = 0; y < h; ++y) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
51
    int x_q4 = x0_q4;
52
    for (x = 0; x < w; ++x) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
53 54 55 56 57
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_x[k] * x_filter[k];
58
      dst[x] = ROUND_POWER_OF_TWO(dst[x] +
Dmitry Kovalev's avatar
Dmitry Kovalev committed
59
          clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
60 61 62 63 64 65 66
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
67 68
static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
                          uint8_t *dst, ptrdiff_t dst_stride,
69
                          const InterpKernel *y_filters,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
70 71 72
                          int y0_q4, int y_step_q4, int w, int h) {
  int x, y;
  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
73

74
  for (x = 0; x < w; ++x) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
75
    int y_q4 = y0_q4;
76
    for (y = 0; y < h; ++y) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
77 78 79 80 81 82
      const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
83 84 85 86 87 88 89
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
90 91
static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
92
                              const InterpKernel *y_filters,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
93 94 95
                              int y0_q4, int y_step_q4, int w, int h) {
  int x, y;
  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
96

97
  for (x = 0; x < w; ++x) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
98
    int y_q4 = y0_q4;
99
    for (y = 0; y < h; ++y) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
100 101 102 103 104
      const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
105
      dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
Dmitry Kovalev's avatar
Dmitry Kovalev committed
106
          clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
107 108 109 110 111 112 113
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
114 115
static void convolve(const uint8_t *src, ptrdiff_t src_stride,
                     uint8_t *dst, ptrdiff_t dst_stride,
116
                     const InterpKernel *const x_filters,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
117
                     int x0_q4, int x_step_q4,
118
                     const InterpKernel *const y_filters,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
119 120
                     int y0_q4, int y_step_q4,
                     int w, int h) {
121 122 123 124 125 126 127 128 129 130 131 132 133
  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
  // 2d filtering proceeds in 2 steps:
  //   (1) Interpolate horizontally into an intermediate buffer, temp.
  //   (2) Interpolate temp vertically to derive the sub-pixel result.
  // Deriving the maximum number of rows in the temp buffer (135):
  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
  // --Largest block size is 64x64 pixels.
  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
  //   original frame (in 1/16th pixel units).
  // --Must round-up because block may be located at sub-pixel position.
  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
  uint8_t temp[135 * 64];
134 135
  int intermediate_height =
          (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
136

137 138
  assert(w <= 64);
  assert(h <= 64);
139 140
  assert(y_step_q4 <= 32);
  assert(x_step_q4 <= 32);
141

Dmitry Kovalev's avatar
Dmitry Kovalev committed
142 143 144 145 146 147
  convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
                 x_filters, x0_q4, x_step_q4, w, intermediate_height);
  convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
                y_filters, y0_q4, y_step_q4, w, h);
}

148
static const InterpKernel *get_filter_base(const int16_t *filter) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
149 150
  // NOTE: This assumes that the filter table is 256-byte aligned.
  // TODO(agrange) Modify to make independent of table alignment.
151
  return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
Dmitry Kovalev's avatar
Dmitry Kovalev committed
152 153
}

154
static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
155
  return (int)((const InterpKernel *)(intptr_t)f - base);
156 157
}

Zoe Liu's avatar
Zoe Liu committed
158
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
159
                           uint8_t *dst, ptrdiff_t dst_stride,
160 161 162
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
                           int w, int h) {
163
  const InterpKernel *const filters_x = get_filter_base(filter_x);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
164 165
  const int x0_q4 = get_filter_offset(filter_x, filters_x);

166 167 168
  (void)filter_y;
  (void)y_step_q4;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
169 170
  convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
                 x0_q4, x_step_q4, w, h);
171 172
}

Zoe Liu's avatar
Zoe Liu committed
173
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
174
                               uint8_t *dst, ptrdiff_t dst_stride,
175 176 177
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h) {
178
  const InterpKernel *const filters_x = get_filter_base(filter_x);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
179 180
  const int x0_q4 = get_filter_offset(filter_x, filters_x);

181 182 183
  (void)filter_y;
  (void)y_step_q4;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
184 185
  convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
                     x0_q4, x_step_q4, w, h);
186 187
}

Zoe Liu's avatar
Zoe Liu committed
188
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
189
                          uint8_t *dst, ptrdiff_t dst_stride,
190 191 192
                          const int16_t *filter_x, int x_step_q4,
                          const int16_t *filter_y, int y_step_q4,
                          int w, int h) {
193
  const InterpKernel *const filters_y = get_filter_base(filter_y);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
194
  const int y0_q4 = get_filter_offset(filter_y, filters_y);
195 196 197 198

  (void)filter_x;
  (void)x_step_q4;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
199 200
  convolve_vert(src, src_stride, dst, dst_stride, filters_y,
                y0_q4, y_step_q4, w, h);
201 202
}

Zoe Liu's avatar
Zoe Liu committed
203
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
204
                              uint8_t *dst, ptrdiff_t dst_stride,
205 206 207
                              const int16_t *filter_x, int x_step_q4,
                              const int16_t *filter_y, int y_step_q4,
                              int w, int h) {
208
  const InterpKernel *const filters_y = get_filter_base(filter_y);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
209
  const int y0_q4 = get_filter_offset(filter_y, filters_y);
210 211 212 213

  (void)filter_x;
  (void)x_step_q4;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
214 215
  convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
                    y0_q4, y_step_q4, w, h);
216 217
}

Zoe Liu's avatar
Zoe Liu committed
218
void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
219
                     uint8_t *dst, ptrdiff_t dst_stride,
220 221 222
                     const int16_t *filter_x, int x_step_q4,
                     const int16_t *filter_y, int y_step_q4,
                     int w, int h) {
223
  const InterpKernel *const filters_x = get_filter_base(filter_x);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
224 225
  const int x0_q4 = get_filter_offset(filter_x, filters_x);

226
  const InterpKernel *const filters_y = get_filter_base(filter_y);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
227 228 229 230 231
  const int y0_q4 = get_filter_offset(filter_y, filters_y);

  convolve(src, src_stride, dst, dst_stride,
           filters_x, x0_q4, x_step_q4,
           filters_y, y0_q4, y_step_q4, w, h);
232 233
}

Zoe Liu's avatar
Zoe Liu committed
234
void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
235
                         uint8_t *dst, ptrdiff_t dst_stride,
236 237 238
                         const int16_t *filter_x, int x_step_q4,
                         const int16_t *filter_y, int y_step_q4,
                         int w, int h) {
239
  /* Fixed size intermediate buffer places limits on parameters. */
240
  DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
241 242
  assert(w <= 64);
  assert(h <= 64);
243

Zoe Liu's avatar
Zoe Liu committed
244
  vpx_convolve8_c(src, src_stride, temp, 64,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
245
                  filter_x, x_step_q4, filter_y, y_step_q4, w, h);
Zoe Liu's avatar
Zoe Liu committed
246
  vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
247
}
248

Zoe Liu's avatar
Zoe Liu committed
249
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
250 251 252 253 254 255
                         uint8_t *dst, ptrdiff_t dst_stride,
                         const int16_t *filter_x, int filter_x_stride,
                         const int16_t *filter_y, int filter_y_stride,
                         int w, int h) {
  int r;

256 257 258
  (void)filter_x;  (void)filter_x_stride;
  (void)filter_y;  (void)filter_y_stride;

259
  for (r = h; r > 0; --r) {
James Zern's avatar
James Zern committed
260
    memcpy(dst, src, w);
261 262
    src += src_stride;
    dst += dst_stride;
263 264 265
  }
}

Zoe Liu's avatar
Zoe Liu committed
266
void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
267 268 269 270
                        uint8_t *dst, ptrdiff_t dst_stride,
                        const int16_t *filter_x, int filter_x_stride,
                        const int16_t *filter_y, int filter_y_stride,
                        int w, int h) {
271 272
  int x, y;

273 274 275
  (void)filter_x;  (void)filter_x_stride;
  (void)filter_y;  (void)filter_y_stride;

276
  for (y = 0; y < h; ++y) {
277 278 279
    for (x = 0; x < w; ++x)
      dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

280 281 282 283
    src += src_stride;
    dst += dst_stride;
  }
}
284

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                        uint8_t *dst, ptrdiff_t dst_stride,
                        const int16_t *filter_x, int x_step_q4,
                        const int16_t *filter_y, int y_step_q4,
                        int w, int h) {
  vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                        filter_y, y_step_q4, w, h);
}

void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                       uint8_t *dst, ptrdiff_t dst_stride,
                       const int16_t *filter_x, int x_step_q4,
                       const int16_t *filter_y, int y_step_q4,
                       int w, int h) {
  vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                       filter_y, y_step_q4, w, h);
}

void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride,
                     uint8_t *dst, ptrdiff_t dst_stride,
                     const int16_t *filter_x, int x_step_q4,
                     const int16_t *filter_y, int y_step_q4,
                     int w, int h) {
  vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                  filter_y, y_step_q4, w, h);
}

void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x, int x_step_q4,
                            const int16_t *filter_y, int y_step_q4,
                            int w, int h) {
  vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
                            x_step_q4, filter_y, y_step_q4, w, h);
}

void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
                           int w, int h) {
  vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
                           x_step_q4, filter_y, y_step_q4, w, h);
}

void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride,
                     uint8_t *dst, ptrdiff_t dst_stride,
                     const int16_t *filter_x, int x_step_q4,
                     const int16_t *filter_y, int y_step_q4,
                     int w, int h) {
  vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                      filter_y, y_step_q4, w, h);
}

339
#if CONFIG_VP9_HIGHBITDEPTH
340 341 342 343 344
static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
                                  uint8_t *dst8, ptrdiff_t dst_stride,
                                  const InterpKernel *x_filters,
                                  int x0_q4, int x_step_q4,
                                  int w, int h, int bd) {
345 346 347 348 349 350 351 352 353 354 355 356
  int x, y;
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  src -= SUBPEL_TAPS / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = x0_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_x[k] * x_filter[k];
357
      dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
358 359 360 361 362 363 364
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

365 366 367 368 369
static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
                                      uint8_t *dst8, ptrdiff_t dst_stride,
                                      const InterpKernel *x_filters,
                                      int x0_q4, int x_step_q4,
                                      int w, int h, int bd) {
370 371 372 373 374 375 376 377 378 379 380 381 382
  int x, y;
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  src -= SUBPEL_TAPS / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = x0_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_x[k] * x_filter[k];
      dst[x] = ROUND_POWER_OF_TWO(dst[x] +
383
          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
384 385 386 387 388 389 390
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

391 392 393 394 395
static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
                                 uint8_t *dst8, ptrdiff_t dst_stride,
                                 const InterpKernel *y_filters,
                                 int y0_q4, int y_step_q4, int w, int h,
                                 int bd) {
396 397 398 399 400 401 402 403 404 405 406 407
  int x, y;
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  for (x = 0; x < w; ++x) {
    int y_q4 = y0_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
408
      dst[y * dst_stride] = clip_pixel_highbd(
409 410 411 412 413 414 415 416
          ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

417 418 419 420 421
static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
                                     uint8_t *dst8, ptrdiff_t dst_stride,
                                     const InterpKernel *y_filters,
                                     int y0_q4, int y_step_q4, int w, int h,
                                     int bd) {
422 423 424 425 426 427 428 429 430 431 432 433 434
  int x, y;
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  for (x = 0; x < w; ++x) {
    int y_q4 = y0_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
      int k, sum = 0;
      for (k = 0; k < SUBPEL_TAPS; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
435
          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
436 437 438 439 440 441 442
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

443 444 445 446 447 448 449
static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const InterpKernel *const x_filters,
                            int x0_q4, int x_step_q4,
                            const InterpKernel *const y_filters,
                            int y0_q4, int y_step_q4,
                            int w, int h, int bd) {
450 451 452 453 454 455 456 457 458 459 460 461 462
  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
  // 2d filtering proceeds in 2 steps:
  //   (1) Interpolate horizontally into an intermediate buffer, temp.
  //   (2) Interpolate temp vertically to derive the sub-pixel result.
  // Deriving the maximum number of rows in the temp buffer (135):
  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
  // --Largest block size is 64x64 pixels.
  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
  //   original frame (in 1/16th pixel units).
  // --Must round-up because block may be located at sub-pixel position.
  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
  uint16_t temp[64 * 135];
463 464
  int intermediate_height =
          (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
465 466 467 468 469 470

  assert(w <= 64);
  assert(h <= 64);
  assert(y_step_q4 <= 32);
  assert(x_step_q4 <= 32);

471 472 473 474 475 476 477
  highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),
                        src_stride, CONVERT_TO_BYTEPTR(temp), 64,
                        x_filters, x0_q4, x_step_q4, w,
                        intermediate_height, bd);
  highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
                       64, dst, dst_stride, y_filters, y0_q4, y_step_q4,
                       w, h, bd);
478 479 480
}


Zoe Liu's avatar
Zoe Liu committed
481
void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
482 483 484 485
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x, int x_step_q4,
                                  const int16_t *filter_y, int y_step_q4,
                                  int w, int h, int bd) {
486 487 488 489 490
  const InterpKernel *const filters_x = get_filter_base(filter_x);
  const int x0_q4 = get_filter_offset(filter_x, filters_x);
  (void)filter_y;
  (void)y_step_q4;

491 492
  highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
                        x0_q4, x_step_q4, w, h, bd);
493 494
}

Zoe Liu's avatar
Zoe Liu committed
495
void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
496 497 498 499
                                      uint8_t *dst, ptrdiff_t dst_stride,
                                      const int16_t *filter_x, int x_step_q4,
                                      const int16_t *filter_y, int y_step_q4,
                                      int w, int h, int bd) {
500 501 502 503 504
  const InterpKernel *const filters_x = get_filter_base(filter_x);
  const int x0_q4 = get_filter_offset(filter_x, filters_x);
  (void)filter_y;
  (void)y_step_q4;

505 506
  highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
                            x0_q4, x_step_q4, w, h, bd);
507 508
}

Zoe Liu's avatar
Zoe Liu committed
509
void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
510 511 512 513
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x, int x_step_q4,
                                 const int16_t *filter_y, int y_step_q4,
                                 int w, int h, int bd) {
514 515 516 517 518
  const InterpKernel *const filters_y = get_filter_base(filter_y);
  const int y0_q4 = get_filter_offset(filter_y, filters_y);
  (void)filter_x;
  (void)x_step_q4;

519 520
  highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y,
                       y0_q4, y_step_q4, w, h, bd);
521 522
}

Zoe Liu's avatar
Zoe Liu committed
523
void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
524 525 526 527
                                     uint8_t *dst, ptrdiff_t dst_stride,
                                     const int16_t *filter_x, int x_step_q4,
                                     const int16_t *filter_y, int y_step_q4,
                                     int w, int h, int bd) {
528 529 530 531 532
  const InterpKernel *const filters_y = get_filter_base(filter_y);
  const int y0_q4 = get_filter_offset(filter_y, filters_y);
  (void)filter_x;
  (void)x_step_q4;

533 534
  highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
                           y0_q4, y_step_q4, w, h, bd);
535 536
}

Zoe Liu's avatar
Zoe Liu committed
537
void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
538 539 540 541
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x, int x_step_q4,
                            const int16_t *filter_y, int y_step_q4,
                            int w, int h, int bd) {
542 543 544 545 546 547
  const InterpKernel *const filters_x = get_filter_base(filter_x);
  const int x0_q4 = get_filter_offset(filter_x, filters_x);

  const InterpKernel *const filters_y = get_filter_base(filter_y);
  const int y0_q4 = get_filter_offset(filter_y, filters_y);

548 549 550
  highbd_convolve(src, src_stride, dst, dst_stride,
                  filters_x, x0_q4, x_step_q4,
                  filters_y, y0_q4, y_step_q4, w, h, bd);
551 552
}

Zoe Liu's avatar
Zoe Liu committed
553
void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
554 555 556 557
                                uint8_t *dst, ptrdiff_t dst_stride,
                                const int16_t *filter_x, int x_step_q4,
                                const int16_t *filter_y, int y_step_q4,
                                int w, int h, int bd) {
558
  // Fixed size intermediate buffer places limits on parameters.
559
  DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
560 561 562
  assert(w <= 64);
  assert(h <= 64);

Zoe Liu's avatar
Zoe Liu committed
563
  vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
564
                         filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
Zoe Liu's avatar
Zoe Liu committed
565
  vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
566
                            NULL, 0, NULL, 0, w, h, bd);
567 568
}

Zoe Liu's avatar
Zoe Liu committed
569
void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
570 571 572 573
                                uint8_t *dst8, ptrdiff_t dst_stride,
                                const int16_t *filter_x, int filter_x_stride,
                                const int16_t *filter_y, int filter_y_stride,
                                int w, int h, int bd) {
574 575 576 577 578 579 580 581 582 583
  int r;
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  (void)filter_x;
  (void)filter_y;
  (void)filter_x_stride;
  (void)filter_y_stride;
  (void)bd;

  for (r = h; r > 0; --r) {
James Zern's avatar
James Zern committed
584
    memcpy(dst, src, w * sizeof(uint16_t));
585 586 587 588 589
    src += src_stride;
    dst += dst_stride;
  }
}

Zoe Liu's avatar
Zoe Liu committed
590
void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
591 592 593 594
                               uint8_t *dst8, ptrdiff_t dst_stride,
                               const int16_t *filter_x, int filter_x_stride,
                               const int16_t *filter_y, int filter_y_stride,
                               int w, int h, int bd) {
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
  int x, y;
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  (void)filter_x;
  (void)filter_y;
  (void)filter_x_stride;
  (void)filter_y_stride;
  (void)bd;

  for (y = 0; y < h; ++y) {
    for (x = 0; x < w; ++x) {
      dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
    }
    src += src_stride;
    dst += dst_stride;
  }
}
#endif