dct16x16_test.cc 29.9 KB
Newer Older
Daniel Kang's avatar
Daniel Kang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <math.h>
#include <stdlib.h>
#include <string.h>

#include "third_party/googletest/src/include/gtest/gtest.h"
16 17 18 19
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
Daniel Kang's avatar
Daniel Kang committed
20

Yaowu Xu's avatar
Yaowu Xu committed
21
#include "./vp9_rtcd.h"
22
#include "vp9/common/vp9_entropy.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
23
#include "vp9/common/vp9_scan.h"
24
#include "vpx/vpx_codec.h"
25
#include "vpx/vpx_integer.h"
26
#include "vpx_ports/mem.h"
27

Daniel Kang's avatar
Daniel Kang committed
28 29 30
using libvpx_test::ACMRandom;

namespace {
31 32 33 34

#ifdef _MSC_VER
static int round(double x) {
  if (x < 0)
Yaowu Xu's avatar
Yaowu Xu committed
35
    return static_cast<int>(ceil(x - 0.5));
36
  else
Yaowu Xu's avatar
Yaowu Xu committed
37
    return static_cast<int>(floor(x + 0.5));
38 39
}
#endif
Daniel Kang's avatar
Daniel Kang committed
40

41
const int kNumCoeffs = 256;
Daniel Kang's avatar
Daniel Kang committed
42 43 44 45 46 47 48 49
const double PI = 3.1415926535898;
void reference2_16x16_idct_2d(double *input, double *output) {
  double x;
  for (int l = 0; l < 16; ++l) {
    for (int k = 0; k < 16; ++k) {
      double s = 0;
      for (int i = 0; i < 16; ++i) {
        for (int j = 0; j < 16; ++j) {
Yaowu Xu's avatar
Yaowu Xu committed
50 51 52
          x = cos(PI * j * (l + 0.5) / 16.0) *
              cos(PI * i * (k + 0.5) / 16.0) *
              input[i * 16 + j] / 256;
Daniel Kang's avatar
Daniel Kang committed
53 54 55 56 57 58 59 60 61 62 63 64
          if (i != 0)
            x *= sqrt(2.0);
          if (j != 0)
            x *= sqrt(2.0);
          s += x;
        }
      }
      output[k*16+l] = s;
    }
  }
}

65

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
const double C3 = 0.956940335732209;
const double C4 = 0.923879532511287;
const double C5 = 0.881921264348355;
const double C6 = 0.831469612302545;
const double C7 = 0.773010453362737;
const double C8 = 0.707106781186548;
const double C9 = 0.634393284163646;
const double C10 = 0.555570233019602;
const double C11 = 0.471396736825998;
const double C12 = 0.38268343236509;
const double C13 = 0.290284677254462;
const double C14 = 0.195090322016128;
const double C15 = 0.098017140329561;
81

Jingning Han's avatar
Jingning Han committed
82
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
Daniel Kang's avatar
Daniel Kang committed
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  double step[16];
  double intermediate[16];
  double temp1, temp2;

  // step 1
  step[ 0] = input[0] + input[15];
  step[ 1] = input[1] + input[14];
  step[ 2] = input[2] + input[13];
  step[ 3] = input[3] + input[12];
  step[ 4] = input[4] + input[11];
  step[ 5] = input[5] + input[10];
  step[ 6] = input[6] + input[ 9];
  step[ 7] = input[7] + input[ 8];
  step[ 8] = input[7] - input[ 8];
  step[ 9] = input[6] - input[ 9];
  step[10] = input[5] - input[10];
  step[11] = input[4] - input[11];
  step[12] = input[3] - input[12];
  step[13] = input[2] - input[13];
  step[14] = input[1] - input[14];
  step[15] = input[0] - input[15];

  // step 2
  output[0] = step[0] + step[7];
  output[1] = step[1] + step[6];
  output[2] = step[2] + step[5];
  output[3] = step[3] + step[4];
  output[4] = step[3] - step[4];
  output[5] = step[2] - step[5];
  output[6] = step[1] - step[6];
  output[7] = step[0] - step[7];

115 116
  temp1 = step[ 8] * C7;
  temp2 = step[15] * C9;
Daniel Kang's avatar
Daniel Kang committed
117 118
  output[ 8] = temp1 + temp2;

119 120
  temp1 = step[ 9] * C11;
  temp2 = step[14] * C5;
Daniel Kang's avatar
Daniel Kang committed
121 122
  output[ 9] = temp1 - temp2;

123 124
  temp1 = step[10] * C3;
  temp2 = step[13] * C13;
Daniel Kang's avatar
Daniel Kang committed
125 126
  output[10] = temp1 + temp2;

127 128
  temp1 = step[11] * C15;
  temp2 = step[12] * C1;
Daniel Kang's avatar
Daniel Kang committed
129 130
  output[11] = temp1 - temp2;

131 132
  temp1 = step[11] * C1;
  temp2 = step[12] * C15;
Daniel Kang's avatar
Daniel Kang committed
133 134
  output[12] = temp2 + temp1;

135 136
  temp1 = step[10] * C13;
  temp2 = step[13] * C3;
Daniel Kang's avatar
Daniel Kang committed
137 138
  output[13] = temp2 - temp1;

139 140
  temp1 = step[ 9] * C5;
  temp2 = step[14] * C11;
Daniel Kang's avatar
Daniel Kang committed
141 142
  output[14] = temp2 + temp1;

143 144
  temp1 = step[ 8] * C9;
  temp2 = step[15] * C7;
Daniel Kang's avatar
Daniel Kang committed
145 146 147 148 149 150 151 152
  output[15] = temp2 - temp1;

  // step 3
  step[ 0] = output[0] + output[3];
  step[ 1] = output[1] + output[2];
  step[ 2] = output[1] - output[2];
  step[ 3] = output[0] - output[3];

153 154
  temp1 = output[4] * C14;
  temp2 = output[7] * C2;
Daniel Kang's avatar
Daniel Kang committed
155 156
  step[ 4] = temp1 + temp2;

157 158
  temp1 = output[5] * C10;
  temp2 = output[6] * C6;
Daniel Kang's avatar
Daniel Kang committed
159 160
  step[ 5] = temp1 + temp2;

161 162
  temp1 = output[5] * C6;
  temp2 = output[6] * C10;
Daniel Kang's avatar
Daniel Kang committed
163 164
  step[ 6] = temp2 - temp1;

165 166
  temp1 = output[4] * C2;
  temp2 = output[7] * C14;
Daniel Kang's avatar
Daniel Kang committed
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
  step[ 7] = temp2 - temp1;

  step[ 8] = output[ 8] + output[11];
  step[ 9] = output[ 9] + output[10];
  step[10] = output[ 9] - output[10];
  step[11] = output[ 8] - output[11];

  step[12] = output[12] + output[15];
  step[13] = output[13] + output[14];
  step[14] = output[13] - output[14];
  step[15] = output[12] - output[15];

  // step 4
  output[ 0] = (step[ 0] + step[ 1]);
  output[ 8] = (step[ 0] - step[ 1]);

183 184
  temp1 = step[2] * C12;
  temp2 = step[3] * C4;
Daniel Kang's avatar
Daniel Kang committed
185
  temp1 = temp1 + temp2;
186
  output[ 4] = 2*(temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
187

188 189
  temp1 = step[2] * C4;
  temp2 = step[3] * C12;
Daniel Kang's avatar
Daniel Kang committed
190
  temp1 = temp2 - temp1;
191
  output[12] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
192

193 194
  output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
  output[14] = 2 * ((step[7] - step[ 6]) * C8);
Daniel Kang's avatar
Daniel Kang committed
195 196 197 198 199 200 201 202 203

  temp1 = step[4] - step[5];
  temp2 = step[6] + step[7];
  output[ 6] = (temp1 + temp2);
  output[10] = (temp1 - temp2);

  intermediate[8] = step[8] + step[14];
  intermediate[9] = step[9] + step[15];

204 205
  temp1 = intermediate[8] * C12;
  temp2 = intermediate[9] * C4;
Daniel Kang's avatar
Daniel Kang committed
206
  temp1 = temp1 - temp2;
207
  output[3] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
208

209 210
  temp1 = intermediate[8] * C4;
  temp2 = intermediate[9] * C12;
Daniel Kang's avatar
Daniel Kang committed
211
  temp1 = temp2 + temp1;
212
  output[13] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
213

214
  output[ 9] = 2 * ((step[10] + step[11]) * C8);
Daniel Kang's avatar
Daniel Kang committed
215 216 217 218 219 220 221 222 223 224

  intermediate[11] = step[10] - step[11];
  intermediate[12] = step[12] + step[13];
  intermediate[13] = step[12] - step[13];
  intermediate[14] = step[ 8] - step[14];
  intermediate[15] = step[ 9] - step[15];

  output[15] = (intermediate[11] + intermediate[12]);
  output[ 1] = -(intermediate[11] - intermediate[12]);

225
  output[ 7] = 2 * (intermediate[13] * C8);
Daniel Kang's avatar
Daniel Kang committed
226

227 228
  temp1 = intermediate[14] * C12;
  temp2 = intermediate[15] * C4;
Daniel Kang's avatar
Daniel Kang committed
229
  temp1 = temp1 - temp2;
230
  output[11] = -2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
231

232 233
  temp1 = intermediate[14] * C4;
  temp2 = intermediate[15] * C12;
Daniel Kang's avatar
Daniel Kang committed
234
  temp1 = temp2 + temp1;
235
  output[ 5] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
236 237
}

238
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
Daniel Kang's avatar
Daniel Kang committed
239 240 241 242
  // First transform columns
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
243
      temp_in[j] = input[j * 16 + i];
Daniel Kang's avatar
Daniel Kang committed
244 245
    butterfly_16x16_dct_1d(temp_in, temp_out);
    for (int j = 0; j < 16; ++j)
246
      output[j * 16 + i] = temp_out[j];
Daniel Kang's avatar
Daniel Kang committed
247 248 249 250 251
  }
  // Then transform rows
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
252
      temp_in[j] = output[j + i * 16];
Daniel Kang's avatar
Daniel Kang committed
253 254 255
    butterfly_16x16_dct_1d(temp_in, temp_out);
    // Scale by some magic number
    for (int j = 0; j < 16; ++j)
256
      output[j + i * 16] = temp_out[j]/2;
Daniel Kang's avatar
Daniel Kang committed
257 258 259
  }
}

260 261 262
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
263
                        int tx_type);
264
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
265
                        int tx_type);
Daniel Kang's avatar
Daniel Kang committed
266

267 268
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
269 270
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
    Idct16x16Param;
271

272
void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
273
                   int /*tx_type*/) {
274
  vp9_fdct16x16_c(in, out, stride);
275 276
}

277
void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
278
                   int /*tx_type*/) {
279 280 281
  vp9_idct16x16_256_add_c(in, dest, stride);
}

282 283
void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                  int tx_type) {
284
  vp9_fht16x16_c(in, out, stride, tx_type);
285 286
}

287 288
void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                  int tx_type) {
289 290 291
  vp9_iht16x16_256_add_c(in, dest, stride, tx_type);
}

292 293
#if CONFIG_VP9_HIGHBITDEPTH
void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
294
  vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
295 296 297
}

void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
298
  vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
299 300 301 302 303 304 305 306 307 308 309 310 311
}

void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_10(in, out, stride);
}

void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_12(in, out, stride);
}

void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
312
  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
313 314 315
}

void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
316
  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
317
}
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344

void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
}

void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
}

#if HAVE_SSE2
void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
}

void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
}

void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
}

void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
}
#endif  // HAVE_SSE2
#endif  // CONFIG_VP9_HIGHBITDEPTH
345

346
class Trans16x16TestBase {
347
 public:
348
  virtual ~Trans16x16TestBase() {}
349

350
 protected:
351
  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
352

353
  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
354 355 356

  void RunAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
357 358
    uint32_t max_error = 0;
    int64_t total_error = 0;
359 360
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
361 362 363 364
      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
365
#if CONFIG_VP9_HIGHBITDEPTH
366 367
      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
368
#endif
369

370
      // Initialize a test block with input range [-mask_, mask_].
371
      for (int j = 0; j < kNumCoeffs; ++j) {
372 373 374 375 376 377 378 379 380 381 382
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          test_input_block[j] = src[j] - dst[j];
#if CONFIG_VP9_HIGHBITDEPTH
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          test_input_block[j] = src16[j] - dst16[j];
#endif
        }
383 384
      }

385 386
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
                                          test_temp_block, pitch_));
387 388 389 390 391 392 393 394 395
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, dst, pitch_));
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
396 397

      for (int j = 0; j < kNumCoeffs; ++j) {
398 399 400 401
#if CONFIG_VP9_HIGHBITDEPTH
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ?  dst[j] - src[j] : dst16[j] - src16[j];
#else
402
        const uint32_t diff = dst[j] - src[j];
403
#endif
404
        const uint32_t error = diff * diff;
405 406 407 408
        if (max_error < error)
          max_error = error;
        total_error += error;
      }
409
    }
Daniel Kang's avatar
Daniel Kang committed
410

411
    EXPECT_GE(1u  << 2 * (bit_depth_ - 8), max_error)
412 413
        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";

414
    EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
415
        << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
416 417
  }

418
  void RunCoeffCheck() {
419 420
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
421 422 423
    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
424

425
    for (int i = 0; i < count_test_block; ++i) {
426
      // Initialize a test block with input range [-mask_, mask_].
427
      for (int j = 0; j < kNumCoeffs; ++j)
428
        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
429 430

      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
431
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
432 433 434 435 436 437 438 439 440 441

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j)
        EXPECT_EQ(output_block[j], output_ref_block[j]);
    }
  }

  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
442 443 444
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
445

446
    for (int i = 0; i < count_test_block; ++i) {
447
      // Initialize a test block with input range [-mask_, mask_].
448
      for (int j = 0; j < kNumCoeffs; ++j) {
449
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
450
      }
451
      if (i == 0) {
452
        for (int j = 0; j < kNumCoeffs; ++j)
453
          input_extreme_block[j] = mask_;
454
      } else if (i == 1) {
455
        for (int j = 0; j < kNumCoeffs; ++j)
456
          input_extreme_block[j] = -mask_;
457
      }
458

459
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
460 461
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
                                          output_block, pitch_));
462 463 464

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j) {
465
        EXPECT_EQ(output_block[j], output_ref_block[j]);
466
        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
467 468
            << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
      }
469
    }
470 471
  }

472 473
  void RunQuantCheck(int dc_thred, int ac_thred) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
474
    const int count_test_block = 100000;
475 476
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
477

478 479
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
480
#if CONFIG_VP9_HIGHBITDEPTH
481 482
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
483
#endif
484 485

    for (int i = 0; i < count_test_block; ++i) {
486
      // Initialize a test block with input range [-mask_, mask_].
487
      for (int j = 0; j < kNumCoeffs; ++j) {
488
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
489 490 491
      }
      if (i == 0)
        for (int j = 0; j < kNumCoeffs; ++j)
492
          input_extreme_block[j] = mask_;
493 494
      if (i == 1)
        for (int j = 0; j < kNumCoeffs; ++j)
495
          input_extreme_block[j] = -mask_;
496 497 498 499

      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

      // clear reconstructed pixel buffers
James Zern's avatar
James Zern committed
500 501
      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
502
#if CONFIG_VP9_HIGHBITDEPTH
James Zern's avatar
James Zern committed
503 504
      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
505
#endif
506 507 508 509 510

      // quantization with maximum allowed step sizes
      output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
      for (int j = 1; j < kNumCoeffs; ++j)
        output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
      if (bit_depth_ == VPX_BITS_8) {
        inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
                     tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block,
                                            CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
      if (bit_depth_ == VPX_BITS_8) {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref[j], dst[j]);
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref16[j], dst16[j]);
#endif
      }
531 532 533
    }
  }

534 535 536
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
537 538 539 540
    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
541
#if CONFIG_VP9_HIGHBITDEPTH
542 543
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
544
#endif  // CONFIG_VP9_HIGHBITDEPTH
Daniel Kang's avatar
Daniel Kang committed
545

546 547
    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
Daniel Kang's avatar
Daniel Kang committed
548

549 550
      // Initialize a test block with input range [-255, 255].
      for (int j = 0; j < kNumCoeffs; ++j) {
551 552 553 554 555 556 557 558 559
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          in[j] = src[j] - dst[j];
#if CONFIG_VP9_HIGHBITDEPTH
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
560
#endif  // CONFIG_VP9_HIGHBITDEPTH
561
        }
562 563 564 565
      }

      reference_16x16_dct_2d(in, out_r);
      for (int j = 0; j < kNumCoeffs; ++j)
566
        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
567

568 569 570 571 572 573
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
574
#endif  // CONFIG_VP9_HIGHBITDEPTH
575
      }
576 577

      for (int j = 0; j < kNumCoeffs; ++j) {
578 579 580 581
#if CONFIG_VP9_HIGHBITDEPTH
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
582
        const uint32_t diff = dst[j] - src[j];
583
#endif  // CONFIG_VP9_HIGHBITDEPTH
584 585
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
586 587 588
            << "Error: 16x16 IDCT has error " << error
            << " at index " << j;
      }
Daniel Kang's avatar
Daniel Kang committed
589 590
    }
  }
591 592 593 594 595 596

  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 10;
    const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
597 598 599
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
600
#if CONFIG_VP9_HIGHBITDEPTH
601 602
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
#endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int i = 0; i < count_test_block; ++i) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
        } else {
          coeff[scan[j]] = 0;
        }
        if (bit_depth_ == VPX_BITS_8) {
          dst[j] = 0;
          ref[j] = 0;
#if CONFIG_VP9_HIGHBITDEPTH
        } else {
          dst16[j] = 0;
          ref16[j] = 0;
#endif  // CONFIG_VP9_HIGHBITDEPTH
        }
      }
      if (bit_depth_ == VPX_BITS_8) {
        ref_txfm(coeff, ref, pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
      } else {
#if CONFIG_VP9_HIGHBITDEPTH
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                 pitch_));
#endif  // CONFIG_VP9_HIGHBITDEPTH
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
        const uint32_t diff = dst[j] - ref[j];
#endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
            << "Error: 16x16 IDCT Comparison has error " << error
            << " at index " << j;
      }
    }
  }

649 650
  int pitch_;
  int tx_type_;
651 652
  vpx_bit_depth_t bit_depth_;
  int mask_;
653 654
  FhtFunc fwd_txfm_ref;
  IhtFunc inv_txfm_ref;
655
};
Daniel Kang's avatar
Daniel Kang committed
656

657 658
class Trans16x16DCT
    : public Trans16x16TestBase,
659
      public ::testing::TestWithParam<Dct16x16Param> {
660 661
 public:
  virtual ~Trans16x16DCT() {}
Daniel Kang's avatar
Daniel Kang committed
662

663 664 665 666
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
667
    bit_depth_ = GET_PARAM(3);
668
    pitch_    = 16;
669
    fwd_txfm_ref = fdct16x16_ref;
670
    inv_txfm_ref = idct16x16_ref;
671 672 673
    mask_ = (1 << bit_depth_) - 1;
#if CONFIG_VP9_HIGHBITDEPTH
    switch (bit_depth_) {
674
      case VPX_BITS_10:
675 676
        inv_txfm_ref = idct16x16_10_ref;
        break;
677
      case VPX_BITS_12:
678 679 680 681 682 683 684 685 686
        inv_txfm_ref = idct16x16_12_ref;
        break;
      default:
        inv_txfm_ref = idct16x16_ref;
        break;
    }
#else
    inv_txfm_ref = idct16x16_ref;
#endif
687 688
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
Daniel Kang's avatar
Daniel Kang committed
689

690
 protected:
691
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
692 693
    fwd_txfm_(in, out, stride);
  }
694
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
695
    inv_txfm_(out, dst, stride);
Daniel Kang's avatar
Daniel Kang committed
696
  }
697

698 699
  FdctFunc fwd_txfm_;
  IdctFunc inv_txfm_;
700 701 702 703
};

TEST_P(Trans16x16DCT, AccuracyCheck) {
  RunAccuracyCheck();
Daniel Kang's avatar
Daniel Kang committed
704
}
705

706 707 708 709 710 711
TEST_P(Trans16x16DCT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16DCT, MemCheck) {
  RunMemCheck();
712 713
}

714 715 716 717 718 719
TEST_P(Trans16x16DCT, QuantCheck) {
  // Use maximally allowed quantization step sizes for DC and AC
  // coefficients respectively.
  RunQuantCheck(1336, 1828);
}

720 721 722 723
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
  RunInvAccuracyCheck();
}

724 725
class Trans16x16HT
    : public Trans16x16TestBase,
726
      public ::testing::TestWithParam<Ht16x16Param> {
727 728 729 730 731 732 733
 public:
  virtual ~Trans16x16HT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
734
    bit_depth_ = GET_PARAM(3);
735 736
    pitch_    = 16;
    fwd_txfm_ref = fht16x16_ref;
737
    inv_txfm_ref = iht16x16_ref;
738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
    mask_ = (1 << bit_depth_) - 1;
#if CONFIG_VP9_HIGHBITDEPTH
    switch (bit_depth_) {
      case VPX_BITS_10:
        inv_txfm_ref = iht16x16_10;
        break;
      case VPX_BITS_12:
        inv_txfm_ref = iht16x16_12;
        break;
      default:
        inv_txfm_ref = iht16x16_ref;
        break;
    }
#else
    inv_txfm_ref = iht16x16_ref;
#endif
754
  }
755 756 757
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
758
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
759
    fwd_txfm_(in, out, stride, tx_type_);
760
  }
761
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
762
    inv_txfm_(out, dst, stride, tx_type_);
763 764
  }

765 766
  FhtFunc fwd_txfm_;
  IhtFunc inv_txfm_;
767 768 769 770 771 772
};

TEST_P(Trans16x16HT, AccuracyCheck) {
  RunAccuracyCheck();
}

773 774 775 776 777 778
TEST_P(Trans16x16HT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16HT, MemCheck) {
  RunMemCheck();
779 780
}

781 782 783
TEST_P(Trans16x16HT, QuantCheck) {
  // The encoder skips any non-DC intra prediction modes,
  // when the quantization step size goes beyond 988.
784
  RunQuantCheck(429, 729);
785 786
}

787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
class InvTrans16x16DCT
    : public Trans16x16TestBase,
      public ::testing::TestWithParam<Idct16x16Param> {
 public:
  virtual ~InvTrans16x16DCT() {}

  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    thresh_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
    pitch_ = 16;
    mask_ = (1 << bit_depth_) - 1;
}
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }

  IdctFunc ref_txfm_;
  IdctFunc inv_txfm_;
  int thresh_;
};

TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
}

818 819
using std::tr1::make_tuple;

820 821 822 823
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
824 825
        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
826 827
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
#else
828 829 830
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
831
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
832
#endif  // CONFIG_VP9_HIGHBITDEPTH
833 834

#if CONFIG_VP9_HIGHBITDEPTH
835 836 837
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
838 839 840 841 842 843 844 845
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
846 847 848 849 850 851 852 853 854 855 856 857
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
#else
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
858
#endif  // CONFIG_VP9_HIGHBITDEPTH
859

860
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
861 862 863 864
INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c,
865
                   &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
866 867
#endif

868
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
869 870 871
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
872
        make_tuple(&vp9_fdct16x16_sse2,
873
                   &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
874 875 876
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
877 878 879 880 881 882 883 884
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0,
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1,
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2,
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
                   VPX_BITS_8)));
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
        make_tuple(&vp9_highbd_fdct16x16_sse2,
                   &idct16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fdct16x16_c,
                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fdct16x16_sse2,
                   &idct16x16_12, 0, VPX_BITS_12),
        make_tuple(&vp9_highbd_fdct16x16_c,
                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
        make_tuple(&vp9_fdct16x16_sse2,
                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3,
                   VPX_BITS_8)));
// Optimizations take effect at a threshold of 3155, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans16x16DCT,
    ::testing::Values(
        make_tuple(&idct16x16_10_add_10_c,
                   &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10,
                   &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10_add_12_c,
                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
        make_tuple(&idct16x16_12,
                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
931

932
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
933 934 935 936 937 938 939 940 941 942 943 944 945
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c,
                   &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16HT,
    ::testing::Values(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 3, VPX_BITS_8)));
#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
Daniel Kang's avatar
Daniel Kang committed
946
}  // namespace