Commit 25c588b1 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Add subtract_block SSE2 version and unit test.

3% faster overall (3min35.0 to 3min28.5).

Change-Id: I5ff8a5c2c91586b6632ca5009ad1ea51ce94af5e
parent 1e6a32f1
......@@ -66,6 +66,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
......
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
extern "C" {
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_blockd.h"
}
typedef void (*subtract_fn_t)(int rows, int cols,
int16_t *diff_ptr, ptrdiff_t diff_stride,
const uint8_t *src_ptr, ptrdiff_t src_stride,
const uint8_t *pred_ptr, ptrdiff_t pred_stride);
namespace vp9 {
class VP9SubtractBlockTest : public ::testing::TestWithParam<subtract_fn_t> {
public:
virtual void TearDown() {
libvpx_test::ClearSystemState();
}
};
using libvpx_test::ACMRandom;
TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
// FIXME(rbultje) split in its own file
for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES;
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
const int block_width = 4 << b_width_log2(bsize);
const int block_height = 4 << b_height_log2(bsize);
int16_t *diff = new int16_t[block_width * block_height * 2];
uint8_t *pred = new uint8_t[block_width * block_height * 2];
uint8_t *src = new uint8_t[block_width * block_height * 2];
for (int n = 0; n < 100; n++) {
for (int r = 0; r < block_height; ++r) {
for (int c = 0; c < block_width * 2; ++c) {
src[r * block_width * 2 + c] = rnd.Rand8();
pred[r * block_width * 2 + c] = rnd.Rand8();
}
}
GetParam()(block_height, block_width, diff, block_width,
src, block_width, pred, block_width);
for (int r = 0; r < block_height; ++r) {
for (int c = 0; c < block_width; ++c) {
EXPECT_EQ(diff[r * block_width + c],
(src[r * block_width + c] -
pred[r * block_width + c])) << "r = " << r
<< ", c = " << c
<< ", bs = " << bsize;
}
}
GetParam()(block_height, block_width, diff, block_width * 2,
src, block_width * 2, pred, block_width * 2);
for (int r = 0; r < block_height; ++r) {
for (int c = 0; c < block_width; ++c) {
EXPECT_EQ(diff[r * block_width * 2 + c],
(src[r * block_width * 2 + c] -
pred[r * block_width * 2 + c])) << "r = " << r
<< ", c = " << c
<< ", bs = " << bsize;
}
}
}
delete[] diff;
delete[] pred;
delete[] src;
}
}
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_sse2));
#endif
} // namespace vp9
......@@ -533,6 +533,9 @@ prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size"
specialize vp9_block_error mmx sse2
vp9_block_error_sse2=vp9_block_error_xmm
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
specialize vp9_subtract_block sse2
#
# Structured Similarity (SSIM)
#
......
......@@ -22,10 +22,10 @@
DECLARE_ALIGNED(16, extern const uint8_t,
vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
void vp9_subtract_block(int rows, int cols,
int16_t *diff_ptr, int diff_stride,
const uint8_t *src_ptr, int src_stride,
const uint8_t *pred_ptr, int pred_stride) {
void vp9_subtract_block_c(int rows, int cols,
int16_t *diff_ptr, ptrdiff_t diff_stride,
const uint8_t *src_ptr, ptrdiff_t src_stride,
const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
int r, c;
for (r = 0; r < rows; r++) {
......
......@@ -42,10 +42,6 @@ void vp9_encode_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_xform_quant_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_xform_quant_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_subtract_block(int rows, int cols,
int16_t *diff_ptr, int diff_stride,
const uint8_t *src_ptr, int src_stride,
const uint8_t *pred_ptr, int pred_stride);
void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_subtract_sb(MACROBLOCK *xd, BLOCK_SIZE_TYPE bsize);
......
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vpx_config.h"
#include "vpx_ports/x86.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/x86/vp9_dct_mmx.h"
// TODO(jimbankoski) Consider rewriting the c to take the same values rather
// than going through these pointer conversions
#if 0 && HAVE_MMX
void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) {
vp9_short_fdct4x4_mmx(input, output, pitch);
vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch);
}
void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);
void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride;
short *diff = &be->src_diff[0];
unsigned char *predictor = *(bd->base_dst) + bd->dst;
// TODO(jingning): The prototype function in c has been changed. Need to
// modify the mmx and sse versions.
vp9_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch);
}
#endif
#if 0 && HAVE_SSE2
void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);
void vp9_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) {
unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride;
short *diff = &be->src_diff[0];
unsigned char *predictor = *(bd->base_dst) + bd->dst;
// TODO(jingning): The prototype function in c has been changed. Need to
// modify the mmx and sse versions.
vp9_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
}
#endif
......@@ -73,13 +73,11 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_x86_csystemdependent.c
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.h
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_subtract_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment