Commit 54d48955 authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp8: change build_intra_predictors_mby_s to use vpx_dsp.

Change-Id: I2000820e0c04de2c975d370a0cf7145330289bb2
parent aeae7fc9
......@@ -216,95 +216,6 @@ class IntraPredBase {
int num_planes_;
};
typedef void (*IntraPredYFunc)(MACROBLOCKD *x,
uint8_t *yabove_row,
uint8_t *yleft,
int left_stride,
uint8_t *ypred_ptr,
int y_stride);
class IntraPredYTest
: public IntraPredBase,
public ::testing::TestWithParam<IntraPredYFunc> {
public:
static void SetUpTestCase() {
mb_ = reinterpret_cast<MACROBLOCKD*>(
vpx_memalign(32, sizeof(MACROBLOCKD)));
mi_ = reinterpret_cast<MODE_INFO*>(
vpx_memalign(32, sizeof(MODE_INFO)));
data_array_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
}
static void TearDownTestCase() {
vpx_free(data_array_);
vpx_free(mi_);
vpx_free(mb_);
data_array_ = NULL;
}
protected:
static const int kBlockSize = 16;
static const int kDataAlignment = 16;
static const int kStride = kBlockSize * 3;
// We use 48 so that the data pointer of the first pixel in each row of
// each macroblock is 16-byte aligned, and this gives us access to the
// top-left and top-right corner pixels belonging to the top-left/right
// macroblocks.
// We use 17 lines so we have one line above us for top-prediction.
static const int kDataBufferSize = kStride * (kBlockSize + 1);
virtual void SetUp() {
pred_fn_ = GetParam();
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
}
virtual void Predict(MB_PREDICTION_MODE mode) {
mbptr_->mode_info_context->mbmi.mode = mode;
ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
data_ptr_[0] - kStride,
data_ptr_[0] - 1, kStride,
data_ptr_[0], kStride));
}
IntraPredYFunc pred_fn_;
static uint8_t* data_array_;
static MACROBLOCKD * mb_;
static MODE_INFO *mi_;
};
MACROBLOCKD* IntraPredYTest::mb_ = NULL;
MODE_INFO* IntraPredYTest::mi_ = NULL;
uint8_t* IntraPredYTest::data_array_ = NULL;
TEST_P(IntraPredYTest, IntraPredTests) {
RunTest();
}
INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_sse2));
#endif
#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_ssse3));
#endif
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_neon));
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(MSA, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_msa));
#endif
typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
uint8_t *uabove_row,
uint8_t *vabove_row,
......
......@@ -12,92 +12,6 @@
#include "vp8/common/blockd.h"
void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride) {
const int mode = x->mode_info_context->mbmi.mode;
int i;
switch (mode) {
case DC_PRED:
{
int shift = x->up_available + x->left_available;
uint8x16_t v_expected_dc = vdupq_n_u8(128);
if (shift) {
unsigned int average = 0;
int expected_dc;
if (x->up_available) {
const uint8x16_t v_above = vld1q_u8(yabove_row);
const uint16x8_t a = vpaddlq_u8(v_above);
const uint32x4_t b = vpaddlq_u16(a);
const uint64x2_t c = vpaddlq_u32(b);
const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
vreinterpret_u32_u64(vget_high_u64(c)));
average = vget_lane_u32(d, 0);
}
if (x->left_available) {
for (i = 0; i < 16; ++i) {
average += yleft[0];
yleft += left_stride;
}
}
shift += 3;
expected_dc = (average + (1 << (shift - 1))) >> shift;
v_expected_dc = vmovq_n_u8((uint8_t)expected_dc);
}
for (i = 0; i < 16; ++i) {
vst1q_u8(ypred_ptr, v_expected_dc);
ypred_ptr += y_stride;
}
}
break;
case V_PRED:
{
const uint8x16_t v_above = vld1q_u8(yabove_row);
for (i = 0; i < 16; ++i) {
vst1q_u8(ypred_ptr, v_above);
ypred_ptr += y_stride;
}
}
break;
case H_PRED:
{
for (i = 0; i < 16; ++i) {
const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]);
yleft += left_stride;
vst1q_u8(ypred_ptr, v_yleft);
ypred_ptr += y_stride;
}
}
break;
case TM_PRED:
{
const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]);
const uint8x16_t v_above = vld1q_u8(yabove_row);
for (i = 0; i < 16; ++i) {
const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]);
const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft);
const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft);
const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo),
vreinterpretq_s16_u16(v_ytop_left));
const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi),
vreinterpretq_s16_u16(v_ytop_left));
const uint8x8_t pred_lo = vqmovun_s16(b_lo);
const uint8x8_t pred_hi = vqmovun_s16(b_hi);
vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi));
ypred_ptr += y_stride;
yleft += left_stride;
}
}
break;
}
}
void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
......
......@@ -22,16 +22,6 @@ static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst,
SD4(out, out, out, out, dst, dst_stride);
}
static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst,
int32_t dst_stride)
{
v16u8 out = LD_UB(src);
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
dst += (8 * dst_stride);
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
}
static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
......@@ -51,34 +41,6 @@ static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
SD4(out4, out5, out6, out7, dst, dst_stride);
}
static void intra_predict_horiz_16x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
uint32_t row;
uint8_t inp0, inp1, inp2, inp3;
v16u8 src0, src1, src2, src3;
for (row = 4; row--;)
{
inp0 = src[0];
src += src_stride;
inp1 = src[0];
src += src_stride;
inp2 = src[0];
src += src_stride;
inp3 = src[0];
src += src_stride;
src0 = (v16u8)__msa_fill_b(inp0);
src1 = (v16u8)__msa_fill_b(inp1);
src2 = (v16u8)__msa_fill_b(inp2);
src3 = (v16u8)__msa_fill_b(inp3);
ST_UB4(src0, src1, src2, src3, dst, dst_stride);
dst += (4 * dst_stride);
}
}
static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
int32_t src_stride_left,
uint8_t *dst, int32_t dst_stride,
......@@ -140,128 +102,6 @@ static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
SD4(out, out, out, out, dst, dst_stride);
}
static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left,
int32_t src_stride_left,
uint8_t *dst, int32_t dst_stride,
uint8_t is_above, uint8_t is_left)
{
uint32_t row;
uint32_t addition = 0;
v16u8 src_above, out;
v8u16 sum_above;
v4u32 sum_top;
v2u64 sum;
if (is_left && is_above)
{
src_above = LD_UB(src_top);
sum_above = __msa_hadd_u_h(src_above, src_above);
sum_top = __msa_hadd_u_w(sum_above, sum_above);
sum = __msa_hadd_u_d(sum_top, sum_top);
sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum);
sum = __msa_hadd_u_d(sum_top, sum_top);
addition = __msa_copy_u_w((v4i32)sum, 0);
for (row = 0; row < 16; ++row)
{
addition += src_left[row * src_stride_left];
}
addition = (addition + 16) >> 5;
out = (v16u8)__msa_fill_b(addition);
}
else if (is_left)
{
for (row = 0; row < 16; ++row)
{
addition += src_left[row * src_stride_left];
}
addition = (addition + 8) >> 4;
out = (v16u8)__msa_fill_b(addition);
}
else if (is_above)
{
src_above = LD_UB(src_top);
sum_above = __msa_hadd_u_h(src_above, src_above);
sum_top = __msa_hadd_u_w(sum_above, sum_above);
sum = __msa_hadd_u_d(sum_top, sum_top);
sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum);
sum = __msa_hadd_u_d(sum_top, sum_top);
sum = (v2u64)__msa_srari_d((v2i64)sum, 4);
out = (v16u8)__msa_splati_b((v16i8)sum, 0);
}
else
{
out = (v16u8)__msa_ldi_b(128);
}
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
dst += (8 * dst_stride);
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
}
void vp8_build_intra_predictors_mby_s_msa(struct macroblockd *x,
unsigned char *yabove_row,
unsigned char *yleft,
int left_stride,
unsigned char *ypred_ptr,
int y_stride)
{
uint32_t row, col;
uint8_t ytop_left = yabove_row[-1];
switch (x->mode_info_context->mbmi.mode)
{
case DC_PRED:
intra_predict_dc_16x16_msa(yabove_row, yleft, left_stride,
ypred_ptr, y_stride,
x->up_available, x->left_available);
break;
case V_PRED:
intra_predict_vert_16x16_msa(yabove_row, ypred_ptr, y_stride);
break;
case H_PRED:
intra_predict_horiz_16x16_msa(yleft, left_stride, ypred_ptr,
y_stride);
break;
case TM_PRED:
for (row = 0; row < 16; ++row)
{
for (col = 0; col < 16; ++col)
{
int pred = yleft[row * left_stride] + yabove_row[col] -
ytop_left;
if (pred < 0)
pred = 0;
if (pred > 255)
pred = 255;
ypred_ptr[col] = pred;
}
ypred_ptr += y_stride;
}
break;
case B_PRED:
case NEARESTMV:
case NEARMV:
case ZEROMV:
case NEWMV:
case SPLITMV:
case MB_MODE_COUNT:
break;
}
}
void vp8_build_intra_predictors_mbuv_s_msa(struct macroblockd *x,
unsigned char *uabove_row,
unsigned char *vabove_row,
......
......@@ -9,132 +9,56 @@
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/vpx_once.h"
#include "blockd.h"
#include "vp8/common/reconintra.h"
void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
static intra_pred_fn pred[4];
static intra_pred_fn dc_pred[2][2];
static void vp8_init_intra_predictors_internal(void)
{
pred[V_PRED] = vpx_v_predictor_16x16;
pred[H_PRED] = vpx_h_predictor_16x16;
pred[TM_PRED] = vpx_tm_predictor_16x16;
dc_pred[0][0] = vpx_dc_128_predictor_16x16;
dc_pred[0][1] = vpx_dc_top_predictor_16x16;
dc_pred[1][0] = vpx_dc_left_predictor_16x16;
dc_pred[1][1] = vpx_dc_predictor_16x16;
}
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
unsigned char yleft_col[16];
unsigned char ytop_left = yabove_row[-1];
int r, c, i;
int i;
for (i = 0; i < 16; i++)
{
yleft_col[i] = yleft[i* left_stride];
}
/* for Y */
switch (x->mode_info_context->mbmi.mode)
{
case DC_PRED:
{
int expected_dc;
int shift;
int average = 0;
if (x->up_available || x->left_available)
{
if (x->up_available)
{
for (i = 0; i < 16; i++)
{
average += yabove_row[i];
}
}
if (x->left_available)
{
for (i = 0; i < 16; i++)
{
average += yleft_col[i];
}
}
shift = 3 + x->up_available + x->left_available;
expected_dc = (average + (1 << (shift - 1))) >> shift;
}
else
{
expected_dc = 128;
}
/*memset(ypred_ptr, expected_dc, 256);*/
for (r = 0; r < 16; r++)
{
memset(ypred_ptr, expected_dc, 16);
ypred_ptr += y_stride;
}
}
break;
case V_PRED:
{
for (r = 0; r < 16; r++)
{
((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
ypred_ptr += y_stride;
}
}
break;
case H_PRED:
if (mode == DC_PRED)
{
for (r = 0; r < 16; r++)
{
memset(ypred_ptr, yleft_col[r], 16);
ypred_ptr += y_stride;
}
dc_pred[x->left_available][x->up_available](ypred_ptr, y_stride,
yabove_row, yleft_col);
}
break;
case TM_PRED:
else
{
for (r = 0; r < 16; r++)
{
for (c = 0; c < 16; c++)
{
int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
if (pred < 0)
pred = 0;
if (pred > 255)
pred = 255;
ypred_ptr[c] = pred;
}
ypred_ptr += y_stride;
}
}
break;
case B_PRED:
case NEARESTMV:
case NEARMV:
case ZEROMV:
case NEWMV:
case SPLITMV:
case MB_MODE_COUNT:
break;
pred[mode](ypred_ptr, y_stride, yabove_row, yleft_col);
}
}
......@@ -278,3 +202,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
break;
}
}
void vp8_init_intra_predictors(void)
{
once(vp8_init_intra_predictors_internal);
}
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTRA_H_
#define VP8_COMMON_RECONINTRA_H_
#include "vp8/common/blockd.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char *yabove_row,
unsigned char *yleft,
int left_stride,
unsigned char *ypred_ptr,
int y_stride);
void vp8_init_intra_predictors(void);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_RECONINTRA_H_
......@@ -152,9 +152,6 @@ specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/;
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3 neon msa/;
add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3 neon msa/;
......
This diff is collapsed.
......@@ -109,78 +109,3 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
#define build_intra_predictors_mby_prototype(sym) \
void sym(unsigned char *dst, int dst_stride, \
const unsigned char *above, \
const unsigned char *left, int left_stride)
typedef build_intra_predictors_mby_prototype((*build_intra_predictors_mby_fn_t));
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dctop_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dcleft_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc128_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ho_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ve_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_ssse3);
static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char *dst_y,
int dst_stride,
unsigned char * yleft,
int left_stride,
build_intra_predictors_mby_fn_t tm_func)
{
int mode = x->mode_info_context->mbmi.mode;
build_intra_predictors_mbuv_fn_t fn;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
case TM_PRED: fn = tm_func; break;
case DC_PRED:
if (x->up_available) {
if (x->left_available) {
fn = vp8_intra_pred_y_dc_sse2; break;
} else {
fn = vp8_intra_pred_y_dctop_sse2; break;
}
} else if (x->left_available) {
fn = vp8_intra_pred_y_dcleft_sse2; break;
} else {
fn = vp8_intra_pred_y_dc128_sse2; break;
}
break;
default: return;
}
fn(dst_y, dst_stride, yabove_row, yleft, left_stride);
return;
}
void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
y_stride, yleft, left_stride,
vp8_intra_pred_y_tm_sse2);
}
void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
y_stride, yleft, left_stride,
vp8_intra_pred_y_tm_ssse3);
}
......@@ -23,6 +23,7 @@
#include "vp8/common/entropymode.h"
#include "vp8/common/quant_common.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/setupintrarecon.h"
#include "decodemv.h"
......
......@@ -25,9 +25,12 @@
#include <assert.h>
#include "vp8/common/quant_common.h"
#include "vp8/common/reconintra.h"
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/systemdependent.h"
#include "vpx_ports/vpx_once.h"
#include "vpx_ports/vpx_timer.h"
#include "detokenize.h"
#if CONFIG_ERROR_CONCEALMENT
......@@ -42,6 +45,17 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
static int get_free_fb (VP8_COMMON *cm);
static void ref_cnt_fb (int *buf, int *idx, int new_idx);
static void initialize_dec(void) {
static volatile int init_done = 0;
if (!init_done)
{
vpx_dsp_rtcd();
vp8_init_intra_predictors();
init_done = 1;
}
}
static void remove_decompressor(VP8D_COMP *pbi)
{
#if CONFIG_ERROR_CONCEALMENT
......@@ -105,6 +119,8 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
vp8_setup_block_dptrs(&pbi->mb);
once(initialize_dec);
return pbi;
}
......