Commit 8fd3f9a2 authored by Marco's avatar Marco

Enable non-rd mode coding on key frame, for speed 6.

For key frame at speed 6: enable the non-rd mode selection in speed setting
and use the (non-rd) variance_based partition.

Adjust some logic/thresholds in variance partition selection for key frame only (no change to delta frames),
mainly to bias to selecting smaller prediction blocks, and also set max tx size of 16x16.

Loss in key frame quality (~0.6-0.7dB) compared to rd coding,
but speeds up key frame encoding by at least 6x.
Average PNSR/SSIM metrics over RTC clips go down by ~1-2% for speed 6.

Change-Id: Ie4845e0127e876337b9c105aa37e93b286193405
parent 99874f55
......@@ -57,7 +57,7 @@ class AverageTestBase : public ::testing::Test {
}
// Sum Pixels
unsigned int ReferenceAverage(const uint8_t* source, int pitch ) {
unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch ) {
unsigned int average = 0;
for (int h = 0; h < 8; ++h)
for (int w = 0; w < 8; ++w)
......@@ -65,6 +65,14 @@ class AverageTestBase : public ::testing::Test {
return ((average + 32) >> 6);
}
unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch ) {
unsigned int average = 0;
for (int h = 0; h < 4; ++h)
for (int w = 0; w < 4; ++w)
average += source[h * source_stride_ + w];
return ((average + 8) >> 4);
}
void FillConstant(uint8_t fill_constant) {
for (int i = 0; i < width_ * height_; ++i) {
source_data_[i] = fill_constant;
......@@ -85,7 +93,7 @@ class AverageTestBase : public ::testing::Test {
};
typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);
typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc;
typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
class AverageTest
: public AverageTestBase,
......@@ -95,12 +103,18 @@ class AverageTest
protected:
void CheckAverages() {
unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2),
source_stride_);
unsigned int expected = 0;
if (GET_PARAM(3) == 8) {
expected = ReferenceAverage8x8(source_data_+ GET_PARAM(2),
source_stride_);
} else if (GET_PARAM(3) == 4) {
expected = ReferenceAverage4x4(source_data_+ GET_PARAM(2),
source_stride_);
}
ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2),
ASM_REGISTER_STATE_CHECK(GET_PARAM(4)(source_data_+ GET_PARAM(2),
source_stride_));
unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2),
unsigned int actual = GET_PARAM(4)(source_data_+ GET_PARAM(2),
source_stride_);
EXPECT_EQ(expected, actual);
......@@ -134,16 +148,20 @@ using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
C, AverageTest,
::testing::Values(
make_tuple(16, 16, 1, &vp9_avg_8x8_c)));
make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, AverageTest,
::testing::Values(
make_tuple(16, 16, 0, &vp9_avg_8x8_sse2),
make_tuple(16, 16, 5, &vp9_avg_8x8_sse2),
make_tuple(32, 32, 15, &vp9_avg_8x8_sse2)));
make_tuple(16, 16, 0, 8, &vp9_avg_8x8_sse2),
make_tuple(16, 16, 5, 8, &vp9_avg_8x8_sse2),
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_sse2),
make_tuple(16, 16, 0, 4, &vp9_avg_4x4_sse2),
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2),
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2)));
#endif
......
......@@ -1135,9 +1135,14 @@ specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
specialize qw/vp9_avg_8x8 sse2/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
specialize qw/vp9_highbd_avg_8x8/;
add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_highbd_avg_4x4/;
}
# ENCODEMB INVOKE
......
......@@ -19,6 +19,15 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
return (sum + 32) >> 6;
}
unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
int i, j;
int sum = 0;
for (i = 0; i < 4; ++i, s+=p)
for (j = 0; j < 4; sum += s[j], ++j) {}
return (sum + 8) >> 4;
}
#if CONFIG_VP9_HIGHBITDEPTH
unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
int i, j;
......@@ -29,5 +38,16 @@ unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
return (sum + 32) >> 6;
}
unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) {
int i, j;
int sum = 0;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
for (i = 0; i < 4; ++i, s+=p)
for (j = 0; j < 4; sum += s[j], ++j) {}
return (sum + 8) >> 4;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
This diff is collapsed.
......@@ -321,7 +321,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->partition_search_type = VAR_BASED_PARTITION;
// Turn on this to use non-RD key frame coding mode.
// sf->use_nonrd_pick_mode = 1;
sf->use_nonrd_pick_mode = 1;
sf->mv.search_method = NSTEP;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
sf->mv.reduce_first_step_size = 1;
......
......@@ -38,3 +38,21 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
avg = _mm_extract_epi16(s0, 0);
return (avg + 32) >> 6;
}
unsigned int vp9_avg_4x4_sse2(const uint8_t *s, int p) {
__m128i s0, s1, u0;
unsigned int avg = 0;
u0 = _mm_setzero_si128();
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));
s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
avg = _mm_extract_epi16(s0, 0);
return (avg + 8) >> 4;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment