From 6253cc9279652fd7550bc103f85bb45525ff20f1 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Tue, 1 Oct 2013 16:57:18 +0100
Subject: [PATCH] Speed setting review.

Substantial reworking of the speed vs quality trade offs for
speed 1 and 2.

In this patch I am attempting to freeze the "quality" meaning of
speeds 1 and 2 relative to speed 0 so that in future we can
better evaluate progress.

I am targeting :
Speed 1 quality ~-5% vs speed 0.
Speed 2 quality ~-10% vs speed 0

It is inevitable that quality will still fluctuate a little as we adjust
settings and add new features, but we will attempt to keep as
close as possible to these values. Above speed 2 things will remain
a bit more fluid for now.

In this patch speed 1 is approximately 4-5x as fast as speed 0. This
is similar to before but the quality hit is a lot less. Likewise speed 2
is approximately 2x as fast as speed 1 but is similar in quality to the
previous speed 1 configuration.

Also slight change to behavior of FLAG_EARLY_TERMINATE to insure
all reference frames get at least one rd test. Important for very low
variance regions.

WIP :- Added a new speed level with old speed 4 becoming speed 5.
Speed 3 and 4 tradeoffs still WIP

Change-Id: Ic7a38dd7b5b63ab1501f9352411972f480ac6264
---
 vp9/encoder/vp9_encodeframe.c |   4 +-
 vp9/encoder/vp9_onyx_if.c     | 107 +++++++++++++++++-----------------
 vp9/encoder/vp9_onyx_int.h    |  10 +++-
 vp9/encoder/vp9_rdopt.c       |   5 +-
 4 files changed, 70 insertions(+), 56 deletions(-)

diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 0c1c30d681..957c57793f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1781,7 +1781,9 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
             || cpi->common.show_frame == 0
             || cpi->common.frame_type == KEY_FRAME
             || cpi->is_src_frame_alt_ref
-            || sb_has_motion(cpi, prev_mi_8x8)) {
+            || ((cpi->sf.use_lastframe_partitioning ==
+                 LAST_FRAME_PARTITION_LOW_MOTION) &&
+                 sb_has_motion(cpi, prev_mi_8x8))) {
           // If required set upper and lower partition size limits
           if (cpi->sf.auto_min_max_partition_size) {
             set_offsets(cpi, mi_row, mi_col, BLOCK_64X64);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index acad8e161a..f7d9869962 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -714,7 +714,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
   sf->comp_inter_joint_search_thresh = BLOCK_4X4;
   sf->adaptive_rd_thresh = 0;
-  sf->use_lastframe_partitioning = 0;
+  sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
   sf->tx_size_search_method = USE_FULL_RD;
   sf->use_lp32x32fdct = 0;
   sf->adaptive_motion_search = 0;
@@ -761,87 +761,88 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
       sf->static_segmentation = 0;
 #endif
       sf->use_avoid_tested_higherror = 1;
-      sf->adaptive_rd_thresh = MIN((speed + 1), 4);
+      sf->adaptive_rd_thresh = 1;
 
       if (speed == 1) {
-        sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-        sf->less_rectangular_check  = 1;
+        sf->use_square_partition_only = !(cpi->common.frame_type == KEY_FRAME ||
+                                          cpi->common.intra_only);
         sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
-                                      cpi->common.intra_only ||
-                                      cpi->common.show_frame == 0) ?
-                                     USE_FULL_RD :
-                                     USE_LARGESTALL);
+                                      cpi->common.intra_only)
+                                     ? USE_FULL_RD : USE_LARGESTALL);
+        sf->disable_splitmv =
+          (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
+
+        sf->use_rd_breakout = 1;
+        sf->adaptive_motion_search = 1;
+        sf->auto_mv_step_size = 1;
+        sf->adaptive_rd_thresh = 2;
+      }
+      if (speed == 2) {
         sf->use_square_partition_only = !(cpi->common.frame_type == KEY_FRAME ||
-                                   cpi->common.intra_only ||
-                                   cpi->common.show_frame == 0);
+                                          cpi->common.intra_only);
+        sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
+                                      cpi->common.intra_only)
+                                     ? USE_FULL_RD : USE_LARGESTALL);
         sf->disable_splitmv =
-            (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
+          (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
+
         sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA |
                                      FLAG_SKIP_INTRA_LOWVAR;
-        sf->use_uv_intra_rd_estimate = 1;
+
         sf->use_rd_breakout = 1;
-        sf->skip_encode_sb = 1;
-        sf->use_lp32x32fdct = 1;
         sf->adaptive_motion_search = 1;
         sf->auto_mv_step_size = 1;
 
-        sf->auto_min_max_partition_size = 1;
-        // FIXME(jingning): temporarily turn off disable_split_var_thresh
-        // during refactoring process. will get this back after finishing
-        // the main framework of partition search type.
-        sf->disable_split_var_thresh = 0;
         sf->disable_filter_search_var_thresh = 16;
+        sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+
+        sf->auto_min_max_partition_size = 1;
+        sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
+        sf->adjust_partitioning_from_last_frame = 1;
+        sf->last_partitioning_redo_frequency = 3;
 
-        sf->intra_y_mode_mask = INTRA_DC_TM_H_V;
-        sf->intra_uv_mode_mask = INTRA_DC_TM_H_V;
-        sf->use_fast_coef_updates = 1;
+        sf->adaptive_rd_thresh = 2;
         sf->mode_skip_start = 11;
       }
-      if (speed == 2) {
-        sf->less_rectangular_check  = 1;
+      if (speed == 3) {
         sf->use_square_partition_only = 1;
-        sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-        sf->use_lastframe_partitioning = 1;
-        sf->adjust_partitioning_from_last_frame = 1;
-        sf->last_partitioning_redo_frequency = 3;
-        sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
-                                      cpi->common.intra_only ||
-                                      cpi->common.show_frame == 0) ?
-                                     USE_FULL_RD :
-                                     USE_LARGESTALL);
+        sf->tx_size_search_method = USE_LARGESTALL;
+        sf->disable_splitmv =
+          (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
+
         sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA |
-                                     FLAG_SKIP_COMP_REFMISMATCH |
-                                     FLAG_SKIP_INTRA_LOWVAR |
-                                     FLAG_EARLY_TERMINATE;
-        sf->intra_y_mode_mask = INTRA_DC_TM;
-        sf->intra_uv_mode_mask = INTRA_DC_TM;
-        sf->use_uv_intra_rd_estimate = 1;
+                                     FLAG_SKIP_INTRA_LOWVAR;
+
         sf->use_rd_breakout = 1;
-        sf->skip_encode_sb = 1;
-        sf->use_lp32x32fdct = 1;
         sf->adaptive_motion_search = 1;
-        sf->using_small_partition_info = 0;
-        sf->disable_splitmv =
-            (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
         sf->auto_mv_step_size = 1;
-        sf->search_method = SQUARE;
-        sf->subpel_iters_per_step = 1;
-        sf->use_fast_lpf_pick = 1;
+
+        sf->disable_filter_search_var_thresh = 16;
+        sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+
         sf->auto_min_max_partition_size = 1;
-        sf->disable_split_var_thresh = 32;
-        sf->disable_filter_search_var_thresh = 32;
+        sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
+        sf->adjust_partitioning_from_last_frame = 1;
+        sf->last_partitioning_redo_frequency = 3;
+
+        sf->use_uv_intra_rd_estimate = 1;
+        sf->skip_encode_sb = 1;
+        sf->use_lp32x32fdct = 1;
+        sf->subpel_iters_per_step = 1;
         sf->use_fast_coef_updates = 2;
+
+        sf->adaptive_rd_thresh = 4;
         sf->mode_skip_start = 6;
       }
-      if (speed == 3) {
+      if (speed == 4) {
         sf->less_rectangular_check  = 1;
         sf->use_square_partition_only = 1;
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-        sf->use_lastframe_partitioning = 1;
+        sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
         sf->adjust_partitioning_from_last_frame = 1;
         sf->last_partitioning_redo_frequency = 3;
         sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
@@ -872,9 +873,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->disable_split_var_thresh = 64;
         sf->disable_filter_search_var_thresh = 64;
         sf->use_fast_coef_updates = 2;
+        sf->adaptive_rd_thresh = 4;
         sf->mode_skip_start = 6;
       }
-      if (speed == 4) {
+      if (speed == 5) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
         sf->use_one_partition_size_always = 1;
         sf->always_this_block_size = BLOCK_16X16;
@@ -904,6 +906,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->intra_y_mode_mask = INTRA_DC_ONLY;
         sf->intra_uv_mode_mask = INTRA_DC_ONLY;
         sf->use_fast_coef_updates = 2;
+        sf->adaptive_rd_thresh = 4;
         sf->mode_skip_start = 6;
       }
       break;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 3dbeb98e2a..db8b484cc8 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -36,7 +36,7 @@
 #define DISABLE_RC_LONG_TERM_MEM 0
 #endif
 
-// #define MODE_TEST_HIT_STATS
+#define MODE_TEST_HIT_STATS
 
 // #define SPEEDSTATS 1
 #if CONFIG_MULTIPLE_ARF
@@ -229,6 +229,12 @@ typedef enum {
 #define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
 #define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
 
+typedef enum {
+  LAST_FRAME_PARTITION_OFF = 0,
+  LAST_FRAME_PARTITION_LOW_MOTION = 1,
+  LAST_FRAME_PARTITION_ALL = 2
+} LAST_FRAME_PARTITION_METHOD;
+
 typedef struct {
   int RD;
   SEARCH_METHODS search_method;
@@ -246,7 +252,7 @@ typedef struct {
   int adaptive_rd_thresh;
   int skip_encode_sb;
   int skip_encode_frame;
-  int use_lastframe_partitioning;
+  LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
   TX_SIZE_SEARCH_METHOD tx_size_search_method;
   int use_lp32x32fdct;
   int use_avoid_tested_higherror;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 83cd612261..96ed6f7952 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -52,6 +52,8 @@ DECLARE_ALIGNED(16, extern const uint8_t,
 #define GOLDEN_FRAME_MODE_MASK  0xFFB5A3BB0
 #define ALT_REF_MODE_MASK       0xFF8C648D0
 
+#define MIN_EARLY_TERM_INDEX    3
+
 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {RD_NEARESTMV, LAST_FRAME,   NONE},
   {RD_NEARESTMV, ALTREF_FRAME, NONE},
@@ -3851,7 +3853,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
         // TODO(debargha): enhance this test with a better distortion prediction
         // based on qp, activity mask and history
-        if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) {
+        if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
+            (mode_index > MIN_EARLY_TERM_INDEX)) {
           const int qstep = xd->plane[0].dequant[1];
           // TODO(debargha): Enhance this by specializing for each mode_index
           int scale = 4;
-- 
GitLab