From 8b3faccb9e950c01daa3d9aa011970674e1ec0fc Mon Sep 17 00:00:00 2001
From: Deb Mukherjee <debargha@google.com>
Date: Wed, 31 Jul 2013 09:33:58 -0700
Subject: [PATCH] Add variance based mode/skipping

Adds a speed feature to skip all intra modes other than
DC_PRED if the source variance is small. This feature is
made part of speed 1 and up.

Results on derf300: psnr -0.07%, speedup about 1-2%

Also uses the source variance to fine-tune the early
termination criteria when FLAG_EARLY_TERMINATE is on.
This feature is made part of speed 2 and up.

Results on derf300: psnr -0.52%, speedup about 5-7%

Change-Id: I59e38aa836557cfa5405ae706fc64815cbfe4232
---
 vp9/encoder/vp9_block.h       |  1 +
 vp9/encoder/vp9_encodeframe.c |  3 +++
 vp9/encoder/vp9_onyx_if.c     | 15 +++++++++++----
 vp9/encoder/vp9_onyx_int.h    |  8 ++++++--
 vp9/encoder/vp9_rdopt.c       | 30 ++++++++++++++++++++++++------
 5 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 634c0b44ed..ebab85942a 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -102,6 +102,7 @@ struct macroblock {
 
   int mv_best_ref_index[MAX_REF_FRAMES];
   unsigned int max_mv_context[MAX_REF_FRAMES];
+  unsigned int source_variance;
 
   int nmvjointcost[MV_JOINTS];
   int nmvcosts[2][MV_VALS];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 4825378372..272e38c51b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -577,6 +577,8 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
 
   set_offsets(cpi, mi_row, mi_col, bsize);
   xd->mode_info_context->mbmi.sb_type = bsize;
+
+  x->source_variance = get_sb_variance(cpi, x, bsize);
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
     vp9_activity_masking(cpi, x);
 
@@ -1907,6 +1909,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
       MODE_INFO *m = cm->mi + idx_str;
       MODE_INFO *p = cm->prev_mi + idx_str;
 
+      cpi->mb.source_variance = UINT_MAX;
       if (cpi->sf.use_one_partition_size_always) {
         set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
         set_partitioning(cpi, m, cpi->sf.always_this_block_size);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 78992cfff1..c70198d1e3 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -788,7 +788,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->unused_mode_skip_lvl = BLOCK_SIZE_SB32X32;
         sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
                                      FLAG_SKIP_INTRA_BESTINTER |
-                                     FLAG_SKIP_COMP_BESTINTRA;
+                                     FLAG_SKIP_COMP_BESTINTRA |
+                                     FLAG_SKIP_INTRA_LOWVAR;
         sf->use_uv_intra_rd_estimate = 1;
         sf->use_rd_breakout = 1;
         sf->skip_encode_sb = 1;
@@ -816,7 +817,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA |
-                                     FLAG_SKIP_COMP_REFMISMATCH;
+                                     FLAG_SKIP_COMP_REFMISMATCH |
+                                     FLAG_SKIP_INTRA_LOWVAR |
+                                     FLAG_EARLY_TERMINATE;
         sf->last_chroma_intra_mode = DC_PRED;
         sf->use_uv_intra_rd_estimate = 1;
         sf->use_rd_breakout = 1;
@@ -837,7 +840,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA |
-                                     FLAG_SKIP_COMP_REFMISMATCH;
+                                     FLAG_SKIP_COMP_REFMISMATCH |
+                                     FLAG_SKIP_INTRA_LOWVAR |
+                                     FLAG_EARLY_TERMINATE;
         sf->use_rd_breakout = 1;
         sf->skip_encode_sb = 1;
         sf->disable_splitmv = 1;
@@ -855,7 +860,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA |
-                                     FLAG_SKIP_COMP_REFMISMATCH;
+                                     FLAG_SKIP_COMP_REFMISMATCH |
+                                     FLAG_SKIP_INTRA_LOWVAR |
+                                     FLAG_EARLY_TERMINATE;
         sf->use_rd_breakout = 1;
         sf->optimize_coefficients = 0;
         sf->auto_mv_step_size = 1;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index f4d8db4e1a..3091975abc 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -145,6 +145,8 @@ typedef struct {
 // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
 typedef enum {
   THR_NEARESTMV,
+  THR_DC,
+
   THR_NEARESTA,
   THR_NEARESTG,
   THR_NEWMV,
@@ -152,8 +154,6 @@ typedef enum {
   THR_NEARMV,
   THR_COMP_NEARESTGA,
 
-  THR_DC,
-
   THR_NEWG,
   THR_NEWA,
   THR_NEARA,
@@ -224,6 +224,10 @@ typedef enum {
   // skips oblique intra modes  at angles 27, 63, 117, 153 if the best
   // intra so far is not one of the neighboring directions
   FLAG_SKIP_INTRA_DIRMISMATCH = 16,
+
+  // skips intra modes other than DC_PRED if the source variance
+  // is small
+  FLAG_SKIP_INTRA_LOWVAR = 32,
 } MODE_SEARCH_SKIP_LOGIC;
 
 typedef struct {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 8d822b39b3..d44b4e75c5 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -54,6 +54,8 @@ DECLARE_ALIGNED(16, extern const uint8_t,
 
 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {NEARESTMV, LAST_FRAME,   NONE},
+  {DC_PRED,   INTRA_FRAME,  NONE},
+
   {NEARESTMV, ALTREF_FRAME, NONE},
   {NEARESTMV, GOLDEN_FRAME, NONE},
   {NEWMV,     LAST_FRAME,   NONE},
@@ -61,8 +63,6 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {NEARMV,    LAST_FRAME,   NONE},
   {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
 
-  {DC_PRED,   INTRA_FRAME,  NONE},
-
   {NEWMV,     GOLDEN_FRAME, NONE},
   {NEWMV,     ALTREF_FRAME, NONE},
   {NEARMV,    ALTREF_FRAME, NONE},
@@ -3525,6 +3525,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         txfm_cache[i] = txfm_cache[ONLY_4X4];
     } else if (ref_frame == INTRA_FRAME) {
       TX_SIZE uv_tx;
+      // Disable intra modes other than DC_PRED for blocks with low variance
+      // Threshold for intra skipping based on source variance
+      // TODO(debargha): Specialize the threshold for super block sizes
+      static const int skip_intra_var_thresh[BLOCK_SIZE_TYPES] = {
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+      };
+      if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
+          this_mode != DC_PRED &&
+          x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
+        continue;
       // Only search the oblique modes if the best so far is
       // one of the neighboring directional modes
       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
@@ -3851,8 +3861,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     if (this_rd < best_rd || x->skip) {
       if (!mode_excluded) {
         // Note index of best mode so far
-        const int qstep = xd->plane[0].dequant[1];
-
         best_mode_index = mode_index;
 
         if (ref_frame == INTRA_FRAME) {
@@ -3875,9 +3883,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
         // TODO(debargha): enhance this test with a better distortion prediction
         // based on qp, activity mask and history
-        if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE)
-          if (ref_frame > INTRA_FRAME && distortion2 * 4 < qstep * qstep)
+        if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) {
+          const int qstep = xd->plane[0].dequant[1];
+          // TODO(debargha): Enhance this by specializing for each mode_index
+          int scale = 4;
+          if (x->source_variance < UINT_MAX) {
+            const int var_adjust = (x->source_variance < 16);
+            scale -= var_adjust;
+          }
+          if (ref_frame > INTRA_FRAME &&
+              distortion2 * scale < qstep * qstep) {
             early_term = 1;
+          }
+        }
       }
 #if 0
       // Testing this mode gave rise to an improvement in best error score.
-- 
GitLab