From 1374a06bd87e2e2c41e17738bf5215d53e5223a0 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Thu, 27 Jun 2013 12:07:07 -0700
Subject: [PATCH] Optimize partition search order

This commit change the partition search order to allow checking of
rectangular partition to be done after square partitions. It also
added a speed feature to skip rectangular partition check when
NONE is better than SPLIT in RD sense.

This feature roughly speed up encoder by 1.5X with loss on compression
-0.91% on cif set
-0.56% on stdhd set

Change-Id: I0d2d06993041aa9ea9073fcc39c54f73a127dfa4
---
 vp9/encoder/vp9_encodeframe.c | 133 +++++++++++++++++-----------------
 vp9/encoder/vp9_onyx_if.c     |  13 +++-
 vp9/encoder/vp9_onyx_int.h    |   1 +
 3 files changed, 78 insertions(+), 69 deletions(-)

diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 168b460127..33e532dfad 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1351,71 +1351,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
   if (!cpi->sf.use_partitions_less_than
       || (cpi->sf.use_partitions_less_than
           && bsize <= cpi->sf.less_than_block_size)) {
-    // PARTITION_HORZ
-    if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
-      int r2, r = 0;
-      int64_t d2, d = 0;
-      subsize = get_subsize(bsize, PARTITION_HORZ);
-      *(get_sb_index(xd, subsize)) = 0;
-      pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
-                    get_block_context(x, subsize));
-
-      if (mi_row + (ms >> 1) < cm->mi_rows) {
-        update_state(cpi, get_block_context(x, subsize), subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-
-        *(get_sb_index(xd, subsize)) = 1;
-        pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize,
-                      get_block_context(x, subsize));
-        r2 += r;
-        d2 += d;
-      }
-      set_partition_seg_context(cm, xd, mi_row, mi_col);
-      pl = partition_plane_context(xd, bsize);
-      if (r2 < INT_MAX)
-        r2 += x->partition_cost[pl][PARTITION_HORZ];
-      if (RDCOST(x->rdmult, x->rddiv, r2, d2)
-          < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
-        srate = r2;
-        sdist = d2;
-        *(get_sb_partitioning(x, bsize)) = subsize;
-      }
-      restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
-    }
-
-    // PARTITION_VERT
-    if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
-      int r2;
-      int64_t d2;
-      subsize = get_subsize(bsize, PARTITION_VERT);
-      *(get_sb_index(xd, subsize)) = 0;
-      pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
-                    get_block_context(x, subsize));
-      if (mi_col + (ms >> 1) < cm->mi_cols) {
-        int r = 0;
-        int64_t d = 0;
-        update_state(cpi, get_block_context(x, subsize), subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-
-        *(get_sb_index(xd, subsize)) = 1;
-        pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize,
-                      get_block_context(x, subsize));
-        r2 += r;
-        d2 += d;
-      }
-      set_partition_seg_context(cm, xd, mi_row, mi_col);
-      pl = partition_plane_context(xd, bsize);
-      if (r2 < INT_MAX)
-        r2 += x->partition_cost[pl][PARTITION_VERT];
-      if (RDCOST(x->rdmult, x->rddiv, r2, d2)
-          < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
-        srate = r2;
-        sdist = d2;
-        *(get_sb_partitioning(x, bsize)) = subsize;
-      }
-      restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
-    }
-
+    int larger_is_better = 0;
     // PARTITION_NONE
     if ((mi_row + (ms >> 1) < cm->mi_rows) &&
         (mi_col + (ms >> 1) < cm->mi_cols)) {
@@ -1433,10 +1369,77 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
           < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
         srate = r;
         sdist = d;
+        larger_is_better = 1;
         if (bsize >= BLOCK_SIZE_SB8X8)
           *(get_sb_partitioning(x, bsize)) = bsize;
       }
     }
+    if (!cpi->sf.less_rectangular_check || !larger_is_better) {
+      // PARTITION_HORZ
+      if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
+        int r2, r = 0;
+        int64_t d2, d = 0;
+        subsize = get_subsize(bsize, PARTITION_HORZ);
+        *(get_sb_index(xd, subsize)) = 0;
+        pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
+                      get_block_context(x, subsize));
+
+        if (mi_row + (ms >> 1) < cm->mi_rows) {
+          update_state(cpi, get_block_context(x, subsize), subsize, 0);
+          encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+
+          *(get_sb_index(xd, subsize)) = 1;
+          pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize,
+                        get_block_context(x, subsize));
+          r2 += r;
+          d2 += d;
+        }
+        set_partition_seg_context(cm, xd, mi_row, mi_col);
+        pl = partition_plane_context(xd, bsize);
+        if (r2 < INT_MAX)
+          r2 += x->partition_cost[pl][PARTITION_HORZ];
+        if (RDCOST(x->rdmult, x->rddiv, r2, d2)
+            < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
+          srate = r2;
+          sdist = d2;
+          *(get_sb_partitioning(x, bsize)) = subsize;
+        }
+        restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+      }
+
+      // PARTITION_VERT
+      if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
+        int r2;
+        int64_t d2;
+        subsize = get_subsize(bsize, PARTITION_VERT);
+        *(get_sb_index(xd, subsize)) = 0;
+        pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
+                      get_block_context(x, subsize));
+        if (mi_col + (ms >> 1) < cm->mi_cols) {
+          int r = 0;
+          int64_t d = 0;
+          update_state(cpi, get_block_context(x, subsize), subsize, 0);
+          encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+
+          *(get_sb_index(xd, subsize)) = 1;
+          pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize,
+                        get_block_context(x, subsize));
+          r2 += r;
+          d2 += d;
+        }
+        set_partition_seg_context(cm, xd, mi_row, mi_col);
+        pl = partition_plane_context(xd, bsize);
+        if (r2 < INT_MAX)
+          r2 += x->partition_cost[pl][PARTITION_VERT];
+        if (RDCOST(x->rdmult, x->rddiv, r2, d2)
+            < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
+          srate = r2;
+          sdist = d2;
+          *(get_sb_partitioning(x, bsize)) = subsize;
+        }
+        restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+      }
+    }
   }
   *rate = srate;
   *dist = sdist;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 3b09b9f11f..b7635ce77f 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -659,7 +659,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   int mode = cpi->compressor_speed;
   int speed = cpi->speed;
   int i;
-
   // Only modes 0 and 1 supported for now in experimental code basae
   if (mode > 1)
     mode = 1;
@@ -699,6 +698,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->adjust_thresholds_by_speed = 0;
   sf->partition_by_variance = 0;
   sf->use_one_partition_size_always = 0;
+  sf->less_rectangular_check = 0;
   sf->use_partitions_less_than = 0;
   sf->less_than_block_size = BLOCK_SIZE_MB16X16;
   sf->use_partitions_greater_than = 0;
@@ -730,12 +730,17 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 
       if (speed == 1) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
-        sf->use_largest_txform = !(cpi->common.frame_type == KEY_FRAME ||
-                                   cpi->common.intra_only ||
-                                   cpi->common.show_frame == 0);
+        sf->less_rectangular_check  = 1;
+        sf->use_largest_txform        = !(cpi->common.frame_type == KEY_FRAME ||
+                                          cpi->common.intra_only ||
+                                          cpi->common.show_frame == 0);
       }
       if (speed == 2) {
+        sf->use_largest_txform        = !(cpi->common.frame_type == KEY_FRAME ||
+                                          cpi->common.intra_only ||
+                                          cpi->common.show_frame == 0);
         sf->adjust_thresholds_by_speed = 1;
+        sf->less_rectangular_check  = 1;
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
         sf->reduce_first_step_size = 1;
         sf->optimize_coefficients = 0;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 63b0155491..a1f567aedb 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -226,6 +226,7 @@ typedef struct {
   int adjust_thresholds_by_speed;
   int partition_by_variance;
   int use_one_partition_size_always;
+  int less_rectangular_check;
   BLOCK_SIZE_TYPE always_this_block_size;
   int use_partitions_greater_than;
   BLOCK_SIZE_TYPE greater_than_block_size;
-- 
GitLab