From 360c80cfb95559b3a49fc4624a24dee01e1abb6a Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Fri, 14 Feb 2014 15:55:49 -0800
Subject: [PATCH] Use fast HEX search in real time mode

In good quality mode motion search, the best matches are normally
found after searching in a large area. In real time mode, to make
encoding fast, a center-biased fast HEX search is used, which
converges quickly most of the time. A 4-point diamond search is
also carried out as the following refining search, which gives more
precise results, and maintains good motion search quality.

At speed 5, the borg test on rtc set showed an overall PSNR loss of
0.936%. The encoding speed gain is 4% - 5%.

Change-Id: I42cd68bb56a09ca1b86293c99d5f7312225ca7ae
---
 vp9/encoder/vp9_mcomp.c    | 178 +++++++++++++++++++++++++++++++++++++
 vp9/encoder/vp9_mcomp.h    |   8 ++
 vp9/encoder/vp9_onyx_if.c  |   2 +
 vp9/encoder/vp9_onyx_int.h |   3 +-
 vp9/encoder/vp9_rdopt.c    |   6 +-
 5 files changed, 195 insertions(+), 2 deletions(-)

diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 94e49bd4a7..10dee52eda 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -855,6 +855,184 @@ int vp9_square_search(const MACROBLOCK *x,
                             square_num_candidates, square_candidates);
 };
 
+// Number of candidates in first hex search
+#define FIRST_HEX_CANDIDATES 6
+// Index of previous hex search's best match
+#define PRE_BEST_CANDIDATE 6
+// Number of candidates in following hex search
+#define NEXT_HEX_CANDIDATES 3
+// Number of candidates in refining search
+#define REFINE_CANDIDATES 4
+
+int vp9_fast_hex_search(const MACROBLOCK *x,
+                        MV *ref_mv,
+                        int search_param,
+                        int sad_per_bit,
+                        const vp9_variance_fn_ptr_t *vfp,
+                        int use_mvcost,
+                        const MV *center_mv,
+                        MV *best_mv) {
+  const MACROBLOCKD* const xd = &x->e_mbd;
+  static const MV hex[FIRST_HEX_CANDIDATES] = {
+    { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}
+  };
+  static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = {
+    {{ -2, 0}, { -1, -2}, {1, -2}},
+    {{ -1, -2}, {1, -2}, {2, 0}},
+    {{1, -2}, {2, 0}, {1, 2}},
+    {{2, 0}, {1, 2}, { -1, 2}},
+    {{1, 2}, { -1, 2}, { -2, 0}},
+    {{ -1, 2}, { -2, 0}, { -1, -2}}
+  };
+  static const MV neighbors[REFINE_CANDIDATES] = {
+      {0, -1}, { -1, 0}, {1, 0}, {0, 1}
+  };
+  int i, j;
+
+  const uint8_t *what = x->plane[0].src.buf;
+  const int what_stride = x->plane[0].src.stride;
+  const int in_what_stride = xd->plane[0].pre[0].stride;
+  int br, bc;
+  MV this_mv;
+  unsigned int bestsad = 0x7fffffff;
+  unsigned int thissad;
+  const uint8_t *base_offset;
+  const uint8_t *this_offset;
+  int k = -1;
+  int best_site = -1;
+  const int max_hex_search = 512;
+  const int max_dia_search = 32;
+
+  const int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+
+  // Adjust ref_mv to make sure it is within MV range
+  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+  br = ref_mv->row;
+  bc = ref_mv->col;
+
+  // Check the start point
+  base_offset = xd->plane[0].pre[0].buf;
+  this_offset = base_offset + (br * in_what_stride) + bc;
+  this_mv.row = br;
+  this_mv.col = bc;
+  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
+            + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                             sad_per_bit);
+
+  // Initial 6-point hex search
+  if (check_bounds(x, br, bc, 2)) {
+    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
+      this_mv.row = br + hex[i].row;
+      this_mv.col = bc + hex[i].col;
+      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
+      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                         bestsad);
+      CHECK_BETTER
+    }
+  } else {
+    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
+      this_mv.row = br + hex[i].row;
+      this_mv.col = bc + hex[i].col;
+      if (!is_mv_in(x, &this_mv))
+        continue;
+      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
+      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                         bestsad);
+      CHECK_BETTER
+    }
+  }
+
+  // Continue hex search if we find a better match in first round
+  if (best_site != -1) {
+    br += hex[best_site].row;
+    bc += hex[best_site].col;
+    k = best_site;
+
+    // Allow search covering maximum MV range
+    for (j = 1; j < max_hex_search; j++) {
+      best_site = -1;
+
+      if (check_bounds(x, br, bc, 2)) {
+        for (i = 0; i < 3; i++) {
+          this_mv.row = br + next_chkpts[k][i].row;
+          this_mv.col = bc + next_chkpts[k][i].col;
+          this_offset = base_offset + (this_mv.row * in_what_stride) +
+              this_mv.col;
+          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                             bestsad);
+          CHECK_BETTER
+        }
+      } else {
+        for (i = 0; i < 3; i++) {
+          this_mv.row = br + next_chkpts[k][i].row;
+          this_mv.col = bc + next_chkpts[k][i].col;
+          if (!is_mv_in(x, &this_mv))
+            continue;
+          this_offset = base_offset + (this_mv.row * in_what_stride) +
+              this_mv.col;
+          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                             bestsad);
+          CHECK_BETTER
+        }
+      }
+
+      if (best_site == -1) {
+        break;
+      } else {
+        br += next_chkpts[k][best_site].row;
+        bc += next_chkpts[k][best_site].col;
+        k += 5 + best_site;
+        if (k >= 12) k -= 12;
+        else if (k >= 6) k -= 6;
+      }
+    }
+  }
+
+  // Check 4 1-away neighbors
+  for (j = 0; j < max_dia_search; j++) {
+    best_site = -1;
+
+    if (check_bounds(x, br, bc, 1)) {
+      for (i = 0; i < REFINE_CANDIDATES; i++) {
+        this_mv.row = br + neighbors[i].row;
+        this_mv.col = bc + neighbors[i].col;
+        this_offset = base_offset + (this_mv.row * in_what_stride) +
+            this_mv.col;
+        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                           bestsad);
+        CHECK_BETTER
+      }
+    } else {
+      for (i = 0; i < REFINE_CANDIDATES; i++) {
+        this_mv.row = br + neighbors[i].row;
+        this_mv.col = bc + neighbors[i].col;
+        if (!is_mv_in(x, &this_mv))
+          continue;
+        this_offset = base_offset + (this_mv.row * in_what_stride) +
+            this_mv.col;
+        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                           bestsad);
+        CHECK_BETTER
+      }
+    }
+
+    if (best_site == -1) {
+      break;
+    } else {
+      br += neighbors[best_site].row;
+      bc += neighbors[best_site].col;
+    }
+  }
+
+  best_mv->row = br;
+  best_mv->col = bc;
+
+  return bestsad;
+}
+
 #undef CHECK_BETTER
 
 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 4414f3d49a..ff4b1df755 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -75,6 +75,14 @@ int vp9_square_search(const MACROBLOCK *x,
                       int use_mvcost,
                       const MV *center_mv,
                       MV *best_mv);
+int vp9_fast_hex_search(const MACROBLOCK *x,
+                        MV *ref_mv,
+                        int search_param,
+                        int sad_per_bit,
+                        const vp9_variance_fn_ptr_t *vfp,
+                        int use_mvcost,
+                        const MV *center_mv,
+                        MV *best_mv);
 
 typedef int (fractional_mv_step_fp) (
     const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 84a0e6069a..b83c1f98c0 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -861,6 +861,8 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
     }
     sf->frame_parameter_update = 0;
     sf->encode_breakout_thresh = 1000;
+
+    sf->search_method = FAST_HEX;
   }
   if (speed >= 6) {
     sf->always_this_block_size = BLOCK_16X16;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 4831b6f727..1d15f09040 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -136,7 +136,8 @@ typedef enum {
   NSTEP = 1,
   HEX = 2,
   BIGDIA = 3,
-  SQUARE = 4
+  SQUARE = 4,
+  FAST_HEX = 5
 } SEARCH_METHODS;
 
 typedef enum {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index d617f783d1..11b2ae8745 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2437,7 +2437,11 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   // Further step/diamond searches as necessary
   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
-  if (cpi->sf.search_method == HEX) {
+  if (cpi->sf.search_method == FAST_HEX) {
+    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
+                                  &cpi->fn_ptr[bsize], 1,
+                                  &ref_mv.as_mv, &tmp_mv->as_mv);
+  } else if (cpi->sf.search_method == HEX) {
     bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
                              &cpi->fn_ptr[bsize], 1,
                              &ref_mv.as_mv, &tmp_mv->as_mv);
-- 
GitLab