diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 014f54a49d5cd05144c03ea12323e52a2acedaa4..96e66f076ff4459c7dbb790d151f9d86bc44145b 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -245,6 +245,71 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
     },                                                                   \
     v = INT_MAX;)
 
+#define FIRST_LEVEL_CHECKS                              \
+  {                                                     \
+    unsigned int left, right, up, down, diag;           \
+    CHECK_BETTER(left, tr, tc - hstep);                 \
+    CHECK_BETTER(right, tr, tc + hstep);                \
+    CHECK_BETTER(up, tr - hstep, tc);                   \
+    CHECK_BETTER(down, tr + hstep, tc);                 \
+    whichdir = (left < right ? 0 : 1) +                 \
+               (up < down ? 0 : 2);                     \
+    switch (whichdir) {                                 \
+      case 0:                                           \
+        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
+        break;                                          \
+      case 1:                                           \
+        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
+        break;                                          \
+      case 2:                                           \
+        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
+        break;                                          \
+      case 3:                                           \
+        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
+        break;                                          \
+    }                                                   \
+  }
+
+#define SECOND_LEVEL_CHECKS                             \
+  {                                                     \
+    int kr, kc;                                         \
+    unsigned int second;                                \
+    if (tr != br && tc != bc) {                         \
+      kr = br - tr;                                     \
+      kc = bc - tc;                                     \
+      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
+      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
+    } else if (tr == br && tc != bc) {                  \
+      kc = bc - tc;                                     \
+      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
+      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
+      switch (whichdir) {                               \
+        case 0:                                         \
+        case 1:                                         \
+          CHECK_BETTER(second, tr + hstep, tc + kc);    \
+          break;                                        \
+        case 2:                                         \
+        case 3:                                         \
+          CHECK_BETTER(second, tr - hstep, tc + kc);    \
+          break;                                        \
+      }                                                 \
+    } else if (tr != br && tc == bc) {                  \
+      kr = br - tr;                                     \
+      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
+      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
+      switch (whichdir) {                               \
+        case 0:                                         \
+        case 2:                                         \
+          CHECK_BETTER(second, tr + kr, tc + hstep);    \
+          break;                                        \
+        case 1:                                         \
+        case 3:                                         \
+          CHECK_BETTER(second, tr + kr, tc - hstep);    \
+          break;                                        \
+      }                                                 \
+    }                                                   \
+  }
+
 int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
                                       int_mv *bestmv, int_mv *ref_mv,
                                       int error_per_bit,
@@ -261,7 +326,6 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
   int rr, rc, br, bc, hstep;
   int tr, tc;
   unsigned int besterr = INT_MAX;
-  unsigned int left, right, up, down, diag;
   unsigned int sse;
   unsigned int whichdir;
   unsigned int halfiters = iters_per_step;
@@ -306,32 +370,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
   // common with the last iteration could be 2 ( if diag selected)
   while (halfiters--) {
     // 1/2 pel
-    CHECK_BETTER(left, tr, tc - hstep);
-    CHECK_BETTER(right, tr, tc + hstep);
-    CHECK_BETTER(up, tr - hstep, tc);
-    CHECK_BETTER(down, tr + hstep, tc);
-
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-    switch (whichdir) {
-      case 0:
-        CHECK_BETTER(diag, tr - hstep, tc - hstep);
-        break;
-      case 1:
-        CHECK_BETTER(diag, tr - hstep, tc + hstep);
-        break;
-      case 2:
-        CHECK_BETTER(diag, tr + hstep, tc - hstep);
-        break;
-      case 3:
-        CHECK_BETTER(diag, tr + hstep, tc + hstep);
-        break;
-    }
-
+    FIRST_LEVEL_CHECKS;
     // no reason to check the same one again.
     if (tr == br && tc == bc)
       break;
-
     tr = br;
     tc = bc;
   }
@@ -343,32 +385,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
   if (forced_stop != 2) {
     hstep >>= 1;
     while (quarteriters--) {
-      CHECK_BETTER(left, tr, tc - hstep);
-      CHECK_BETTER(right, tr, tc + hstep);
-      CHECK_BETTER(up, tr - hstep, tc);
-      CHECK_BETTER(down, tr + hstep, tc);
-
-      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-      switch (whichdir) {
-        case 0:
-          CHECK_BETTER(diag, tr - hstep, tc - hstep);
-          break;
-        case 1:
-          CHECK_BETTER(diag, tr - hstep, tc + hstep);
-          break;
-        case 2:
-          CHECK_BETTER(diag, tr + hstep, tc - hstep);
-          break;
-        case 3:
-          CHECK_BETTER(diag, tr + hstep, tc + hstep);
-          break;
-      }
-
+      FIRST_LEVEL_CHECKS;
       // no reason to check the same one again.
       if (tr == br && tc == bc)
         break;
-
       tr = br;
       tc = bc;
     }
@@ -378,32 +398,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
       forced_stop == 0) {
     hstep >>= 1;
     while (eighthiters--) {
-      CHECK_BETTER(left, tr, tc - hstep);
-      CHECK_BETTER(right, tr, tc + hstep);
-      CHECK_BETTER(up, tr - hstep, tc);
-      CHECK_BETTER(down, tr + hstep, tc);
-
-      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-      switch (whichdir) {
-        case 0:
-          CHECK_BETTER(diag, tr - hstep, tc - hstep);
-          break;
-        case 1:
-          CHECK_BETTER(diag, tr - hstep, tc + hstep);
-          break;
-        case 2:
-          CHECK_BETTER(diag, tr + hstep, tc - hstep);
-          break;
-        case 3:
-          CHECK_BETTER(diag, tr + hstep, tc + hstep);
-          break;
-      }
-
+      FIRST_LEVEL_CHECKS;
       // no reason to check the same one again.
       if (tr == br && tc == bc)
         break;
-
       tr = br;
       tc = bc;
     }
@@ -419,6 +417,105 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
   return besterr;
 }
 
+int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
+                                 int_mv *bestmv, int_mv *ref_mv,
+                                 int error_per_bit,
+                                 const vp9_variance_fn_ptr_t *vfp,
+                                 int forced_stop,
+                                 int iters_per_step,
+                                 int *mvjcost, int *mvcost[2],
+                                 int *distortion,
+                                 unsigned int *sse1) {
+  uint8_t *z = x->plane[0].src.buf;
+  int src_stride = x->plane[0].src.stride;
+  MACROBLOCKD *xd = &x->e_mbd;
+  int rr, rc, br, bc, hstep;
+  int tr, tc;
+  unsigned int besterr = INT_MAX;
+  unsigned int sse;
+  unsigned int whichdir;
+  int thismse;
+  int maxc, minc, maxr, minr;
+  int y_stride;
+  int offset;
+  unsigned int halfiters = iters_per_step;
+  unsigned int quarteriters = iters_per_step;
+  unsigned int eighthiters = iters_per_step;
+
+  uint8_t *y = xd->plane[0].pre[0].buf +
+               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
+               bestmv->as_mv.col;
+
+  y_stride = xd->plane[0].pre[0].stride;
+
+  rr = ref_mv->as_mv.row;
+  rc = ref_mv->as_mv.col;
+  br = bestmv->as_mv.row << 3;
+  bc = bestmv->as_mv.col << 3;
+  hstep = 4;
+  minc = MAX(x->mv_col_min << 3,
+             (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
+  maxc = MIN(x->mv_col_max << 3,
+             (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
+  minr = MAX(x->mv_row_min << 3,
+             (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
+  maxr = MIN(x->mv_row_max << 3,
+             (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
+
+  tr = br;
+  tc = bc;
+
+  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+
+  // central mv
+  bestmv->as_mv.row <<= 3;
+  bestmv->as_mv.col <<= 3;
+
+  // calculate central point error
+  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
+  *distortion = besterr;
+  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+  // 1/2 pel
+  FIRST_LEVEL_CHECKS;
+  if (halfiters > 1) {
+    SECOND_LEVEL_CHECKS;
+  }
+  tr = br;
+  tc = bc;
+
+  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+  if (forced_stop != 2) {
+    hstep >>= 1;
+    FIRST_LEVEL_CHECKS;
+    if (quarteriters > 1) {
+      SECOND_LEVEL_CHECKS;
+    }
+    tr = br;
+    tc = bc;
+  }
+
+  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+      forced_stop == 0) {
+    hstep >>= 1;
+    FIRST_LEVEL_CHECKS;
+    if (eighthiters > 1) {
+      SECOND_LEVEL_CHECKS;
+    }
+    tr = br;
+    tc = bc;
+  }
+
+  bestmv->as_mv.row = br;
+  bestmv->as_mv.col = bc;
+
+  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
+      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+    return INT_MAX;
+
+  return besterr;
+}
+
 #undef DIST
 /* returns subpixel variance error function */
 #define DIST(r, c) \
@@ -443,7 +540,6 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
   int rr, rc, br, bc, hstep;
   int tr, tc;
   unsigned int besterr = INT_MAX;
-  unsigned int left, right, up, down, diag;
   unsigned int sse;
   unsigned int whichdir;
   unsigned int halfiters = iters_per_step;
@@ -478,7 +574,6 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
   tr = br;
   tc = bc;
 
-
   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
 
   // central mv
@@ -497,32 +592,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
   // common with the last iteration could be 2 ( if diag selected)
   while (halfiters--) {
     // 1/2 pel
-    CHECK_BETTER(left, tr, tc - hstep);
-    CHECK_BETTER(right, tr, tc + hstep);
-    CHECK_BETTER(up, tr - hstep, tc);
-    CHECK_BETTER(down, tr + hstep, tc);
-
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-    switch (whichdir) {
-      case 0:
-        CHECK_BETTER(diag, tr - hstep, tc - hstep);
-        break;
-      case 1:
-        CHECK_BETTER(diag, tr - hstep, tc + hstep);
-        break;
-      case 2:
-        CHECK_BETTER(diag, tr + hstep, tc - hstep);
-        break;
-      case 3:
-        CHECK_BETTER(diag, tr + hstep, tc + hstep);
-        break;
-    }
-
+    FIRST_LEVEL_CHECKS;
     // no reason to check the same one again.
     if (tr == br && tc == bc)
       break;
-
     tr = br;
     tc = bc;
   }
@@ -534,32 +607,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
   if (forced_stop != 2) {
     hstep >>= 1;
     while (quarteriters--) {
-      CHECK_BETTER(left, tr, tc - hstep);
-      CHECK_BETTER(right, tr, tc + hstep);
-      CHECK_BETTER(up, tr - hstep, tc);
-      CHECK_BETTER(down, tr + hstep, tc);
-
-      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-      switch (whichdir) {
-        case 0:
-          CHECK_BETTER(diag, tr - hstep, tc - hstep);
-          break;
-        case 1:
-          CHECK_BETTER(diag, tr - hstep, tc + hstep);
-          break;
-        case 2:
-          CHECK_BETTER(diag, tr + hstep, tc - hstep);
-          break;
-        case 3:
-          CHECK_BETTER(diag, tr + hstep, tc + hstep);
-          break;
-      }
-
+      FIRST_LEVEL_CHECKS;
       // no reason to check the same one again.
       if (tr == br && tc == bc)
         break;
-
       tr = br;
       tc = bc;
     }
@@ -569,32 +620,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
       forced_stop == 0) {
     hstep >>= 1;
     while (eighthiters--) {
-      CHECK_BETTER(left, tr, tc - hstep);
-      CHECK_BETTER(right, tr, tc + hstep);
-      CHECK_BETTER(up, tr - hstep, tc);
-      CHECK_BETTER(down, tr + hstep, tc);
-
-      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-      switch (whichdir) {
-        case 0:
-          CHECK_BETTER(diag, tr - hstep, tc - hstep);
-          break;
-        case 1:
-          CHECK_BETTER(diag, tr - hstep, tc + hstep);
-          break;
-        case 2:
-          CHECK_BETTER(diag, tr + hstep, tc - hstep);
-          break;
-        case 3:
-          CHECK_BETTER(diag, tr + hstep, tc + hstep);
-          break;
-      }
-
+      FIRST_LEVEL_CHECKS;
       // no reason to check the same one again.
       if (tr == br && tc == bc)
         break;
-
       tr = br;
       tc = bc;
     }
@@ -609,6 +638,116 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
   return besterr;
 }
 
+int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
+                                      int_mv *bestmv, int_mv *ref_mv,
+                                      int error_per_bit,
+                                      const vp9_variance_fn_ptr_t *vfp,
+                                      int forced_stop,
+                                      int iters_per_step,
+                                      int *mvjcost, int *mvcost[2],
+                                      int *distortion,
+                                      unsigned int *sse1,
+                                      const uint8_t *second_pred,
+                                      int w, int h) {
+  uint8_t *z = x->plane[0].src.buf;
+  int src_stride = x->plane[0].src.stride;
+  MACROBLOCKD *xd = &x->e_mbd;
+  int rr, rc, br, bc, hstep;
+  int tr, tc;
+  unsigned int besterr = INT_MAX;
+  unsigned int sse;
+  unsigned int whichdir;
+  int thismse;
+  int maxc, minc, maxr, minr;
+  int y_stride;
+  int offset;
+  unsigned int halfiters = iters_per_step;
+  unsigned int quarteriters = iters_per_step;
+  unsigned int eighthiters = iters_per_step;
+
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
+  uint8_t *y = xd->plane[0].pre[0].buf +
+               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
+               bestmv->as_mv.col;
+
+  y_stride = xd->plane[0].pre[0].stride;
+
+  rr = ref_mv->as_mv.row;
+  rc = ref_mv->as_mv.col;
+  br = bestmv->as_mv.row << 3;
+  bc = bestmv->as_mv.col << 3;
+  hstep = 4;
+  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) -
+             ((1 << MV_MAX_BITS) - 1));
+  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) +
+             ((1 << MV_MAX_BITS) - 1));
+  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) -
+             ((1 << MV_MAX_BITS) - 1));
+  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) +
+             ((1 << MV_MAX_BITS) - 1));
+
+  tr = br;
+  tc = bc;
+
+
+  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+
+  // central mv
+  bestmv->as_mv.row <<= 3;
+  bestmv->as_mv.col <<= 3;
+
+  // calculate central point error
+  // TODO(yunqingwang): central pointer error was already calculated in full-
+  // pixel search, and can be passed in this function.
+  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
+  *distortion = besterr;
+  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+  // Each subsequent iteration checks at least one point in
+  // common with the last iteration could be 2 ( if diag selected)
+  // 1/2 pel
+  FIRST_LEVEL_CHECKS;
+  if (halfiters > 1) {
+    SECOND_LEVEL_CHECKS;
+  }
+  tr = br;
+  tc = bc;
+
+  // Each subsequent iteration checks at least one point in common with
+  // the last iteration could be 2 ( if diag selected) 1/4 pel
+
+  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+  if (forced_stop != 2) {
+    hstep >>= 1;
+    FIRST_LEVEL_CHECKS;
+    if (quarteriters > 1) {
+      SECOND_LEVEL_CHECKS;
+    }
+    tr = br;
+    tc = bc;
+  }
+
+  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+      forced_stop == 0) {
+    hstep >>= 1;
+    FIRST_LEVEL_CHECKS;
+    if (eighthiters > 1) {
+      SECOND_LEVEL_CHECKS;
+    }
+    tr = br;
+    tc = bc;
+  }
+  bestmv->as_mv.row = br;
+  bestmv->as_mv.col = bc;
+
+  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
+      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+    return INT_MAX;
+
+  return besterr;
+}
+
 #undef MVC
 #undef PRE
 #undef DIST
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index b91e6fd346e1a8fdd51a00132ede24870af65891..35ef483c6c87567abc205ee8879b4763d91677e4 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -80,6 +80,21 @@ typedef int (fractional_mv_step_fp) (
     int *distortion,
     unsigned int *sse);
 extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative;
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
+
+typedef int (fractional_mv_step_comp_fp) (
+    MACROBLOCK *x,
+    int_mv *bestmv, int_mv *ref_mv,
+    int error_per_bit,
+    const vp9_variance_fn_ptr_t *vfp,
+    int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only
+    int iters_per_step,
+    int *mvjcost, int *mvcost[2],
+    int *distortion, unsigned int *sse1,
+    const uint8_t *second_pred,
+    int w, int h);
+extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_iterative;
+extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_tree;
 
 typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x,
                                     int_mv *ref_mv, int sad_per_bit,
@@ -102,18 +117,6 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x,
                                        int *mvjcost, int *mvcost[2],
                                        int_mv *center_mv);
 
-int vp9_find_best_sub_pixel_comp_iterative(
-    MACROBLOCK *x,
-    int_mv *bestmv, int_mv *ref_mv,
-    int error_per_bit,
-    const vp9_variance_fn_ptr_t *vfp,
-    int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only
-    int iters_per_step,
-    int *mvjcost, int *mvcost[2],
-    int *distortion, unsigned int *sse1,
-    const uint8_t *second_pred,
-    int w, int h);
-
 int vp9_refining_search_8p_c(MACROBLOCK *x,
                              int_mv *ref_mv, int error_per_bit,
                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 2bfee63d6524b9a97b978e0e1d1c34df8e5ede26..914476c6e9b4b7a390b6fadac7c65da731758f1e 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -709,8 +709,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->search_method = NSTEP;
   sf->auto_filter = 1;
   sf->recode_loop = 1;
-  sf->subpel_search_method = SUBPEL_ITERATIVE;
-  sf->subpel_iters_per_step = 3;
+  sf->subpel_search_method = SUBPEL_TREE;
+  sf->subpel_iters_per_step = 2;
   sf->optimize_coefficients = !cpi->oxcf.lossless;
   sf->reduce_first_step_size = 0;
   sf->auto_mv_step_size = 0;
@@ -829,7 +829,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
             (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
         sf->auto_mv_step_size = 1;
         sf->search_method = SQUARE;
-        sf->subpel_iters_per_step = 2;
+        sf->subpel_iters_per_step = 1;
       }
       if (speed == 3) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -923,9 +923,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 
   if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) {
     cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative;
-  } else {
-    // TODO(debargha): Other methods to come
-    assert(0);
+    cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_iterative;
+  } else if (cpi->sf.subpel_search_method == SUBPEL_TREE) {
+    cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
+    cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree;
   }
 
   cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index fdc1081355731d29b22b39b3c5b1988dfe697345..083ff71e5e7b19446d60beb03dbb0fb2345aae51 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -234,6 +234,7 @@ typedef enum {
 
 typedef enum {
   SUBPEL_ITERATIVE = 0,
+  SUBPEL_TREE = 1,
   // Other methods to come
 } SUBPEL_SEARCH_METHODS;
 
@@ -534,6 +535,7 @@ typedef struct VP9_COMP {
   unsigned int active_map_enabled;
 
   fractional_mv_step_fp *find_fractional_mv_step;
+  fractional_mv_step_comp_fp *find_fractional_mv_step_comp;
   vp9_full_search_fn_t full_search_sad;
   vp9_refining_search_fn_t refining_search_sad;
   vp9_diamond_search_fn_t diamond_search_sad;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 3cc526d03eca3848dfb384faee5c13ae69a93f4e..9e4ce711b593a72a8ac151a7a18d4a8e36c8674f 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2663,7 +2663,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
 
-      bestsme = vp9_find_best_sub_pixel_comp_iterative(
+      bestsme = cpi->find_fractional_mv_step_comp(
           x, &tmp_mv,
           &ref_mv[id],
           x->errorperbit,