diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index 6887b044f6d6559b965d550c7426b82eb92fe265..1c96f0ad0fb79f5a55f75d69767bf768c3357dc7 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -17,8 +17,8 @@
 #include "vp9/common/vp9_treecoder.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
-#define LEFT_TOP_MARGIN (16 << 3)
-#define RIGHT_BOTTOM_MARGIN (16 << 3)
+#define LEFT_TOP_MARGIN     ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
+#define RIGHT_BOTTOM_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
 
 /* check a list of motion vectors by sad score using a number rows of pixels
  * above and a number cols of pixels in the left to select the one with best
@@ -43,7 +43,7 @@ static void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
   mvp->as_mv = xmv;
 }
 
-
+// TODO(jingning): this mv clamping function should be block size dependent.
 static void clamp_mv(int_mv *mv,
                      int mb_to_left_edge,
                      int mb_to_right_edge,
@@ -59,12 +59,19 @@ static void clamp_mv(int_mv *mv,
                   mb_to_bottom_edge : mv->as_mv.row;
 }
 
-static void clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
+static int clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
+  int_mv tmp_mv;
+  int    mv_clampped = 0;
+  tmp_mv.as_int = mv->as_int;
   clamp_mv(mv,
            xd->mb_to_left_edge - LEFT_TOP_MARGIN,
            xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
            xd->mb_to_top_edge - LEFT_TOP_MARGIN,
            xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+  if (tmp_mv.as_int != mv->as_int)
+    mv_clampped = 1;
+
+  return mv_clampped;
 }
 
 static unsigned int check_mv_bounds(int_mv *mv,
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index e033a2435bf8334c02cb87d130f72fa026e3147d..f081e71743811ae47e6ef73a01b83efe83c6dd83 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -1026,11 +1026,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
       case NEWMV:
         process_mv(bc, &mv->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount,
                    xd->allow_high_precision_mv);
-
-        // Don't need to check this on NEARMV and NEARESTMV modes
-        // since those modes clamp the MV. The NEWMV mode does not,
-        // so signal to the prediction stage whether special
-        // handling may be required.
         mbmi->need_to_clamp_mvs = check_mv_bounds(mv,
                                                   mb_to_left_edge,
                                                   mb_to_right_edge,
@@ -1040,11 +1035,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
         if (mbmi->second_ref_frame > 0) {
           process_mv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
                      &cm->fc.NMVcount, xd->allow_high_precision_mv);
-          mbmi->need_to_clamp_secondmv |= check_mv_bounds(&mbmi->mv[1],
-                                                          mb_to_left_edge,
-                                                          mb_to_right_edge,
-                                                          mb_to_top_edge,
-                                                          mb_to_bottom_edge);
+          mbmi->need_to_clamp_secondmv = check_mv_bounds(&mbmi->mv[1],
+                                                         mb_to_left_edge,
+                                                         mb_to_right_edge,
+                                                         mb_to_top_edge,
+                                                         mb_to_bottom_edge);
         }
         break;
       default:
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 35e204afffe93ed0890ad1b1da68e76548deeba7..97a5cb9948292d7f495c2c6beda9caf3c2bab49b 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3706,7 +3706,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   for (i = 0; i < num_refs; ++i) {
     cur_mv[i] = frame_mv[this_mode][refs[i]];
     // Clip "next_nearest" so that it does not extend to far out of image
-    clamp_mv2(&cur_mv[i], xd);
+    if (this_mode == NEWMV)
+      assert(!clamp_mv2(&cur_mv[i], xd));
+    else
+      clamp_mv2(&cur_mv[i], xd);
+
     if (mv_check_bounds(x, &cur_mv[i]))
       return INT64_MAX;
     mbmi->mv[i].as_int = cur_mv[i].as_int;
diff --git a/vpxenc.c b/vpxenc.c
index 3295fd9a458ca42f47fccc930d7b8b5736706bcf..95c6cf2b19e016a235330db4c0b49a51dc049c53 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1481,7 +1481,7 @@ static void show_rate_histogram(struct rate_hist          *hist,
 
 #define mmin(a, b)  ((a) < (b) ? (a) : (b))
 static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
-                          int yloc[2], int uloc[2], int vloc[2]) {
+                          int yloc[4], int uloc[4], int vloc[4]) {
   const unsigned int bsize = 64;
   const unsigned int bsize2 = bsize >> 1;
   unsigned int match = 1;
@@ -1510,6 +1510,7 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
         }
     }
   }
+
   uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
   for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) {
     for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) {