diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 8a94fa369511407e2177cfc7c954ccb46b9d95eb..539a28f4d06312eda5da8b93627b94d13558b506 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -472,7 +472,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
 
     // Initial step/diamond search centred on best mv
-    tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
+    tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
     if ( tmp_err < INT_MAX-new_mv_mode_penalty )
         tmp_err += new_mv_mode_penalty;
 
@@ -495,7 +495,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
             num00--;
         else
         {
-            tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
+            tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
             if ( tmp_err < INT_MAX-new_mv_mode_penalty )
                 tmp_err += new_mv_mode_penalty;
 
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index bb85afa6f11c0f9d159c9cf25ae7e950dd4fd7c6..b9d86a20e2c5e99c5cc836849c73f610d11bfebe 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -913,7 +913,8 @@ int vp8_diamond_search_sad
     int *num00,
     vp8_variance_fn_ptr_t *fn_ptr,
     int *mvsadcost[2],
-    int *mvcost[2]
+    int *mvcost[2],
+    MV *center_mv
 )
 {
     int i, j, step;
@@ -949,7 +950,7 @@ int vp8_diamond_search_sad
     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
     {
         // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
     }
 
     // search_param determines the length of the initial step and hence the number of iterations
@@ -982,7 +983,7 @@ int vp8_diamond_search_sad
                 {
                     this_mv.row = this_row_offset << 3;
                     this_mv.col = this_col_offset << 3;
-                    thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                    thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                     if (thissad < bestsad)
                     {
@@ -1013,7 +1014,7 @@ int vp8_diamond_search_sad
         return INT_MAX;
 
     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
-    + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
 }
 
 int vp8_diamond_search_sadx4
@@ -1028,7 +1029,8 @@ int vp8_diamond_search_sadx4
     int *num00,
     vp8_variance_fn_ptr_t *fn_ptr,
     int *mvsadcost[2],
-    int *mvcost[2]
+    int *mvcost[2],
+    MV *center_mv
 )
 {
     int i, j, step;
@@ -1064,7 +1066,7 @@ int vp8_diamond_search_sadx4
     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
     {
         // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
     }
 
     // search_param determines the length of the initial step and hence the number of iterations
@@ -1108,7 +1110,7 @@ int vp8_diamond_search_sadx4
                     {
                         this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
                         this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
-                        sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                        sad_array[t] += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                         if (sad_array[t] < bestsad)
                         {
@@ -1137,7 +1139,7 @@ int vp8_diamond_search_sadx4
                     {
                         this_mv.row = this_row_offset << 3;
                         this_mv.col = this_col_offset << 3;
-                        thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                        thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                         if (thissad < bestsad)
                         {
@@ -1168,12 +1170,12 @@ int vp8_diamond_search_sadx4
         return INT_MAX;
 
     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
-    + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
 }
 
 
 #if !(CONFIG_REALTIME_ONLY)
-int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
+int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
 {
     unsigned char *what = (*(b->base_src) + b->src);
     int what_stride = b->src_stride;
@@ -1211,7 +1213,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
         // Baseline value at the centre
 
         //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
     }
 
     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1239,7 +1241,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
             this_mv.col = c << 3;
             //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
             //thissad  += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
-            thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
+            thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
 
             if (thissad < bestsad)
             {
@@ -1258,12 +1260,12 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
 
     if (bestsad < INT_MAX)
         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+        + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
     else
         return INT_MAX;
 }
 
-int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
+int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
 {
     unsigned char *what = (*(b->base_src) + b->src);
     int what_stride = b->src_stride;
@@ -1301,7 +1303,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
     {
         // Baseline value at the centre
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
     }
 
     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1336,7 +1338,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
                 if (thissad < bestsad)
                 {
                     this_mv.col = c << 3;
-                    thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                    thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                     if (thissad < bestsad)
                     {
@@ -1359,7 +1361,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
             if (thissad < bestsad)
             {
                 this_mv.col = c << 3;
-                thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                 if (thissad < bestsad)
                 {
@@ -1381,14 +1383,14 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
 
     if (bestsad < INT_MAX)
         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+        + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
     else
         return INT_MAX;
 }
 #endif
 
 
-int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
+int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
 {
     unsigned char *what = (*(b->base_src) + b->src);
     int what_stride = b->src_stride;
@@ -1427,7 +1429,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
     {
         // Baseline value at the centre
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
     }
 
     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1462,7 +1464,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
                 if (thissad < bestsad)
                 {
                     this_mv.col = c << 3;
-                    thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                    thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                     if (thissad < bestsad)
                     {
@@ -1491,7 +1493,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
                 if (thissad < bestsad)
                 {
                     this_mv.col = c << 3;
-                    thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                    thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                     if (thissad < bestsad)
                     {
@@ -1514,7 +1516,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
             if (thissad < bestsad)
             {
                 this_mv.col = c << 3;
-                thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
 
                 if (thissad < bestsad)
                 {
@@ -1535,7 +1537,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
 
     if (bestsad < INT_MAX)
         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+        + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
     else
         return INT_MAX;
 }
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 7d60362482f20bae552e08433f7f8784ebc3ce77..b54fe7377ad03cefb52830f171fdc3a58b6ee979 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -67,7 +67,8 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
      int distance, \
      vp8_variance_fn_ptr_t *fn_ptr, \
      int *mvcost[2], \
-     int *mvsadcost[2] \
+     int *mvsadcost[2], \
+     MV *center_mv \
     )
 
 #define prototype_diamond_search_sad(sym)\
@@ -83,7 +84,8 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
      int *num00, \
      vp8_variance_fn_ptr_t *fn_ptr, \
      int *mvsadcost[2], \
-     int *mvcost[2] \
+     int *mvcost[2], \
+     MV *center_mv \
     )
 
 #if ARCH_X86 || ARCH_X86_64
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 5f02a5a0292bbf336a94d33482404ba571a4ba44..fd7b5668a3ffcee70e47b0efd8e099c952176e6f 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -283,6 +283,21 @@ static void setup_features(VP8_COMP *cpi)
 
 void vp8_dealloc_compressor_data(VP8_COMP *cpi)
 {
+    // Delete last frame MV storage buffers
+    if (cpi->lfmv != 0)
+        vpx_free(cpi->lfmv);
+
+    cpi->lfmv = 0;
+
+    if (cpi->lf_ref_frame_sign_bias != 0)
+        vpx_free(cpi->lf_ref_frame_sign_bias);
+
+    cpi->lf_ref_frame_sign_bias = 0;
+
+    if (cpi->lf_ref_frame != 0)
+        vpx_free(cpi->lf_ref_frame);
+
+    cpi->lf_ref_frame = 0;
 
     // Delete sementation map
     if (cpi->segmentation_map != 0)
@@ -2145,7 +2160,10 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->alt_is_last  = 0 ;
     cpi->gold_is_alt  = 0 ;
 
-
+    // allocate memory for storing last frame's MVs for MV prediction.
+    CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int_mv)));
+    CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
+    CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
 
     // Create the encoder segmentation map and set all entries to 0
     CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
@@ -4165,6 +4183,60 @@ static void encode_frame_to_data_rate
     }
 #endif
 
+
+
+
+////////////////////////////////
+////////////////////////////////
+    // This frame's MVs are saved and will be used in next frame's MV prediction.
+    if(cm->show_frame)   //do not save for altref frame
+    {
+      int mb_row;
+      int mb_col;
+      MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays.
+      //static int last_video_frame = 0;
+
+      /*
+      if (cm->current_video_frame == 0)   //first frame: set to 0
+      {
+        for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
+        {
+            for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
+            {
+                cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride)].as_int = 0;
+                cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride)] = 0;
+                cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride)] = 0;
+            }
+        }
+      }else
+      */
+
+      if(cm->frame_type != KEY_FRAME)
+      {
+        for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
+        {
+          for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
+          {
+              if(tmp->mbmi.ref_frame != INTRA_FRAME)
+                cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride)].as_int = tmp->mbmi.mv.as_int;
+
+              cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
+              cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride)] = tmp->mbmi.ref_frame;
+              //printf("[%d, %d]  ", cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride-1)].as_mv.row, cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride-1)].as_mv.col);
+              tmp++;
+          }
+        }
+
+      //last_video_frame = cm->current_video_frame;
+      }
+    }
+
+//printf("after: %d   %d \n", cm->current_video_frame, cm->show_frame );
+
+
+
+
+
     // Update the GF useage maps.
     // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
     vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 81e32f031d845995c03a043cdcddab04dc553b55..31e627b5cdcc3f025a94fdeccfc8e7471a1167a3 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -239,6 +239,12 @@ enum
     BLOCK_MAX_SEGMENTS
 };
 
+typedef union
+{
+    unsigned int as_int;
+    MV           as_mv;
+} int_mv;        /* facilitates rapid equality tests */
+
 typedef struct
 {
 
@@ -661,6 +667,10 @@ typedef struct
     unsigned char *gf_active_flags;   // Record of which MBs still refer to last golden frame either directly or through 0,0
     int gf_active_count;
 
+    //Store last frame's MV info for next frame MV prediction
+    int_mv *lfmv;
+    int *lf_ref_frame_sign_bias;
+    int *lf_ref_frame;
 
 } VP8_COMP;
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 2f7dd9c7c47f6b4fe623d1a6183545a64440c578..8dfca351c6d48fd3bccc046b24cf5691d9b3ebee 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -685,7 +685,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
 #if 0
 
             // Initial step Search
-            bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost);
+            bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost, &best_ref_mv1);
             mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
             mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
 
@@ -698,7 +698,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
                     num00--;
                 else
                 {
-                    thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost);
+                    thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost, &best_ref_mv1);
 
                     if (thissme < bestsme)
                     {
@@ -724,7 +724,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
             }
             else
             {
-                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
+                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb < 9
                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
 
@@ -743,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
                         num00--;
                     else
                     {
-                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
+                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb = 9
 
                         if (thissme < bestsme)
                         {
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 8a753fd4403f8ac2f1289f13dc1365aa64e80672..99129ace00a7c2e54d42c541c418aa933e2e2a92 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1236,7 +1236,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                             bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
                         else
                         {
-                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
+                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost, best_ref_mv);
 
                             n = num00;
                             num00 = 0;
@@ -1249,7 +1249,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                                     num00--;
                                 else
                                 {
-                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
+                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost, best_ref_mv);
 
                                     if (thissme < bestsme)
                                     {
@@ -1264,7 +1264,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                         // Should we do a full search (best quality only)
                         if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000)
                         {
-                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost);
+                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost, best_ref_mv);
 
                             if (thissme < bestsme)
                             {
@@ -1385,6 +1385,273 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
 }
 
 
+
+/////////////////////////
+static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
+{
+    MV xmv;
+    xmv = x->mbmi.mv.as_mv;
+
+    if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
+    {
+        xmv.row *= -1;
+        xmv.col *= -1;
+    }
+
+    mvp->as_mv = xmv;
+}
+
+static void lf_mv_bias(const int lf_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
+{
+    MV xmv;
+    xmv = mvp->as_mv;
+
+    if (lf_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
+    {
+        xmv.row *= -1;
+        xmv.col *= -1;
+    }
+
+    mvp->as_mv = xmv;
+}
+
+static void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
+{
+    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
+        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
+        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+
+    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
+        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
+        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+}
+
+static void swap(int *x,int *y)
+{
+   int tmp;
+
+   tmp = *x;
+   *x = *y;
+   *y = tmp;
+}
+
+static void quicksortmv(int arr[],int left, int right)
+{
+   int lidx,ridx,pivot;
+
+   lidx = left;
+   ridx = right;
+
+   if( left < right)
+   {
+      pivot = (left + right)/2;
+
+      while(lidx <=pivot && ridx >=pivot)
+      {
+          while(arr[lidx] < arr[pivot] && lidx <= pivot)
+              lidx++;
+          while(arr[ridx] > arr[pivot] && ridx >= pivot)
+              ridx--;
+          swap(&arr[lidx], &arr[ridx]);
+          lidx++;
+          ridx--;
+          if(lidx-1 == pivot)
+          {
+              ridx++;
+              pivot = ridx;
+          }
+          else if(ridx+1 == pivot)
+          {
+              lidx--;
+              pivot = lidx;
+          }
+      }
+      quicksortmv(arr, left, pivot - 1);
+      quicksortmv(arr, pivot + 1, right);
+   }
+}
+
+static void quicksortsad(int arr[],int idx[], int left, int right)
+{
+   int lidx,ridx,pivot;
+
+   lidx = left;
+   ridx = right;
+
+   if( left < right)
+   {
+      pivot = (left + right)/2;
+
+      while(lidx <=pivot && ridx >=pivot)
+      {
+          while(arr[lidx] < arr[pivot] && lidx <= pivot)
+              lidx++;
+          while(arr[ridx] > arr[pivot] && ridx >= pivot)
+              ridx--;
+          swap(&arr[lidx], &arr[ridx]);
+          swap(&idx[lidx], &idx[ridx]);
+          lidx++;
+          ridx--;
+          if(lidx-1 == pivot)
+          {
+              ridx++;
+              pivot = ridx;
+          }
+          else if(ridx+1 == pivot)
+          {
+              lidx--;
+              pivot = lidx;
+          }
+      }
+      quicksortsad(arr, idx, left, pivot - 1);
+      quicksortsad(arr, idx, pivot + 1, right);
+   }
+}
+
+//The improved MV prediction
+static void vp8_mv_pred
+(
+    VP8_COMP *cpi,
+    MACROBLOCKD *xd,
+    const MODE_INFO *here,
+    MV *mvp,
+    int refframe,
+    int *ref_frame_sign_bias,
+    int *sr,
+    int near_sadidx[]
+)
+{
+    const MODE_INFO *above = here - xd->mode_info_stride;
+    const MODE_INFO *left = here - 1;
+    const MODE_INFO *aboveleft = above - 1;
+    int_mv           near_mvs[7];
+    int              near_ref[7];
+    int_mv           mv;
+    int              vcnt=0;
+    int              find=0;
+    int              mb_offset;
+
+    int              mvx[7];
+    int              mvy[7];
+    int              i;
+
+    mv.as_int = 0;
+
+    if(here->mbmi.ref_frame != INTRA_FRAME)
+    {
+        near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = 0;
+        near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = 0;
+
+        // read in 3 nearby block's MVs from current frame as prediction candidates.
+        if (above->mbmi.ref_frame != INTRA_FRAME)
+        {
+            near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
+            mv_bias(above, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+            near_ref[vcnt] =  above->mbmi.ref_frame;
+        }
+        vcnt++;
+        if (left->mbmi.ref_frame != INTRA_FRAME)
+        {
+            near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
+            mv_bias(left, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+            near_ref[vcnt] =  left->mbmi.ref_frame;
+        }
+        vcnt++;
+        if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
+        {
+            near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
+            mv_bias(aboveleft, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+            near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
+        }
+        vcnt++;
+
+        // read in 4 nearby block's MVs from last frame.
+        if(cpi->common.last_frame_type != KEY_FRAME)
+        {
+            mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride) + (-xd->mb_to_left_edge/128 +1) ;
+
+            // current in last frame
+            if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
+            {
+                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
+                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
+            }
+            vcnt++;
+
+            // above in last frame
+            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride] != INTRA_FRAME)
+            {
+                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride].as_int;
+                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride];
+            }
+            vcnt++;
+
+            // left in last frame
+            if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
+            {
+                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
+                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
+            }
+            vcnt++;
+
+            // aboveleft in last frame
+            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride -1] != INTRA_FRAME)
+            {
+                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride -1].as_int;
+                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride -1];
+            }
+            vcnt++;
+        }
+
+        for(i=0; i< vcnt; i++)
+        {
+            if(near_ref[near_sadidx[i]] != INTRA_FRAME)
+            {
+                if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
+                {
+                    mv.as_int = near_mvs[near_sadidx[i]].as_int;
+                    find = 1;
+                    if(vcnt<2)
+                        *sr = 4;
+                    else if (vcnt<4)
+                        *sr = 3;
+                    else
+                        *sr = 2;
+                    break;
+                }
+            }
+        }
+
+        if(!find)
+        {
+            for(i=0; i<vcnt; i++)
+            {
+                mvx[i] = near_mvs[i].as_mv.row;
+                mvy[i] = near_mvs[i].as_mv.col;
+            }
+
+            quicksortmv (mvx, 0, vcnt-1);
+            quicksortmv (mvy, 0, vcnt-1);
+            mv.as_mv.row = mvx[vcnt/2];
+            mv.as_mv.col = mvy[vcnt/2];
+
+            find = 1;
+            //sr is set to 0 to allow calling function to decide the search range.
+            *sr = 0;
+        }
+    }
+
+    /* Set up return values */
+    *mvp = mv.as_mv;
+    vp8_clamp_mv(mvp, xd);
+}
+
 int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
 {
     BLOCK *b = &x->block[0];
@@ -1422,6 +1689,12 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     int tteob = 0;
     int force_no_skip = 0;
 
+    MV mvp;
+    int near_sad[7]; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf aboveleft
+    int near_sadidx[7] = {0, 1, 2, 3, 4, 5, 6};
+    int saddone=0;
+    int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
+
     *returnintra = INT_MAX;
 
     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean
@@ -1589,6 +1862,72 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                           &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv,
                           mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
 
+        if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
+        {
+            if(!saddone)
+            {
+                //calculate sad for current frame 3 nearby MBs.
+                if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
+                {
+                    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
+                }else if(xd->mb_to_top_edge==0)
+                {   //only has left MB for sad calculation.
+                    near_sad[0] = near_sad[2] = INT_MAX;
+                    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
+                }else if(xd->mb_to_left_edge ==0)
+                {   //only has left MB for sad calculation.
+                    near_sad[1] = near_sad[2] = INT_MAX;
+                    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
+                }else
+                {
+                    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
+                    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
+                    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
+                }
+
+                if(cpi->common.last_frame_type != KEY_FRAME)
+                {
+                    //calculate sad for last frame 4 nearby MBs.
+                    unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
+                    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
+
+                    if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
+                    {
+                        near_sad[4] = near_sad[5] = near_sad[6] = INT_MAX;
+                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+                    }else if(xd->mb_to_top_edge==0)
+                    {   //only has left MB for sad calculation.
+                        near_sad[4] = near_sad[6] = INT_MAX;
+                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+                        near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
+                    }else if(xd->mb_to_left_edge ==0)
+                    {   //only has left MB for sad calculation.
+                        near_sad[5] = near_sad[6] = INT_MAX;
+                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+                        near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
+                    }else
+                    {
+                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+                        near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
+                        near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
+                        near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16 -16, pre_y_stride, 0x7fffffff);
+                    }
+                }
+
+                if(cpi->common.last_frame_type != KEY_FRAME)
+                {
+                    quicksortsad(near_sad, near_sadidx, 0, 6);
+                }else
+                {
+                    quicksortsad(near_sad, near_sadidx, 0, 2);
+                }
+
+                saddone = 1;
+            }
+
+            vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
+                        x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
+        }
 
         // Estimate the reference frame signaling cost and add it to the rolling cost variable.
         frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
@@ -1764,6 +2103,10 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                 int further_steps;
                 int n;
 
+                //adjust search range according to sr from mv prediction
+                if(sr > step_param)
+                    step_param = sr;
+
                 // Work out how long a search we should do
                 search_range = MAXF(abs(best_ref_mv.col), abs(best_ref_mv.row)) >> 3;
 
@@ -1784,7 +2127,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                     }
                     else
                     {
-                        bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
+                        bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9
                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
 
@@ -1803,7 +2146,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                                 num00--;
                             else
                             {
-                                thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
+                                thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9
 
                                 if (thissme < bestsme)
                                 {
@@ -1827,9 +2170,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                 {
                     int thissme;
                     int full_flag_thresh = 0;
+                    MV full_mvp;
+
+                    full_mvp.row = d->bmi.mv.as_mv.row <<3;    // use diamond search result as full search staring point
+                    full_mvp.col = d->bmi.mv.as_mv.col <<3;
 
                     // Update x->vector_range based on best vector found in step search
-                    search_range = MAXF(abs(d->bmi.mv.as_mv.row), abs(d->bmi.mv.as_mv.col));
+                    search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
+                    //search_range *= 1.4;  //didn't improve PSNR
 
                     if (search_range > x->vector_range)
                         x->vector_range = search_range;
@@ -1838,9 +2186,20 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
                     // Apply limits
                     search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
+
+                    //add this to reduce full search range.
+                    if(sr<=3 && search_range > 8) search_range = 8;
+
                     {
                         int sadpb = x->sadperbit16 >> 2;
-                        thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost);
+                        thissme = cpi->full_search_sad(x, b, d, &full_mvp, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost,&best_ref_mv);
+                        /*
+                        MV dia_ref_mv;
+                        dia_ref_mv.row = d->bmi.mv.as_mv.row << 3;
+                        dia_ref_mv.col = d->bmi.mv.as_mv.col << 3;
+                        thissme = cpi->full_search_sad(x, b, d, &dia_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost,&best_ref_mv);
+                        */
+
                     }
 
                     // Barrier threshold to initiating full search
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index fd5dd7edeb2be96e555b0bc90b533febca28f824..745dc31fe8a3209e88d5a9ec30de6c309c3356bd 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -246,7 +246,7 @@ static int find_matching_mb
             step_param,
             sadpb / 2/*x->errorperbit*/,
             &num00, &cpi->fn_ptr[BLOCK_16X16],
-            mvsadcost, mvcost); //sadpb < 9
+            mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9
 
         // Further step/diamond searches as necessary
         n = 0;
@@ -268,7 +268,7 @@ static int find_matching_mb
                     step_param + n,
                     sadpb / 4/*x->errorperbit*/,
                     &num00, &cpi->fn_ptr[BLOCK_16X16],
-                    mvsadcost, mvcost); //sadpb = 9
+                    mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9
 
                 if (thissme < bestsme)
                 {