diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 566fd5059c06d4729f3e614f26e46782fe667a30..96b27bf7c889ef0f37815a52a44485a42d71a483 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -95,9 +95,8 @@ static void setup_mi(VP9_COMMON *cm) {
 int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
   int i, mi_cols;
 
-  // Our internal buffers are always multiples of 16
-  const int aligned_width = multiple8(width);
-  const int aligned_height = multiple8(height);
+  const int aligned_width = ALIGN_POWER_OF_TWO(width, LOG2_MI_SIZE);
+  const int aligned_height = ALIGN_POWER_OF_TWO(height, LOG2_MI_SIZE);
   const int ss_x = oci->subsampling_x;
   const int ss_y = oci->subsampling_y;
   int mi_size;
@@ -147,7 +146,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
 
   // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
   // information is exposed at this level
-  mi_cols = mi_cols_aligned_to_sb(oci);
+  mi_cols = mi_cols_aligned_to_sb(oci->mi_cols);
 
   // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
   // block where mi unit size is 8x8.
@@ -198,8 +197,8 @@ void vp9_initialize_common() {
 }
 
 void vp9_update_frame_size(VP9_COMMON *cm) {
-  const int aligned_width = multiple8(cm->width);
-  const int aligned_height = multiple8(cm->height);
+  const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, LOG2_MI_SIZE);
+  const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, LOG2_MI_SIZE);
 
   set_mb_mi(cm, aligned_width, aligned_height);
   setup_mi(cm);
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index a1dca1bf481f808bd1cbba827c557753a400fdc5..179690652493a2d0671a639c55e5564be3aad85f 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -22,12 +22,11 @@
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
 
-#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
+#define ROUND_POWER_OF_TWO(value, n) \
+    (((value) + (1 << ((n) - 1))) >> (n))
 
-/* If we don't want to use ROUND_POWER_OF_TWO macro
-static INLINE int16_t round_power_of_two(int16_t value, int n) {
-  return (value + (1 << (n - 1))) >> n;
-}*/
+#define ALIGN_POWER_OF_TWO(value, n) \
+    (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
 
 // Only need this for fixed-size arrays, for structs just assign.
 #define vp9_copy(dest, src) {            \
@@ -56,10 +55,6 @@ static INLINE double fclamp(double value, double low, double high) {
   return value < low ? low : (value > high ? high : value);
 }
 
-static INLINE int multiple8(int value) {
-  return (value + 7) & ~7;
-}
-
 static int get_unsigned_bits(unsigned int num_values) {
   int cat = 0;
   if (num_values <= 1)
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 89a9253972460b91488b595bbb78a4c78299eac6..dfd6a4a3f4e8db58583b82fc190776ef9dbfc668 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -14,10 +14,12 @@
 #include "./vpx_config.h"
 
 #define LOG2_MI_SIZE 3
+#define LOG2_MI_BLOCK_SIZE (6 - LOG2_MI_SIZE)  // 64 = 2^6
 
-#define MI_SIZE (1 << LOG2_MI_SIZE)
-#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1)
-#define MI_BLOCK_SIZE (64 / MI_SIZE)
+#define MI_SIZE (1 << LOG2_MI_SIZE)  // pixels per mi-unit
+#define MI_BLOCK_SIZE (1 << LOG2_MI_BLOCK_SIZE)  // mi-units per max block
+
+#define MI_MASK (MI_BLOCK_SIZE - 1)
 
 typedef enum BLOCK_SIZE_TYPE {
   BLOCK_SIZE_AB4X4,
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index abbf73f1962404fdc42f958d4724f9c097e5fb6a..f31d5a21c1464aa310c933829565378e3b116737 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -301,8 +301,8 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
   buf[new_idx]++;
 }
 
-static int mi_cols_aligned_to_sb(VP9_COMMON *cm) {
-  return 2 * ((cm->mb_cols + 3) & ~3);
+static int mi_cols_aligned_to_sb(int n_mis) {
+  return ALIGN_POWER_OF_TWO(n_mis, LOG2_MI_BLOCK_SIZE);
 }
 
 static INLINE void set_partition_seg_context(VP9_COMMON *cm,
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
index 95296ad6f2a4b7a64a67ce7eda12045395f1ceb5..208cbb9579babf1fea75e494251978b48e8df0ab 100644
--- a/vp9/common/vp9_tile_common.c
+++ b/vp9/common/vp9_tile_common.c
@@ -15,10 +15,14 @@
 #define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
 #define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
 
+static int to_sbs(n_mis) {
+  return mi_cols_aligned_to_sb(n_mis) >> LOG2_MI_BLOCK_SIZE;
+}
+
 static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
                                  int *max_tile_off, int tile_idx,
                                  int log2_n_tiles, int n_mis) {
-  const int n_sbs = (n_mis + 7) >> 3;
+  const int n_sbs = to_sbs(n_mis);
   const int sb_off1 =  (tile_idx      * n_sbs) >> log2_n_tiles;
   const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles;
 
@@ -43,7 +47,7 @@ void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx) {
 
 void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr,
                          int *delta_log2_n_tiles) {
-  const int sb_cols = (cm->mb_cols + 3) >> 2;
+  const int sb_cols = to_sbs(cm->mi_cols);
   int min_log2_n_tiles, max_log2_n_tiles;
 
   for (max_log2_n_tiles = 0;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 5ea75973d5cb66e19506fe673d9f6f2ee2b6e976..c0c74f6cc038dbf3c408b23f5ff2a3c2747f1eff 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -694,16 +694,17 @@ static void decode_tiles(VP9D_COMP *pbi,
   VP9_COMMON *const pc = &pbi->common;
 
   const uint8_t *data_ptr = data + first_partition_size;
-  const uint8_t* const data_end = pbi->source + pbi->source_sz;
+  const uint8_t *const data_end = pbi->source + pbi->source_sz;
+  const int aligned_mi_cols = mi_cols_aligned_to_sb(pc->mi_cols);
   int tile_row, tile_col;
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
-                                      MAX_MB_PLANE * mi_cols_aligned_to_sb(pc));
+  vpx_memset(pc->above_context[0], 0,
+             sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * aligned_mi_cols);
 
-  vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-                                       mi_cols_aligned_to_sb(pc));
+  vpx_memset(pc->above_seg_context, 0,
+             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);
 
   if (pbi->oxcf.inv_tile_order) {
     const int n_cols = pc->tile_columns;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 26456e021be409adb05f7ea70fed987e0774fee1..530c9b10706203311d08666c9c86bc47b56b2f68 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1476,8 +1476,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
     unsigned char *data_ptr = cx_data + header_bc.pos;
     TOKENEXTRA *tok[4][1 << 6], *tok_end;
 
-    vpx_memset(cpi->common.above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-               mi_cols_aligned_to_sb(&cpi->common));
+    vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
+               mi_cols_aligned_to_sb(pc->mi_cols));
     tok[0][0] = cpi->tok;
     for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
       if (tile_row) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 1a42961b5b6cda990e5e3a87e1c28a55845fd3da..3d3079a9f9892ddc0eedbd710bf6b3e5546a346e 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1690,9 +1690,10 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
 }
 
 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
-  MACROBLOCK * const x = &cpi->mb;
-  VP9_COMMON * const cm = &cpi->common;
-  MACROBLOCKD * const xd = &x->e_mbd;
+  MACROBLOCK *const x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
 
   x->act_zbin_adj = 0;
   cpi->seg0_idx = 0;
@@ -1735,11 +1736,10 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(
-      cm->above_context[0], 0,
-      sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));
+  vpx_memset(cm->above_context[0], 0,
+             sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * aligned_mi_cols);
   vpx_memset(cm->above_seg_context, 0,
-             sizeof(PARTITION_CONTEXT) * mi_cols_aligned_to_sb(cm));
+             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);
 }
 
 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index dc41d77d1206440cdb12256e16b959e8d33620c7..0c1f373688c393b7fd0d97dc400a6c3d0059e571 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -29,7 +29,7 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG  *yv12,
     img->fmt = VPX_IMG_FMT_I420;
   }
   img->w = yv12->y_stride;
-  img->h = multiple8(yv12->y_height + 2 * VP9BORDERINPIXELS);
+  img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9BORDERINPIXELS, 3);
   img->d_w = yv12->y_crop_width;
   img->d_h = yv12->y_crop_height;
   img->x_chroma_shift = yv12->uv_width < yv12->y_width;