Commit c3941665 authored by Ronald S. Bultje's avatar Ronald S. Bultje

64x64 blocksize support.

3.2% gains on std/hd, 1.0% gains on hd.

Change-Id: I481d5df23d8a4fc650a5bcba956554490b2bd200
parent 81d1171f
......@@ -240,6 +240,7 @@ EXPERIMENT_LIST="
csm
comp_intra_pred
superblocks
superblocks64
pred_filter
lossless
subpelrefmv
......
......@@ -226,6 +226,16 @@ typedef enum {
MAX_REF_FRAMES = 4
} MV_REFERENCE_FRAME;
#if CONFIG_SUPERBLOCKS
typedef enum {
BLOCK_SIZE_MB16X16 = 0,
BLOCK_SIZE_SB32X32 = 1,
#if CONFIG_SUPERBLOCKS64
BLOCK_SIZE_SB64X64 = 2,
#endif
} BLOCK_SIZE_TYPE;
#endif
typedef struct {
MB_PREDICTION_MODE mode, uv_mode;
#if CONFIG_COMP_INTRA_PRED
......@@ -268,8 +278,8 @@ typedef struct {
#if CONFIG_SUPERBLOCKS
// FIXME need a SB array of 4 MB_MODE_INFOs that
// only needs one encoded_as_sb.
unsigned char encoded_as_sb;
// only needs one sb_type.
BLOCK_SIZE_TYPE sb_type;
#endif
} MB_MODE_INFO;
......@@ -415,6 +425,7 @@ typedef struct macroblockd {
DECLARE_ALIGNED(32, uint8_t, y_buf[22 * 32]);
#endif
int sb_index;
int mb_index; // Index of the MB in the SB (0..3)
int q_index;
......@@ -519,7 +530,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
return tx_type;
#if CONFIG_SUPERBLOCKS
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.encoded_as_sb)
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
#endif
if (xd->mode_info_context->mbmi.mode == B_PRED &&
......@@ -576,7 +587,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
return tx_type;
#if CONFIG_SUPERBLOCKS
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.encoded_as_sb)
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
#endif
if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
......@@ -611,7 +622,7 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
return tx_type;
#if CONFIG_SUPERBLOCKS
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.encoded_as_sb)
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
#endif
if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
......
......@@ -21,6 +21,9 @@
#define TRUE 1
#define FALSE 0
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
/* Only need this for fixed-size arrays, for structs just assign. */
#define vp9_copy(Dest, Src) { \
......
......@@ -191,7 +191,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
above_src, xd->dst.y_stride, &sse);
score += sse;
#if CONFIG_SUPERBLOCKS
if (xd->mode_info_context->mbmi.encoded_as_sb) {
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
vp9_sub_pixel_variance16x2_c(above_ref + offset + 16,
ref_y_stride,
SP(this_mv.as_mv.col),
......@@ -199,6 +199,22 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
above_src + 16, xd->dst.y_stride, &sse);
score += sse;
}
#if CONFIG_SUPERBLOCKS64
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
vp9_sub_pixel_variance16x2_c(above_ref + offset + 32,
ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
above_src + 32, xd->dst.y_stride, &sse);
score += sse;
vp9_sub_pixel_variance16x2_c(above_ref + offset + 48,
ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
above_src + 48, xd->dst.y_stride, &sse);
score += sse;
}
#endif
#endif
}
if (xd->left_available) {
......@@ -208,7 +224,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
left_src, xd->dst.y_stride, &sse);
score += sse;
#if CONFIG_SUPERBLOCKS
if (xd->mode_info_context->mbmi.encoded_as_sb) {
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16,
ref_y_stride,
SP(this_mv.as_mv.col),
......@@ -217,6 +233,24 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
xd->dst.y_stride, &sse);
score += sse;
}
#if CONFIG_SUPERBLOCKS64
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32,
ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
left_src + xd->dst.y_stride * 32,
xd->dst.y_stride, &sse);
score += sse;
vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 48,
ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
left_src + xd->dst.y_stride * 48,
xd->dst.y_stride, &sse);
score += sse;
}
#endif
#endif
}
#else
......@@ -230,22 +264,42 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
score += vp9_sad16x3(above_src, xd->dst.y_stride,
above_ref + offset, ref_y_stride);
#if CONFIG_SUPERBLOCKS
if (xd->mode_info_context->mbmi.encoded_as_sb) {
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
score += vp9_sad16x3(above_src + 16, xd->dst.y_stride,
above_ref + offset + 16, ref_y_stride);
}
#if CONFIG_SUPERBLOCKS64
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
score += vp9_sad16x3(above_src + 32, xd->dst.y_stride,
above_ref + offset + 32, ref_y_stride);
score += vp9_sad16x3(above_src + 48, xd->dst.y_stride,
above_ref + offset + 48, ref_y_stride);
}
#endif
#endif
}
if (xd->left_available) {
score += vp9_sad3x16(left_src, xd->dst.y_stride,
left_ref + offset, ref_y_stride);
#if CONFIG_SUPERBLOCKS
if (xd->mode_info_context->mbmi.encoded_as_sb) {
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
score += vp9_sad3x16(left_src + xd->dst.y_stride * 16,
xd->dst.y_stride,
left_ref + offset + ref_y_stride * 16,
ref_y_stride);
}
#if CONFIG_SUPERBLOCKS64
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
score += vp9_sad3x16(left_src + xd->dst.y_stride * 32,
xd->dst.y_stride,
left_ref + offset + ref_y_stride * 32,
ref_y_stride);
score += vp9_sad3x16(left_src + xd->dst.y_stride * 48,
xd->dst.y_stride,
left_ref + offset + ref_y_stride * 48,
ref_y_stride);
}
#endif
#endif
}
#endif
......
......@@ -228,7 +228,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
if (mb_col > 0
#if CONFIG_SUPERBLOCKS
&& !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb &&
&& !((mb_col & 1) && mode_info_context->mbmi.sb_type &&
((mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-1].mbmi.mb_skip_coeff)
#if CONFIG_TX32X32
......@@ -253,7 +253,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
/* don't apply across umv border */
if (mb_row > 0
#if CONFIG_SUPERBLOCKS
&& !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb &&
&& !((mb_row & 1) && mode_info_context->mbmi.sb_type &&
((mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-mis].mbmi.mb_skip_coeff)
#if CONFIG_TX32X32
......@@ -277,7 +277,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
// FIXME: Not 8x8 aware
if (mb_col > 0
#if CONFIG_SUPERBLOCKS
&& !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb &&
&& !((mb_col & 1) && mode_info_context->mbmi.sb_type &&
mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-1].mbmi.mb_skip_coeff)
#endif
......@@ -292,7 +292,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
/* don't apply across umv border */
if (mb_row > 0
#if CONFIG_SUPERBLOCKS
&& !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb &&
&& !((mb_row & 1) && mode_info_context->mbmi.sb_type &&
mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff)
#endif
......
......@@ -237,7 +237,7 @@ void vp9_find_mv_refs(
vpx_memset(candidate_scores, 0, sizeof(candidate_scores));
#if CONFIG_SUPERBLOCKS
if (mbmi->encoded_as_sb) {
if (mbmi->sb_type) {
mv_ref_search = sb_mv_ref_search;
ref_distance_weight = sb_ref_distance_weight;
} else {
......
......@@ -229,7 +229,7 @@ typedef struct VP9Common {
/* Y,U,V,Y2 */
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
ENTROPY_CONTEXT_PLANES left_context[2]; /* (up to) 4 contexts "" */
ENTROPY_CONTEXT_PLANES left_context[4]; /* (up to) 4 contexts "" */
/* keyframe block modes are predicted by their above, left neighbors */
......@@ -248,7 +248,10 @@ typedef struct VP9Common {
vp9_prob prob_last_coded;
vp9_prob prob_gf_coded;
#if CONFIG_SUPERBLOCKS
vp9_prob sb_coded;
vp9_prob sb32_coded;
#if CONFIG_SUPERBLOCKS64
vp9_prob sb64_coded;
#endif // CONFIG_SUPERBLOCKS64
#endif
// Context probabilities when using predictive coding of segment id
......
......@@ -9,6 +9,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_treecoder.h"
......@@ -230,13 +231,18 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd,
case PRED_SEG_ID:
xd->mode_info_context->mbmi.seg_id_predicted = pred_flag;
#if CONFIG_SUPERBLOCKS
if (xd->mode_info_context->mbmi.encoded_as_sb) {
if (xd->mb_to_right_edge >= 0)
xd->mode_info_context[1].mbmi.seg_id_predicted = pred_flag;
if (xd->mb_to_bottom_edge >= 0) {
xd->mode_info_context[mis].mbmi.seg_id_predicted = pred_flag;
if (xd->mb_to_right_edge >= 0)
xd->mode_info_context[mis + 1].mbmi.seg_id_predicted = pred_flag;
if (xd->mode_info_context->mbmi.sb_type) {
#define sub(a, b) (b) < 0 ? (a) + (b) : (a)
const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type;
const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7);
const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7);
int x, y;
for (y = 0; y < y_mbs; y++) {
for (x = !y; x < x_mbs; x++) {
xd->mode_info_context[y * mis + x].mbmi.seg_id_predicted =
pred_flag;
}
}
}
#endif
......@@ -245,13 +251,16 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd,
case PRED_REF:
xd->mode_info_context->mbmi.ref_predicted = pred_flag;
#if CONFIG_SUPERBLOCKS
if (xd->mode_info_context->mbmi.encoded_as_sb) {
if (xd->mb_to_right_edge >= 0)
xd->mode_info_context[1].mbmi.ref_predicted = pred_flag;
if (xd->mb_to_bottom_edge >= 0) {
xd->mode_info_context[mis].mbmi.ref_predicted = pred_flag;
if (xd->mb_to_right_edge >= 0)
xd->mode_info_context[mis + 1].mbmi.ref_predicted = pred_flag;
if (xd->mode_info_context->mbmi.sb_type) {
const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type;
const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7);
const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7);
int x, y;
for (y = 0; y < y_mbs; y++) {
for (x = !y; x < x_mbs; x++) {
xd->mode_info_context[y * mis + x].mbmi.ref_predicted = pred_flag;
}
}
}
#endif
......@@ -260,13 +269,16 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd,
case PRED_MBSKIP:
xd->mode_info_context->mbmi.mb_skip_coeff = pred_flag;
#if CONFIG_SUPERBLOCKS
if (xd->mode_info_context->mbmi.encoded_as_sb) {
if (xd->mb_to_right_edge >= 0)
xd->mode_info_context[1].mbmi.mb_skip_coeff = pred_flag;
if (xd->mb_to_bottom_edge >= 0) {
xd->mode_info_context[mis].mbmi.mb_skip_coeff = pred_flag;
if (xd->mb_to_right_edge >= 0)
xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = pred_flag;
if (xd->mode_info_context->mbmi.sb_type) {
const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type;
const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7);
const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7);
int x, y;
for (y = 0; y < y_mbs; y++) {
for (x = !y; x < x_mbs; x++) {
xd->mode_info_context[y * mis + x].mbmi.mb_skip_coeff = pred_flag;
}
}
}
#endif
......@@ -288,21 +300,25 @@ unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm,
// Currently the prediction for the macroblock segment ID is
// the value stored for this macroblock in the previous frame.
#if CONFIG_SUPERBLOCKS
if (!xd->mode_info_context->mbmi.encoded_as_sb) {
if (!xd->mode_info_context->mbmi.sb_type) {
#endif
return cm->last_frame_seg_map[MbIndex];
#if CONFIG_SUPERBLOCKS
} else {
int seg_id = cm->last_frame_seg_map[MbIndex];
int mb_col = MbIndex % cm->mb_cols;
int mb_row = MbIndex / cm->mb_cols;
if (mb_col + 1 < cm->mb_cols)
seg_id = seg_id && cm->last_frame_seg_map[MbIndex + 1];
if (mb_row + 1 < cm->mb_rows) {
seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols];
if (mb_col + 1 < cm->mb_cols)
seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols + 1];
const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type;
const int mb_col = MbIndex % cm->mb_cols;
const int mb_row = MbIndex / cm->mb_cols;
const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col);
const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row);
int x, y;
unsigned seg_id = -1;
for (y = mb_row; y < mb_row + y_mbs; y++) {
for (x = mb_col; x < mb_col + x_mbs; x++) {
seg_id = MIN(seg_id, cm->last_frame_seg_map[cm->mb_cols * y + x]);
}
}
return seg_id;
}
#endif
......
......@@ -780,6 +780,70 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
}
#endif
}
void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
uint8_t *dst_y,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_ystride,
int dst_uvstride) {
uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
*v2 = x->second_pre.v_buffer;
int edge[4], n;
edge[0] = x->mb_to_top_edge;
edge[1] = x->mb_to_bottom_edge;
edge[2] = x->mb_to_left_edge;
edge[3] = x->mb_to_right_edge;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3);
x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);
x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3);
x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3);
x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride + x_idx * 32;
x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride + x_idx * 32;
x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
}
vp9_build_inter32x32_predictors_sb(x,
dst_y + y_idx * 32 * dst_ystride + x_idx * 32,
dst_u + y_idx * 16 * dst_uvstride + x_idx * 16,
dst_v + y_idx * 16 * dst_uvstride + x_idx * 16,
dst_ystride, dst_uvstride);
}
x->mb_to_top_edge = edge[0];
x->mb_to_bottom_edge = edge[1];
x->mb_to_left_edge = edge[2];
x->mb_to_right_edge = edge[3];
x->pre.y_buffer = y1;
x->pre.u_buffer = u1;
x->pre.v_buffer = v1;
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
x->second_pre.y_buffer = y2;
x->second_pre.u_buffer = u2;
x->second_pre.v_buffer = v2;
}
#if CONFIG_COMP_INTERINTRA_PRED
if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v,
dst_ystride, dst_uvstride);
}
#endif
}
#endif
/*
......
......@@ -54,6 +54,13 @@ extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
uint8_t *dst_v,
int dst_ystride,
int dst_uvstride);
extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
uint8_t *dst_y,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_ystride,
int dst_uvstride);
#endif
extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd);
......
......@@ -254,7 +254,7 @@ void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride,
int up_available, int left_available) {
uint8_t *yabove_row = src - src_stride;
uint8_t yleft_col[32];
uint8_t yleft_col[64];
uint8_t ytop_left = yabove_row[-1];
int r, c, i;
......@@ -271,15 +271,19 @@ void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride,
int average = 0;
int log2_bsize_minus_1;
assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32);
assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32 ||
bsize == 64);
if (bsize == 4) {
log2_bsize_minus_1 = 1;
} else if (bsize == 8) {
log2_bsize_minus_1 = 2;
} else if (bsize == 16) {
log2_bsize_minus_1 = 3;
} else /* bsize == 32 */ {
} else if (bsize == 32) {
log2_bsize_minus_1 = 4;
} else {
assert(bsize == 64);
log2_bsize_minus_1 = 5;
}
if (up_available || left_available) {
......@@ -517,16 +521,17 @@ static void combine_interintra(MB_PREDICTION_MODE mode,
71, 70, 70, 70, 69, 69, 69, 68,
68, 68, 68, 68, 67, 67, 67, 67,
};
int size_scale = (size == 32 ? 1 :
int size_scale = (size >= 32 ? 1 :
size == 16 ? 2 :
size == 8 ? 4 : 8);
int size_shift = size == 64 ? 1 : 0;
int i, j;
switch (mode) {
case V_PRED:
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
int scale = weights1d[i * size_scale];
int scale = weights1d[i * size_scale >> size_shift];
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
......@@ -539,7 +544,7 @@ static void combine_interintra(MB_PREDICTION_MODE mode,
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
int scale = weights1d[j * size_scale];
int scale = weights1d[j * size_scale >> size_shift];
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
......@@ -553,8 +558,9 @@ static void combine_interintra(MB_PREDICTION_MODE mode,
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
int scale = (weights2d[i * size_scale * 32 + j * size_scale] +
weights1d[i * size_scale]) >> 1;
int scale = (weights2d[(i * size_scale * 32 +
j * size_scale) >> size_shift] +
weights1d[i * size_scale >> size_shift]) >> 1;
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
......@@ -568,8 +574,9 @@ static void combine_interintra(MB_PREDICTION_MODE mode,
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
int scale = (weights2d[i * size_scale * 32 + j * size_scale] +
weights1d[j * size_scale]) >> 1;
int scale = (weights2d[(i * size_scale * 32 +
j * size_scale) >> size_shift] +
weights1d[j * size_scale >> size_shift]) >> 1;
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
......@@ -582,7 +589,8 @@ static void combine_interintra(MB_PREDICTION_MODE mode,
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
int scale = weights2d[i * size_scale * 32 + j * size_scale];
int scale = weights2d[(i * size_scale * 32 +
j * size_scale) >> size_shift];
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
......@@ -695,6 +703,47 @@ void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
vp9_build_interintra_32x32_predictors_sby(xd, ypred, ystride);
vp9_build_interintra_32x32_predictors_sbuv(xd, upred, vpred, uvstride);
}
void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd,
uint8_t *ypred,
int ystride) {
uint8_t intrapredictor[4096];
const int mode = xd->mode_info_context->mbmi.interintra_mode;
vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
intrapredictor, 64, mode, 64,
xd->up_available, xd->left_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_mode,
ypred, ystride, intrapredictor, 64, 64);
}
void vp9_build_interintra_64x64_predictors_sbuv(MACROBLOCKD *xd,
uint8_t *upred,
uint8_t *vpred,
int uvstride) {
uint8_t uintrapredictor[1024];
uint8_t vintrapredictor[1024];
const int mode = xd->mode_info_context->mbmi.interintra_uv_mode;
vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride,
uintrapredictor, 32, mode, 32,
xd->up_available, xd->left_available);
vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride,
vintrapredictor, 32, mode, 32,
xd->up_available, xd->left_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
upred, uvstride, uintrapredictor, 32, 32);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
vpred, uvstride, vintrapredictor, 32, 32);
}
void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd,
uint8_t *ypred,
uint8_t *upred,
uint8_t *vpred,
int ystride,
int uvstride) {
vp9_build_interintra_64x64_predictors_sby(xd, ypred, ystride);
vp9_build_interintra_64x64_predictors_sbuv(xd, upred, vpred, uvstride);
}
#endif
#endif
......@@ -719,6 +768,13 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) {
xd->mode_info_context->mbmi.mode, 32,
xd->up_available, xd->left_available);
}
void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) {
vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
xd->dst.y_buffer, xd->dst.y_stride,
xd->mode_info_context->mbmi.mode, 64,
xd->up_available, xd->left_available);
}
#endif
#if CONFIG_COMP_INTRA_PRED
......@@ -778,6 +834,13 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) {
xd->mode_info_context->mbmi.uv_mode,
16);
}
void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) {
vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer,
xd->dst.v_buffer, xd->dst.uv_stride,
xd->mode_info_context->mbmi.uv_mode,
32);
}
#endif
#if CONFIG_COMP_INTRA_PRED
......
......@@ -41,6 +41,12 @@ extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
uint8_t *vpred,
int ystride,
int uvstride);
extern void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd,
uint8_t *ypred,
uint8_t *upred,
uint8_t *vpred,
int ystride,
int uvstride);
#endif // CONFIG_SUPERBLOCKS
#endif // VP9_COMMON_VP9_RECONINTRA_H_
......@@ -434,12 +434,9 @@ void vp9_comp_intra4x4_predict_c(BLOCKD *x,
* to the right prediction have filled in pixels to use.
*/
void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) {
int extend_edge = (xd->mb_to_right_edge == 0 && xd->mb_index < 2);
int extend_edge = xd->mb_to_right_edge == 0 && xd->mb_index < 2;
uint8_t *above_right = *(xd->block[0].base_dst) + xd->block[0].dst -
xd->block[0].dst_stride + 16;
uint32_t *src_ptr = (uint32_t *)
(above_right - (xd->mb_index == 3 ? 16 * xd->block[0].dst_stride : 0));
uint32_t *dst_ptr0 = (uint32_t *)above_right;
uint32_t *dst_ptr1 =
(uint32_t *)(above_right + 4 * xd->block[0].dst_stride);
......@@ -448,6 +445,17 @@ void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) {
uint32_t *dst_ptr3 =
(uint32_t *)(above_right + 12 * xd->block[0].dst_stride);
uint32_t *src_ptr = (uint32_t *) above_right;
if ((xd->sb_index >= 2 && xd->mb_to_right_edge == 0) ||
(xd->sb_index == 3 && xd->mb_index & 1))
src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 32 *
xd->block[0].dst_stride);
if (xd->mb_index == 3 ||
(xd->mb_to_right_edge == 0 && xd->mb_index == 2))
src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 16 *
xd->block[0].dst_stride);
if (extend_edge) {
*src_ptr = ((uint8_t *) src_ptr)[-1] * 0x01010101U;
}
......
......@@ -418,6 +418,9 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x32
prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance64x64
prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance16x16 mmx sse2
vp9_variance16x16_sse2=vp9_variance16x16_wmt
......@@ -443,6 +446,9 @@ specialize vp9_variance4x4 mmx sse2
vp9_variance4x4_sse2=vp9_variance4x4_wmt
vp9_variance4x4_mmx=vp9_variance4x4_mmx
prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
specialize vp9_sub_pixel_variance64x64
prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x32
......@@ -467,6 +473,9 @@ prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int so
specialize vp9_sub_pixel_variance4x4 sse2 mmx
vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad64x64
prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad32x32
......@@ -502,6 +511,15 @@ prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr,