diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index fe768046b161c063370cebe97fc1a4490fbc466c..e40a6096eb17f999a0c043b9f4d5cb8a7a7ecba9 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -26,7 +26,8 @@ typedef struct { // Structure to hold snapshot of coding context during the mode picking process typedef struct { MODE_INFO mic; - uint8_t zcoeff_blk[256]; + uint8_t *zcoeff_blk; + int num_4x4_blk; int skip; int_mv best_ref_mv; int_mv second_best_ref_mv; @@ -177,6 +178,45 @@ struct macroblock { int y_blocks); }; +// TODO(jingning): the variables used here are little complicated. need further +// refactoring on organizing the temporary buffers, when recursive +// partition down to 4x4 block size is enabled. +static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + + switch (bsize) { + case BLOCK_64X64: + return &x->sb64_context; + case BLOCK_64X32: + return &x->sb64x32_context[xd->sb_index]; + case BLOCK_32X64: + return &x->sb32x64_context[xd->sb_index]; + case BLOCK_32X32: + return &x->sb32_context[xd->sb_index]; + case BLOCK_32X16: + return &x->sb32x16_context[xd->sb_index][xd->mb_index]; + case BLOCK_16X32: + return &x->sb16x32_context[xd->sb_index][xd->mb_index]; + case BLOCK_16X16: + return &x->mb_context[xd->sb_index][xd->mb_index]; + case BLOCK_16X8: + return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_8X16: + return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_8X8: + return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_8X4: + return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_4X8: + return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_4X4: + return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index]; + default: + assert(0); + return NULL; + } +} + struct rdcost_block_args { MACROBLOCK *x; ENTROPY_CONTEXT t_above[16]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 6e8e1d13d3a071d161f5ef138c26d017a80d94b4..0515db2be3f47c250f17ce9100ec12deeba082d0 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -410,7 +410,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, x->skip = ctx->skip; vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, - sizeof(ctx->zcoeff_blk)); + sizeof(uint8_t) * ctx->num_4x4_blk); if (!output_enabled) return; @@ -690,45 +690,6 @@ static void update_stats(VP9_COMP *cpi) { } } -// TODO(jingning): the variables used here are little complicated. need further -// refactoring on organizing the temporary buffers, when recursive -// partition down to 4x4 block size is enabled. -static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - switch (bsize) { - case BLOCK_64X64: - return &x->sb64_context; - case BLOCK_64X32: - return &x->sb64x32_context[xd->sb_index]; - case BLOCK_32X64: - return &x->sb32x64_context[xd->sb_index]; - case BLOCK_32X32: - return &x->sb32_context[xd->sb_index]; - case BLOCK_32X16: - return &x->sb32x16_context[xd->sb_index][xd->mb_index]; - case BLOCK_16X32: - return &x->sb16x32_context[xd->sb_index][xd->mb_index]; - case BLOCK_16X16: - return &x->mb_context[xd->sb_index][xd->mb_index]; - case BLOCK_16X8: - return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_8X16: - return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_8X8: - return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_8X4: - return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_4X8: - return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_4X4: - return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index]; - default: - assert(0); - return NULL; - } -} - static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; switch (bsize) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index db74df75d8844c81d0e3bdfdac25357118f17734..f6b2a287653e9fdee65d5f6eaf5248b00a46a96d 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1416,6 +1416,94 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } while (++i <= MV_MAX); } +static void init_pick_mode_context(VP9_COMP *cpi) { + int i; + MACROBLOCK *x = &cpi->mb; + MACROBLOCKD *xd = &x->e_mbd; + VP9_COMMON *cm = &cpi->common; + + for (i = 0; i < BLOCK_SIZES; ++i) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[i]; + const int num_4x4_h = num_4x4_blocks_high_lookup[i]; + const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h); + if (i < BLOCK_16X16) { + for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) { + for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) { + for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + ctx->num_4x4_blk = num_4x4_blk; + CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, + vpx_calloc(num_4x4_blk, sizeof(uint8_t))); + } + } + } + } else if (i < BLOCK_32X32) { + for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) { + for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk; + ++xd->mb_index) { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + ctx->num_4x4_blk = num_4x4_blk; + CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, + vpx_calloc(num_4x4_blk, sizeof(uint8_t))); + } + } + } else if (i < BLOCK_64X64) { + for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + ctx->num_4x4_blk = num_4x4_blk; + CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, + vpx_calloc(num_4x4_blk, sizeof(uint8_t))); + } + } else { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + ctx->num_4x4_blk = num_4x4_blk; + CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, + vpx_calloc(num_4x4_blk, sizeof(uint8_t))); + } + } +} + +static void free_pick_mode_context(MACROBLOCK *x) { + int i; + MACROBLOCKD *xd = &x->e_mbd; + + for (i = 0; i < BLOCK_SIZES; ++i) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[i]; + const int num_4x4_h = num_4x4_blocks_high_lookup[i]; + const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h); + if (i < BLOCK_16X16) { + for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) { + for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) { + for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + vpx_free(ctx->zcoeff_blk); + ctx->zcoeff_blk = 0; + } + } + } + } else if (i < BLOCK_32X32) { + for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) { + for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk; + ++xd->mb_index) { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + vpx_free(ctx->zcoeff_blk); + ctx->zcoeff_blk = 0; + } + } + } else if (i < BLOCK_64X64) { + for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + vpx_free(ctx->zcoeff_blk); + ctx->zcoeff_blk = 0; + } + } else { + PICK_MODE_CONTEXT *ctx = get_block_context(x, i); + vpx_free(ctx->zcoeff_blk); + ctx->zcoeff_blk = 0; + } + } +} + VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { int i, j; volatile union { @@ -1452,6 +1540,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { init_config((VP9_PTR)cpi, oxcf); + init_pick_mode_context(cpi); + cm->current_video_frame = 0; cpi->kf_overspend_bits = 0; cpi->kf_bitrate_adjustment = 0; @@ -1915,6 +2005,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) { #endif } + free_pick_mode_context(&cpi->mb); dealloc_compressor_data(cpi); vpx_free(cpi->mb.ss); vpx_free(cpi->tok); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0fc715299b4baaf58e8527fcd71569b9701010cd..bdc51527b85f5a09925c23002a2f4cb9a1dfcb73 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3577,7 +3577,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_mbmode = *mbmi; best_skip2 = this_skip2; vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], - sizeof(ctx->zcoeff_blk)); + sizeof(uint8_t) * ctx->num_4x4_blk); // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -4317,7 +4317,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, best_mbmode = *mbmi; best_skip2 = this_skip2; vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], - sizeof(ctx->zcoeff_blk)); + sizeof(uint8_t) * ctx->num_4x4_blk); for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi_8x8[0]->bmi[i];