From 829e2af0e10f9b25f321d962c93614ea2628e5b9 Mon Sep 17 00:00:00 2001 From: John Koleszar <jkoleszar@google.com> Date: Mon, 12 Jul 2010 16:36:53 -0400 Subject: [PATCH] dixie: add normal loopfilter This is the more naive implementation as described in the bitstream guide, rather than the masking version implemented in the reference code. However, the core function prototypes were left as-is to make it easy to plug in the reference assembly code. Verified loopfiltered output matches reference decoder for 500 frames. Change-Id: Ib4f197e864f07dbb918b6d5e742c6110d57c1f40 --- vp8/dixie/dixie.c | 7 + vp8/dixie/dixie_loopfilter.c | 392 +++++++++++++++++++++++++++++++++++ vp8/dixie/dixie_loopfilter.h | 19 ++ vp8/dixie/tokens.c | 1 + vp8/vp8dx.mk | 2 + 5 files changed, 421 insertions(+) create mode 100644 vp8/dixie/dixie_loopfilter.c create mode 100644 vp8/dixie/dixie_loopfilter.h diff --git a/vp8/dixie/dixie.c b/vp8/dixie/dixie.c index cd61ff50e2..6328163340 100644 --- a/vp8/dixie/dixie.c +++ b/vp8/dixie/dixie.c @@ -15,6 +15,7 @@ #include "modemv.h" #include "tokens.h" #include "predict.h" +#include "dixie_loopfilter.h" #include <string.h> #include <assert.h> @@ -384,10 +385,16 @@ decode_frame(struct vp8_decoder_ctx *ctx, vp8_dixie_tokens_process_row(ctx, partition, row, 0, ctx->mb_cols); vp8_dixie_predict_process_row(ctx, row, 0, ctx->mb_cols); + if (ctx->loopfilter_hdr.level && row) + vp8_dixie_loopfilter_process_row(ctx, row - 1, 0, ctx->mb_cols); + if (++partition == ctx->token_hdr.partitions) partition = 0; } + if (ctx->loopfilter_hdr.level) + vp8_dixie_loopfilter_process_row(ctx, row - 1, 0, ctx->mb_cols); + ctx->frame_cnt++; if (!ctx->reference_hdr.refresh_entropy) diff --git a/vp8/dixie/dixie_loopfilter.c b/vp8/dixie/dixie_loopfilter.c new file mode 100644 index 0000000000..5aec36c571 --- /dev/null +++ b/vp8/dixie/dixie_loopfilter.c @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "dixie.h" +#include "dixie_loopfilter.h" + +#define ABS(x) ((x) >= 0 ? (x) : -(x)) + +#define p3 pixels[-4*stride] +#define p2 pixels[-3*stride] +#define p1 pixels[-2*stride] +#define p0 pixels[-1*stride] +#define q0 pixels[ 0*stride] +#define q1 pixels[ 1*stride] +#define q2 pixels[ 2*stride] +#define q3 pixels[ 3*stride] + +#define static +static int +saturate_int8(int x) +{ + if (x < -128) + return -128; + + if (x > 127) + return 127; + + return x; +} + + +static int +saturate_uint8(int x) +{ + if (x < 0) + return 0; + + if (x > 255) + return 255; + + return x; +} + + +static int +high_edge_variance(unsigned char *pixels, + int stride, + int hev_threshold) +{ + return ABS(p1 - p0) > hev_threshold || ABS(q1 - q0) > hev_threshold; +} + +static int +normal_threshold(unsigned char *pixels, + int stride, + int edge_limit, + int interior_limit) +{ + int E = edge_limit; + int I = interior_limit; + + /* Note: Deviates from spec */ + return (ABS(p0 - q0) * 2 + (ABS(p1 - q1) >> 1)) <= 2 * E + I + && ABS(p3 - p2) <= I && ABS(p2 - p1) <= I && ABS(p1 - p0) <= I + && ABS(q3 - q2) <= I && ABS(q2 - q1) <= I && ABS(q1 - q0) <= I; +} + + +static void +filter_common(unsigned char *pixels, + int stride, + int use_outer_taps) +{ + int a, f1, f2; + + /* This logic cribbed from ffvp8, which incorporates a number of + * deviations from the bitstream guide to match what the libvpx + * reference code does. + */ + + a = 3 * (q0 - p0); + + if (use_outer_taps) + a += saturate_int8(p1 - q1); + + a = saturate_int8(a); + + f1 = ((a + 4 > 127) ? 127 : a + 4) >> 3; + f2 = ((a + 3 > 127) ? 127 : a + 3) >> 3; + + p0 = saturate_uint8(p0 + f2); + q0 = saturate_uint8(q0 - f1); + + if (!use_outer_taps) + { + /* This handles the case of subblock_filter() (from the bitstream + * guide. + */ + a = (f1 + 1) >> 1; + p1 = saturate_uint8(p1 + a); + q1 = saturate_uint8(q1 - a); + } +} + + +static void +filter_mb_edge(unsigned char *pixels, + int stride) +{ + int w, a; + + w = saturate_int8(saturate_int8(p1 - q1) + 3 * (q0 - p0)); + + a = (27 * w + 63) >> 7; + p0 = saturate_uint8(p0 + a); + q0 = saturate_uint8(q0 - a); + + a = (18 * w + 63) >> 7; + p1 = saturate_uint8(p1 + a); + q1 = saturate_uint8(q1 - a); + + a = (9 * w + 63) >> 7; + p2 = saturate_uint8(p2 + a); + q2 = saturate_uint8(q2 - a); + +} + + +static void +filter_mb_v_edge(unsigned char *src, + int stride, + int edge_limit, + int interior_limit, + int hev_threshold, + int size) +{ + int i; + + for (i = 0; i < 8 * size; i++) + { + if (normal_threshold(src, 1, edge_limit, interior_limit)) + { + if (high_edge_variance(src, 1, hev_threshold)) + filter_common(src, 1, 1); + else + filter_mb_edge(src, 1); + } + + src += stride; + } +} + + +static void +filter_subblock_v_edge(unsigned char *src, + int stride, + int edge_limit, + int interior_limit, + int hev_threshold, + int size) +{ + int i; + + for (i = 0; i < 8 * size; i++) + { + if (normal_threshold(src, 1, edge_limit, interior_limit)) + filter_common(src, 1, high_edge_variance(src, 1, hev_threshold)); + + src += stride; + } +} + + +static void +filter_mb_h_edge(unsigned char *src, + int stride, + int edge_limit, + int interior_limit, + int hev_threshold, + int size) +{ + int i; + + for (i = 0; i < 8 * size; i++) + { + if (normal_threshold(src, stride, edge_limit, interior_limit)) + { + if (high_edge_variance(src, stride, hev_threshold)) + filter_common(src, stride, 1); + else + filter_mb_edge(src, stride); + } + + src += 1; + } +} + + +static void +filter_subblock_h_edge(unsigned char *src, + int stride, + int edge_limit, + int interior_limit, + int hev_threshold, + int size) +{ + int i; + + for (i = 0; i < 8 * size; i++) + { + if (normal_threshold(src, stride, edge_limit, interior_limit)) + filter_common(src, stride, + high_edge_variance(src, stride, hev_threshold)); + + src += 1; + } +} + + +static void +calculate_filter_parameters(struct vp8_decoder_ctx *ctx, + struct mb_info *mbi, + int *edge_limit_, + int *interior_limit_, + int *hev_threshold_) +{ + int filter_level, interior_limit, hev_threshold; + + /* Reference code/spec seems to conflate filter_level and edge_limit */ + + filter_level = ctx->loopfilter_hdr.level; + + if (ctx->segment_hdr.enabled) + { + if (!ctx->segment_hdr.abs) + filter_level += ctx->segment_hdr.lf_level[mbi->base.segment_id]; + else + filter_level = ctx->segment_hdr.lf_level[mbi->base.segment_id]; + } + + if (ctx->loopfilter_hdr.delta_enabled) + { + filter_level += ctx->loopfilter_hdr.ref_delta[mbi->base.ref_frame]; + + if (mbi->base.ref_frame == CURRENT_FRAME) + { + if (mbi->base.y_mode == B_PRED) + filter_level += ctx->loopfilter_hdr.mode_delta[0]; + } + else if (mbi->base.y_mode == ZEROMV) + filter_level += ctx->loopfilter_hdr.mode_delta[1]; + else if (mbi->base.y_mode == SPLITMV) + filter_level += ctx->loopfilter_hdr.mode_delta[3]; + else + filter_level += ctx->loopfilter_hdr.mode_delta[2]; + } + + if (filter_level > 63) + filter_level = 63; + else if (filter_level < 0) + filter_level = 0; + + interior_limit = filter_level; + + if (ctx->loopfilter_hdr.sharpness) + { + interior_limit >>= ctx->loopfilter_hdr.sharpness > 4 ? 2 : 1; + + if (interior_limit > 9 - ctx->loopfilter_hdr.sharpness) + interior_limit = 9 - ctx->loopfilter_hdr.sharpness; + } + + if (interior_limit < 1) + interior_limit = 1; + + hev_threshold = (filter_level >= 15); + + if (filter_level >= 40) + hev_threshold++; + + if (filter_level >= 20 && !ctx->frame_hdr.is_keyframe) + hev_threshold++; + + *edge_limit_ = filter_level; + *interior_limit_ = interior_limit; + *hev_threshold_ = hev_threshold; +} + +#include <assert.h> +void +vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx, + unsigned int row, + unsigned int start_col, + unsigned int num_cols) +{ + unsigned char *y, *u, *v; + int stride, uv_stride; + struct mb_info *mbi; + unsigned int col; + int i; + + /* Adjust pointers based on row, start_col */ + stride = ctx->ref_frames[CURRENT_FRAME]->img.stride[PLANE_Y]; + uv_stride = ctx->ref_frames[CURRENT_FRAME]->img.stride[PLANE_U]; + y = ctx->ref_frames[CURRENT_FRAME]->img.planes[PLANE_Y]; + u = ctx->ref_frames[CURRENT_FRAME]->img.planes[PLANE_U]; + v = ctx->ref_frames[CURRENT_FRAME]->img.planes[PLANE_V]; + y += (stride * row + start_col) * 16; + u += (uv_stride * row + start_col) * 8; + v += (uv_stride * row + start_col) * 8; + mbi = ctx->mb_info_rows[row] + start_col; + + for (col = start_col; col < start_col + num_cols; col++) + { + int edge_limit, interior_limit, hev_threshold; + + /* TODO: only need to recalculate every MB if segmentation is + * enabled. + */ + calculate_filter_parameters(ctx, mbi, &edge_limit, &interior_limit, + &hev_threshold); + + if (edge_limit) + { + if (col) + { + filter_mb_v_edge(y, stride, edge_limit + 2, interior_limit, + hev_threshold, 2); + filter_mb_v_edge(u, uv_stride, edge_limit + 2, interior_limit, + hev_threshold, 1); + filter_mb_v_edge(v, uv_stride, edge_limit + 2, interior_limit, + hev_threshold, 1); + } + + /* NOTE: This conditional is actually dependent on the number + * of coefficients decoded, not the skip flag as coded in the + * bitstream. The tokens task is expected to set 31 if there + * is *any* non-zero data. + */ + if (mbi->base.eob_mask + || mbi->base.y_mode == SPLITMV || mbi->base.y_mode == B_PRED) + { + filter_subblock_v_edge(y + 4, stride, edge_limit, + interior_limit, hev_threshold, 2); + filter_subblock_v_edge(y + 8, stride, edge_limit, + interior_limit, hev_threshold, 2); + filter_subblock_v_edge(y + 12, stride, edge_limit, + interior_limit, hev_threshold, 2); + filter_subblock_v_edge(u + 4, uv_stride, edge_limit, + interior_limit, hev_threshold, 1); + filter_subblock_v_edge(v + 4, uv_stride, edge_limit, + interior_limit, hev_threshold, 1); + } + + if (row) + { + filter_mb_h_edge(y, stride, edge_limit + 2, interior_limit, + hev_threshold, 2); + filter_mb_h_edge(u, uv_stride, edge_limit + 2, interior_limit, + hev_threshold, 1); + filter_mb_h_edge(v, uv_stride, edge_limit + 2, interior_limit, + hev_threshold, 1); + } + + if (mbi->base.eob_mask + || mbi->base.y_mode == SPLITMV || mbi->base.y_mode == B_PRED) + { + filter_subblock_h_edge(y + 4 * stride, stride, edge_limit, + interior_limit, hev_threshold, 2); + filter_subblock_h_edge(y + 8 * stride, stride, edge_limit, + interior_limit, hev_threshold, 2); + filter_subblock_h_edge(y + 12 * stride, stride, edge_limit, + interior_limit, hev_threshold, 2); + filter_subblock_h_edge(u + 4 * uv_stride, uv_stride, edge_limit, + interior_limit, hev_threshold, 1); + filter_subblock_h_edge(v + 4 * uv_stride, uv_stride, edge_limit, + interior_limit, hev_threshold, 1); + } + } + + y += 16; + u += 8; + v += 8; + mbi++; + } +} diff --git a/vp8/dixie/dixie_loopfilter.h b/vp8/dixie/dixie_loopfilter.h new file mode 100644 index 0000000000..fa8324b306 --- /dev/null +++ b/vp8/dixie/dixie_loopfilter.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef DIXIE_LOOPFILTER_H +#define DIXIE_LOOPFILTER_H + +void +vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx, + unsigned int row, + unsigned int start_col, + unsigned int num_cols); + +#endif diff --git a/vp8/dixie/tokens.c b/vp8/dixie/tokens.c index c55195152b..ecbacf0f07 100644 --- a/vp8/dixie/tokens.c +++ b/vp8/dixie/tokens.c @@ -330,6 +330,7 @@ ONE_CONTEXT_NODE_0_: BLOCK_FINISHED: eob_mask |= (c > 1) << i; t = (c != !type); // any nonzero data? + eob_mask |= t << 31; left[left_context_index[i]] = above[above_context_index[i]] = t; b_tokens += 16; diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk index 1ac10956ea..7ae3875aed 100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@ -38,6 +38,8 @@ VP8_DX_SRCS-$(CONFIG_DIXIE) += dixie/predict.c VP8_DX_SRCS-$(CONFIG_DIXIE) += dixie/predict.h VP8_DX_SRCS-$(CONFIG_DIXIE) += dixie/idct_add.c VP8_DX_SRCS-$(CONFIG_DIXIE) += dixie/idct_add.h +VP8_DX_SRCS-$(CONFIG_DIXIE) += dixie/dixie_loopfilter.c +VP8_DX_SRCS-$(CONFIG_DIXIE) += dixie/dixie_loopfilter.h CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder -- GitLab