• Paul Wilkins's avatar
    New inter mode context. · 6ff3eb16
    Paul Wilkins authored
    This patch creates a new inter mode contest that avoids
    a dependence on the reconstructed motion vectors from
    neighboring blocks. This was a change requested by
    a hardware vendor to improve decode performance.
    
    As part of this change I have also made some modifications
    to stats output code (under a flag) to allow accumulation of
    inter mode context flags over multiple clips
    
    Some further changes will be required to accommodate the
    deprecation of the split mv mode over the next few days.
    
    Performance as stands is around -0.25% on derf and
    std-hd but up on the YT and YT-HD sets. With further tuning
    or some adjustment to the context criteria it should be
    possible to make this change broadly neutral.
    
    Change-Id: Ia15cb4470969b9e87332a59c546ae0bd40676f6c
    6ff3eb16
dct.c 2.52 KiB
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include <math.h>
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
    int i;
    int a1, b1, c1, d1;
    short *ip = input;
    short *op = output;
    for (i = 0; i < 4; i++)
        a1 = ((ip[0] + ip[3])<<3);
        b1 = ((ip[1] + ip[2])<<3);
        c1 = ((ip[1] - ip[2])<<3);
        d1 = ((ip[0] - ip[3])<<3);
        op[0] = a1 + b1;
        op[2] = a1 - b1;
        op[1] = (c1 * 2217 + d1 * 5352 +  14500)>>12;
        op[3] = (d1 * 2217 - c1 * 5352 +   7500)>>12;
        ip += pitch / 2;
        op += 4;
    ip = output;
    op = output;
    for (i = 0; i < 4; i++)
        a1 = ip[0] + ip[12];
        b1 = ip[4] + ip[8];
        c1 = ip[4] - ip[8];
        d1 = ip[0] - ip[12];
        op[0]  = ( a1 + b1 + 7)>>4;
        op[8]  = ( a1 - b1 + 7)>>4;
        op[4]  =((c1 * 2217 + d1 * 5352 +  12000)>>16) + (d1!=0);
        op[12] = (d1 * 2217 - c1 * 5352 +  51000)>>16;
        ip++;
        op++;
void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
    vp8_short_fdct4x4_c(input,   output,    pitch);
    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
    int i;
    int a1, b1, c1, d1;
    int a2, b2, c2, d2;
    short *ip = input;
    short *op = output;
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
for (i = 0; i < 4; i++) { a1 = ((ip[0] + ip[2])<<2); d1 = ((ip[1] + ip[3])<<2); c1 = ((ip[1] - ip[3])<<2); b1 = ((ip[0] - ip[2])<<2); op[0] = a1 + d1 + (a1!=0); op[1] = b1 + c1; op[2] = b1 - c1; op[3] = a1 - d1; ip += pitch / 2; op += 4; } ip = output; op = output; for (i = 0; i < 4; i++) { a1 = ip[0] + ip[8]; d1 = ip[4] + ip[12]; c1 = ip[4] - ip[12]; b1 = ip[0] - ip[8]; a2 = a1 + d1; b2 = b1 + c1; c2 = b1 - c1; d2 = a1 - d1; a2 += a2<0; b2 += b2<0; c2 += c2<0; d2 += d2<0; op[0] = (a2+3) >> 3; op[4] = (b2+3) >> 3; op[8] = (c2+3) >> 3; op[12]= (d2+3) >> 3; ip++; op++; } }