An error occurred while loading the file. Please try again.
-
Paul Wilkins authored
This patch creates a new inter mode contest that avoids a dependence on the reconstructed motion vectors from neighboring blocks. This was a change requested by a hardware vendor to improve decode performance. As part of this change I have also made some modifications to stats output code (under a flag) to allow accumulation of inter mode context flags over multiple clips Some further changes will be required to accommodate the deprecation of the split mv mode over the next few days. Performance as stands is around -0.25% on derf and std-hd but up on the YT and YT-HD sets. With further tuning or some adjustment to the context criteria it should be possible to make this change broadly neutral. Change-Id: Ia15cb4470969b9e87332a59c546ae0bd40676f6c
6ff3eb16
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
{
int i;
int a1, b1, c1, d1;
short *ip = input;
short *op = output;
for (i = 0; i < 4; i++)
{
a1 = ((ip[0] + ip[3])<<3);
b1 = ((ip[1] + ip[2])<<3);
c1 = ((ip[1] - ip[2])<<3);
d1 = ((ip[0] - ip[3])<<3);
op[0] = a1 + b1;
op[2] = a1 - b1;
op[1] = (c1 * 2217 + d1 * 5352 + 14500)>>12;
op[3] = (d1 * 2217 - c1 * 5352 + 7500)>>12;
ip += pitch / 2;
op += 4;
}
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
op[0] = ( a1 + b1 + 7)>>4;
op[8] = ( a1 - b1 + 7)>>4;
op[4] =((c1 * 2217 + d1 * 5352 + 12000)>>16) + (d1!=0);
op[12] = (d1 * 2217 - c1 * 5352 + 51000)>>16;
ip++;
op++;
}
}
void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
{
vp8_short_fdct4x4_c(input, output, pitch);
vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
}
void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
{
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
for (i = 0; i < 4; i++)
{
a1 = ((ip[0] + ip[2])<<2);
d1 = ((ip[1] + ip[3])<<2);
c1 = ((ip[1] - ip[3])<<2);
b1 = ((ip[0] - ip[2])<<2);
op[0] = a1 + d1 + (a1!=0);
op[1] = b1 + c1;
op[2] = b1 - c1;
op[3] = a1 - d1;
ip += pitch / 2;
op += 4;
}
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[8];
d1 = ip[4] + ip[12];
c1 = ip[4] - ip[12];
b1 = ip[0] - ip[8];
a2 = a1 + d1;
b2 = b1 + c1;
c2 = b1 - c1;
d2 = a1 - d1;
a2 += a2<0;
b2 += b2<0;
c2 += c2<0;
d2 += d2<0;
op[0] = (a2+3) >> 3;
op[4] = (b2+3) >> 3;
op[8] = (c2+3) >> 3;
op[12]= (d2+3) >> 3;
ip++;
op++;
}
}