Commit c1661484 authored by Diego Biurrun's avatar Diego Biurrun

dsputil: Move pix_sum, pix_norm1, shrink function pointers to mpegvideoenc

parent 8d686ca5
......@@ -21,6 +21,7 @@ OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \
arm/simple_idct_arm.o
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
......@@ -60,6 +61,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \
arm/idctdsp_armv6.o \
arm/simple_idct_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
ARMV6-OBJS-$(CONFIG_VP7_DECODER) += arm/vp8_armv6.o \
......
......@@ -297,58 +297,3 @@ function ff_sse16_armv6, export=1
pop {r4-r9, pc}
endfunc
function ff_pix_norm1_armv6, export=1
push {r4-r6, lr}
mov r12, #16
mov lr, #0
1:
ldm r0, {r2-r5}
uxtb16 r6, r2
uxtb16 r2, r2, ror #8
smlad lr, r6, r6, lr
uxtb16 r6, r3
smlad lr, r2, r2, lr
uxtb16 r3, r3, ror #8
smlad lr, r6, r6, lr
uxtb16 r6, r4
smlad lr, r3, r3, lr
uxtb16 r4, r4, ror #8
smlad lr, r6, r6, lr
uxtb16 r6, r5
smlad lr, r4, r4, lr
uxtb16 r5, r5, ror #8
smlad lr, r6, r6, lr
subs r12, r12, #1
add r0, r0, r1
smlad lr, r5, r5, lr
bgt 1b
mov r0, lr
pop {r4-r6, pc}
endfunc
function ff_pix_sum_armv6, export=1
push {r4-r7, lr}
mov r12, #16
mov r2, #0
mov r3, #0
mov lr, #0
ldr r4, [r0]
1:
subs r12, r12, #1
ldr r5, [r0, #4]
usada8 r2, r4, lr, r2
ldr r6, [r0, #8]
usada8 r3, r5, lr, r3
ldr r7, [r0, #12]
usada8 r2, r6, lr, r2
beq 2f
ldr_pre r4, r0, r1
usada8 r3, r7, lr, r3
bgt 1b
2:
usada8 r3, r7, lr, r3
add r0, r2, r3
pop {r4-r7, pc}
endfunc
......@@ -43,9 +43,6 @@ int ff_pix_abs8_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
int line_size, int h);
int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
int ff_pix_sum_armv6(uint8_t *pix, int line_size);
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
......@@ -63,7 +60,4 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
c->sad[1] = ff_pix_abs8_armv6;
c->sse[0] = ff_sse16_armv6;
c->pix_norm1 = ff_pix_norm1_armv6;
c->pix_sum = ff_pix_sum_armv6;
}
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/asm.S"
function ff_pix_norm1_armv6, export=1
push {r4-r6, lr}
mov r12, #16
mov lr, #0
1:
ldm r0, {r2-r5}
uxtb16 r6, r2
uxtb16 r2, r2, ror #8
smlad lr, r6, r6, lr
uxtb16 r6, r3
smlad lr, r2, r2, lr
uxtb16 r3, r3, ror #8
smlad lr, r6, r6, lr
uxtb16 r6, r4
smlad lr, r3, r3, lr
uxtb16 r4, r4, ror #8
smlad lr, r6, r6, lr
uxtb16 r6, r5
smlad lr, r4, r4, lr
uxtb16 r5, r5, ror #8
smlad lr, r6, r6, lr
subs r12, r12, #1
add r0, r0, r1
smlad lr, r5, r5, lr
bgt 1b
mov r0, lr
pop {r4-r6, pc}
endfunc
function ff_pix_sum_armv6, export=1
push {r4-r7, lr}
mov r12, #16
mov r2, #0
mov r3, #0
mov lr, #0
ldr r4, [r0]
1:
subs r12, r12, #1
ldr r5, [r0, #4]
usada8 r2, r4, lr, r2
ldr r6, [r0, #8]
usada8 r3, r5, lr, r3
ldr r7, [r0, #12]
usada8 r2, r6, lr, r2
beq 2f
ldr_pre r4, r0, r1
usada8 r3, r7, lr, r3
bgt 1b
2:
usada8 r3, r7, lr, r3
add r0, r2, r3
pop {r4-r7, pc}
endfunc
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/mpegvideoencdsp.h"
int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
int ff_pix_sum_armv6(uint8_t *pix, int line_size);
av_cold void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
int cpu_flags = av_get_cpu_flags();
if (have_armv6(cpu_flags)) {
c->pix_norm1 = ff_pix_norm1_armv6;
c->pix_sum = ff_pix_sum_armv6;
}
}
......@@ -309,6 +309,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
ff_blockdsp_init(&ctx->bdsp, avctx);
ff_dsputil_init(&ctx->m.dsp, avctx);
ff_idctdsp_init(&ctx->m.idsp, avctx);
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
ff_dct_common_init(&ctx->m);
if (!ctx->m.dct_quantize)
ctx->m.dct_quantize = ff_dct_quantize_c;
......@@ -719,8 +720,8 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg,
int varc;
if (!partial_last_row && mb_x * 16 <= avctx->width - 16) {
sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
varc = ctx->m.dsp.pix_norm1(pix, ctx->m.linesize);
sum = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize);
varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize);
} else {
int bw = FFMIN(avctx->width - 16 * mb_x, 16);
int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
......
......@@ -26,15 +26,12 @@
*/
#include "libavutil/attributes.h"
#include "libavutil/imgutils.h"
#include "avcodec.h"
#include "copy_block.h"
#include "dct.h"
#include "dsputil.h"
#include "simple_idct.h"
#include "faandct.h"
#include "imgconvert.h"
#include "mathops.h"
#include "mpegvideo.h"
#include "config.h"
......@@ -47,74 +44,6 @@ uint32_t ff_square_tab[512] = { 0, };
#define BIT_DEPTH 8
#include "dsputilenc_template.c"
static int pix_sum_c(uint8_t *pix, int line_size)
{
int s = 0, i, j;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
s += pix[0];
s += pix[1];
s += pix[2];
s += pix[3];
s += pix[4];
s += pix[5];
s += pix[6];
s += pix[7];
pix += 8;
}
pix += line_size - 16;
}
return s;
}
static int pix_norm1_c(uint8_t *pix, int line_size)
{
int s = 0, i, j;
uint32_t *sq = ff_square_tab + 256;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
#if 0
s += sq[pix[0]];
s += sq[pix[1]];
s += sq[pix[2]];
s += sq[pix[3]];
s += sq[pix[4]];
s += sq[pix[5]];
s += sq[pix[6]];
s += sq[pix[7]];
#else
#if HAVE_FAST_64BIT
register uint64_t x = *(uint64_t *) pix;
s += sq[x & 0xff];
s += sq[(x >> 8) & 0xff];
s += sq[(x >> 16) & 0xff];
s += sq[(x >> 24) & 0xff];
s += sq[(x >> 32) & 0xff];
s += sq[(x >> 40) & 0xff];
s += sq[(x >> 48) & 0xff];
s += sq[(x >> 56) & 0xff];
#else
register uint32_t x = *(uint32_t *) pix;
s += sq[x & 0xff];
s += sq[(x >> 8) & 0xff];
s += sq[(x >> 16) & 0xff];
s += sq[(x >> 24) & 0xff];
x = *(uint32_t *) (pix + 4);
s += sq[x & 0xff];
s += sq[(x >> 8) & 0xff];
s += sq[(x >> 16) & 0xff];
s += sq[(x >> 24) & 0xff];
#endif
#endif
pix += 8;
}
pix += line_size - 16;
}
return s;
}
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{
......@@ -1055,9 +984,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->sum_abs_dctelem = sum_abs_dctelem_c;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
/* TODO [0] 16 [1] 8 */
c->pix_abs[0][0] = pix_abs16_c;
c->pix_abs[0][1] = pix_abs16_x2_c;
......@@ -1097,11 +1023,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->nsse[0] = nsse16_c;
c->nsse[1] = nsse8_c;
c->shrink[0] = av_image_copy_plane;
c->shrink[1] = ff_shrink22;
c->shrink[2] = ff_shrink44;
c->shrink[3] = ff_shrink88;
c->draw_edges = draw_edges_8_c;
switch (avctx->bits_per_raw_sample) {
......
......@@ -58,9 +58,6 @@ typedef struct DSPContext {
int stride);
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
int (*pix_sum)(uint8_t *pix, int line_size);
int (*pix_norm1)(uint8_t *pix, int line_size);
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
me_cmp_func sse[6];
me_cmp_func hadamard8_diff[6];
......@@ -92,9 +89,6 @@ typedef struct DSPContext {
#define EDGE_WIDTH 16
#define EDGE_TOP 1
#define EDGE_BOTTOM 2
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
int src_wrap, int width, int height);
} DSPContext;
void ff_dsputil_static_init(void);
......
......@@ -881,8 +881,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
/* intra / predictive decision */
pix = c->src[0][0];
sum = s->dsp.pix_sum(pix, s->linesize);
varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500;
sum = s->mpvencdsp.pix_sum(pix, s->linesize);
varc = s->mpvencdsp.pix_norm1(pix, s->linesize) -
(((unsigned) sum * sum) >> 8) + 500;
pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
......
......@@ -895,7 +895,7 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src,
int offset = x + y * stride;
int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
16);
int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
int sae = get_sae(src + offset, mean, stride);
acc += sae + 500 < sad;
......@@ -1138,15 +1138,21 @@ static int estimate_best_b_count(MpegEncContext *s)
pre_input.f->data[2] += INPLACE_OFFSET;
}
s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
pre_input.f->data[0], pre_input.f->linesize[0],
c->width, c->height);
s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
pre_input.f->data[1], pre_input.f->linesize[1],
c->width >> 1, c->height >> 1);
s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
pre_input.f->data[2], pre_input.f->linesize[2],
c->width >> 1, c->height >> 1);
s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
s->tmp_frames[i]->linesize[0],
pre_input.f->data[0],
pre_input.f->linesize[0],
c->width, c->height);
s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
s->tmp_frames[i]->linesize[1],
pre_input.f->data[1],
pre_input.f->linesize[1],
c->width >> 1, c->height >> 1);
s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
s->tmp_frames[i]->linesize[2],
pre_input.f->data[2],
pre_input.f->linesize[2],
c->width >> 1, c->height >> 1);
}
}
......@@ -2420,9 +2426,10 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
int yy = mb_y * 16;
uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
int varc;
int sum = s->dsp.pix_sum(pix, s->linesize);
int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
(((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
......
......@@ -21,7 +21,10 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/imgutils.h"
#include "avcodec.h"
#include "dsputil.h"
#include "imgconvert.h"
#include "mpegvideoencdsp.h"
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
......@@ -53,12 +56,92 @@ static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
(BASIS_SHIFT - RECON_SHIFT);
}
static int pix_sum_c(uint8_t *pix, int line_size)
{
int s = 0, i, j;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
s += pix[0];
s += pix[1];
s += pix[2];
s += pix[3];
s += pix[4];
s += pix[5];
s += pix[6];
s += pix[7];
pix += 8;
}
pix += line_size - 16;
}
return s;
}
static int pix_norm1_c(uint8_t *pix, int line_size)
{
int s = 0, i, j;
uint32_t *sq = ff_square_tab + 256;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
#if 0
s += sq[pix[0]];
s += sq[pix[1]];
s += sq[pix[2]];
s += sq[pix[3]];
s += sq[pix[4]];
s += sq[pix[5]];
s += sq[pix[6]];
s += sq[pix[7]];
#else
#if HAVE_FAST_64BIT
register uint64_t x = *(uint64_t *) pix;
s += sq[x & 0xff];
s += sq[(x >> 8) & 0xff];
s += sq[(x >> 16) & 0xff];
s += sq[(x >> 24) & 0xff];
s += sq[(x >> 32) & 0xff];
s += sq[(x >> 40) & 0xff];
s += sq[(x >> 48) & 0xff];
s += sq[(x >> 56) & 0xff];
#else
register uint32_t x = *(uint32_t *) pix;
s += sq[x & 0xff];
s += sq[(x >> 8) & 0xff];
s += sq[(x >> 16) & 0xff];
s += sq[(x >> 24) & 0xff];
x = *(uint32_t *) (pix + 4);
s += sq[x & 0xff];
s += sq[(x >> 8) & 0xff];
s += sq[(x >> 16) & 0xff];
s += sq[(x >> 24) & 0xff];
#endif
#endif
pix += 8;
}
pix += line_size - 16;
}
return s;
}
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
c->try_8x8basis = try_8x8basis_c;
c->add_8x8basis = add_8x8basis_c;
c->shrink[0] = av_image_copy_plane;
c->shrink[1] = ff_shrink22;
c->shrink[2] = ff_shrink44;
c->shrink[3] = ff_shrink88;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
if (ARCH_ARM)
ff_mpegvideoencdsp_init_arm(c, avctx);
if (ARCH_PPC)
ff_mpegvideoencdsp_init_ppc(c, avctx);
if (ARCH_X86)
ff_mpegvideoencdsp_init_x86(c, avctx);
}
......@@ -31,10 +31,19 @@ typedef struct MpegvideoEncDSPContext {
int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
int (*pix_sum)(uint8_t *pix, int line_size);
int (*pix_norm1)(uint8_t *pix, int line_size);
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
int src_wrap, int width, int height);
} MpegvideoEncDSPContext;
void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);
void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);
void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);
void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);
......
......@@ -13,6 +13,7 @@ OBJS-$(CONFIG_IDCTDSP) += ppc/idctdsp.o
OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
ppc/mpegvideodsp.o
OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
......
......@@ -308,34 +308,6 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
return s;
}
static int pix_norm1_altivec(uint8_t *pix, int line_size)
{
int i, s = 0;
const vector unsigned int zero =
(const vector unsigned int) vec_splat_u32(0);
vector unsigned char perm = vec_lvsl(0, pix);
vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
vector signed int sum;
for (i = 0; i < 16; i++) {
/* Read the potentially unaligned pixels. */
vector unsigned char pixl = vec_ld(0, pix);
vector unsigned char pixr = vec_ld(15, pix);
vector unsigned char pixv = vec_perm(pixl, pixr, perm);
/* Square the values, and add them to our sum. */
sv = vec_msum(pixv, pixv, sv);
pix += line_size;
}
/* Sum up the four partial sums, and put the result into s. */
sum = vec_sums((vector signed int) sv, (vector signed int) zero);
sum = vec_splat(sum, 3);
vec_ste(sum, 0, &s);
return s;
}