variance_mmx.c 6 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11
#include "./vp8_rtcd.h"
12
#include "vpx_config.h"
13
#include "vp8/common/variance.h"
John Koleszar's avatar
John Koleszar committed
14
#include "vpx_ports/mem.h"
Johann's avatar
Johann committed
15
#include "vp8/common/x86/filter_x86.h"
John Koleszar's avatar
John Koleszar committed
16 17 18

extern void filter_block1d_h6_mmx
(
19
    const unsigned char *src_ptr,
John Koleszar's avatar
John Koleszar committed
20 21 22 23 24
    unsigned short *output_ptr,
    unsigned int src_pixels_per_line,
    unsigned int pixel_step,
    unsigned int output_height,
    unsigned int output_width,
Johann's avatar
Johann committed
25
    short *filter
John Koleszar's avatar
John Koleszar committed
26 27 28
);
extern void filter_block1d_v6_mmx
(
29
    const short *src_ptr,
John Koleszar's avatar
John Koleszar committed
30 31 32 33 34
    unsigned char *output_ptr,
    unsigned int pixels_per_line,
    unsigned int pixel_step,
    unsigned int output_height,
    unsigned int output_width,
Johann's avatar
Johann committed
35
    short *filter
John Koleszar's avatar
John Koleszar committed
36 37 38 39
);

extern void vp8_filter_block2d_bil4x4_var_mmx
(
40
    const unsigned char *ref_ptr,
John Koleszar's avatar
John Koleszar committed
41
    int ref_pixels_per_line,
42
    const unsigned char *src_ptr,
John Koleszar's avatar
John Koleszar committed
43 44 45 46 47 48 49 50
    int src_pixels_per_line,
    const short *HFilter,
    const short *VFilter,
    int *sum,
    unsigned int *sumsquared
);
extern void vp8_filter_block2d_bil_var_mmx
(
51
    const unsigned char *ref_ptr,
John Koleszar's avatar
John Koleszar committed
52
    int ref_pixels_per_line,
53
    const unsigned char *src_ptr,
John Koleszar's avatar
John Koleszar committed
54 55 56 57 58 59 60 61 62 63
    int src_pixels_per_line,
    unsigned int Height,
    const short *HFilter,
    const short *VFilter,
    int *sum,
    unsigned int *sumsquared
);

unsigned int vp8_sub_pixel_variance4x4_mmx
(
64
    const unsigned char  *src_ptr,
John Koleszar's avatar
John Koleszar committed
65 66 67
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
68
    const unsigned char *dst_ptr,
John Koleszar's avatar
John Koleszar committed
69 70 71 72 73 74 75 76 77
    int dst_pixels_per_line,
    unsigned int *sse)

{
    int xsum;
    unsigned int xxsum;
    vp8_filter_block2d_bil4x4_var_mmx(
        src_ptr, src_pixels_per_line,
        dst_ptr, dst_pixels_per_line,
Johann's avatar
Johann committed
78
        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
John Koleszar's avatar
John Koleszar committed
79 80 81
        &xsum, &xxsum
    );
    *sse = xxsum;
82
    return (xxsum - (((unsigned int)xsum * xsum) >> 4));
John Koleszar's avatar
John Koleszar committed
83 84 85 86 87
}


unsigned int vp8_sub_pixel_variance8x8_mmx
(
88
    const unsigned char  *src_ptr,
John Koleszar's avatar
John Koleszar committed
89 90 91
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
92
    const unsigned char *dst_ptr,
John Koleszar's avatar
John Koleszar committed
93 94 95 96 97 98 99 100 101 102
    int dst_pixels_per_line,
    unsigned int *sse
)
{

    int xsum;
    unsigned int xxsum;
    vp8_filter_block2d_bil_var_mmx(
        src_ptr, src_pixels_per_line,
        dst_ptr, dst_pixels_per_line, 8,
Johann's avatar
Johann committed
103
        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
John Koleszar's avatar
John Koleszar committed
104 105 106
        &xsum, &xxsum
    );
    *sse = xxsum;
107
    return (xxsum - (((unsigned int)xsum * xsum) >> 6));
John Koleszar's avatar
John Koleszar committed
108 109 110 111
}

unsigned int vp8_sub_pixel_variance16x16_mmx
(
112
    const unsigned char  *src_ptr,
John Koleszar's avatar
John Koleszar committed
113 114 115
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
116
    const unsigned char *dst_ptr,
John Koleszar's avatar
John Koleszar committed
117 118 119 120 121 122 123 124 125 126 127 128
    int dst_pixels_per_line,
    unsigned int *sse
)
{

    int xsum0, xsum1;
    unsigned int xxsum0, xxsum1;


    vp8_filter_block2d_bil_var_mmx(
        src_ptr, src_pixels_per_line,
        dst_ptr, dst_pixels_per_line, 16,
Johann's avatar
Johann committed
129
        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
John Koleszar's avatar
John Koleszar committed
130 131 132 133 134 135 136
        &xsum0, &xxsum0
    );


    vp8_filter_block2d_bil_var_mmx(
        src_ptr + 8, src_pixels_per_line,
        dst_ptr + 8, dst_pixels_per_line, 16,
Johann's avatar
Johann committed
137
        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
John Koleszar's avatar
John Koleszar committed
138 139 140 141 142 143 144
        &xsum1, &xxsum1
    );

    xsum0 += xsum1;
    xxsum0 += xxsum1;

    *sse = xxsum0;
145
    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
John Koleszar's avatar
John Koleszar committed
146 147 148 149 150 151


}

unsigned int vp8_sub_pixel_variance16x8_mmx
(
152
    const unsigned char  *src_ptr,
John Koleszar's avatar
John Koleszar committed
153 154 155
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
156
    const unsigned char *dst_ptr,
John Koleszar's avatar
John Koleszar committed
157 158 159 160 161 162 163 164 165 166 167
    int dst_pixels_per_line,
    unsigned int *sse
)
{
    int xsum0, xsum1;
    unsigned int xxsum0, xxsum1;


    vp8_filter_block2d_bil_var_mmx(
        src_ptr, src_pixels_per_line,
        dst_ptr, dst_pixels_per_line, 8,
Johann's avatar
Johann committed
168
        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
John Koleszar's avatar
John Koleszar committed
169 170 171 172 173 174 175
        &xsum0, &xxsum0
    );


    vp8_filter_block2d_bil_var_mmx(
        src_ptr + 8, src_pixels_per_line,
        dst_ptr + 8, dst_pixels_per_line, 8,
Johann's avatar
Johann committed
176
        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
John Koleszar's avatar
John Koleszar committed
177 178 179 180 181 182 183
        &xsum1, &xxsum1
    );

    xsum0 += xsum1;
    xxsum0 += xxsum1;

    *sse = xxsum0;
184
    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
John Koleszar's avatar
John Koleszar committed
185 186 187 188
}

unsigned int vp8_sub_pixel_variance8x16_mmx
(
189
    const unsigned char  *src_ptr,
John Koleszar's avatar
John Koleszar committed
190 191 192
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
193
    const unsigned char *dst_ptr,
John Koleszar's avatar
John Koleszar committed
194
    int dst_pixels_per_line,
195
    unsigned int *sse
John Koleszar's avatar
John Koleszar committed
196 197 198 199 200 201 202
)
{
    int xsum;
    unsigned int xxsum;
    vp8_filter_block2d_bil_var_mmx(
        src_ptr, src_pixels_per_line,
        dst_ptr, dst_pixels_per_line, 16,
Johann's avatar
Johann committed
203
        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
John Koleszar's avatar
John Koleszar committed
204 205 206
        &xsum, &xxsum
    );
    *sse = xxsum;
207
    return (xxsum - (((unsigned int)xsum * xsum) >> 7));
John Koleszar's avatar
John Koleszar committed
208
}
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244


unsigned int vp8_variance_halfpixvar16x16_h_mmx(
    const unsigned char *src_ptr,
    int  source_stride,
    const unsigned char *ref_ptr,
    int  recon_stride,
    unsigned int *sse)
{
    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
                                           ref_ptr, recon_stride, sse);
}


unsigned int vp8_variance_halfpixvar16x16_v_mmx(
    const unsigned char *src_ptr,
    int  source_stride,
    const unsigned char *ref_ptr,
    int  recon_stride,
    unsigned int *sse)
{
    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
                                           ref_ptr, recon_stride, sse);
}


unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
    const unsigned char *src_ptr,
    int  source_stride,
    const unsigned char *ref_ptr,
    int  recon_stride,
    unsigned int *sse)
{
    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
                                           ref_ptr, recon_stride, sse);
}