Commit f14c323b authored by James Bankoski's avatar James Bankoski Committed by Gerrit Code Review

Merge "libyuv: update to b8ddb5a2"

parents 74bb78df b8f83282
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1456
Version: b8ddb5a2
License: BSD
License File: LICENSE
......@@ -13,3 +13,13 @@ which down-samples the original input video (f.g. 1280x720) a number of times
in order to encode multiple resolution bit streams.
Local Modifications:
Remove files unnecessary to libvpx build.
rm -rf .gitignore .gn AUTHORS Android.mk BUILD.gn CMakeLists.txt DEPS LICENSE \
LICENSE_THIRD_PARTY OWNERS PATENTS PRESUBMIT.py README.chromium README.md \
all.gyp build_overrides/ chromium/ codereview.settings docs/ \
download_vs_toolchain.py gyp_libyuv gyp_libyuv.py include/libyuv.h \
include/libyuv/compare_row.h libyuv.gyp libyuv.gypi libyuv_nacl.gyp \
libyuv_test.gyp linux.mk public.mk setup_links.py sync_chromium.py \
third_party/ tools/ unit_test/ util/ winarm.mk
......@@ -12,10 +12,8 @@
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/rotate.h" // For enum RotationMode.
#ifdef __cplusplus
namespace libyuv {
......
......@@ -12,10 +12,8 @@
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/rotate.h" // For enum RotationMode.
// TODO(fbarchard): This set of functions should exactly match convert.h
// TODO(fbarchard): Add tests. Create random content of right size and convert
......@@ -60,6 +58,22 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J444 to ARGB.
LIBYUV_API
int J444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
......@@ -68,6 +82,24 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int attenuate);
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height, int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
......@@ -131,6 +163,54 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J420 to ABGR.
LIBYUV_API
int J420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert J422 to ABGR.
LIBYUV_API
int J422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H420 to ARGB.
LIBYUV_API
int H420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H422 to ARGB.
LIBYUV_API
int H422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H420 to ABGR.
LIBYUV_API
int H420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H422 to ABGR.
LIBYUV_API
int H422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
......
......@@ -56,8 +56,6 @@ int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// TODO(fbarchard): I420ToM420
LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......
......@@ -18,9 +18,8 @@ namespace libyuv {
extern "C" {
#endif
// TODO(fbarchard): Consider overlapping bits for different architectures.
// Internal flag to indicate cpuid requires initialization.
#define kCpuInit 0x1
static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
......@@ -37,12 +36,12 @@ static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasAVX3 = 0x2000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasMIPS_DSP = 0x20000;
static const int kCpuHasMIPS_DSPR2 = 0x40000;
static const int kCpuHasDSPR2 = 0x20000;
// Internal function used to auto-init.
LIBYUV_API
......@@ -57,13 +56,13 @@ int ArmCpuCaps(const char* cpuinfo_name);
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
}
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
// MaskCpuFlags(0) to disable all cpu specific optimizations.
// MaskCpuFlags(1) to disable all cpu specific optimizations.
LIBYUV_API
void MaskCpuFlags(int enable_flags);
......
......@@ -145,13 +145,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert NV21 to RGB565.
LIBYUV_API
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// I422ToARGB is in convert_argb.h
// Convert I422 to BGRA.
LIBYUV_API
......@@ -177,6 +170,14 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
// Alias
#define RGB24ToRAW RAWToRGB24
LIBYUV_API
int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Draw a rectangle into I420.
LIBYUV_API
int I420Rect(uint8* dst_y, int dst_stride_y,
......@@ -281,13 +282,19 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy ARGB to ARGB.
// Copy Alpha channel of ARGB to alpha of ARGB.
LIBYUV_API
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy ARGB to ARGB.
// Extract the alpha channel from ARGB.
LIBYUV_API
int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_a, int dst_stride_a,
int width, int height);
// Copy Y channel to Alpha of ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
......@@ -301,6 +308,7 @@ LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
LIBYUV_API
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
......@@ -308,6 +316,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alpha Blend plane and store to destination.
// Source is not pre-multiplied by alpha.
LIBYUV_API
int BlendPlane(const uint8* src_y0, int src_stride_y0,
const uint8* src_y1, int src_stride_y1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alpha Blend YUV images and store to destination.
// Source is not pre-multiplied by alpha.
// Alpha is full width x height and subsampled to half size to apply to UV.
LIBYUV_API
int I420Blend(const uint8* src_y0, int src_stride_y0,
const uint8* src_u0, int src_stride_u0,
const uint8* src_v0, int src_stride_v0,
const uint8* src_y1, int src_stride_y1,
const uint8* src_u1, int src_stride_u1,
const uint8* src_v1, int src_stride_v1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
......@@ -357,12 +390,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert MJPG to ARGB.
LIBYUV_API
int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* argb, int argb_stride,
int w, int h, int dw, int dh);
// Internal function - do not call directly.
// Computes table of cumulative sum for image where the value is the sum
// of all values above and to the left of the entry. Used by ARGBBlur.
......@@ -389,22 +416,49 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
// Interpolate between two ARGB images using specified amount of interpolation
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
// and 255 means 1% src_argb0 and 99% src_argb1.
// Internally uses ARGBScale bilinear filtering.
// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
int InterpolatePlane(const uint8* src0, int src_stride0,
const uint8* src1, int src_stride1,
uint8* dst, int dst_stride,
int width, int height, int interpolation);
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
// Interpolate between two YUV images using specified amount of interpolation
// Internally calls InterpolatePlane on each plane where the U and V planes
// are half width and half height.
LIBYUV_API
int I420Interpolate(const uint8* src0_y, int src0_stride_y,
const uint8* src0_u, int src0_stride_u,
const uint8* src0_v, int src0_stride_v,
const uint8* src1_y, int src1_stride_y,
const uint8* src1_u, int src1_stride_u,
const uint8* src1_v, int src1_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
......
......@@ -22,53 +22,24 @@ extern "C" {
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// TODO(fbarchard): switch to standard form of inline; fails on clangcl.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#if defined(__APPLE__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \
".text \n" \
".private_extern _" #name " \n" \
".align 4,0x90 \n" \
"_" #name ": \n"
#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \
".text \n" \
".align 4,0x90 \n" \
"_" #name ": \n"
#else
#define DECLARE_FUNCTION(name) \
".text \n" \
".align 4,0x90 \n" \
#name ": \n"
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available for Visual C:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
defined(_MSC_VER) && !defined(__clang__)
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#define HAS_TRANSPOSEWX8_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
// The following are available for GCC but not NaCL:
// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
#define HAS_TRANSPOSEWX8_SSSE3
#endif
// The following are available for 32 bit GCC:
#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__)
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
// The following are available for 64 bit GCC but not NaCL:
#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
defined(__x86_64__)
......@@ -85,8 +56,8 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_TRANSPOSEWX8_MIPS_DSPR2
#define HAS_TRANSPOSEUVWx8_MIPS_DSPR2
#define HAS_TRANSPOSEWX8_DSPR2
#define HAS_TRANSPOSEUVWX8_DSPR2
#endif // defined(__mips__)
void TransposeWxH_C(const uint8* src, int src_stride,
......@@ -100,7 +71,9 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
......@@ -109,8 +82,8 @@ void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeUVWxH_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
......@@ -126,9 +99,19 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
#ifdef __cplusplus
} // extern "C"
......
This diff is collapsed.
......@@ -35,7 +35,6 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
// TODO(fbarchard): Implement this.
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1456
#define LIBYUV_VERSION 1601
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -62,7 +62,7 @@ enum FourCC {
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
......@@ -90,7 +90,8 @@ enum FourCC {
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
......@@ -150,6 +151,7 @@ enum FourCCBpp {
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
......
......@@ -17,6 +17,7 @@
#endif
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
......@@ -26,30 +27,13 @@ namespace libyuv {
extern "C" {
#endif
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
// This module is for Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
#define HAS_HASHDJB2_SSE41
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
#ifdef VISUALC_HAS_AVX2
#define HAS_HASHDJB2_AVX2
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#endif
#endif // HAS_HASHDJB2_SSE41
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
......@@ -127,23 +111,6 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
return fourcc;
}
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
#endif
#ifdef VISUALC_HAS_AVX2
#define HAS_SUMSQUAREERROR_AVX2
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
#endif
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
......
......@@ -10,6 +10,8 @@
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
......
......@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -16,11 +18,13 @@ namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
asm volatile ( // NOLINT
asm volatile (
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
......@@ -54,15 +58,10 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
"+r"(count), // %2
"=g"(sse) // %3
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
); // NOLINT
);
return sse;
}
#endif // defined(__x86_64__) || defined(__i386__)
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
......@@ -91,7 +90,7 @@ static uvec32 kHashMul3 = {
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
asm volatile ( // NOLINT
asm volatile (
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
......@@ -140,7 +139,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
"m"(kHashMul3) // %8
: "memory", "cc"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
); // NOLINT
);
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
......
......@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -27,7 +29,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n"
......
......@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -26,7 +28,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
......
......@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -16,9 +18,8 @@ namespace libyuv {
extern "C" {
#endif
// This module is for Visual C x86.