Commit 29103303 authored by James Bankoski's avatar James Bankoski

Revert "libyuv: update to b8ddb5a2"

This reverts commit b8f83282.

Update was to wrong version and still has: 

BUG=webm:1252

Change-Id: I80f3a7c0581ab5e2dd1a84f7840e51d7c362afac
parent b8f83282
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: b8ddb5a2
Version: 1456
License: BSD
License File: LICENSE
......@@ -13,13 +13,3 @@ which down-samples the original input video (f.g. 1280x720) a number of times
in order to encode multiple resolution bit streams.
Local Modifications:
Remove files unnecessary to libvpx build.
rm -rf .gitignore .gn AUTHORS Android.mk BUILD.gn CMakeLists.txt DEPS LICENSE \
LICENSE_THIRD_PARTY OWNERS PATENTS PRESUBMIT.py README.chromium README.md \
all.gyp build_overrides/ chromium/ codereview.settings docs/ \
download_vs_toolchain.py gyp_libyuv gyp_libyuv.py include/libyuv.h \
include/libyuv/compare_row.h libyuv.gyp libyuv.gypi libyuv_nacl.gyp \
libyuv_test.gyp linux.mk public.mk setup_links.py sync_chromium.py \
third_party/ tools/ unit_test/ util/ winarm.mk
......@@ -12,8 +12,10 @@
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For enum RotationMode.
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
......
......@@ -12,8 +12,10 @@
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For enum RotationMode.
// TODO(fbarchard): Remove the following headers includes
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
// TODO(fbarchard): This set of functions should exactly match convert.h
// TODO(fbarchard): Add tests. Create random content of right size and convert
......@@ -58,22 +60,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J444 to ARGB.
LIBYUV_API
int J444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
......@@ -82,24 +68,6 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int attenuate);
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height, int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
......@@ -163,54 +131,6 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J420 to ABGR.
LIBYUV_API
int J420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert J422 to ABGR.
LIBYUV_API
int J422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H420 to ARGB.
LIBYUV_API
int H420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H422 to ARGB.
LIBYUV_API
int H422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H420 to ABGR.
LIBYUV_API
int H420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H422 to ABGR.
LIBYUV_API
int H422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
......
......@@ -56,6 +56,8 @@ int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// TODO(fbarchard): I420ToM420
LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......
......@@ -18,8 +18,9 @@ namespace libyuv {
extern "C" {
#endif
// TODO(fbarchard): Consider overlapping bits for different architectures.
// Internal flag to indicate cpuid requires initialization.
static const int kCpuInitialized = 0x1;
#define kCpuInit 0x1
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
......@@ -36,12 +37,12 @@ static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasAVX3 = 0x2000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasDSPR2 = 0x20000;
static const int kCpuHasMIPS_DSP = 0x20000;
static const int kCpuHasMIPS_DSPR2 = 0x40000;
// Internal function used to auto-init.
LIBYUV_API
......@@ -56,13 +57,13 @@ int ArmCpuCaps(const char* cpuinfo_name);
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
}
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
// MaskCpuFlags(1) to disable all cpu specific optimizations.
// MaskCpuFlags(0) to disable all cpu specific optimizations.
LIBYUV_API
void MaskCpuFlags(int enable_flags);
......
......@@ -145,6 +145,13 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert NV21 to RGB565.
LIBYUV_API
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// I422ToARGB is in convert_argb.h
// Convert I422 to BGRA.
LIBYUV_API
......@@ -170,14 +177,6 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
// Alias
#define RGB24ToRAW RAWToRGB24
LIBYUV_API
int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Draw a rectangle into I420.
LIBYUV_API
int I420Rect(uint8* dst_y, int dst_stride_y,
......@@ -282,19 +281,13 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy Alpha channel of ARGB to alpha of ARGB.
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Extract the alpha channel from ARGB.
LIBYUV_API
int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_a, int dst_stride_a,
int width, int height);
// Copy Y channel to Alpha of ARGB.
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
......@@ -308,7 +301,6 @@ LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
LIBYUV_API
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
......@@ -316,31 +308,6 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alpha Blend plane and store to destination.
// Source is not pre-multiplied by alpha.
LIBYUV_API
int BlendPlane(const uint8* src_y0, int src_stride_y0,
const uint8* src_y1, int src_stride_y1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alpha Blend YUV images and store to destination.
// Source is not pre-multiplied by alpha.
// Alpha is full width x height and subsampled to half size to apply to UV.
LIBYUV_API
int I420Blend(const uint8* src_y0, int src_stride_y0,
const uint8* src_u0, int src_stride_u0,
const uint8* src_v0, int src_stride_v0,
const uint8* src_y1, int src_stride_y1,
const uint8* src_u1, int src_stride_u1,
const uint8* src_v1, int src_stride_v1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
......@@ -390,6 +357,12 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert MJPG to ARGB.
LIBYUV_API
int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* argb, int argb_stride,
int w, int h, int dw, int dh);
// Internal function - do not call directly.
// Computes table of cumulative sum for image where the value is the sum
// of all values above and to the left of the entry. Used by ARGBBlur.
......@@ -416,49 +389,22 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
int InterpolatePlane(const uint8* src0, int src_stride0,
const uint8* src1, int src_stride1,
uint8* dst, int dst_stride,
int width, int height, int interpolation);
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
// and 255 means 1% src_argb0 and 99% src_argb1.
// Internally uses ARGBScale bilinear filtering.
// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
// Interpolate between two YUV images using specified amount of interpolation
// Internally calls InterpolatePlane on each plane where the U and V planes
// are half width and half height.
LIBYUV_API
int I420Interpolate(const uint8* src0_y, int src0_stride_y,
const uint8* src0_u, int src0_stride_u,
const uint8* src0_v, int src0_stride_v,
const uint8* src1_y, int src1_stride_y,
const uint8* src1_u, int src1_stride_u,
const uint8* src1_v, int src1_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
......
......@@ -22,24 +22,53 @@ extern "C" {
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// TODO(fbarchard): switch to standard form of inline; fails on clangcl.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#if defined(__APPLE__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \
".text \n" \
".private_extern _" #name " \n" \
".align 4,0x90 \n" \
"_" #name ": \n"
#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
#define DECLARE_FUNCTION(name) \
".text \n" \
".align 4,0x90 \n" \
"_" #name ": \n"
#else
#define DECLARE_FUNCTION(name) \
".text \n" \
".align 4,0x90 \n" \
#name ": \n"
#endif
#endif
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
// The following are available for Visual C:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
defined(_MSC_VER) && !defined(__clang__)
#define HAS_TRANSPOSEWX8_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
// The following are available for GCC but not NaCL:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
#define HAS_TRANSPOSEWX8_SSSE3
#endif
// The following are available for 32 bit GCC:
#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__)
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
// The following are available for 64 bit GCC but not NaCL:
#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
defined(__x86_64__)
......@@ -56,8 +85,8 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_TRANSPOSEWX8_DSPR2
#define HAS_TRANSPOSEUVWX8_DSPR2
#define HAS_TRANSPOSEWX8_MIPS_DSPR2
#define HAS_TRANSPOSEUVWx8_MIPS_DSPR2
#endif // defined(__mips__)
void TransposeWxH_C(const uint8* src, int src_stride,
......@@ -71,9 +100,7 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
......@@ -82,8 +109,8 @@ void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeUVWxH_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
......@@ -99,19 +126,9 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
#ifdef __cplusplus
} // extern "C"
......
This diff is collapsed.
......@@ -35,6 +35,7 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
// TODO(fbarchard): Implement this.
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1601
#define LIBYUV_VERSION 1456
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -62,7 +62,7 @@ enum FourCC {
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
......@@ -90,8 +90,7 @@ enum FourCC {
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
......@@ -151,7 +150,6 @@ enum FourCCBpp {
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
......
......@@ -17,7 +17,6 @@
#endif
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
......@@ -27,13 +26,30 @@ namespace libyuv {
extern "C" {
#endif
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
// This module is for Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
#define HAS_HASHDJB2_SSE41
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
#ifdef VISUALC_HAS_AVX2
#define HAS_HASHDJB2_AVX2
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#endif
#endif // HAS_HASHDJB2_SSE41
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
HashDjb2_C;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
......@@ -111,6 +127,23 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
return fourcc;
}
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
#endif
#ifdef VISUALC_HAS_AVX2
#define HAS_SUMSQUAREERROR_AVX2
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
#endif
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
......
......@@ -10,8 +10,6 @@
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
......
......@@ -9,8 +9,6 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -18,13 +16,11 @@ namespace libyuv {
extern "C" {
#endif
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
asm volatile (
asm volatile ( // NOLINT
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
......@@ -58,10 +54,15 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
"+r"(count), // %2
"=g"(sse) // %3
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
); // NOLINT
return sse;
}
#endif // defined(__x86_64__) || defined(__i386__)
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
......@@ -90,7 +91,7 @@ static uvec32 kHashMul3 = {
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
asm volatile (
asm volatile ( // NOLINT
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
......@@ -139,7 +140,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
"m"(kHashMul3) // %8
: "memory", "cc"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
); // NOLINT
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
......
......@@ -9,8 +9,6 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -29,6 +27,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n"
......
......@@ -9,8 +9,6 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -28,6 +26,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
......
......@@ -9,8 +9,6 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
......@@ -18,8 +16,9 @@ namespace libyuv {
extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl