Commit 66b2266a authored by Jim Bankoski's avatar Jim Bankoski

libyuv: update to de944ed8c74909ea6fbd743a22efe1e55e851b83

Fixes windows build issue:
==> tests::VS10_x64 is broken
         LINK : warning C4742: 'kYvuI601Constants' has different alignment in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': 32 and 2 [.build-x86_64-win64-vs10\vpxdec.vcxproj]
         LINK : warning C4744: 'kYvuI601Constants' has different type in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': '__declspec(align(32)) struct (224 bytes)' and 'struct (224 bytes)' [.build-x86_64-win64-vs10\vpxdec.vcxproj]
         LINK : warning C4742: 'kYuvI601Constants' has different alignment in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': 32 and 2 [.build-x86_64-win64-vs10\vpxdec.vcxproj]
         LINK : warning C4744: 'kYuvI601Constants' has different type in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': '__declspec(align(32)) struct (224 bytes)' and 'struct (224 bytes)' [.build-x86_64-win64-vs10\vpxdec.vcxproj]
         LINK : warning C4742: 'kYvuI601Constants' has different alignment in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': 32 and 2 [.build-x86_64-win64-vs10\vpxenc.vcxproj]
         LINK : warning C4744: 'kYvuI601Constants' has different type in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': '__declspec(align(32)) struct (224 bytes)' and 'struct (224 bytes)' [.build-x86_64-win64-vs10\vpxenc.vcxproj]
         LINK : warning C4742: 'kYuvI601Constants' has different alignment in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': 32 and 2 [.build-x86_64-win64-vs10\vpxenc.vcxproj]
         LINK : warning C4744: 'kYuvI601Constants' has different type in 'third_party\libyuv\source\row_common.cc' and 'third_party\libyuv\source\planar_functions.cc': '__declspec(align(32)) struct (224 bytes)' and 'struct (224 bytes)' [.build-x86_64-win64-vs10\vpxenc.vcxproj]
         LINK : error C2220: warning treated as error - no 'executable' file generated [.build-x86_64-win64-vs10\vpxdec.vcxproj]
         LINK : error C2220: warning treated as error - no 'executable' file generated [.build-x86_64-win64-vs10\vpxenc.vcxproj]

Change-Id: Ic3c4fff9209f5a52ff8f8ff321548d49ba09ec06
parent 129814fc
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: c244a3e9
URL: https://chromium.googlesource.com/libyuv/libyuv
Version: de944ed8c74909ea6fbd743a22efe1e55e851b83
License: BSD
License File: LICENSE
......
......@@ -373,8 +373,11 @@ extern "C" {
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
#if defined(VISUALC_HAS_AVX2)
#define SIMD_ALIGNED(var) __declspec(align(32)) var
#else
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#define SIMD_ALIGNED32(var) __declspec(align(32)) var
#endif
typedef __declspec(align(16)) int16 vec16[8];
typedef __declspec(align(16)) int32 vec32[4];
typedef __declspec(align(16)) int8 vec8[16];
......@@ -389,8 +392,11 @@ typedef __declspec(align(32)) uint32 ulvec32[8];
typedef __declspec(align(32)) uint8 ulvec8[32];
#elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
#if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
#define SIMD_ALIGNED(var) var __attribute__((aligned(32)))
#else
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#define SIMD_ALIGNED32(var) var __attribute__((aligned(32)))
#endif
typedef int16 __attribute__((vector_size(16))) vec16;
typedef int32 __attribute__((vector_size(16))) vec32;
typedef int8 __attribute__((vector_size(16))) vec8;
......@@ -405,7 +411,6 @@ typedef uint32 __attribute__((vector_size(32))) ulvec32;
typedef uint8 __attribute__((vector_size(32))) ulvec8;
#else
#define SIMD_ALIGNED(var) var
#define SIMD_ALIGNED32(var) var
typedef int16 vec16[8];
typedef int32 vec32[4];
typedef int8 vec8[16];
......@@ -461,14 +466,14 @@ struct YuvConstants {
#endif
// Conversion matrix for YUV to RGB
extern const struct YuvConstants kYuvI601Constants; // BT.601
extern const struct YuvConstants kYuvJPEGConstants; // JPeg color space
extern const struct YuvConstants kYuvH709Constants; // BT.709
extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants); // BT.601
extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants); // JPeg
extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants); // BT.709
// Conversion matrix for YVU to BGR
extern const struct YuvConstants kYvuI601Constants; // BT.601
extern const struct YuvConstants kYvuJPEGConstants; // JPeg color space
extern const struct YuvConstants kYvuH709Constants; // BT.709
extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601
extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg
extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1614
#define LIBYUV_VERSION 1616
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -1019,7 +1019,7 @@ const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{ 0x0101 * YG, 0, 0, 0 }
};
#else
const struct YuvConstants SIMD_ALIGNED32(kYuvI601Constants) = {
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
......@@ -1031,7 +1031,7 @@ const struct YuvConstants SIMD_ALIGNED32(kYuvI601Constants) = {
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
};
const struct YuvConstants SIMD_ALIGNED32(kYvuI601Constants) = {
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
......@@ -1106,7 +1106,7 @@ const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{ 0x0101 * YG, 0, 0, 0 }
};
#else
const struct YuvConstants SIMD_ALIGNED32(kYuvJPEGConstants) = {
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
......@@ -1118,7 +1118,7 @@ const struct YuvConstants SIMD_ALIGNED32(kYuvJPEGConstants) = {
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
};
const struct YuvConstants SIMD_ALIGNED32(kYvuJPEGConstants) = {
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
......@@ -1194,7 +1194,7 @@ const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{ 0x0101 * YG, 0, 0, 0 }
};
#else
const struct YuvConstants SIMD_ALIGNED32(kYuvH709Constants) = {
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
......@@ -1206,7 +1206,7 @@ const struct YuvConstants SIMD_ALIGNED32(kYuvH709Constants) = {
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
};
const struct YuvConstants SIMD_ALIGNED32(kYvuH709Constants) = {
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
......@@ -2504,7 +2504,7 @@ void I422ToRGB565Row_AVX2(const uint8* src_y,
uint8* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
......@@ -2530,7 +2530,7 @@ void I422ToARGB1555Row_AVX2(const uint8* src_y,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
......@@ -2556,7 +2556,7 @@ void I422ToARGB4444Row_AVX2(const uint8* src_y,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
......@@ -2582,7 +2582,7 @@ void I422ToRGB24Row_AVX2(const uint8* src_y,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
......@@ -2604,7 +2604,7 @@ void NV12ToRGB565Row_AVX2(const uint8* src_y,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
......
;
; Copyright 2012 The LibYuv Project Authors. All rights reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01020000h
%error AVX2 is supported only by yasm 1.2.0 or later.
%endif
%endif
%include "x86inc.asm"
SECTION .text
; cglobal numeric constants are parameters, gpr regs, mm regs
; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
%macro YUY2TOYROW 2-3
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%ifidn %1,YUY2
pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
psrlw m2, m2, 8
%endif
ALIGN 4
.convertloop:
mov%2 m0, [src_yuy2q]
mov%2 m1, [src_yuy2q + mmsize]
lea src_yuy2q, [src_yuy2q + mmsize * 2]
%ifidn %1,YUY2
pand m0, m0, m2 ; YUY2 even bytes are Y
pand m1, m1, m2
%else
psrlw m0, m0, 8 ; UYVY odd bytes are Y
psrlw m1, m1, 8
%endif
packuswb m0, m0, m1
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
%endif
sub pixd, mmsize
mov%2 [dst_yq], m0
lea dst_yq, [dst_yq + mmsize]
jg .convertloop
REP_RET
%endmacro
; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
INIT_MMX MMX
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_XMM SSE2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_YMM AVX2
YUY2TOYROW YUY2,a,
YUY2TOYROW UYVY,a,
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
%macro SplitUVRow 1-2
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
psrlw m4, m4, 8
sub dst_vq, dst_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uvq]
mov%1 m1, [src_uvq + mmsize]
lea src_uvq, [src_uvq + mmsize * 2]
psrlw m2, m0, 8 ; odd bytes
psrlw m3, m1, 8
pand m0, m0, m4 ; even bytes
pand m1, m1, m4
packuswb m0, m0, m1
packuswb m2, m2, m3
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
vpermq m2, m2, 0xd8
%endif
mov%1 [dst_uq], m0
mov%1 [dst_uq + dst_vq], m2
lea dst_uq, [dst_uq + mmsize]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_XMM SSE2
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_YMM AVX2
SplitUVRow a,
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width);
%macro MergeUVRow_ 1-2
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
sub src_vq, src_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uq]
mov%1 m1, [src_vq]
lea src_uq, [src_uq + mmsize]
punpcklbw m2, m0, m1 // first 8 UV pairs
punpckhbw m0, m0, m1 // next 8 UV pairs
%if cpuflag(AVX2)
vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
mov%1 [dst_uvq], m1
mov%1 [dst_uvq + mmsize], m2
%else
mov%1 [dst_uvq], m2
mov%1 [dst_uvq + mmsize], m0
%endif
lea dst_uvq, [dst_uvq + mmsize * 2]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_XMM SSE2
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_YMM AVX2
MergeUVRow_ a,
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment