Commit 153eec46 authored by Yunqing Wang's avatar Yunqing Wang

Align image buffer in multiple-resolution encoder

Aligned the image buffer and stride to 32 bytes. This enables
calling of optimized scaler function in libyuv, and improves
the performance.

Tested libyuv scaler(x86 optimization) on Linux and Windows,
including: Linux 32/64bit, visual studio 32/64bit, Cygwin, and
MinGW32.

Also, fixed a wrong pointer in vpx_codec_encode().

Change-Id: Ibe97d7a0a745f82c43852fa4ed719be5a4db6abc
parent 254889cd
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 90
Version: 102
License: BSD
License File: LICENSE
......
......@@ -13,21 +13,12 @@
#include <stddef.h> // for NULL, size_t
#ifndef WIN32
#if !(defined(_MSC_VER) && (_MSC_VER < 1600))
#include <stdint.h> // for uintptr_t
#endif
#ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED
#ifdef COMPILER_MSVC
typedef __int64 int64;
#else
typedef long long int64;
#endif /* COMPILER_MSVC */
typedef int int32;
typedef short int16;
typedef char int8;
#ifdef COMPILER_MSVC
typedef unsigned __int64 uint64;
typedef __int64 int64;
......@@ -38,9 +29,20 @@ typedef __int64 int64;
#define UINT64_C(x) x ## UI64
#endif
#define INT64_F "I64"
#else
#else // COMPILER_MSVC
#ifdef __LP64__
typedef unsigned long uint64;
typedef long int64;
#ifndef INT64_C
#define INT64_C(x) x ## L
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UL
#endif
#define INT64_F "l"
#else // __LP64__
typedef unsigned long long uint64;
//typedef long long int64;
typedef long long int64;
#ifndef INT64_C
#define INT64_C(x) x ## LL
#endif
......@@ -48,10 +50,14 @@ typedef unsigned long long uint64;
#define UINT64_C(x) x ## ULL
#endif
#define INT64_F "ll"
#endif /* COMPILER_MSVC */
#endif // __LP64__
#endif // COMPILER_MSVC
typedef unsigned int uint32;
typedef int int32;
typedef unsigned short uint16;
typedef short int16;
typedef unsigned char uint8;
typedef char int8;
#endif // INT_TYPES_DEFINED
// Detect compiler is for x86 or x64.
......@@ -60,7 +66,6 @@ typedef unsigned char uint8;
#define CPU_X86 1
#endif
#define IS_ALIGNED(p, a) (0==((uintptr_t)(p) & ((a)-1)))
#define ALIGNP(p, t) \
((uint8*)((((uintptr_t)(p) + \
((t)-1)) & ~((t)-1))))
......
......@@ -11,21 +11,39 @@
#ifndef INCLUDE_LIBYUV_CPU_ID_H_
#define INCLUDE_LIBYUV_CPU_ID_H_
//namespace libyuv {
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// These flags are only valid on x86 processors
static const int kCpuHasSSE2 = 1;
static const int kCpuHasSSSE3 = 2;
// SIMD support on ARM processors
// These flags are only valid on ARM processors
static const int kCpuHasNEON = 4;
// Internal flag to indicate cpuid is initialized.
static const int kCpuInitialized = 8;
// Detect CPU has SSE2 etc.
int TestCpuFlag(int flag);
// test_flag parameter should be one of kCpuHas constants above
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
extern int cpu_info_;
extern int InitCpuFlags();
return (cpu_info_ ? cpu_info_ : InitCpuFlags()) & test_flag;
}
// For testing, allow CPU flags to be disabled.
void MaskCpuFlagsForTest(int enable_flags);
//} // namespace libyuv
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// -1 to enable all cpu specific optimizations.
// 0 to disable all cpu specific optimizations.
void MaskCpuFlags(int enable_flags);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CPU_ID_H_
......@@ -13,7 +13,10 @@
#include "third_party/libyuv/include/libyuv/basic_types.h"
//namespace libyuv {
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported filtering
typedef enum {
......@@ -42,16 +45,8 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_width, int dst_height,
FilterMode filtering);
// Legacy API
// If dst_height_offset is non-zero, the image is offset by that many pixels
// and stretched to (dst_height - dst_height_offset * 2) pixels high,
// instead of dst_height.
int Scale_1(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_height_offset,
int interpolate);
// Same, but specified src terms of each plane location and stride.
int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
// Legacy API. Deprecated
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height,
uint8* dst_y, uint8* dst_u, uint8* dst_v,
......@@ -59,9 +54,17 @@ int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int dst_width, int dst_height,
int interpolate);
// Legacy API. Deprecated
int ScaleOffset(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_yoffset,
int interpolate);
// For testing, allow disabling of optimizations.
void SetUseReferenceImpl(int use);
//} // namespace libyuv
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_H_
......@@ -9,66 +9,73 @@
*/
#include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "third_party/libyuv/include/libyuv/basic_types.h" // for CPU_X86
#ifdef _MSC_VER
#include <intrin.h>
#endif
#ifdef __ANDROID__
#include <cpu-features.h>
#endif
#include "third_party/libyuv/include/libyuv/basic_types.h" // for CPU_X86
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile (
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
asm volatile (
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
}
#elif defined(__i386__) || defined(__x86_64__)
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile (
"cpuid\n"
asm volatile (
"cpuid \n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
}
#endif
//namespace libyuv {
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// CPU detect function for SIMD instruction sets.
static int cpu_info_initialized_ = 0;
static int cpu_info_ = 0;
int cpu_info_ = 0;
// Global lock for cpu initialization.
static void InitCpuFlags() {
int InitCpuFlags() {
#ifdef CPU_X86
int cpu_info[4];
__cpuid(cpu_info, 1);
cpu_info_ = (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
(cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0);
cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) |
(cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
kCpuInitialized;
#elif defined(__ANDROID__) && defined(__ARM_NEON__)
uint64_t features = android_getCpuFeatures();
cpu_info_ = ((features & ANDROID_CPU_ARM_FEATURE_NEON) ? kCpuHasNEON : 0) |
kCpuInitialized;
#elif defined(__ARM_NEON__)
// gcc -mfpu=neon defines __ARM_NEON__
// if code is specifically built for Neon-only, enable the flag.
cpu_info_ |= kCpuHasNEON;
// Enable Neon if you want support for Neon and Arm, and use MaskCpuFlags
// to disable Neon on devices that do not have it.
cpu_info_ = kCpuHasNEON | kCpuInitialized;
#else
cpu_info_ = 0;
cpu_info_ = kCpuInitialized;
#endif
cpu_info_initialized_ = 1;
return cpu_info_;
}
void MaskCpuFlagsForTest(int enable_flags) {
void MaskCpuFlags(int enable_flags) {
InitCpuFlags();
cpu_info_ &= enable_flags;
}
int TestCpuFlag(int flag) {
if (!cpu_info_initialized_) {
InitCpuFlags();
}
return cpu_info_ & flag ? 1 : 0;
cpu_info_ = (cpu_info_ & enable_flags) | kCpuInitialized;
}
//} // namespace libyuv
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
......@@ -14,7 +14,7 @@
#include "third_party/libyuv/include/libyuv/basic_types.h"
#define kMaxStride (2048 * 4)
//#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR)
#define YUV_DISABLE_ASM
......@@ -72,7 +72,10 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
#define HAS_REVERSE_ROW_NEON
#endif
//extern "C" {
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
......@@ -253,6 +256,9 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
#endif
//} // extern "C"
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // LIBYUV_SOURCE_ROW_H_
This diff is collapsed.
......@@ -78,6 +78,8 @@ static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
exit(EXIT_FAILURE);
}
int (*read_frame_p)(FILE *f, vpx_image_t *img);
static int read_frame(FILE *f, vpx_image_t *img) {
size_t nbytes, to_read;
int res = 1;
......@@ -92,6 +94,55 @@ static int read_frame(FILE *f, vpx_image_t *img) {
return res;
}
static int read_frame_by_row(FILE *f, vpx_image_t *img) {
size_t nbytes, to_read;
int res = 1;
int plane;
for (plane = 0; plane < 3; plane++)
{
unsigned char *ptr;
int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
int r;
/* Determine the correct plane based on the image format. The for-loop
* always counts in Y,U,V order, but this may not match the order of
* the data on disk.
*/
switch (plane)
{
case 1:
ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
break;
case 2:
ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
break;
default:
ptr = img->planes[plane];
}
for (r = 0; r < h; r++)
{
to_read = w;
nbytes = fread(ptr, 1, to_read, f);
if(nbytes != to_read) {
res = 0;
if(nbytes > 0)
printf("Warning: Read partial frame. Check your width & height!\n");
break;
}
ptr += img->stride[plane];
}
if (!res)
break;
}
return res;
}
static void write_ivf_file_header(FILE *outfile,
const vpx_codec_enc_cfg_t *cfg,
int frame_cnt) {
......@@ -262,9 +313,14 @@ int main(int argc, char **argv)
/* Allocate image for each encoder */
for (i=0; i< NUM_ENCODERS; i++)
if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 1))
if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
read_frame_p = read_frame;
else
read_frame_p = read_frame_by_row;
for (i=0; i< NUM_ENCODERS; i++)
write_ivf_file_header(outfile[i], &cfg[i], 0);
......@@ -305,35 +361,22 @@ int main(int argc, char **argv)
const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
flags = 0;
frame_avail = read_frame(infile, &raw[0]);
frame_avail = read_frame_p(infile, &raw[0]);
for ( i=1; i<NUM_ENCODERS; i++)
if(frame_avail)
{
if(frame_avail)
for ( i=1; i<NUM_ENCODERS; i++)
{
/*Scale the image down a number of times by downsampling factor*/
int src_uvwidth = (raw[i-1].d_w + 1) >> 1;
int src_uvheight = (raw[i-1].d_h + 1) >> 1;
const unsigned char* src_y = raw[i-1].planes[VPX_PLANE_Y];
const unsigned char* src_u = raw[i-1].planes[VPX_PLANE_Y]
+ raw[i-1].d_w*raw[i-1].d_h;
const unsigned char* src_v = raw[i-1].planes[VPX_PLANE_Y]
+ raw[i-1].d_w*raw[i-1].d_h
+ src_uvwidth*src_uvheight;
int dst_uvwidth = (raw[i].d_w + 1) >> 1;
int dst_uvheight = (raw[i].d_h + 1) >> 1;
unsigned char* dst_y = raw[i].planes[VPX_PLANE_Y];
unsigned char* dst_u = raw[i].planes[VPX_PLANE_Y]
+ raw[i].d_w*raw[i].d_h;
unsigned char* dst_v = raw[i].planes[VPX_PLANE_Y]
+ raw[i].d_w*raw[i].d_h
+ dst_uvwidth*dst_uvheight;
/* FilterMode 1 or 2 give better psnr than FilterMode 0. */
I420Scale(src_y, raw[i-1].d_w, src_u, src_uvwidth, src_v,
src_uvwidth, raw[i-1].d_w, raw[i-1].d_h,
dst_y, raw[i].d_w, dst_u, dst_uvwidth,
dst_v, dst_uvwidth, raw[i].d_w, raw[i].d_h, 1);
I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
raw[i-1].d_w, raw[i-1].d_h,
raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
raw[i].d_w, raw[i].d_h, 1);
}
}
......
......@@ -243,6 +243,7 @@ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx,
ctx--;
if (img) img--;
}
ctx++;
}
FLOATING_POINT_RESTORE();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment