x86.h 7.99 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11
 */


12 13
#ifndef VPX_PORTS_X86_H_
#define VPX_PORTS_X86_H_
John Koleszar's avatar
John Koleszar committed
14
#include <stdlib.h>
15
#include "vpx_config.h"
James Zern's avatar
James Zern committed
16
#include "vpx/vpx_integer.h"
John Koleszar's avatar
John Koleszar committed
17

18 19 20 21
#ifdef __cplusplus
extern "C" {
#endif

John Koleszar's avatar
John Koleszar committed
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
typedef enum {
  VPX_CPU_UNKNOWN = -1,
  VPX_CPU_AMD,
  VPX_CPU_AMD_OLD,
  VPX_CPU_CENTAUR,
  VPX_CPU_CYRIX,
  VPX_CPU_INTEL,
  VPX_CPU_NEXGEN,
  VPX_CPU_NSC,
  VPX_CPU_RISE,
  VPX_CPU_SIS,
  VPX_CPU_TRANSMETA,
  VPX_CPU_TRANSMETA_OLD,
  VPX_CPU_UMC,
  VPX_CPU_VIA,

  VPX_CPU_LAST
39 40
}  vpx_cpu_t;

41
#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
John Koleszar's avatar
John Koleszar committed
42
#if ARCH_X86_64
43
#define cpuid(func, func2, ax, bx, cx, dx)\
John Koleszar's avatar
John Koleszar committed
44 45 46
  __asm__ __volatile__ (\
                        "cpuid           \n\t" \
                        : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
47
                        : "a" (func), "c" (func2));
John Koleszar's avatar
John Koleszar committed
48
#else
49
#define cpuid(func, func2, ax, bx, cx, dx)\
John Koleszar's avatar
John Koleszar committed
50 51 52 53 54
  __asm__ __volatile__ (\
                        "mov %%ebx, %%edi   \n\t" \
                        "cpuid              \n\t" \
                        "xchg %%edi, %%ebx  \n\t" \
                        : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
55
                        : "a" (func), "c" (func2));
John Koleszar's avatar
John Koleszar committed
56
#endif
57
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
58
#if ARCH_X86_64
59
#define cpuid(func, func2, ax, bx, cx, dx)\
60 61 62 63 64 65
  asm volatile (\
                "xchg %rsi, %rbx \n\t" \
                "cpuid           \n\t" \
                "movl %ebx, %edi \n\t" \
                "xchg %rsi, %rbx \n\t" \
                : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
66
                : "a" (func), "c" (func2));
67
#else
68
#define cpuid(func, func2, ax, bx, cx, dx)\
69 70 71 72 73 74
  asm volatile (\
                "pushl %ebx       \n\t" \
                "cpuid            \n\t" \
                "movl %ebx, %edi  \n\t" \
                "popl %ebx        \n\t" \
                : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
75
                : "a" (func), "c" (func2));
76
#endif
77
#else /* end __SUNPRO__ */
John Koleszar's avatar
John Koleszar committed
78
#if ARCH_X86_64
79
#if defined(_MSC_VER) && _MSC_VER > 1500
80 81
void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue);
#pragma intrinsic(__cpuidex)
82
#define cpuid(func, func2, a, b, c, d) do {\
John Koleszar's avatar
John Koleszar committed
83
    int regs[4];\
84
    __cpuidex(regs, func, func2); \
85
    a = regs[0];  b = regs[1];  c = regs[2];  d = regs[3];\
John Koleszar's avatar
John Koleszar committed
86
  } while(0)
John Koleszar's avatar
John Koleszar committed
87
#else
88 89 90 91 92 93 94 95 96
void __cpuid(int CPUInfo[4], int info_type);
#pragma intrinsic(__cpuid)
#define cpuid(func, func2, a, b, c, d) do {\
    int regs[4];\
    __cpuid(regs, func); \
    a = regs[0];  b = regs[1];  c = regs[2];  d = regs[3];\
  } while (0)
#endif
#else
97
#define cpuid(func, func2, a, b, c, d)\
John Koleszar's avatar
John Koleszar committed
98
  __asm mov eax, func\
99
  __asm mov ecx, func2\
John Koleszar's avatar
John Koleszar committed
100 101 102 103 104
  __asm cpuid\
  __asm mov a, eax\
  __asm mov b, ebx\
  __asm mov c, ecx\
  __asm mov d, edx
John Koleszar's avatar
John Koleszar committed
105
#endif
106
#endif /* end others */
John Koleszar's avatar
John Koleszar committed
107

James Zern's avatar
James Zern committed
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
// NaCl has no support for xgetbv or the raw opcode.
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
static INLINE uint64_t xgetbv(void) {
  const uint32_t ecx = 0;
  uint32_t eax, edx;
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
  __asm__ volatile (
    ".byte 0x0f, 0x01, 0xd0\n"
    : "=a"(eax), "=d"(edx) : "c" (ecx));
  return ((uint64_t)edx << 32) | eax;
}
#elif (defined(_M_X64) || defined(_M_IX86)) && \
      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
#include <immintrin.h>
#define xgetbv() _xgetbv(0)
#elif defined(_MSC_VER) && defined(_M_IX86)
static INLINE uint64_t xgetbv(void) {
  uint32_t eax_, edx_;
  __asm {
    xor ecx, ecx  // ecx = 0
    // Use the raw opcode for xgetbv for compatibility with older toolchains.
    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
    mov eax_, eax
    mov edx_, edx
  }
  return ((uint64_t)edx_ << 32) | eax_;
}
#else
#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
#endif

139
#if defined(_MSC_VER) && _MSC_VER >= 1700
140 141 142 143 144 145
#include <windows.h>
#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
#define getenv(x) NULL
#endif
#endif

146 147 148 149 150 151 152 153
#define HAS_MMX     0x01
#define HAS_SSE     0x02
#define HAS_SSE2    0x04
#define HAS_SSE3    0x08
#define HAS_SSSE3   0x10
#define HAS_SSE4_1  0x20
#define HAS_AVX     0x40
#define HAS_AVX2    0x80
John Koleszar's avatar
John Koleszar committed
154 155 156 157
#ifndef BIT
#define BIT(n) (1<<n)
#endif

158
static INLINE int
John Koleszar's avatar
John Koleszar committed
159 160 161
x86_simd_caps(void) {
  unsigned int flags = 0;
  unsigned int mask = ~0;
162
  unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
John Koleszar's avatar
John Koleszar committed
163 164
  char *env;
  (void)reg_ebx;
John Koleszar's avatar
John Koleszar committed
165

John Koleszar's avatar
John Koleszar committed
166 167
  /* See if the CPU capabilities are being overridden by the environment */
  env = getenv("VPX_SIMD_CAPS");
John Koleszar's avatar
John Koleszar committed
168

John Koleszar's avatar
John Koleszar committed
169 170
  if (env && *env)
    return (int)strtol(env, NULL, 0);
John Koleszar's avatar
John Koleszar committed
171

John Koleszar's avatar
John Koleszar committed
172
  env = getenv("VPX_SIMD_CAPS_MASK");
John Koleszar's avatar
John Koleszar committed
173

John Koleszar's avatar
John Koleszar committed
174 175
  if (env && *env)
    mask = strtol(env, NULL, 0);
John Koleszar's avatar
John Koleszar committed
176

John Koleszar's avatar
John Koleszar committed
177
  /* Ensure that the CPUID instruction supports extended features */
178
  cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
John Koleszar's avatar
John Koleszar committed
179

180
  if (max_cpuid_val < 1)
John Koleszar's avatar
John Koleszar committed
181
    return 0;
John Koleszar's avatar
John Koleszar committed
182

John Koleszar's avatar
John Koleszar committed
183
  /* Get the standard feature flags */
184
  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
John Koleszar's avatar
John Koleszar committed
185

John Koleszar's avatar
John Koleszar committed
186
  if (reg_edx & BIT(23)) flags |= HAS_MMX;
John Koleszar's avatar
John Koleszar committed
187

John Koleszar's avatar
John Koleszar committed
188
  if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
John Koleszar's avatar
John Koleszar committed
189

John Koleszar's avatar
John Koleszar committed
190
  if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
John Koleszar's avatar
John Koleszar committed
191

192
  if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
John Koleszar's avatar
John Koleszar committed
193

194
  if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
John Koleszar's avatar
John Koleszar committed
195

John Koleszar's avatar
John Koleszar committed
196
  if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
197

James Zern's avatar
James Zern committed
198
  // bits 27 (OSXSAVE) & 28 (256-bit AVX)
199
  if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) {
James Zern's avatar
James Zern committed
200 201
    if ((xgetbv() & 0x6) == 0x6) {
      flags |= HAS_AVX;
202

203 204 205
      if (max_cpuid_val >= 7) {
        /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
        cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
206

207 208
        if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
      }
James Zern's avatar
James Zern committed
209 210
    }
  }
211

John Koleszar's avatar
John Koleszar committed
212
  return flags & mask;
John Koleszar's avatar
John Koleszar committed
213 214 215 216 217 218
}

#if ARCH_X86_64 && defined(_MSC_VER)
unsigned __int64 __rdtsc(void);
#pragma intrinsic(__rdtsc)
#endif
219
static INLINE unsigned int
John Koleszar's avatar
John Koleszar committed
220
x86_readtsc(void) {
John Koleszar's avatar
John Koleszar committed
221
#if defined(__GNUC__) && __GNUC__
John Koleszar's avatar
John Koleszar committed
222 223 224
  unsigned int tsc;
  __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
  return tsc;
225 226 227 228
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
  unsigned int tsc;
  asm volatile("rdtsc\n\t":"=a"(tsc):);
  return tsc;
John Koleszar's avatar
John Koleszar committed
229 230
#else
#if ARCH_X86_64
231
  return (unsigned int)__rdtsc();
John Koleszar's avatar
John Koleszar committed
232
#else
John Koleszar's avatar
John Koleszar committed
233
  __asm  rdtsc;
John Koleszar's avatar
John Koleszar committed
234 235 236 237 238 239 240
#endif
#endif
}


#if defined(__GNUC__) && __GNUC__
#define x86_pause_hint()\
John Koleszar's avatar
John Koleszar committed
241
  __asm__ __volatile__ ("pause \n\t")
242 243 244
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#define x86_pause_hint()\
  asm volatile ("pause \n\t")
John Koleszar's avatar
John Koleszar committed
245 246
#else
#if ARCH_X86_64
247
#define x86_pause_hint()\
John Koleszar's avatar
John Koleszar committed
248
  _mm_pause();
John Koleszar's avatar
John Koleszar committed
249 250
#else
#define x86_pause_hint()\
John Koleszar's avatar
John Koleszar committed
251
  __asm pause
John Koleszar's avatar
John Koleszar committed
252 253 254 255 256
#endif
#endif

#if defined(__GNUC__) && __GNUC__
static void
John Koleszar's avatar
John Koleszar committed
257
x87_set_control_word(unsigned short mode) {
258
  __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
John Koleszar's avatar
John Koleszar committed
259 260
}
static unsigned short
John Koleszar's avatar
John Koleszar committed
261 262
x87_get_control_word(void) {
  unsigned short mode;
263
  __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
John Koleszar's avatar
John Koleszar committed
264 265
    return mode;
}
266 267
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
static void
268
x87_set_control_word(unsigned short mode) {
269
  asm volatile("fldcw %0" : : "m"(*&mode));
270 271 272 273
}
static unsigned short
x87_get_control_word(void) {
  unsigned short mode;
274
  asm volatile("fstcw %0\n\t":"=m"(*&mode):);
275 276
  return mode;
}
John Koleszar's avatar
John Koleszar committed
277 278 279 280 281 282 283 284
#elif ARCH_X86_64
/* No fldcw intrinsics on Windows x64, punt to external asm */
extern void           vpx_winx64_fldcw(unsigned short mode);
extern unsigned short vpx_winx64_fstcw(void);
#define x87_set_control_word vpx_winx64_fldcw
#define x87_get_control_word vpx_winx64_fstcw
#else
static void
John Koleszar's avatar
John Koleszar committed
285 286
x87_set_control_word(unsigned short mode) {
  __asm { fldcw mode }
John Koleszar's avatar
John Koleszar committed
287 288
}
static unsigned short
John Koleszar's avatar
John Koleszar committed
289 290 291 292
x87_get_control_word(void) {
  unsigned short mode;
  __asm { fstcw mode }
  return mode;
John Koleszar's avatar
John Koleszar committed
293 294 295
}
#endif

296
static INLINE unsigned int
John Koleszar's avatar
John Koleszar committed
297
x87_set_double_precision(void) {
298
  unsigned int mode = x87_get_control_word();
John Koleszar's avatar
John Koleszar committed
299 300
  x87_set_control_word((mode&~0x300) | 0x200);
  return mode;
John Koleszar's avatar
John Koleszar committed
301 302 303 304 305
}


extern void vpx_reset_mmx_state(void);

306 307 308 309 310
#ifdef __cplusplus
}  // extern "C"
#endif

#endif  // VPX_PORTS_X86_H_