[go: up one dir, main page]

blob: 089b048d57e464d2ff3832def5484f647e7cc84c [file] [log] [blame]
John Koleszar0ea50ce2010-05-18 15:58:331/*
John Koleszarc2140b82010-09-09 12:16:392 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar0ea50ce2010-05-18 15:58:333 *
John Koleszar94c52e42010-06-18 16:39:214 * Use of this source code is governed by a BSD-style license
John Koleszar09202d82010-06-04 20:19:405 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
John Koleszar94c52e42010-06-18 16:39:217 * in the file PATENTS. All contributing project authors may
John Koleszar09202d82010-06-04 20:19:408 * be found in the AUTHORS file in the root of the source tree.
John Koleszar0ea50ce2010-05-18 15:58:339 */
10
James Zernd2f49402013-12-16 02:36:0011#ifndef VPX_PORTS_X86_H_
12#define VPX_PORTS_X86_H_
John Koleszar0ea50ce2010-05-18 15:58:3313#include <stdlib.h>
John Koleszar807acf12012-11-02 22:39:1414#include "vpx_config.h"
James Zern7e515c42015-01-15 06:51:4915#include "vpx/vpx_integer.h"
John Koleszar0ea50ce2010-05-18 15:58:3316
James Zerna5d23f52014-01-18 20:16:1117#ifdef __cplusplus
18extern "C" {
19#endif
20
John Koleszarc6b90392012-07-13 22:21:2921typedef enum {
22 VPX_CPU_UNKNOWN = -1,
23 VPX_CPU_AMD,
24 VPX_CPU_AMD_OLD,
25 VPX_CPU_CENTAUR,
26 VPX_CPU_CYRIX,
27 VPX_CPU_INTEL,
28 VPX_CPU_NEXGEN,
29 VPX_CPU_NSC,
30 VPX_CPU_RISE,
31 VPX_CPU_SIS,
32 VPX_CPU_TRANSMETA,
33 VPX_CPU_TRANSMETA_OLD,
34 VPX_CPU_UMC,
35 VPX_CPU_VIA,
Fritz Koenig0f5c63e2010-10-12 21:55:3136
John Koleszarc6b90392012-07-13 22:21:2937 VPX_CPU_LAST
clang-format99e28b82016-01-27 20:42:4538} vpx_cpu_t;
Fritz Koenig0f5c63e2010-10-12 21:55:3139
changjun.yangc9a99052013-04-26 13:00:2440#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
John Koleszar0ea50ce2010-05-18 15:58:3341#if ARCH_X86_64
clang-format99e28b82016-01-27 20:42:4542#define cpuid(func, func2, ax, bx, cx, dx) \
43 __asm__ __volatile__("cpuid \n\t" \
44 : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
45 : "a"(func), "c"(func2));
John Koleszar0ea50ce2010-05-18 15:58:3346#else
clang-format99e28b82016-01-27 20:42:4547#define cpuid(func, func2, ax, bx, cx, dx) \
48 __asm__ __volatile__( \
49 "mov %%ebx, %%edi \n\t" \
50 "cpuid \n\t" \
51 "xchg %%edi, %%ebx \n\t" \
52 : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
53 : "a"(func), "c"(func2));
John Koleszar0ea50ce2010-05-18 15:58:3354#endif
clang-format99e28b82016-01-27 20:42:4555#elif defined(__SUNPRO_C) || \
56 defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
John Koleszar807acf12012-11-02 22:39:1457#if ARCH_X86_64
clang-format99e28b82016-01-27 20:42:4558#define cpuid(func, func2, ax, bx, cx, dx) \
59 asm volatile( \
60 "xchg %rsi, %rbx \n\t" \
61 "cpuid \n\t" \
62 "movl %ebx, %edi \n\t" \
63 "xchg %rsi, %rbx \n\t" \
64 : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
65 : "a"(func), "c"(func2));
John Koleszar807acf12012-11-02 22:39:1466#else
clang-format99e28b82016-01-27 20:42:4567#define cpuid(func, func2, ax, bx, cx, dx) \
68 asm volatile( \
69 "pushl %ebx \n\t" \
70 "cpuid \n\t" \
71 "movl %ebx, %edi \n\t" \
72 "popl %ebx \n\t" \
73 : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
74 : "a"(func), "c"(func2));
John Koleszar807acf12012-11-02 22:39:1475#endif
changjun.yangc9a99052013-04-26 13:00:2476#else /* end __SUNPRO__ */
John Koleszar0ea50ce2010-05-18 15:58:3377#if ARCH_X86_64
Yaowu Xu8caeeb82013-11-23 00:45:5678#if defined(_MSC_VER) && _MSC_VER > 1500
Yaowu Xu36dfb902013-11-22 01:39:3379void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue);
80#pragma intrinsic(__cpuidex)
clang-format99e28b82016-01-27 20:42:4581#define cpuid(func, func2, a, b, c, d) \
82 do { \
83 int regs[4]; \
84 __cpuidex(regs, func, func2); \
85 a = regs[0]; \
86 b = regs[1]; \
87 c = regs[2]; \
88 d = regs[3]; \
89 } while (0)
John Koleszar0ea50ce2010-05-18 15:58:3390#else
Yaowu Xu8caeeb82013-11-23 00:45:5691void __cpuid(int CPUInfo[4], int info_type);
92#pragma intrinsic(__cpuid)
clang-format99e28b82016-01-27 20:42:4593#define cpuid(func, func2, a, b, c, d) \
94 do { \
95 int regs[4]; \
96 __cpuid(regs, func); \
97 a = regs[0]; \
98 b = regs[1]; \
99 c = regs[2]; \
100 d = regs[3]; \
Yaowu Xu8caeeb82013-11-23 00:45:56101 } while (0)
102#endif
103#else
clang-format99e28b82016-01-27 20:42:45104/* clang-format off */
Erik Niemeyer9f268612013-11-20 04:11:57105#define cpuid(func, func2, a, b, c, d)\
John Koleszarc6b90392012-07-13 22:21:29106 __asm mov eax, func\
Erik Niemeyer9f268612013-11-20 04:11:57107 __asm mov ecx, func2\
John Koleszarc6b90392012-07-13 22:21:29108 __asm cpuid\
109 __asm mov a, eax\
110 __asm mov b, ebx\
111 __asm mov c, ecx\
112 __asm mov d, edx
John Koleszar0ea50ce2010-05-18 15:58:33113#endif
clang-format99e28b82016-01-27 20:42:45114/* clang-format on */
changjun.yangc9a99052013-04-26 13:00:24115#endif /* end others */
John Koleszar0ea50ce2010-05-18 15:58:33116
James Zern7e515c42015-01-15 06:51:49117// NaCl has no support for xgetbv or the raw opcode.
118#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
119static INLINE uint64_t xgetbv(void) {
120 const uint32_t ecx = 0;
121 uint32_t eax, edx;
122 // Use the raw opcode for xgetbv for compatibility with older toolchains.
clang-format99e28b82016-01-27 20:42:45123 __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
124 : "=a"(eax), "=d"(edx)
125 : "c"(ecx));
James Zern7e515c42015-01-15 06:51:49126 return ((uint64_t)edx << 32) | eax;
127}
clang-format99e28b82016-01-27 20:42:45128#elif(defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
129 _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
James Zern7e515c42015-01-15 06:51:49130#include <immintrin.h>
131#define xgetbv() _xgetbv(0)
132#elif defined(_MSC_VER) && defined(_M_IX86)
133static INLINE uint64_t xgetbv(void) {
134 uint32_t eax_, edx_;
135 __asm {
136 xor ecx, ecx // ecx = 0
137 // Use the raw opcode for xgetbv for compatibility with older toolchains.
138 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
139 mov eax_, eax
140 mov edx_, edx
141 }
142 return ((uint64_t)edx_ << 32) | eax_;
143}
144#else
145#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
146#endif
147
James Zern60322392015-08-08 18:10:44148#if defined(_MSC_VER) && _MSC_VER >= 1700
Ghislain MARY3067c342015-07-28 14:37:09149#include <windows.h>
150#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
151#define getenv(x) NULL
152#endif
153#endif
154
clang-format99e28b82016-01-27 20:42:45155#define HAS_MMX 0x01
156#define HAS_SSE 0x02
157#define HAS_SSE2 0x04
158#define HAS_SSE3 0x08
159#define HAS_SSSE3 0x10
160#define HAS_SSE4_1 0x20
161#define HAS_AVX 0x40
162#define HAS_AVX2 0x80
John Koleszar0ea50ce2010-05-18 15:58:33163#ifndef BIT
clang-format99e28b82016-01-27 20:42:45164#define BIT(n) (1 << n)
John Koleszar0ea50ce2010-05-18 15:58:33165#endif
166
clang-format99e28b82016-01-27 20:42:45167static INLINE int x86_simd_caps(void) {
John Koleszarc6b90392012-07-13 22:21:29168 unsigned int flags = 0;
169 unsigned int mask = ~0;
James Zernabff8b22015-04-25 02:21:53170 unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
John Koleszarc6b90392012-07-13 22:21:29171 char *env;
172 (void)reg_ebx;
John Koleszar0ea50ce2010-05-18 15:58:33173
John Koleszarc6b90392012-07-13 22:21:29174 /* See if the CPU capabilities are being overridden by the environment */
175 env = getenv("VPX_SIMD_CAPS");
John Koleszar0ea50ce2010-05-18 15:58:33176
clang-format99e28b82016-01-27 20:42:45177 if (env && *env) return (int)strtol(env, NULL, 0);
John Koleszar0ea50ce2010-05-18 15:58:33178
John Koleszarc6b90392012-07-13 22:21:29179 env = getenv("VPX_SIMD_CAPS_MASK");
John Koleszar0ea50ce2010-05-18 15:58:33180
clang-format99e28b82016-01-27 20:42:45181 if (env && *env) mask = strtol(env, NULL, 0);
John Koleszar0ea50ce2010-05-18 15:58:33182
John Koleszarc6b90392012-07-13 22:21:29183 /* Ensure that the CPUID instruction supports extended features */
James Zernabff8b22015-04-25 02:21:53184 cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
John Koleszar0ea50ce2010-05-18 15:58:33185
clang-format99e28b82016-01-27 20:42:45186 if (max_cpuid_val < 1) return 0;
John Koleszar0ea50ce2010-05-18 15:58:33187
John Koleszarc6b90392012-07-13 22:21:29188 /* Get the standard feature flags */
Erik Niemeyer9f268612013-11-20 04:11:57189 cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
John Koleszar0ea50ce2010-05-18 15:58:33190
John Koleszarc6b90392012-07-13 22:21:29191 if (reg_edx & BIT(23)) flags |= HAS_MMX;
John Koleszar0ea50ce2010-05-18 15:58:33192
John Koleszarc6b90392012-07-13 22:21:29193 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
John Koleszar0ea50ce2010-05-18 15:58:33194
John Koleszarc6b90392012-07-13 22:21:29195 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
John Koleszar0ea50ce2010-05-18 15:58:33196
Erik Niemeyere6863ef2013-10-29 15:48:12197 if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
John Koleszar0ea50ce2010-05-18 15:58:33198
Erik Niemeyere6863ef2013-10-29 15:48:12199 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
John Koleszar0ea50ce2010-05-18 15:58:33200
John Koleszarc6b90392012-07-13 22:21:29201 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
Yunqing Wang71ecb5d2010-10-27 12:45:24202
James Zern7e515c42015-01-15 06:51:49203 // bits 27 (OSXSAVE) & 28 (256-bit AVX)
James Zern4ed1bda2015-01-23 22:13:51204 if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) {
James Zern7e515c42015-01-15 06:51:49205 if ((xgetbv() & 0x6) == 0x6) {
206 flags |= HAS_AVX;
Erik Niemeyere6863ef2013-10-29 15:48:12207
James Zernabff8b22015-04-25 02:21:53208 if (max_cpuid_val >= 7) {
209 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
210 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
Erik Niemeyer9f268612013-11-20 04:11:57211
James Zernabff8b22015-04-25 02:21:53212 if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
213 }
James Zern7e515c42015-01-15 06:51:49214 }
215 }
Erik Niemeyere6863ef2013-10-29 15:48:12216
John Koleszarc6b90392012-07-13 22:21:29217 return flags & mask;
John Koleszar0ea50ce2010-05-18 15:58:33218}
219
John Koleszar0ea50ce2010-05-18 15:58:33220#if ARCH_X86_64 && defined(_MSC_VER)
221unsigned __int64 __rdtsc(void);
222#pragma intrinsic(__rdtsc)
223#endif
clang-format99e28b82016-01-27 20:42:45224static INLINE unsigned int x86_readtsc(void) {
John Koleszar0ea50ce2010-05-18 15:58:33225#if defined(__GNUC__) && __GNUC__
John Koleszarc6b90392012-07-13 22:21:29226 unsigned int tsc;
clang-format99e28b82016-01-27 20:42:45227 __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :);
John Koleszarc6b90392012-07-13 22:21:29228 return tsc;
John Koleszar807acf12012-11-02 22:39:14229#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
230 unsigned int tsc;
clang-format99e28b82016-01-27 20:42:45231 asm volatile("rdtsc\n\t" : "=a"(tsc) :);
John Koleszar807acf12012-11-02 22:39:14232 return tsc;
John Koleszar0ea50ce2010-05-18 15:58:33233#else
234#if ARCH_X86_64
John Koleszar807acf12012-11-02 22:39:14235 return (unsigned int)__rdtsc();
John Koleszar0ea50ce2010-05-18 15:58:33236#else
clang-format99e28b82016-01-27 20:42:45237 __asm rdtsc;
John Koleszar0ea50ce2010-05-18 15:58:33238#endif
239#endif
240}
241
John Koleszar0ea50ce2010-05-18 15:58:33242#if defined(__GNUC__) && __GNUC__
clang-format99e28b82016-01-27 20:42:45243#define x86_pause_hint() __asm__ __volatile__("pause \n\t")
John Koleszar807acf12012-11-02 22:39:14244#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
clang-format99e28b82016-01-27 20:42:45245#define x86_pause_hint() asm volatile("pause \n\t")
John Koleszar0ea50ce2010-05-18 15:58:33246#else
247#if ARCH_X86_64
clang-format99e28b82016-01-27 20:42:45248#define x86_pause_hint() _mm_pause();
John Koleszar0ea50ce2010-05-18 15:58:33249#else
clang-format99e28b82016-01-27 20:42:45250#define x86_pause_hint() __asm pause
John Koleszar0ea50ce2010-05-18 15:58:33251#endif
252#endif
253
254#if defined(__GNUC__) && __GNUC__
clang-format99e28b82016-01-27 20:42:45255static void x87_set_control_word(unsigned short mode) {
James Zern9fb6f402013-06-18 04:58:00256 __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
John Koleszar0ea50ce2010-05-18 15:58:33257}
clang-format99e28b82016-01-27 20:42:45258static unsigned short x87_get_control_word(void) {
John Koleszarc6b90392012-07-13 22:21:29259 unsigned short mode;
clang-format99e28b82016-01-27 20:42:45260 __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :);
261 return mode;
John Koleszar0ea50ce2010-05-18 15:58:33262}
Timothy B. Terriberry8b1a14d2012-05-02 17:14:27263#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
clang-format99e28b82016-01-27 20:42:45264static void x87_set_control_word(unsigned short mode) {
James Zern9fb6f402013-06-18 04:58:00265 asm volatile("fldcw %0" : : "m"(*&mode));
John Koleszar807acf12012-11-02 22:39:14266}
clang-format99e28b82016-01-27 20:42:45267static unsigned short x87_get_control_word(void) {
John Koleszar807acf12012-11-02 22:39:14268 unsigned short mode;
clang-format99e28b82016-01-27 20:42:45269 asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
John Koleszar807acf12012-11-02 22:39:14270 return mode;
271}
John Koleszar0ea50ce2010-05-18 15:58:33272#elif ARCH_X86_64
273/* No fldcw intrinsics on Windows x64, punt to external asm */
clang-format99e28b82016-01-27 20:42:45274extern void vpx_winx64_fldcw(unsigned short mode);
John Koleszar0ea50ce2010-05-18 15:58:33275extern unsigned short vpx_winx64_fstcw(void);
276#define x87_set_control_word vpx_winx64_fldcw
277#define x87_get_control_word vpx_winx64_fstcw
278#else
clang-format99e28b82016-01-27 20:42:45279static void x87_set_control_word(unsigned short mode) {
John Koleszarc6b90392012-07-13 22:21:29280 __asm { fldcw mode }
John Koleszar0ea50ce2010-05-18 15:58:33281}
clang-format99e28b82016-01-27 20:42:45282static unsigned short x87_get_control_word(void) {
John Koleszarc6b90392012-07-13 22:21:29283 unsigned short mode;
284 __asm { fstcw mode }
285 return mode;
John Koleszar0ea50ce2010-05-18 15:58:33286}
287#endif
288
clang-format99e28b82016-01-27 20:42:45289static INLINE unsigned int x87_set_double_precision(void) {
Jim Bankoskif4529612014-08-12 23:51:07290 unsigned int mode = x87_get_control_word();
clang-format99e28b82016-01-27 20:42:45291 x87_set_control_word((mode & ~0x300) | 0x200);
John Koleszarc6b90392012-07-13 22:21:29292 return mode;
John Koleszar0ea50ce2010-05-18 15:58:33293}
294
John Koleszar0ea50ce2010-05-18 15:58:33295extern void vpx_reset_mmx_state(void);
John Koleszar0ea50ce2010-05-18 15:58:33296
James Zerna5d23f52014-01-18 20:16:11297#ifdef __cplusplus
298} // extern "C"
299#endif
300
301#endif // VPX_PORTS_X86_H_