| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 1 | /* |
| John Koleszar | c2140b8 | 2010-09-09 12:16:39 | [diff] [blame] | 2 | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 3 | * |
| John Koleszar | 94c52e4 | 2010-06-18 16:39:21 | [diff] [blame] | 4 | * Use of this source code is governed by a BSD-style license |
| John Koleszar | 09202d8 | 2010-06-04 20:19:40 | [diff] [blame] | 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| John Koleszar | 94c52e4 | 2010-06-18 16:39:21 | [diff] [blame] | 7 | * in the file PATENTS. All contributing project authors may |
| John Koleszar | 09202d8 | 2010-06-04 20:19:40 | [diff] [blame] | 8 | * be found in the AUTHORS file in the root of the source tree. |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 9 | */ |
| 10 | |
| James Zern | d2f4940 | 2013-12-16 02:36:00 | [diff] [blame] | 11 | #ifndef VPX_PORTS_X86_H_ |
| 12 | #define VPX_PORTS_X86_H_ |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 13 | #include <stdlib.h> |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 14 | #include "vpx_config.h" |
| James Zern | 7e515c4 | 2015-01-15 06:51:49 | [diff] [blame] | 15 | #include "vpx/vpx_integer.h" |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 16 | |
| James Zern | a5d23f5 | 2014-01-18 20:16:11 | [diff] [blame] | 17 | #ifdef __cplusplus |
| 18 | extern "C" { |
| 19 | #endif |
| 20 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 21 | typedef enum { |
| 22 | VPX_CPU_UNKNOWN = -1, |
| 23 | VPX_CPU_AMD, |
| 24 | VPX_CPU_AMD_OLD, |
| 25 | VPX_CPU_CENTAUR, |
| 26 | VPX_CPU_CYRIX, |
| 27 | VPX_CPU_INTEL, |
| 28 | VPX_CPU_NEXGEN, |
| 29 | VPX_CPU_NSC, |
| 30 | VPX_CPU_RISE, |
| 31 | VPX_CPU_SIS, |
| 32 | VPX_CPU_TRANSMETA, |
| 33 | VPX_CPU_TRANSMETA_OLD, |
| 34 | VPX_CPU_UMC, |
| 35 | VPX_CPU_VIA, |
| Fritz Koenig | 0f5c63e | 2010-10-12 21:55:31 | [diff] [blame] | 36 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 37 | VPX_CPU_LAST |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 38 | } vpx_cpu_t; |
| Fritz Koenig | 0f5c63e | 2010-10-12 21:55:31 | [diff] [blame] | 39 | |
| changjun.yang | c9a9905 | 2013-04-26 13:00:24 | [diff] [blame] | 40 | #if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 41 | #if ARCH_X86_64 |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 42 | #define cpuid(func, func2, ax, bx, cx, dx) \ |
| 43 | __asm__ __volatile__("cpuid \n\t" \ |
| 44 | : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \ |
| 45 | : "a"(func), "c"(func2)); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 46 | #else |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 47 | #define cpuid(func, func2, ax, bx, cx, dx) \ |
| 48 | __asm__ __volatile__( \ |
| 49 | "mov %%ebx, %%edi \n\t" \ |
| 50 | "cpuid \n\t" \ |
| 51 | "xchg %%edi, %%ebx \n\t" \ |
| 52 | : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ |
| 53 | : "a"(func), "c"(func2)); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 54 | #endif |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 55 | #elif defined(__SUNPRO_C) || \ |
| 56 | defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 57 | #if ARCH_X86_64 |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 58 | #define cpuid(func, func2, ax, bx, cx, dx) \ |
| 59 | asm volatile( \ |
| 60 | "xchg %rsi, %rbx \n\t" \ |
| 61 | "cpuid \n\t" \ |
| 62 | "movl %ebx, %edi \n\t" \ |
| 63 | "xchg %rsi, %rbx \n\t" \ |
| 64 | : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ |
| 65 | : "a"(func), "c"(func2)); |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 66 | #else |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 67 | #define cpuid(func, func2, ax, bx, cx, dx) \ |
| 68 | asm volatile( \ |
| 69 | "pushl %ebx \n\t" \ |
| 70 | "cpuid \n\t" \ |
| 71 | "movl %ebx, %edi \n\t" \ |
| 72 | "popl %ebx \n\t" \ |
| 73 | : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ |
| 74 | : "a"(func), "c"(func2)); |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 75 | #endif |
| changjun.yang | c9a9905 | 2013-04-26 13:00:24 | [diff] [blame] | 76 | #else /* end __SUNPRO__ */ |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 77 | #if ARCH_X86_64 |
| Yaowu Xu | 8caeeb8 | 2013-11-23 00:45:56 | [diff] [blame] | 78 | #if defined(_MSC_VER) && _MSC_VER > 1500 |
| Yaowu Xu | 36dfb90 | 2013-11-22 01:39:33 | [diff] [blame] | 79 | void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue); |
| 80 | #pragma intrinsic(__cpuidex) |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 81 | #define cpuid(func, func2, a, b, c, d) \ |
| 82 | do { \ |
| 83 | int regs[4]; \ |
| 84 | __cpuidex(regs, func, func2); \ |
| 85 | a = regs[0]; \ |
| 86 | b = regs[1]; \ |
| 87 | c = regs[2]; \ |
| 88 | d = regs[3]; \ |
| 89 | } while (0) |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 90 | #else |
| Yaowu Xu | 8caeeb8 | 2013-11-23 00:45:56 | [diff] [blame] | 91 | void __cpuid(int CPUInfo[4], int info_type); |
| 92 | #pragma intrinsic(__cpuid) |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 93 | #define cpuid(func, func2, a, b, c, d) \ |
| 94 | do { \ |
| 95 | int regs[4]; \ |
| 96 | __cpuid(regs, func); \ |
| 97 | a = regs[0]; \ |
| 98 | b = regs[1]; \ |
| 99 | c = regs[2]; \ |
| 100 | d = regs[3]; \ |
| Yaowu Xu | 8caeeb8 | 2013-11-23 00:45:56 | [diff] [blame] | 101 | } while (0) |
| 102 | #endif |
| 103 | #else |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 104 | /* clang-format off */ |
| Erik Niemeyer | 9f26861 | 2013-11-20 04:11:57 | [diff] [blame] | 105 | #define cpuid(func, func2, a, b, c, d)\ |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 106 | __asm mov eax, func\ |
| Erik Niemeyer | 9f26861 | 2013-11-20 04:11:57 | [diff] [blame] | 107 | __asm mov ecx, func2\ |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 108 | __asm cpuid\ |
| 109 | __asm mov a, eax\ |
| 110 | __asm mov b, ebx\ |
| 111 | __asm mov c, ecx\ |
| 112 | __asm mov d, edx |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 113 | #endif |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 114 | /* clang-format on */ |
| changjun.yang | c9a9905 | 2013-04-26 13:00:24 | [diff] [blame] | 115 | #endif /* end others */ |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 116 | |
| James Zern | 7e515c4 | 2015-01-15 06:51:49 | [diff] [blame] | 117 | // NaCl has no support for xgetbv or the raw opcode. |
| 118 | #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) |
| 119 | static INLINE uint64_t xgetbv(void) { |
| 120 | const uint32_t ecx = 0; |
| 121 | uint32_t eax, edx; |
| 122 | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 123 | __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" |
| 124 | : "=a"(eax), "=d"(edx) |
| 125 | : "c"(ecx)); |
| James Zern | 7e515c4 | 2015-01-15 06:51:49 | [diff] [blame] | 126 | return ((uint64_t)edx << 32) | eax; |
| 127 | } |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 128 | #elif(defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \ |
| 129 | _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 |
| James Zern | 7e515c4 | 2015-01-15 06:51:49 | [diff] [blame] | 130 | #include <immintrin.h> |
| 131 | #define xgetbv() _xgetbv(0) |
| 132 | #elif defined(_MSC_VER) && defined(_M_IX86) |
| 133 | static INLINE uint64_t xgetbv(void) { |
| 134 | uint32_t eax_, edx_; |
| 135 | __asm { |
| 136 | xor ecx, ecx // ecx = 0 |
| 137 | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
| 138 | __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 |
| 139 | mov eax_, eax |
| 140 | mov edx_, edx |
| 141 | } |
| 142 | return ((uint64_t)edx_ << 32) | eax_; |
| 143 | } |
| 144 | #else |
| 145 | #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. |
| 146 | #endif |
| 147 | |
| James Zern | 6032239 | 2015-08-08 18:10:44 | [diff] [blame] | 148 | #if defined(_MSC_VER) && _MSC_VER >= 1700 |
| Ghislain MARY | 3067c34 | 2015-07-28 14:37:09 | [diff] [blame] | 149 | #include <windows.h> |
| 150 | #if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) |
| 151 | #define getenv(x) NULL |
| 152 | #endif |
| 153 | #endif |
| 154 | |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 155 | #define HAS_MMX 0x01 |
| 156 | #define HAS_SSE 0x02 |
| 157 | #define HAS_SSE2 0x04 |
| 158 | #define HAS_SSE3 0x08 |
| 159 | #define HAS_SSSE3 0x10 |
| 160 | #define HAS_SSE4_1 0x20 |
| 161 | #define HAS_AVX 0x40 |
| 162 | #define HAS_AVX2 0x80 |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 163 | #ifndef BIT |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 164 | #define BIT(n) (1 << n) |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 165 | #endif |
| 166 | |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 167 | static INLINE int x86_simd_caps(void) { |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 168 | unsigned int flags = 0; |
| 169 | unsigned int mask = ~0; |
| James Zern | abff8b2 | 2015-04-25 02:21:53 | [diff] [blame] | 170 | unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 171 | char *env; |
| 172 | (void)reg_ebx; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 173 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 174 | /* See if the CPU capabilities are being overridden by the environment */ |
| 175 | env = getenv("VPX_SIMD_CAPS"); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 176 | |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 177 | if (env && *env) return (int)strtol(env, NULL, 0); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 178 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 179 | env = getenv("VPX_SIMD_CAPS_MASK"); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 180 | |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 181 | if (env && *env) mask = strtol(env, NULL, 0); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 182 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 183 | /* Ensure that the CPUID instruction supports extended features */ |
| James Zern | abff8b2 | 2015-04-25 02:21:53 | [diff] [blame] | 184 | cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 185 | |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 186 | if (max_cpuid_val < 1) return 0; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 187 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 188 | /* Get the standard feature flags */ |
| Erik Niemeyer | 9f26861 | 2013-11-20 04:11:57 | [diff] [blame] | 189 | cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 190 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 191 | if (reg_edx & BIT(23)) flags |= HAS_MMX; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 192 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 193 | if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 194 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 195 | if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 196 | |
| Erik Niemeyer | e6863ef | 2013-10-29 15:48:12 | [diff] [blame] | 197 | if (reg_ecx & BIT(0)) flags |= HAS_SSE3; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 198 | |
| Erik Niemeyer | e6863ef | 2013-10-29 15:48:12 | [diff] [blame] | 199 | if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 200 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 201 | if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
| Yunqing Wang | 71ecb5d | 2010-10-27 12:45:24 | [diff] [blame] | 202 | |
| James Zern | 7e515c4 | 2015-01-15 06:51:49 | [diff] [blame] | 203 | // bits 27 (OSXSAVE) & 28 (256-bit AVX) |
| James Zern | 4ed1bda | 2015-01-23 22:13:51 | [diff] [blame] | 204 | if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { |
| James Zern | 7e515c4 | 2015-01-15 06:51:49 | [diff] [blame] | 205 | if ((xgetbv() & 0x6) == 0x6) { |
| 206 | flags |= HAS_AVX; |
| Erik Niemeyer | e6863ef | 2013-10-29 15:48:12 | [diff] [blame] | 207 | |
| James Zern | abff8b2 | 2015-04-25 02:21:53 | [diff] [blame] | 208 | if (max_cpuid_val >= 7) { |
| 209 | /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ |
| 210 | cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
| Erik Niemeyer | 9f26861 | 2013-11-20 04:11:57 | [diff] [blame] | 211 | |
| James Zern | abff8b2 | 2015-04-25 02:21:53 | [diff] [blame] | 212 | if (reg_ebx & BIT(5)) flags |= HAS_AVX2; |
| 213 | } |
| James Zern | 7e515c4 | 2015-01-15 06:51:49 | [diff] [blame] | 214 | } |
| 215 | } |
| Erik Niemeyer | e6863ef | 2013-10-29 15:48:12 | [diff] [blame] | 216 | |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 217 | return flags & mask; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 218 | } |
| 219 | |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 220 | #if ARCH_X86_64 && defined(_MSC_VER) |
| 221 | unsigned __int64 __rdtsc(void); |
| 222 | #pragma intrinsic(__rdtsc) |
| 223 | #endif |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 224 | static INLINE unsigned int x86_readtsc(void) { |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 225 | #if defined(__GNUC__) && __GNUC__ |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 226 | unsigned int tsc; |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 227 | __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :); |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 228 | return tsc; |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 229 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| 230 | unsigned int tsc; |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 231 | asm volatile("rdtsc\n\t" : "=a"(tsc) :); |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 232 | return tsc; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 233 | #else |
| 234 | #if ARCH_X86_64 |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 235 | return (unsigned int)__rdtsc(); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 236 | #else |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 237 | __asm rdtsc; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 238 | #endif |
| 239 | #endif |
| 240 | } |
| 241 | |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 242 | #if defined(__GNUC__) && __GNUC__ |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 243 | #define x86_pause_hint() __asm__ __volatile__("pause \n\t") |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 244 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 245 | #define x86_pause_hint() asm volatile("pause \n\t") |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 246 | #else |
| 247 | #if ARCH_X86_64 |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 248 | #define x86_pause_hint() _mm_pause(); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 249 | #else |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 250 | #define x86_pause_hint() __asm pause |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 251 | #endif |
| 252 | #endif |
| 253 | |
| 254 | #if defined(__GNUC__) && __GNUC__ |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 255 | static void x87_set_control_word(unsigned short mode) { |
| James Zern | 9fb6f40 | 2013-06-18 04:58:00 | [diff] [blame] | 256 | __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 257 | } |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 258 | static unsigned short x87_get_control_word(void) { |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 259 | unsigned short mode; |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 260 | __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :); |
| 261 | return mode; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 262 | } |
| Timothy B. Terriberry | 8b1a14d | 2012-05-02 17:14:27 | [diff] [blame] | 263 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 264 | static void x87_set_control_word(unsigned short mode) { |
| James Zern | 9fb6f40 | 2013-06-18 04:58:00 | [diff] [blame] | 265 | asm volatile("fldcw %0" : : "m"(*&mode)); |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 266 | } |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 267 | static unsigned short x87_get_control_word(void) { |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 268 | unsigned short mode; |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 269 | asm volatile("fstcw %0\n\t" : "=m"(*&mode) :); |
| John Koleszar | 807acf1 | 2012-11-02 22:39:14 | [diff] [blame] | 270 | return mode; |
| 271 | } |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 272 | #elif ARCH_X86_64 |
| 273 | /* No fldcw intrinsics on Windows x64, punt to external asm */ |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 274 | extern void vpx_winx64_fldcw(unsigned short mode); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 275 | extern unsigned short vpx_winx64_fstcw(void); |
| 276 | #define x87_set_control_word vpx_winx64_fldcw |
| 277 | #define x87_get_control_word vpx_winx64_fstcw |
| 278 | #else |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 279 | static void x87_set_control_word(unsigned short mode) { |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 280 | __asm { fldcw mode } |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 281 | } |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 282 | static unsigned short x87_get_control_word(void) { |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 283 | unsigned short mode; |
| 284 | __asm { fstcw mode } |
| 285 | return mode; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 286 | } |
| 287 | #endif |
| 288 | |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 289 | static INLINE unsigned int x87_set_double_precision(void) { |
| Jim Bankoski | f452961 | 2014-08-12 23:51:07 | [diff] [blame] | 290 | unsigned int mode = x87_get_control_word(); |
| clang-format | 99e28b8 | 2016-01-27 20:42:45 | [diff] [blame] | 291 | x87_set_control_word((mode & ~0x300) | 0x200); |
| John Koleszar | c6b9039 | 2012-07-13 22:21:29 | [diff] [blame] | 292 | return mode; |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 293 | } |
| 294 | |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 295 | extern void vpx_reset_mmx_state(void); |
| John Koleszar | 0ea50ce | 2010-05-18 15:58:33 | [diff] [blame] | 296 | |
| James Zern | a5d23f5 | 2014-01-18 20:16:11 | [diff] [blame] | 297 | #ifdef __cplusplus |
| 298 | } // extern "C" |
| 299 | #endif |
| 300 | |
| 301 | #endif // VPX_PORTS_X86_H_ |