| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #ifndef _fpcontrol_h |
| #define _fpcontrol_h |
| |
| // In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware |
| // to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations |
| // in integer code, and have found this is the only way to correctly verify operation. |
| // |
| // Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both |
| // running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default |
| // rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode. |
| #if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__) |
| typedef int FPU_mode_type; |
| #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ ) |
| #include <xmmintrin.h> |
| #elif defined( __PPC__ ) |
| #include <fpu_control.h> |
| extern __thread fpu_control_t fpu_control; |
| #endif |
| // Set the reference hardware floating point unit to FTZ mode |
| static inline void ForceFTZ( FPU_mode_type *mode ) |
| { |
| #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) |
| *mode = _mm_getcsr(); |
| _mm_setcsr( *mode | 0x8040); |
| #elif defined( __PPC__ ) |
| *mode = fpu_control; |
| fpu_control |= _FPU_MASK_NI; |
| #elif defined ( __arm__ ) |
| unsigned fpscr; |
| __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); |
| *mode = fpscr; |
| __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24))); |
| // Add 64 bit support |
| #elif defined (__aarch64__) |
| unsigned fpscr; |
| __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr)); |
| *mode = fpscr; |
| __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24))); |
| #else |
| #error ForceFTZ needs an implentation |
| #endif |
| } |
| |
| // Disable the denorm flush to zero |
| static inline void DisableFTZ( FPU_mode_type *mode ) |
| { |
| #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) |
| *mode = _mm_getcsr(); |
| _mm_setcsr( *mode & ~0x8040); |
| #elif defined( __PPC__ ) |
| *mode = fpu_control; |
| fpu_control &= ~_FPU_MASK_NI; |
| #elif defined ( __arm__ ) |
| unsigned fpscr; |
| __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); |
| *mode = fpscr; |
| __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24))); |
| // Add 64 bit support |
| #elif defined (__aarch64__) |
| unsigned fpscr; |
| __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr)); |
| *mode = fpscr; |
| __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24))); |
| #else |
| #error DisableFTZ needs an implentation |
| #endif |
| } |
| |
| // Restore the reference hardware to floating point state indicated by *mode |
| static inline void RestoreFPState( FPU_mode_type *mode ) |
| { |
| #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) |
| _mm_setcsr( *mode ); |
| #elif defined( __PPC__) |
| fpu_control = *mode; |
| #elif defined (__arm__) |
| __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode)); |
| // Add 64 bit support |
| #elif defined (__aarch64__) |
| __asm__ volatile ("msr fpcr, %0" :: "r"(*mode)); |
| #else |
| #error RestoreFPState needs an implementation |
| #endif |
| } |
| #else |
| #error ForceFTZ and RestoreFPState need implentations |
| #endif |
| |
| #endif |