test_common/harness/fpcontrol.h - platform/external/OpenCL-CTS - Git at Google

 //
 // Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #ifndef _fpcontrol_h
 #define _fpcontrol_h

 // In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
 // to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
 // in integer code, and have found this is the only way to correctly verify operation.
 //
 // Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
 // running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
 // rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
 #if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
     typedef int     FPU_mode_type;
 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
     #include <xmmintrin.h>
 #elif defined( __PPC__ )
     #include <fpu_control.h>
     extern __thread fpu_control_t fpu_control;
 #endif
     // Set the reference hardware floating point unit to FTZ mode
     static inline void ForceFTZ( FPU_mode_type *mode )
     {
 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
         *mode = _mm_getcsr();
         _mm_setcsr( *mode | 0x8040);
 #elif defined( __PPC__ )
         *mode = fpu_control;
         fpu_control |= _FPU_MASK_NI;
 #elif defined ( __arm__ )
         unsigned fpscr;
         __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
         *mode = fpscr;
         __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
         // Add 64 bit support
 #elif defined (__aarch64__)
         unsigned fpscr;
         __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
         *mode = fpscr;
         __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
 #else
         #error ForceFTZ needs an implentation
 #endif
     }

     // Disable the denorm flush to zero
     static inline void DisableFTZ( FPU_mode_type *mode )
     {
 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
         *mode = _mm_getcsr();
         _mm_setcsr( *mode & ~0x8040);
 #elif defined( __PPC__ )
         *mode = fpu_control;
         fpu_control &= ~_FPU_MASK_NI;
 #elif defined ( __arm__ )
         unsigned fpscr;
         __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
         *mode = fpscr;
         __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
         // Add 64 bit support
 #elif defined (__aarch64__)
         unsigned fpscr;
         __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
         *mode = fpscr;
         __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
 #else
     #error DisableFTZ needs an implentation
 #endif
     }

     // Restore the reference hardware to floating point state indicated by *mode
     static inline void RestoreFPState( FPU_mode_type *mode )
     {
 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
         _mm_setcsr( *mode );
 #elif defined( __PPC__)
         fpu_control = *mode;
 #elif defined (__arm__)
         __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
         // Add 64 bit support
 #elif defined (__aarch64__)
         __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
 #else
         #error RestoreFPState needs an implementation
 #endif
     }
 #else
         #error ForceFTZ and RestoreFPState need implentations
 #endif

 #endif
	//
	// Copyright (c) 2017 The Khronos Group Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//
	#ifndef _fpcontrol_h
	#define _fpcontrol_h

	// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
	// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
	// in integer code, and have found this is the only way to correctly verify operation.
	//
	// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
	// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
	// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
	#if defined( __APPLE__ ) \|\| defined( _MSC_VER ) \|\| defined( __linux__ ) \|\| defined (__MINGW32__)
	typedef int FPU_mode_type;
	#if defined( __i386__ ) \|\| defined( __x86_64__ ) \|\| defined( _MSC_VER ) \|\| defined( __MINGW32__ )
	#include <xmmintrin.h>
	#elif defined( __PPC__ )
	#include <fpu_control.h>
	extern __thread fpu_control_t fpu_control;
	#endif
	// Set the reference hardware floating point unit to FTZ mode
	static inline void ForceFTZ( FPU_mode_type *mode )
	{
	#if defined( __i386__ ) \|\| defined( __x86_64__ ) \|\| defined( _MSC_VER ) \|\| defined (__MINGW32__)
	*mode = _mm_getcsr();
	_mm_setcsr( *mode \| 0x8040);
	#elif defined( __PPC__ )
	*mode = fpu_control;
	fpu_control \|= _FPU_MASK_NI;
	#elif defined ( __arm__ )
	unsigned fpscr;
	__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
	*mode = fpscr;
	__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr \| (1U << 24)));
	// Add 64 bit support
	#elif defined (__aarch64__)
	unsigned fpscr;
	__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
	*mode = fpscr;
	__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr \| (1U << 24)));
	#else
	#error ForceFTZ needs an implentation
	#endif
	}

	// Disable the denorm flush to zero
	static inline void DisableFTZ( FPU_mode_type *mode )
	{
	#if defined( __i386__ ) \|\| defined( __x86_64__ ) \|\| defined( _MSC_VER ) \|\| defined (__MINGW32__)
	*mode = _mm_getcsr();
	_mm_setcsr( *mode & ~0x8040);
	#elif defined( __PPC__ )
	*mode = fpu_control;
	fpu_control &= ~_FPU_MASK_NI;
	#elif defined ( __arm__ )
	unsigned fpscr;
	__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
	*mode = fpscr;
	__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
	// Add 64 bit support
	#elif defined (__aarch64__)
	unsigned fpscr;
	__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
	*mode = fpscr;
	__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
	#else
	#error DisableFTZ needs an implentation
	#endif
	}

	// Restore the reference hardware to floating point state indicated by *mode
	static inline void RestoreFPState( FPU_mode_type *mode )
	{
	#if defined( __i386__ ) \|\| defined( __x86_64__ ) \|\| defined( _MSC_VER ) \|\| defined (__MINGW32__)
	_mm_setcsr( *mode );
	#elif defined( __PPC__)
	fpu_control = *mode;
	#elif defined (__arm__)
	__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
	// Add 64 bit support
	#elif defined (__aarch64__)
	__asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
	#else
	#error RestoreFPState needs an implementation
	#endif
	}
	#else
	#error ForceFTZ and RestoreFPState need implentations
	#endif

	#endif