blob: 267a1dbda7c5f60a60ae2d868a20d0551ff25062 [file] [log] [blame]
/*******************************************************************************
* Copyright 2001-2018 Intel Corporation
* All Rights Reserved.
*
* If this software was obtained under the Intel Simplified Software License,
* the following terms apply:
*
* The source code, information and material ("Material") contained herein is
* owned by Intel Corporation or its suppliers or licensors, and title to such
* Material remains with Intel Corporation or its suppliers or licensors. The
* Material contains proprietary information of Intel or its suppliers and
* licensors. The Material is protected by worldwide copyright laws and treaty
* provisions. No part of the Material may be used, copied, reproduced,
* modified, published, uploaded, posted, transmitted, distributed or disclosed
* in any way without Intel's prior express written permission. No license under
* any patent, copyright or other intellectual property rights in the Material
* is granted to or conferred upon you, either expressly, by implication,
* inducement, estoppel or otherwise. Any license under such intellectual
* property rights must be express and approved by Intel in writing.
*
* Unless otherwise agreed by Intel in writing, you may not remove or alter this
* notice or any other notice embedded in Materials by Intel or Intel's
* suppliers or licensors in any way.
*
*
* If this software was obtained under the Apache License, Version 2.0 (the
* "License"), the following terms apply:
*
* You may not use this file except in compliance with the License. You may
* obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#if defined( _OPENMP )
#include <omp.h>
#endif
#include "owndefs.h"
#include "ippcpdefs.h"
#include "ippcp.h"
#ifdef _PCS
#undef _PCS
#define _MY_PCS_DISABLED
#endif
#include "dispatcher.h"
#ifdef _MY_PCS_DISABLED
#define _PCS
#endif
#if defined( _IPP_DATA )
static Ipp64u cpFeatures = 0;
static Ipp64u cpFeaturesMask = 0;
static int cpGetFeatures( Ipp64u* pFeaturesMask );
extern void IPP_CDECL cpGetReg( int* buf, int valEAX, int valECX );
extern int IPP_CDECL cp_is_avx_extension();
extern int IPP_CDECL cp_is_avx512_extension();
IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index );
IPPFUN( Ipp64u, ippcpGetEnabledCpuFeatures, ( void ))
{
return cpFeaturesMask;
}
/*===================================================================*/
IPPFUN( IppStatus, ippcpGetCpuFeatures, ( Ipp64u* pFeaturesMask ))
{
IPP_BAD_PTR1_RET( pFeaturesMask )
{
if( 0 != cpFeatures){
*pFeaturesMask = cpFeatures;// & cpFeaturesMask;
} else {
int ret = cpGetFeatures( pFeaturesMask );
if( !ret ) return ippStsNotSupportedCpu;
}
return ippStsNoErr;
}
}
/*===================================================================*/
int cpGetFeature( Ipp64u Feature )
{
if(( cpFeaturesMask & Feature ) == Feature ){
return 1;
} else {
return 0;
}
}
int k0_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
int n0_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
int l9_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
int e9_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
int y8_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
int h9_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
int g9_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
int p8_cpGetFeature( Ipp64u Feature ){
if(( cpFeaturesMask & Feature ) == Feature ) return 1;
else return 0; }
/*===================================================================*/
#define BIT00 0x00000001
#define BIT01 0x00000002
#define BIT02 0x00000004
#define BIT03 0x00000008
#define BIT04 0x00000010
#define BIT05 0x00000020
#define BIT06 0x00000040
#define BIT07 0x00000080
#define BIT08 0x00000100
#define BIT09 0x00000200
#define BIT10 0x00000400
#define BIT11 0x00000800
#define BIT12 0x00001000
#define BIT13 0x00002000
#define BIT14 0x00004000
#define BIT15 0x00008000
#define BIT16 0x00010000
#define BIT17 0x00020000
#define BIT18 0x00040000
#define BIT19 0x00080000
#define BIT20 0x00100000
#define BIT21 0x00200000
#define BIT22 0x00400000
#define BIT23 0x00800000
#define BIT24 0x01000000
#define BIT25 0x02000000
#define BIT26 0x04000000
#define BIT27 0x08000000
#define BIT28 0x10000000
#define BIT29 0x20000000
#define BIT30 0x40000000
#define BIT31 0x80000000
static int cpGetFeatures( Ipp64u* pFeaturesMask )
{
Ipp32u buf[4];
Ipp32u eax_, ebx_, ecx_, edx_, tmp;
Ipp64u mask;
int flgFMA=0, flgINT=0, flgGPR=0; // for avx2
Ipp32u idBaseMax, idExtdMax;
cpGetReg((int*)buf, 0, 0); //get max value for basic info.
idBaseMax = buf[0];
cpGetReg((int*)buf, 0x80000000, 0); //get max value for extended info.
idExtdMax = buf[0];
cpGetReg( (int*)buf, 1, 0 );
eax_ = (Ipp32u)buf[0];
ecx_ = (Ipp32u)buf[2];
edx_ = (Ipp32u)buf[3];
mask = 0;
if( edx_ & BIT23 ) mask |= ippCPUID_MMX; // edx[23] - MMX(TM) Technology
if( edx_ & BIT25 ) mask |= ippCPUID_SSE; // edx[25] - Intel(R) Streaming SIMD Extensions (Intel(R) SSE)
if( edx_ & BIT26 ) mask |= ippCPUID_SSE2; // edx[26] - Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2)
if( ecx_ & BIT00 ) mask |= ippCPUID_SSE3; // ecx[0] - Intel(R) Streaming SIMD Extensions 3 (Intel(R) SSE3) (formerly codenamed Prescott)
if( ecx_ & BIT09 ) mask |= ippCPUID_SSSE3; // ecx[9] - Supplemental Streaming SIMD Extensions 3 (SSSE3) (formerly codenamed Merom)
if( ecx_ & BIT22 ) mask |= ippCPUID_MOVBE; // ecx[22] - Intel(R) instruction MOVBE (Intel Atom(R) processor)
if( ecx_ & BIT19 ) mask |= ippCPUID_SSE41; // ecx[19] - Intel(R) Streaming SIMD Extensions 4.1 (Intel(R) SSE4.1) (formerly codenamed Penryn)
if( ecx_ & BIT20 ) mask |= ippCPUID_SSE42; // ecx[20] - Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) (formerly codenamed Nenalem)
if( ecx_ & BIT28 ) mask |= ippCPUID_AVX; // ecx[28] - Intel(R) Advanced Vector Extensions (Intel(R) AVX) (formerly codenamed Sandy Bridge)
if(( ecx_ & 0x18000000 ) == 0x18000000 ){
tmp = (Ipp32u)cp_is_avx_extension();
if( tmp & BIT00 ) mask |= ippAVX_ENABLEDBYOS; // Intel(R) AVX is supported by OS
}
if( ecx_ & BIT25 ) mask |= ippCPUID_AES; // ecx[25] - Intel(R) AES New Instructions
if( ecx_ & BIT01 ) mask |= ippCPUID_CLMUL; // ecx[1] - Intel(R) instruction PCLMULQDQ
if( ecx_ & BIT30 ) mask |= ippCPUID_RDRAND; // ecx[30] - Intel(R) instruction RDRRAND
if( ecx_ & BIT29 ) mask |= ippCPUID_F16C; // ecx[29] - Intel(R) instruction F16C
// Intel(R) AVX2 instructions extention: only if 3 features are enabled at once:
// FMA, Intel(R) AVX 256 int & GPR BMI (bit-manipulation);
if( ecx_ & BIT12 ) flgFMA = 1; else flgFMA = 0; // ecx[12] - FMA 128 & 256 bit
if( idBaseMax >= 7 ){ // get CPUID.eax = 7
cpGetReg( (int*)buf, 0x7, 0 );
ebx_ = (Ipp32u)buf[1];
ecx_ = (Ipp32u)buf[2];
edx_ = (Ipp32u)buf[3];
if( ebx_ & BIT05 ) flgINT = 1;
else flgINT = 0; //ebx[5], Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) (int 256bits)
// ebx[3] - enabled ANDN, BEXTR, BLSI, BLSMK, BLSR, TZCNT
// ebx[8] - enabled BZHI, MULX, PDEP, PEXT, RORX, SARX, SHLX, SHRX
if(( ebx_ & BIT03 )&&( ebx_ & BIT08 )) flgGPR = 1;
else flgGPR = 0; // VEX-encoded GPR instructions (GPR BMI)
// Intel(R) architecture formerly codenamed Broadwell instructions extention
if( ebx_ & BIT19 ) mask |= ippCPUID_ADCOX; // eax[0x7] -->> ebx:: Bit 19: Intel(R) instructions ADOX/ADCX
if( ebx_ & BIT18 ) mask |= ippCPUID_RDSEED; // eax[0x7] -->> ebx:: Bit 18: Intel(R) instruction RDSEED
if( ebx_ & BIT29 ) mask |= ippCPUID_SHA; // eax[0x7] -->> ebx:: Bit 29: Intel(R) Secure Hash Algorithm Extensions
// Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) extention
if(cp_is_avx512_extension()){
mask |= ippAVX512_ENABLEDBYOS; // Intel(R) AVX-512 is supported by OS
}
if( ebx_ & BIT16 ) mask |= ippCPUID_AVX512F; // ebx[16] - Intel(R) AVX-512 Foundation
if( ebx_ & BIT26 ) mask |= ippCPUID_AVX512PF; // ebx[26] - Intel(R) AVX-512 Prefetch instructions
if( ebx_ & BIT27 ) mask |= ippCPUID_AVX512ER; // ebx[27] - Intel(R) AVX-512 Exponential and Reciprocal instructions
if( ebx_ & BIT28 ) mask |= ippCPUID_AVX512CD; // ebx[28] - Intel(R) AVX-512 Conflict Detection
if( ebx_ & BIT17 ) mask |= ippCPUID_AVX512DQ; // ebx[17] - Intel(R) AVX-512 Dword & Quadword
if( ebx_ & BIT30 ) mask |= ippCPUID_AVX512BW; // ebx[30] - Intel(R) AVX-512 Byte & Word
if( ebx_ & BIT31 ) mask |= ippCPUID_AVX512VL; // ebx[31] - Intel(R) AVX-512 Vector Length extensions
if( ecx_ & BIT01 ) mask |= ippCPUID_AVX512VBMI; // ecx[01] - Intel(R) AVX-512 Vector Byte Manipulation Instructions
if( edx_ & BIT02 ) mask |= ippCPUID_AVX512_4VNNIW; // edx[02] - Intel(R) AVX-512 Vector instructions for deep learning enhanced word variable precision
if( edx_ & BIT03 ) mask |= ippCPUID_AVX512_4FMADDPS; // edx[03] - Intel(R) AVX-512 Vector instructions for deep learning floating-point single precision
// bitwise OR between ippCPUID_MPX & ippCPUID_AVX flags can be used to define that arch is GE than formerly codenamed Skylake
if( ebx_ & BIT14 ) mask |= ippCPUID_MPX; // ebx[14] - Intel(R) Memory Protection Extensions (Intel(R) MPX)
if( ebx_ & BIT21 ) mask |= ippCPUID_AVX512IFMA; // ebx[21] - Intel(R) AVX-512 IFMA PMADD52
}
mask = ( flgFMA && flgINT && flgGPR ) ? (mask | ippCPUID_AVX2) : mask; // to separate Intel(R) AVX2 flags here
if( idExtdMax >= 0x80000001 ){ // get CPUID.eax=0x80000001
cpGetReg( (int*)buf, 0x80000001, 0 );
ecx_ = (Ipp32u)buf[2];
// Intel(R) architecture formerly codenamed Broadwell instructions extention
if( ecx_ & BIT08 ) mask |= ippCPUID_PREFETCHW; // eax[0x80000001] -->> ecx:: Bit 8: Intel(R) instruction PREFETCHW
}
// Intel(R) architecture formerly codenamed Knights Corner
if(((( eax_ << 20 ) >> 24 ) ^ 0xb1 ) == 0 ){
mask = mask | ippCPUID_KNC;
}
cpFeatures = mask;
cpFeaturesMask = mask; /* all CPU features are enabled by default */
*pFeaturesMask = cpFeatures;
return 1; /* if somebody need to check for cpuid support - do it at the top of function and return 0 if it's not supported */
}
int ippcpJumpIndexForMergedLibs = -1;
static int cpthreads_omp_of_n_ipp = 1;
IPPFUN( int, ippcpGetEnabledNumThreads,( void ))
{
return cpthreads_omp_of_n_ipp;
}
#define AVX3X_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ )
#define AVX3M_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER )
// AVX3X_FEATURES means Intel(R) Xeon(R) processor
// AVX3M_FEATURES means Intel(R) Many Integrated Core Architecture
IppStatus owncpFeaturesToIdx( Ipp64u* cpuFeatures, int* index )
{
IppStatus ownStatus = ippStsNoErr;
Ipp64u mask = 0;
*index = 0;
if(( AVX3X_FEATURES == ( *cpuFeatures & AVX3X_FEATURES ))&&
( ippAVX512_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerlySkylake ia32=S0, x64=K0 */
mask = AVX3X_MSK;
*index = LIB_AVX3X;
} else
if(( AVX3M_FEATURES == ( *cpuFeatures & AVX3M_FEATURES ))&&
( ippAVX512_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerly codenamed Knights Landing ia32=i0, x64=N0 */
mask = AVX3M_MSK;
*index = LIB_AVX3M;
} else
if(( ippCPUID_AVX2 == ( *cpuFeatures & ippCPUID_AVX2 ))&&
( ippAVX_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerly codenamed Haswell ia32=H9, x64=L9 */
mask = AVX2_MSK;
*index = LIB_AVX2;
} else
if(( ippCPUID_AVX == ( *cpuFeatures & ippCPUID_AVX ))&&
( ippAVX_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerly codenamed Sandy Bridge ia32=G9, x64=E9 */
mask = AVX_MSK;
*index = LIB_AVX;
} else
if( ippCPUID_SSE42 == ( *cpuFeatures & ippCPUID_SSE42 )){ /* Intel(R) architecture formerly codenamed Nehalem or Intel(R) architecture formerly codenamed Westmer = Intel(R) architecture formerly codenamed Penryn + Intel(R) SSE4.2 + ?Intel(R) instruction PCLMULQDQ + ?(Intel(R) AES New Instructions) + ?(Intel(R) Secure Hash Algorithm Extensions) */
mask = SSE42_MSK; /* or new Intel Atom(R) processor formerly codenamed Silvermont */
*index = LIB_SSE42;
} else
if( ippCPUID_SSE41 == ( *cpuFeatures & ippCPUID_SSE41 )){ /* Intel(R) architecture formerly codenamed Penryn ia32=P8, x64=Y8 */
mask = SSE41_MSK;
*index = LIB_SSE41;
} else
if( ippCPUID_MOVBE == ( *cpuFeatures & ippCPUID_MOVBE )) { /* Intel Atom(R) processor formerly codenamed Silverthorne ia32=S8, x64=N8 */
mask = ATOM_MSK;
*index = LIB_ATOM;
} else
if( ippCPUID_SSSE3 == ( *cpuFeatures & ippCPUID_SSSE3 )) { /* Intel(R) architecture formerly codenamed Merom ia32=V8, x64=U8 (letters etymology is unknown) */
mask = SSSE3_MSK;
*index = LIB_SSSE3;
} else
if( ippCPUID_SSE3 == ( *cpuFeatures & ippCPUID_SSE3 )) { /* Intel(R) architecture formerly codenamed Prescott ia32=W7, x64=M7 */
mask = SSE3_MSK;
*index = LIB_SSE3;
} else
if( ippCPUID_SSE2 == ( *cpuFeatures & ippCPUID_SSE2 )) { /* Intel(R) architecture formerly codenamed Willamette ia32=W7, x64=PX */
mask = SSE2_MSK;
*index = LIB_SSE2;
} else
if( ippCPUID_SSE == ( *cpuFeatures & ippCPUID_SSE )) { /* Intel(R) Pentium(R) processor III ia32=PX only */
mask = SSE_MSK;
*index = LIB_SSE;
#if (defined( _WIN32E ) || defined( linux32e ) || defined( OSXEM64T )) && !(defined( _ARCH_LRB2 ))
ownStatus = ippStsNotSupportedCpu; /* the lowest CPU supported by Intel(R) Integrated Performance Primitives (Intel(R) IPP) must at least support Intel(R) SSE2 for x64 */
#endif
} else
if( ippCPUID_MMX >= ( *cpuFeatures & ippCPUID_MMX )) { /* not supported, PX dispatched */
mask = MMX_MSK;
*index = LIB_MMX;
ownStatus = ippStsNotSupportedCpu; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
}
#if defined ( _IPP_QUARK)
else {
mask = PX_MSK;
*index = LIB_PX;
ownStatus = ippStsNoErr; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
}
#endif
if(( mask != ( *cpuFeatures & mask ))&&( ownStatus == ippStsNoErr ))
ownStatus = ippStsFeaturesCombination; /* warning if combination of features is incomplete */
*cpuFeatures |= mask;
return ownStatus;
}
#ifdef _PCS
extern IppStatus (IPP_STDCALL *pcpSetCpuFeatures)( Ipp64u cpuFeatures );
extern IppStatus (IPP_STDCALL *pcpSetNumThreads)( int numThr );
extern IppStatus (IPP_STDCALL *pcpGetNumThreads)( int* pNumThr );
IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
{
IppStatus status = ippStsNoErr;
if (pcpSetNumThreads != 0)
{
status = pcpSetNumThreads(numThr);
if (status == ippStsNoErr)
{
cpthreads_omp_of_n_ipp = numThr;
}
}
return status;
}
IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
{
IppStatus status = ippStsNoErr;
IPP_BAD_PTR1_RET( pNumThr )
if (pcpGetNumThreads != 0)
{
status = pcpGetNumThreads(pNumThr);
}
return status;
}
#else
IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
{
IppStatus status = ippStsNoErr;
#if defined( _OPENMP )
IPP_BAD_SIZE_RET( numThr )
cpthreads_omp_of_n_ipp = numThr;
status = ippStsNoErr;
#else
UNREFERENCED_PARAMETER(numThr);
status = ippStsNoOperation;
#endif
return status;
}
IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
{
IppStatus status = ippStsNoErr;
IPP_BAD_PTR1_RET( pNumThr )
#if defined( _OPENMP )
*pNumThr = cpthreads_omp_of_n_ipp;
status = ippStsNoErr;
#else
*pNumThr = 1;
status = ippStsNoOperation;
#endif
return status;
}
#endif /* #ifdef _PCS */
#ifdef _IPP_DYNAMIC
typedef IppStatus (IPP_STDCALL *DYN_RELOAD)( int );
static DYN_RELOAD IppDispatcher; /* ippCP only */
static int currentCpu = -1; /* control for disabling the same DLL re-loading */
void owncpRegisterLib( DYN_RELOAD reload )
{
pcpSetCpuFeatures = 0;
pcpSetNumThreads = 0;
pcpGetNumThreads = 0;
IppDispatcher = reload; /* function DynReload() that is defined in ippmain.gen - */
return; /* therefore in each domain there is own DynReload() function */
}
void owncpUnregisterLib( void )
{
IppDispatcher = 0;
currentCpu = -1;
pcpSetCpuFeatures = 0;
pcpSetNumThreads = 0;
pcpGetNumThreads = 0;
return;
}
IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
{
IppStatus status, ownStatus;
int index = 0;
ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
if(( IppDispatcher )&&( currentCpu != index )) {
status = IppDispatcher( index );
currentCpu = index;
} else
status = ippStsNoErr;
#ifdef _PCS
if (pcpSetCpuFeatures != 0 && status >= ippStsNoErr)
{
/* Pass down features to Waterfall dll */
status = pcpSetCpuFeatures(cpuFeatures);
}
if (pcpSetNumThreads != 0 && status >= ippStsNoErr)
{
/* Pass down features to Waterfall dll */
status = pcpSetNumThreads(cpthreads_omp_of_n_ipp);
}
#endif
if( status != ippStsNoErr && status != ippStsNoOperation)
return status;
else
return ownStatus;
}
IPPFUN( IppStatus, ippcpInit,( void ))
{
int index = 0;
IppStatus status, statusf, statusi;
Ipp64u cpuFeatures;
statusf = ippcpGetCpuFeatures( &cpuFeatures );
statusi = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index ); /* ownSetFeatures instead of ippSetFeatures because need unconditional initialization, */
if( IppDispatcher ) status = IppDispatcher( index ); /* call DynReload() function for each domain */
else status = ippStsNoErr;
currentCpu = index;
if( ippStsNoErr != statusf ) return statusf;
if( ippStsNoErr != statusi ) return statusi;
if( ippStsNoErr != status ) return status;
return ippStsNoErr;
}
#else /* _IPP_DYNAMIC */
IPPFUN( IppStatus, ippcpInit,( void ))
{
Ipp64u cpuFeatures;
#if defined( _OPENMP )
ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
#endif
ippcpGetCpuFeatures( &cpuFeatures );
return ippcpSetCpuFeatures( cpuFeatures );
}
IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
{
IppStatus ownStatus;
int index = 0;
#if defined( _OPENMP )
ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
#endif
ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
ippcpJumpIndexForMergedLibs = index;
cpFeaturesMask = cpuFeatures;
return ownStatus;
}
#endif
IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index )
{
Ipp64u tmp;
IppStatus tmpStatus;
*index = 0;
if( ippCPUID_NOCHECK & cpuFeatures ){
// if NOCHECK is set - static variable cpFeatures is initialized unconditionally and real CPU features from CPUID are ignored;
// the one who uses this method of initialization must understand what and why it does and the possible unpredictable consequences.
// the only one known purpose for this approach - environments where CPUID instruction is disabled (for example Intel(R) Software Guard Extensions).
cpuFeatures &= ( IPP_MAX_64U ^ ippCPUID_NOCHECK );
cpFeatures = cpuFeatures;
} else
// if( 0 == cpFeatures ) //do cpFeatures restore unconditionally - to protect from possible previous NOCHECK
{
// if library has not been initialized yet
cpGetFeatures( &tmp );
}
tmpStatus = owncpFeaturesToIdx( &cpuFeatures, index );
cpFeaturesMask = cpuFeatures;
return tmpStatus;
}
static struct {
int sts;
const char *msg;
} ippcpMsg[] = {
/* ippStatus */
/* -9999 */ ippStsCpuNotSupportedErr, "ippStsCpuNotSupportedErr: The target CPU is not supported",
/* -9702 */ MSG_NO_SHARED, "No shared libraries were found in the Waterfall procedure",
/* -9701 */ MSG_NO_DLL, "No DLLs were found in the Waterfall procedure",
/* -9700 */ MSG_LOAD_DLL_ERR, "Error at loading of %s library",
/* -1016 */ ippStsQuadraticNonResidueErr, "ippStsQuadraticNonResidueErr: SQRT operation on quadratic non-residue value",
/* -1015 */ ippStsPointAtInfinity, "ippStsPointAtInfinity: Point at infinity is detected",
/* -1014 */ ippStsOFBSizeErr, "ippStsOFBSizeErr: Incorrect value for crypto OFB block size",
/* -1013 */ ippStsIncompleteContextErr, "ippStsIncompleteContextErr: Crypto: set up of context is not complete",
/* -1012 */ ippStsCTRSizeErr, "ippStsCTRSizeErr: Incorrect value for crypto CTR block size",
/* -1011 */ ippStsEphemeralKeyErr, "ippStsEphemeralKeyErr: ECC: Invalid ephemeral key",
/* -1010 */ ippStsMessageErr, "ippStsMessageErr: ECC: Invalid message digest",
/* -1009 */ ippStsShareKeyErr, "ippStsShareKeyErr: ECC: Invalid share key",
/* -1008 */ ippStsIvalidPrivateKey, "ippStsIvalidPrivateKey ECC: Invalid private key",
/* -1007 */ ippStsOutOfECErr, "ippStsOutOfECErr: ECC: Point out of EC",
/* -1006 */ ippStsECCInvalidFlagErr, "ippStsECCInvalidFlagErr: ECC: Invalid Flag",
/* -1005 */ ippStsUnderRunErr, "ippStsUnderRunErr: Error in data under run",
/* -1004 */ ippStsPaddingErr, "ippStsPaddingErr: Detected padding error indicates the possible data corruption",
/* -1003 */ ippStsCFBSizeErr, "ippStsCFBSizeErr: Incorrect value for crypto CFB block size",
/* -1002 */ ippStsPaddingSchemeErr, "ippStsPaddingSchemeErr: Invalid padding scheme",
/* -1001 */ ippStsBadModulusErr, "ippStsBadModulusErr: Bad modulus caused a failure in module inversion",
/* -216 */ ippStsUnknownStatusCodeErr, "ippStsUnknownStatusCodeErr: Unknown status code",
/* -221 */ ippStsLoadDynErr, "ippStsLoadDynErr: Error when loading the dynamic library",
/* -15 */ ippStsLengthErr, "ippStsLengthErr: Incorrect value for string length",
/* -14 */ ippStsNotSupportedModeErr, "ippStsNotSupportedModeErr: The requested mode is currently not supported",
/* -13 */ ippStsContextMatchErr, "ippStsContextMatchErr: Context parameter does not match the operation",
/* -12 */ ippStsScaleRangeErr, "ippStsScaleRangeErr: Scale bounds are out of range",
/* -11 */ ippStsOutOfRangeErr, "ippStsOutOfRangeErr: Argument is out of range, or point is outside the image",
/* -10 */ ippStsDivByZeroErr, "ippStsDivByZeroErr: An attempt to divide by zero",
/* -9 */ ippStsMemAllocErr, "ippStsMemAllocErr: Memory allocated for the operation is not enough",
/* -8 */ ippStsNullPtrErr, "ippStsNullPtrErr: Null pointer error",
/* -7 */ ippStsRangeErr, "ippStsRangeErr: Incorrect values for bounds: the lower bound is greater than the upper bound",
/* -6 */ ippStsSizeErr, "ippStsSizeErr: Incorrect value for data size",
/* -5 */ ippStsBadArgErr, "ippStsBadArgErr: Incorrect arg/param of the function",
/* -4 */ ippStsNoMemErr, "ippStsNoMemErr: Not enough memory for the operation",
/* -2 */ ippStsErr, "ippStsErr: Unknown/unspecified error, -2",
/* 0 */ ippStsNoErr, "ippStsNoErr: No errors",
/* 1 */ ippStsNoOperation, "ippStsNoOperation: No operation has been executed",
/* 2 */ ippStsDivByZero, "ippStsDivByZero: Zero value(s) for the divisor in the Div function",
/* 25 */ ippStsInsufficientEntropy, "ippStsInsufficientEntropy: Generation of the prime/key failed due to insufficient entropy in the random seed and stimulus bit string",
/* 36 */ ippStsNotSupportedCpu, "The CPU is not supported",
/* 36 */ ippStsFeaturesCombination, "Wrong combination of features",
};
/* /////////////////////////////////////////////////////////////////////////////
// Name: ippcpGetStatusString
// Purpose: transformation of a code of a status Intel(R) IPP to string
// Returns:
// Parameters:
// StsCode Intel(R) IPP status code
//
// Notes: not necessary to release the returned string
*/
IPPFUN( const char*, ippcpGetStatusString, ( IppStatus StsCode ) )
{
unsigned int i;
for( i=0; i<IPP_COUNT_OF( ippcpMsg ); i++ ) {
if( StsCode == ippcpMsg[i].sts ) {
return ippcpMsg[i].msg;
}
}
return ippcpGetStatusString( ippStsUnknownStatusCodeErr );
}
extern Ipp64u IPP_CDECL cp_get_pentium_counter (void);
/* /////////////////////////////////////////////////////////////////////////////
// Name: ippcpGetCpuClocks
// Purpose: time stamp counter (TSC) register reading
// Returns: TSC value
//
// Note: An hardware exception is possible if TSC reading is not supported by
// the current chipset
*/
IPPFUN( Ipp64u, ippcpGetCpuClocks, (void) )
{
return (Ipp64u)cp_get_pentium_counter();
}
#endif /* _IPP_DATA */