blob: 092857cc81d06898f28be9d20968338c1853a66d [file] [log] [blame]
/*
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// With these flags defined, this source file will dynamically
// load the corresponding functions. Disabled by default.
#define __CUDA_API_VERSION 4000
#include <stdio.h>
#include <string.h>
#define INIT_CUDA_GL 1
#include "host-common/dynlink_cuda.h"
#if INIT_CUDA_GL
#include "dynlink_cudaGL.h"
#endif
#if INIT_CUDA_D3D9
#include "../inc/dynlink_cudaD3D9.h"
#endif
#if INIT_CUDA_D3D11
#include "../inc/dynlink_cudaD3D11.h"
#endif
tcuInit *_cuInit;
tcuDriverGetVersion *cuDriverGetVersion;
tcuDeviceGet *cuDeviceGet;
tcuDeviceGetCount *cuDeviceGetCount;
tcuDeviceGetName *cuDeviceGetName;
tcuDeviceComputeCapability *cuDeviceComputeCapability;
tcuDeviceTotalMem *cuDeviceTotalMem;
tcuDeviceGetProperties *cuDeviceGetProperties;
tcuDeviceGetAttribute *cuDeviceGetAttribute;
tcuCtxCreate *cuCtxCreate;
tcuCtxDestroy *cuCtxDestroy;
tcuCtxAttach *cuCtxAttach;
tcuCtxDetach *cuCtxDetach;
tcuCtxPushCurrent *cuCtxPushCurrent;
tcuCtxPopCurrent *cuCtxPopCurrent;
tcuCtxGetCurrent *cuCtxGetCurrent;
tcuCtxSetCurrent *cuCtxSetCurrent;
tcuCtxGetDevice *cuCtxGetDevice;
tcuCtxSynchronize *cuCtxSynchronize;
tcuModuleLoad *cuModuleLoad;
tcuModuleLoadData *cuModuleLoadData;
tcuModuleLoadDataEx *cuModuleLoadDataEx;
tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
tcuModuleUnload *cuModuleUnload;
tcuModuleGetFunction *cuModuleGetFunction;
tcuModuleGetGlobal *cuModuleGetGlobal;
tcuModuleGetTexRef *cuModuleGetTexRef;
tcuModuleGetSurfRef *cuModuleGetSurfRef;
tcuMemGetInfo *cuMemGetInfo;
tcuMemAlloc *cuMemAlloc;
tcuMemAllocPitch *cuMemAllocPitch;
tcuMemFree *cuMemFree;
tcuMemGetAddressRange *cuMemGetAddressRange;
tcuMemAllocHost *cuMemAllocHost;
tcuMemFreeHost *cuMemFreeHost;
tcuMemHostAlloc *cuMemHostAlloc;
tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
tcuMemHostRegister *cuMemHostRegister;
tcuMemHostUnregister *cuMemHostUnregister;
tcuMemcpyHtoD *cuMemcpyHtoD;
tcuMemcpyDtoH *cuMemcpyDtoH;
tcuMemcpyDtoD *cuMemcpyDtoD;
tcuMemcpyDtoA *cuMemcpyDtoA;
tcuMemcpyAtoD *cuMemcpyAtoD;
tcuMemcpyHtoA *cuMemcpyHtoA;
tcuMemcpyAtoH *cuMemcpyAtoH;
tcuMemcpyAtoA *cuMemcpyAtoA;
tcuMemcpy2D *cuMemcpy2D;
tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
tcuMemcpy3D *cuMemcpy3D;
tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
tcuMemcpy2DAsync *cuMemcpy2DAsync;
tcuMemcpy3DAsync *cuMemcpy3DAsync;
tcuMemcpy *cuMemcpy;
tcuMemcpyPeer *cuMemcpyPeer;
tcuMemsetD8 *cuMemsetD8;
tcuMemsetD16 *cuMemsetD16;
tcuMemsetD32 *cuMemsetD32;
tcuMemsetD2D8 *cuMemsetD2D8;
tcuMemsetD2D16 *cuMemsetD2D16;
tcuMemsetD2D32 *cuMemsetD2D32;
tcuFuncSetBlockShape *cuFuncSetBlockShape;
tcuFuncSetSharedSize *cuFuncSetSharedSize;
tcuFuncGetAttribute *cuFuncGetAttribute;
tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
tcuLaunchKernel *cuLaunchKernel;
tcuArrayCreate *cuArrayCreate;
tcuArrayGetDescriptor *cuArrayGetDescriptor;
tcuArrayDestroy *cuArrayDestroy;
tcuArray3DCreate *cuArray3DCreate;
tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
tcuTexRefCreate *cuTexRefCreate;
tcuTexRefDestroy *cuTexRefDestroy;
tcuTexRefSetArray *cuTexRefSetArray;
tcuTexRefSetAddress *cuTexRefSetAddress;
tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
tcuTexRefSetFormat *cuTexRefSetFormat;
tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
tcuTexRefSetFlags *cuTexRefSetFlags;
tcuTexRefGetAddress *cuTexRefGetAddress;
tcuTexRefGetArray *cuTexRefGetArray;
tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
tcuTexRefGetFormat *cuTexRefGetFormat;
tcuTexRefGetFlags *cuTexRefGetFlags;
tcuSurfRefSetArray *cuSurfRefSetArray;
tcuSurfRefGetArray *cuSurfRefGetArray;
tcuParamSetSize *cuParamSetSize;
tcuParamSeti *cuParamSeti;
tcuParamSetf *cuParamSetf;
tcuParamSetv *cuParamSetv;
tcuParamSetTexRef *cuParamSetTexRef;
tcuLaunch *cuLaunch;
tcuLaunchGrid *cuLaunchGrid;
tcuLaunchGridAsync *cuLaunchGridAsync;
tcuEventCreate *cuEventCreate;
tcuEventRecord *cuEventRecord;
tcuEventQuery *cuEventQuery;
tcuEventSynchronize *cuEventSynchronize;
tcuEventDestroy *cuEventDestroy;
tcuEventElapsedTime *cuEventElapsedTime;
tcuStreamCreate *cuStreamCreate;
tcuStreamQuery *cuStreamQuery;
tcuStreamSynchronize *cuStreamSynchronize;
tcuStreamDestroy *cuStreamDestroy;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGetExportTable *cuGetExportTable;
tcuCtxSetLimit *cuCtxSetLimit;
tcuCtxGetLimit *cuCtxGetLimit;
tcuMemHostGetFlags *cuMemHostGetFlags;
#if INIT_CUDA_GL
// GL/CUDA interop
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
tcuWGLGetDevice *cuWGLGetDevice;
#endif
//#if __CUDA_API_VERSION >= 3020
tcuGLCtxCreate *cuGLCtxCreate;
tcuGLCtxCreate *cuGLCtxCreate_v2;
tcuGLMapBufferObject *cuGLMapBufferObject;
tcuGLMapBufferObject *cuGLMapBufferObject_v2;
tcuGLMapBufferObjectAsync *cuGLMapBufferObjectAsync;
//#endif
#if __CUDA_API_VERSION >= 6050
tcuGLGetDevices *cuGLGetDevices;
#endif
tcuGLInit *cuGLInit; // deprecated in CUDA 3.0
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
tcuGLGetDevices *cuGLGetDevices; // CUDA 6.5 only
#endif
#if INIT_CUDA_D3D9
// D3D9/CUDA interop (CUDA 1.x compatible API). These functions
// are deprecated; please use the ones below
tcuD3D9Begin *cuD3D9Begin;
tcuD3D9End *cuD3D9End;
// D3D9/CUDA interop (CUDA 2.x compatible)
tcuD3D9GetDirect3DDevice *cuD3D9GetDirect3DDevice;
tcuD3D9RegisterResource *cuD3D9RegisterResource;
tcuD3D9UnregisterResource *cuD3D9UnregisterResource;
tcuD3D9MapResources *cuD3D9MapResources;
tcuD3D9UnmapResources *cuD3D9UnmapResources;
tcuD3D9ResourceSetMapFlags *cuD3D9ResourceSetMapFlags;
tcuD3D9ResourceGetSurfaceDimensions *cuD3D9ResourceGetSurfaceDimensions;
tcuD3D9ResourceGetMappedArray *cuD3D9ResourceGetMappedArray;
tcuD3D9ResourceGetMappedPointer *cuD3D9ResourceGetMappedPointer;
tcuD3D9ResourceGetMappedSize *cuD3D9ResourceGetMappedSize;
tcuD3D9ResourceGetMappedPitch *cuD3D9ResourceGetMappedPitch;
// D3D9/CUDA interop (CUDA 2.0+)
tcuD3D9GetDevice *cuD3D9GetDevice;
tcuD3D9GetDevice *cuD3D9GetDevices;
tcuD3D9GetDevice *cuD3D9GetDevice_v2;
tcuD3D9CtxCreate *cuD3D9CtxCreate;
tcuD3D9CtxCreate *cuD3D9CtxCreate_v2;
tcuGraphicsD3D9RegisterResource *cuGraphicsD3D9RegisterResource;
tcuGraphicsD3D9RegisterResource *cuGraphicsD3D9RegisterResource_v2;
#endif
#ifdef INIT_CUDA_D3D11
tcuD3D11GetDevice *cuD3D11GetDevice;
tcuD3D11GetDevices *cuD3D11GetDevices;
tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource;
tcuD3D11CtxCreate *cuD3D11CtxCreate;
tcuD3D11CtxCreateOnDevice *cuD3D11CtxCreateOnDevice;
tcuD3D11GetDirect3DDevice *cuD3D11GetDirect3DDevice;
#endif
#define STRINGIFY(X) #X
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#include <Windows.h>
#ifdef UNICODE
static LPCWSTR __CudaLibName = L"nvcuda.dll";
#else
static LPCSTR __CudaLibName = "nvcuda.dll";
#endif
typedef HMODULE CUDADRIVER;
static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance)
{
*pInstance = LoadLibrary(__CudaLibName);
if (*pInstance == NULL)
{
printf("LoadLibrary \"%s\" failed!\n", __CudaLibName);
return CUDA_ERROR_UNKNOWN;
}
return CUDA_SUCCESS;
}
#define GET_PROC_EX(name, alias, required) \
alias = (t##name *)GetProcAddress(CudaDrvLib, #name); \
if (alias == NULL && required) { \
printf("Failed to find required function \"%s\" in %s\n", \
#name, __CudaLibName); \
}
#define GET_PROC_EX_V2(name, alias, required) \
alias = (t##name *)GetProcAddress(CudaDrvLib, STRINGIFY(name##_v2));\
if (alias == NULL && required) { \
printf("Failed to find required function \"%s\" in %s\n", \
STRINGIFY(name##_v2), __CudaLibName); \
}
#elif defined(__unix__) || defined(__APPLE__) || defined(__MACOSX)
#include <dlfcn.h>
#if defined(__APPLE__) || defined(__MACOSX)
static char __CudaLibName[] = "/usr/local/cuda/lib/libcuda.dylib";
#else
static char __CudaLibName[] = "libcuda.so";
#endif
typedef void *CUDADRIVER;
static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance)
{
*pInstance = dlopen(__CudaLibName, RTLD_NOW);
if (*pInstance == NULL)
{
printf("dlopen \"%s\" failed!\n", __CudaLibName);
return CUDA_ERROR_UNKNOWN;
}
return CUDA_SUCCESS;
}
#define GET_PROC_EX(name, alias, required) \
alias = (t##name *)dlsym(CudaDrvLib, #name); \
if (alias == NULL && required) { \
printf("Failed to find required function \"%s\" in %s\n", \
#name, __CudaLibName); \
}
#define GET_PROC_EX_V2(name, alias, required) \
alias = (t##name *)dlsym(CudaDrvLib, STRINGIFY(name##_v2)); \
if (alias == NULL && required) { \
printf("Failed to find required function \"%s\" in %s\n", \
STRINGIFY(name##_v2), __CudaLibName); \
}
#else
#error unsupported platform
#endif
#define CHECKED_CALL(call) \
do { \
CUresult result = (call); \
if (CUDA_SUCCESS != result) { \
return result; \
} \
} while(0)
#define GET_PROC_REQUIRED(name) GET_PROC_EX(name,name,1)
#define GET_PROC_OPTIONAL(name) GET_PROC_EX(name,name,0)
#define GET_PROC(name) GET_PROC_REQUIRED(name)
#define GET_PROC_V2(name) GET_PROC_EX_V2(name,name,1)
#if INIT_CUDA_GL
inline CUresult CUDAAPI cuInitGL(unsigned int Flags, int cudaVersion, CUDADRIVER &CudaDrvLib)
{
if (cudaVersion >= 2010)
{
GET_PROC(cuGLCtxCreate);
GET_PROC(cuGraphicsGLRegisterBuffer);
GET_PROC(cuGraphicsGLRegisterImage);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
GET_PROC(cuWGLGetDevice);
#endif
}
if (cudaVersion >= 2030)
{
GET_PROC(cuGraphicsGLRegisterBuffer);
GET_PROC(cuGraphicsGLRegisterImage);
}
if (cudaVersion >= 3000)
{
GET_PROC(cuGLGetDevices);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
GET_PROC(cuWGLGetDevice);
#endif
GET_PROC_V2(cuGLCtxCreate);
GET_PROC_V2(cuGLMapBufferObject);
GET_PROC(cuGLUnmapBufferObject);
GET_PROC(cuGLMapBufferObjectAsync);
GET_PROC(cuGLUnmapBufferObjectAsync);
GET_PROC(cuGLRegisterBufferObject);
GET_PROC(cuGLUnregisterBufferObject);
GET_PROC(cuGLSetBufferObjectMapFlags);
}
return CUDA_SUCCESS;
}
#endif
#ifdef INIT_CUDA_D3D9
inline CUresult CUDAAPI cuInitD3D9(unsigned int Flags, int cudaVersion, CUDADRIVER &CudaDrvLib)
{
// D3D9/CUDA (CUDA 1.x compatible API)
GET_PROC(cuD3D9Begin);
GET_PROC(cuD3D9End);
// D3D9/CUDA (CUDA 2.x compatible API)
GET_PROC(cuD3D9GetDirect3DDevice);
GET_PROC(cuD3D9RegisterResource);
GET_PROC(cuD3D9UnregisterResource);
GET_PROC(cuD3D9MapResources);
GET_PROC(cuD3D9UnmapResources);
GET_PROC(cuD3D9ResourceSetMapFlags);
// D3D9/CUDA (CUDA 2.0+ compatible API)
GET_PROC(cuD3D9GetDevice);
GET_PROC(cuGraphicsD3D9RegisterResource);
GET_PROC_V2(cuD3D9CtxCreate);
GET_PROC_V2(cuD3D9ResourceGetSurfaceDimensions);
GET_PROC_V2(cuD3D9ResourceGetMappedPointer);
GET_PROC_V2(cuD3D9ResourceGetMappedSize);
GET_PROC_V2(cuD3D9ResourceGetMappedPitch);
// GET_PROC_V2(cuD3D9ResourceGetMappedArray);
return CUDA_SUCCESS;
}
#endif
#ifdef INIT_CUDA_D3D10
inline CUresult CUDAAPI cuInitD3D10(unsigned int Flags, int cudaVersion, CUDADRIVER &CudaDrvLib)
{
if (cudaVersion >= 2030)
{
GET_PROC(cuD3D10GetDevice);
GET_PROC(cuD3D10CtxCreate);
GET_PROC(cuGraphicsD3D10RegisterResource);
}
return CUDA_SUCCESS;
}
#endif
#ifdef INIT_CUDA_D3D11
inline CUresult CUDAAPI cuInitD3D11(unsigned int Flags, int cudaVersion, CUDADRIVER &CudaDrvLib)
{
if (cudaVersion >= 3000)
{
GET_PROC(cuD3D11GetDevice);
GET_PROC(cuD3D11CtxCreate);
GET_PROC(cuGraphicsD3D11RegisterResource);
}
return CUDA_SUCCESS;
}
#endif
CUresult CUDAAPI cuInit(unsigned int Flags, int cudaVersion, void *pHandleDriver)
{
CUDADRIVER CudaDrvLib;
int driverVer = 1000;
CHECKED_CALL(LOAD_LIBRARY(&CudaDrvLib));
if (pHandleDriver != NULL)
{
memcpy(pHandleDriver, &CudaDrvLib, sizeof(CUDADRIVER));
}
// cuInit is required; alias it to _cuInit
GET_PROC_EX(cuInit, _cuInit, 1);
CHECKED_CALL(_cuInit(Flags));
// available since 2.2. if not present, version 1.0 is assumed
GET_PROC_OPTIONAL(cuDriverGetVersion);
if (cuDriverGetVersion)
{
CHECKED_CALL(cuDriverGetVersion(&driverVer));
}
// fetch all function pointers
GET_PROC(cuDeviceGet);
GET_PROC(cuDeviceGetCount);
GET_PROC(cuDeviceGetName);
GET_PROC(cuDeviceComputeCapability);
GET_PROC(cuDeviceGetProperties);
GET_PROC(cuDeviceGetAttribute);
GET_PROC(cuCtxDestroy);
GET_PROC(cuCtxAttach);
GET_PROC(cuCtxDetach);
GET_PROC(cuCtxPushCurrent);
GET_PROC(cuCtxPopCurrent);
GET_PROC(cuCtxGetDevice);
GET_PROC(cuCtxSynchronize);
GET_PROC(cuModuleLoad);
GET_PROC(cuModuleLoadData);
GET_PROC(cuModuleUnload);
GET_PROC(cuModuleGetFunction);
GET_PROC(cuModuleGetTexRef);
GET_PROC(cuMemFreeHost);
GET_PROC(cuMemHostAlloc);
GET_PROC(cuFuncSetBlockShape);
GET_PROC(cuFuncSetSharedSize);
GET_PROC(cuFuncGetAttribute);
GET_PROC(cuArrayDestroy);
GET_PROC(cuTexRefCreate);
GET_PROC(cuTexRefDestroy);
GET_PROC(cuTexRefSetArray);
GET_PROC(cuTexRefSetFormat);
GET_PROC(cuTexRefSetAddressMode);
GET_PROC(cuTexRefSetFilterMode);
GET_PROC(cuTexRefSetFlags);
GET_PROC(cuTexRefGetArray);
GET_PROC(cuTexRefGetAddressMode);
GET_PROC(cuTexRefGetFilterMode);
GET_PROC(cuTexRefGetFormat);
GET_PROC(cuTexRefGetFlags);
GET_PROC(cuParamSetSize);
GET_PROC(cuParamSeti);
GET_PROC(cuParamSetf);
GET_PROC(cuParamSetv);
GET_PROC(cuParamSetTexRef);
GET_PROC(cuLaunch);
GET_PROC(cuLaunchGrid);
GET_PROC(cuLaunchGridAsync);
GET_PROC(cuEventCreate);
GET_PROC(cuEventRecord);
GET_PROC(cuEventQuery);
GET_PROC(cuEventSynchronize);
GET_PROC(cuEventDestroy);
GET_PROC(cuEventElapsedTime);
GET_PROC(cuStreamCreate);
GET_PROC(cuStreamQuery);
GET_PROC(cuStreamSynchronize);
GET_PROC(cuStreamDestroy);
// These could be _v2 interfaces
if (cudaVersion >= 4000)
{
GET_PROC_V2(cuCtxDestroy);
GET_PROC_V2(cuCtxPopCurrent);
GET_PROC_V2(cuCtxPushCurrent);
GET_PROC_V2(cuStreamDestroy);
GET_PROC_V2(cuEventDestroy);
}
if (cudaVersion >= 3020)
{
GET_PROC_V2(cuDeviceTotalMem);
GET_PROC_V2(cuCtxCreate);
GET_PROC_V2(cuModuleGetGlobal);
GET_PROC_V2(cuMemGetInfo);
GET_PROC_V2(cuMemAlloc);
GET_PROC_V2(cuMemAllocPitch);
GET_PROC_V2(cuMemFree);
GET_PROC_V2(cuMemGetAddressRange);
GET_PROC_V2(cuMemAllocHost);
GET_PROC_V2(cuMemHostGetDevicePointer);
GET_PROC_V2(cuMemcpyHtoD);
GET_PROC_V2(cuMemcpyDtoH);
GET_PROC_V2(cuMemcpyDtoD);
GET_PROC_V2(cuMemcpyDtoA);
GET_PROC_V2(cuMemcpyAtoD);
GET_PROC_V2(cuMemcpyHtoA);
GET_PROC_V2(cuMemcpyAtoH);
GET_PROC_V2(cuMemcpyAtoA);
GET_PROC_V2(cuMemcpy2D);
GET_PROC_V2(cuMemcpy2DUnaligned);
GET_PROC_V2(cuMemcpy3D);
GET_PROC_V2(cuMemcpyHtoDAsync);
GET_PROC_V2(cuMemcpyDtoHAsync);
GET_PROC_V2(cuMemcpyHtoAAsync);
GET_PROC_V2(cuMemcpyAtoHAsync);
GET_PROC_V2(cuMemcpy2DAsync);
GET_PROC_V2(cuMemcpy3DAsync);
GET_PROC_V2(cuMemsetD8);
GET_PROC_V2(cuMemsetD16);
GET_PROC_V2(cuMemsetD32);
GET_PROC_V2(cuMemsetD2D8);
GET_PROC_V2(cuMemsetD2D16);
GET_PROC_V2(cuMemsetD2D32);
GET_PROC_V2(cuArrayCreate);
GET_PROC_V2(cuArrayGetDescriptor);
GET_PROC_V2(cuArray3DCreate);
GET_PROC_V2(cuArray3DGetDescriptor);
GET_PROC_V2(cuTexRefSetAddress);
GET_PROC_V2(cuTexRefSetAddress2D);
GET_PROC_V2(cuTexRefGetAddress);
}
else
{
GET_PROC(cuDeviceTotalMem);
GET_PROC(cuCtxCreate);
GET_PROC(cuModuleGetGlobal);
GET_PROC(cuMemGetInfo);
GET_PROC(cuMemAlloc);
GET_PROC(cuMemAllocPitch);
GET_PROC(cuMemFree);
GET_PROC(cuMemGetAddressRange);
GET_PROC(cuMemAllocHost);
GET_PROC(cuMemHostGetDevicePointer);
GET_PROC(cuMemcpyHtoD);
GET_PROC(cuMemcpyDtoH);
GET_PROC(cuMemcpyDtoD);
GET_PROC(cuMemcpyDtoA);
GET_PROC(cuMemcpyAtoD);
GET_PROC(cuMemcpyHtoA);
GET_PROC(cuMemcpyAtoH);
GET_PROC(cuMemcpyAtoA);
GET_PROC(cuMemcpy2D);
GET_PROC(cuMemcpy2DUnaligned);
GET_PROC(cuMemcpy3D);
GET_PROC(cuMemcpyHtoDAsync);
GET_PROC(cuMemcpyDtoHAsync);
GET_PROC(cuMemcpyHtoAAsync);
GET_PROC(cuMemcpyAtoHAsync);
GET_PROC(cuMemcpy2DAsync);
GET_PROC(cuMemcpy3DAsync);
GET_PROC(cuMemsetD8);
GET_PROC(cuMemsetD16);
GET_PROC(cuMemsetD32);
GET_PROC(cuMemsetD2D8);
GET_PROC(cuMemsetD2D16);
GET_PROC(cuMemsetD2D32);
GET_PROC(cuArrayCreate);
GET_PROC(cuArrayGetDescriptor);
GET_PROC(cuArray3DCreate);
GET_PROC(cuArray3DGetDescriptor);
GET_PROC(cuTexRefSetAddress);
GET_PROC(cuTexRefSetAddress2D);
GET_PROC(cuTexRefGetAddress);
}
// The following functions are specific to CUDA versions
if (driverVer >= 2010)
{
GET_PROC(cuModuleLoadDataEx);
GET_PROC(cuModuleLoadFatBinary);
}
if (driverVer >= 2030)
{
GET_PROC(cuMemHostGetFlags);
}
if (driverVer >= 3000)
{
GET_PROC(cuMemcpyDtoDAsync);
GET_PROC(cuFuncSetCacheConfig);
GET_PROC(cuGraphicsUnregisterResource);
GET_PROC(cuGraphicsSubResourceGetMappedArray);
#if (__CUDA_API_VERSION >= 3020)
if (cudaVersion >= 3020)
{
GET_PROC_V2(cuGraphicsResourceGetMappedPointer);
}
else
{
GET_PROC(cuGraphicsResourceGetMappedPointer);
}
#endif
GET_PROC(cuGraphicsResourceSetMapFlags);
GET_PROC(cuGraphicsMapResources);
GET_PROC(cuGraphicsUnmapResources);
GET_PROC(cuGetExportTable);
}
if (driverVer >= 3010)
{
GET_PROC(cuModuleGetSurfRef);
GET_PROC(cuSurfRefSetArray);
GET_PROC(cuSurfRefGetArray);
GET_PROC(cuCtxSetLimit);
GET_PROC(cuCtxGetLimit);
}
if (driverVer >= 4000)
{
GET_PROC(cuCtxSetCurrent);
GET_PROC(cuCtxGetCurrent);
GET_PROC(cuMemHostRegister);
GET_PROC(cuMemHostUnregister);
GET_PROC(cuMemcpy);
GET_PROC(cuMemcpyPeer);
GET_PROC(cuLaunchKernel);
}
#if INIT_CUDA_GL
if (cuInitGL(0, __CUDA_API_VERSION, CudaDrvLib) != CUDA_SUCCESS)
return CUDA_ERROR_INVALID_DEVICE;
#endif
#if INIT_CUDA_D3D9
if (cuInitD3D9(0, __CUDA_API_VERSION, CudaDrvLib) != CUDA_SUCCESS)
return CUDA_ERROR_INVALID_DEVICE;
#endif
#if INIT_CUDA_D3D10
if (cuInitD3D10(0, __CUDA_API_VERSION, CudaDrvLib) != CUDA_SUCCESS)
return CUDA_ERROR_INVALID_DEVICE;
#endif
#if INIT_CUDA_D3D11
if (cuInitD3D11(0, __CUDA_API_VERSION, CudaDrvLib) != CUDA_SUCCESS)
return CUDA_ERROR_INVALID_DEVICE;
#endif
return CUDA_SUCCESS;
}