| // |
| // Copyright (c) 2017, 2020 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "harness/compat.h" |
| |
| // Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits. |
| // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE |
| |
| #define FLUSH fflush(stdout) |
| |
| #define MAX_STR 16*1024 |
| |
| #define ALIGNMENT 128 |
| |
| #define OPTIONS "-cl-std=CL2.0" |
| |
| // NUM_ROUNDS must be at least 1. |
| // It determines how many sets of random data we push through the global |
| // variables. |
| #define NUM_ROUNDS 1 |
| |
| // This is a shared property of the writer and reader kernels. |
| #define NUM_TESTED_VALUES 5 |
| |
| // TODO: pointer-to-half (and its vectors) |
| // TODO: union of... |
| |
| #include <algorithm> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <cstring> |
| #include <string> |
| #include <vector> |
| #include <cassert> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include "harness/typeWrappers.h" |
| #include "harness/errorHelpers.h" |
| #include "harness/mt19937.h" |
| #include "procs.h" |
| |
| |
| //////////////////// |
| // Device capabilities |
| static int l_has_double = 0; |
| static int l_has_half = 0; |
| static int l_64bit_device = 0; |
| static int l_has_int64_atomics = 0; |
| static int l_has_intptr_atomics = 0; |
| static int l_has_cles_int64 = 0; |
| |
| static int l_host_is_big_endian = 1; |
| |
| static size_t l_max_global_id0 = 0; |
| static cl_bool l_linker_available = false; |
| |
| #define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0) |
| |
| //////////////////// |
| // Info about types we can use for program scope variables. |
| |
| |
| class TypeInfo { |
| |
| public: |
| TypeInfo() : |
| name(""), |
| m_buf_elem_type(""), |
| m_is_vecbase(false), |
| m_is_atomic(false), |
| m_is_like_size_t(false), |
| m_is_bool(false), |
| m_elem_type(0), m_num_elem(0), |
| m_size(0), |
| m_value_size(0) |
| {} |
| TypeInfo(const char* name_arg) : |
| name(name_arg), |
| m_buf_elem_type(name_arg), |
| m_is_vecbase(false), |
| m_is_atomic(false), |
| m_is_like_size_t(false), |
| m_is_bool(false), |
| m_elem_type(0), m_num_elem(0), |
| m_size(0), |
| m_value_size(0) |
| { } |
| |
| // Vectors |
| TypeInfo( TypeInfo* elem_type, int num_elem ) : |
| m_is_vecbase(false), |
| m_is_atomic(false), |
| m_is_like_size_t(false), |
| m_is_bool(false), |
| m_elem_type(elem_type), |
| m_num_elem(num_elem) |
| { |
| char the_name[10]; // long enough for longest vector type name "double16" |
| snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem); |
| this->name = std::string(the_name); |
| this->m_buf_elem_type = std::string(the_name); |
| this->m_value_size = num_elem * elem_type->get_size(); |
| if ( m_num_elem == 3 ) { |
| this->m_size = 4 * elem_type->get_size(); |
| } else { |
| this->m_size = num_elem * elem_type->get_size(); |
| } |
| } |
| const std::string& get_name(void) const { return name; } |
| const char* get_name_c_str(void) const { return name.c_str(); } |
| TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; } |
| TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; } |
| TypeInfo& set_like_size_t(void) { |
| this->m_is_like_size_t = true; |
| this->set_size( l_64bit_device ? 8 : 4 ); |
| this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint"; |
| return *this; |
| } |
| TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; } |
| TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; } |
| TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; } |
| |
| const TypeInfo* elem_type(void) const { return m_elem_type; } |
| int num_elem(void) const { return m_num_elem; } |
| |
| bool is_vecbase(void) const {return m_is_vecbase;} |
| bool is_atomic(void) const {return m_is_atomic;} |
| bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;} |
| bool is_like_size_t(void) const {return m_is_like_size_t;} |
| bool is_bool(void) const {return m_is_bool;} |
| size_t get_size(void) const {return m_size;} |
| size_t get_value_size(void) const {return m_value_size;} |
| |
| // When passing values of this type to a kernel, what buffer type |
| // should be used? |
| const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); } |
| |
| std::string as_string(const cl_uchar* value_ptr) const { |
| // This method would be shorter if I had a real handle to element |
| // vector type. |
| if ( this->is_bool() ) { |
| std::string result( name ); |
| result += "<"; |
| result += (*value_ptr ? "true" : "false"); |
| result += ", "; |
| char buf[10]; |
| sprintf(buf,"%02x",*value_ptr); |
| result += buf; |
| result += ">"; |
| return result; |
| } else if ( this->num_elem() ) { |
| std::string result( name ); |
| result += "<"; |
| for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) { |
| char buf[MAX_STR]; |
| if ( ielem ) result += ", "; |
| for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) { |
| sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] ); |
| } |
| result += buf; |
| } |
| result += ">"; |
| return result; |
| } else { |
| std::string result( name ); |
| result += "<"; |
| char buf[MAX_STR]; |
| for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) { |
| sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] ); |
| } |
| result += buf; |
| result += ">"; |
| return result; |
| } |
| } |
| |
| // Initialize the given buffer to a constant value initialized as if it |
| // were from the INIT_VAR macro below. |
| // Only needs to support values 0 and 1. |
| void init( cl_uchar* buf, cl_uchar val) const { |
| if ( this->num_elem() ) { |
| for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) { |
| // Delegate! |
| this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val ); |
| } |
| } else { |
| init_elem( buf, val ); |
| } |
| } |
| |
| private: |
| void init_elem( cl_uchar* buf, cl_uchar val ) const { |
| size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size(); |
| memset(buf,0,elem_size); |
| if ( val ) { |
| if ( strstr( name.c_str(), "float" ) ) { |
| *(float*)buf = (float)val; |
| return; |
| } |
| if ( strstr( name.c_str(), "double" ) ) { |
| *(double*)buf = (double)val; |
| return; |
| } |
| if ( this->is_bool() ) { *buf = (bool)val; return; } |
| |
| // Write a single character value to the correct spot, |
| // depending on host endianness. |
| if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val; |
| else *buf = (cl_uchar)val; |
| } |
| } |
| public: |
| |
| void dump(FILE* fp) const { |
| fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(), |
| (int)m_size, |
| (int)m_value_size, |
| m_buf_elem_type.c_str() ); |
| if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() ); |
| if ( this->m_is_vecbase ) fprintf(fp, " vecbase"); |
| if ( this->m_is_bool ) fprintf(fp, " bool"); |
| if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t"); |
| if ( this->m_is_atomic ) fprintf(fp, " atomic"); |
| fprintf(fp,"\n"); |
| fflush(fp); |
| } |
| |
| private: |
| std::string name; |
| TypeInfo* m_elem_type; |
| int m_num_elem; |
| bool m_is_vecbase; |
| bool m_is_atomic; |
| bool m_is_like_size_t; |
| bool m_is_bool; |
| size_t m_size; // Number of bytes of storage occupied by this type. |
| size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3. |
| |
| // When passing values of this type to a kernel, what buffer type |
| // should be used? |
| // For most types, it's just itself. |
| // Use a std::string so I don't have to make a copy constructor. |
| std::string m_buf_elem_type; |
| }; |
| |
| |
| #define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double |
| #define NUM_VECTOR_SIZES (5) // 2,3,4,8,16 |
| #define NUM_PLAIN_TYPES \ |
| 5 /*boolean and size_t family */ \ |
| + NUM_SCALAR_TYPES \ |
| + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \ |
| + 10 /* atomic types */ |
| |
| // Need room for plain, array, pointer, struct |
| #define MAX_TYPES (4*NUM_PLAIN_TYPES) |
| |
| static TypeInfo type_info[MAX_TYPES]; |
| static int num_type_info = 0; // Number of valid entries in type_info[] |
| |
| |
| |
| |
| // A helper class to form kernel source arguments for clCreateProgramWithSource. |
| class StringTable { |
| public: |
| StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {} |
| ~StringTable() { release_frozen(); } |
| |
| void add(std::string s) { release_frozen(); m_strings.push_back(s); } |
| |
| const size_t num_str() { freeze(); return m_strings.size(); } |
| const char** strs() { freeze(); return m_c_strs; } |
| const size_t* lengths() { freeze(); return m_lengths; } |
| |
| private: |
| void freeze(void) { |
| if ( !m_frozen ) { |
| release_frozen(); |
| |
| m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size()); |
| m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size()); |
| assert( m_c_strs ); |
| assert( m_lengths ); |
| |
| for ( size_t i = 0; i < m_strings.size() ; i++ ) { |
| m_c_strs[i] = m_strings[i].c_str(); |
| m_lengths[i] = strlen(m_c_strs[i]); |
| } |
| |
| m_frozen = true; |
| } |
| } |
| void release_frozen(void) { |
| if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; } |
| if ( m_lengths ) { free(m_lengths); m_lengths = 0; } |
| m_frozen = false; |
| } |
| |
| typedef std::vector<std::string> strlist_t; |
| strlist_t m_strings; |
| const char** m_c_strs; |
| size_t* m_lengths; |
| bool m_frozen; |
| }; |
| |
| |
| //////////////////// |
| // File scope function declarations |
| |
| static void l_load_abilities(cl_device_id device); |
| static const char* l_get_fp64_pragma(void); |
| static const char* l_get_cles_int64_pragma(void); |
| static int l_build_type_table(cl_device_id device); |
| |
| static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret); |
| |
| static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state ); |
| static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti ); |
| static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti ); |
| |
| static std::string conversion_functions(const TypeInfo& ti); |
| static std::string global_decls(const TypeInfo& ti, bool with_init); |
| static std::string global_check_function(const TypeInfo& ti); |
| static std::string writer_function(const TypeInfo& ti); |
| static std::string reader_function(const TypeInfo& ti); |
| |
| static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue ); |
| static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ); |
| |
| static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue ); |
| static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ); |
| |
| static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size ); |
| static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation ); |
| |
| |
| |
| //////////////////// |
| // File scope function definitions |
| |
| static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options) |
| { |
| cl_uint i; |
| cl_int error; |
| BufferOwningPtr<cl_device_id> devices; |
| |
| if(num_devices == 0 || device_list == NULL) |
| { |
| error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL); |
| test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed"); |
| |
| device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices); |
| devices.reset(device_list); |
| |
| memset(device_list, 0, sizeof(cl_device_id) * num_devices); |
| |
| error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL); |
| test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed"); |
| } |
| |
| cl_uint z; |
| bool sourcePrinted = false; |
| |
| for(z = 0; z < num_devices; z++) |
| { |
| char deviceName[4096] = ""; |
| error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); |
| check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z); |
| |
| cl_build_status buildStatus; |
| error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); |
| check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed"); |
| |
| if(buildStatus != CL_BUILD_SUCCESS) |
| { |
| if(!sourcePrinted) |
| { |
| log_error("Build options: %s\n", options); |
| if(count && strings) |
| { |
| log_error("Original source is: ------------\n"); |
| for(i = 0; i < count; i++) log_error("%s", strings[i]); |
| } |
| sourcePrinted = true; |
| } |
| |
| char statusString[64] = ""; |
| if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS) |
| sprintf(statusString, "CL_BUILD_SUCCESS"); |
| else if (buildStatus == (cl_build_status)CL_BUILD_NONE) |
| sprintf(statusString, "CL_BUILD_NONE"); |
| else if (buildStatus == (cl_build_status)CL_BUILD_ERROR) |
| sprintf(statusString, "CL_BUILD_ERROR"); |
| else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS) |
| sprintf(statusString, "CL_BUILD_IN_PROGRESS"); |
| else |
| sprintf(statusString, "UNKNOWN (%d)", buildStatus); |
| |
| log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString); |
| |
| size_t paramSize = 0; |
| error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, ¶mSize); |
| if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break; |
| |
| std::string log; |
| log.resize(paramSize/sizeof(char)); |
| |
| error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL); |
| if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break; |
| if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n"); |
| else |
| { |
| log_error("Build log:\n", deviceName); |
| log_error("%s\n", log.c_str()); |
| } |
| } |
| } |
| return error; |
| } |
| |
| static void l_load_abilities(cl_device_id device) |
| { |
| l_has_half = is_extension_available(device,"cl_khr_fp16"); |
| l_has_double = is_extension_available(device,"cl_khr_fp64"); |
| l_has_cles_int64 = is_extension_available(device,"cles_khr_int64"); |
| |
| l_has_int64_atomics |
| = is_extension_available(device,"cl_khr_int64_base_atomics") |
| && is_extension_available(device,"cl_khr_int64_extended_atomics"); |
| |
| { |
| int status = CL_SUCCESS; |
| cl_uint addr_bits = 32; |
| status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0); |
| l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 ); |
| } |
| |
| // 32-bit devices always have intptr atomics. |
| l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics; |
| |
| union { char c[4]; int i; } probe; |
| probe.i = 1; |
| l_host_is_big_endian = !probe.c[0]; |
| |
| // Determine max global id. |
| { |
| int status = CL_SUCCESS; |
| cl_uint max_dim = 0; |
| status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0); |
| assert( status == CL_SUCCESS ); |
| assert( max_dim > 0 ); |
| size_t max_id[3]; |
| max_id[0] = 0; |
| status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0); |
| assert( status == CL_SUCCESS ); |
| l_max_global_id0 = max_id[0]; |
| } |
| |
| { // Is separate compilation supported? |
| int status = CL_SUCCESS; |
| l_linker_available = false; |
| status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0); |
| assert( status == CL_SUCCESS ); |
| } |
| } |
| |
| |
| static const char* l_get_fp64_pragma(void) |
| { |
| return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : ""; |
| } |
| |
| static const char* l_get_cles_int64_pragma(void) |
| { |
| return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : ""; |
| } |
| |
| static const char* l_get_int64_atomic_pragma(void) |
| { |
| return "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" |
| "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"; |
| } |
| |
| static int l_build_type_table(cl_device_id device) |
| { |
| int status = CL_SUCCESS; |
| size_t iscalar = 0; |
| size_t ivecsize = 0; |
| int vecsizes[] = { 2, 3, 4, 8, 16 }; |
| const char* vecbase[] = { |
| "uchar", "char", |
| "ushort", "short", |
| "uint", "int", |
| "ulong", "long", |
| "float", |
| "double" |
| }; |
| int vecbase_size[] = { |
| 1, 1, |
| 2, 2, |
| 4, 4, |
| 8, 8, |
| 4, |
| 8 |
| }; |
| const char* like_size_t[] = { |
| "intptr_t", |
| "uintptr_t", |
| "size_t", |
| "ptrdiff_t" |
| }; |
| const char* atomics[] = { |
| "atomic_int", "atomic_uint", |
| "atomic_long", "atomic_ulong", |
| "atomic_float", |
| "atomic_double", |
| }; |
| int atomics_size[] = { |
| 4, 4, |
| 8, 8, |
| 4, |
| 8 |
| }; |
| const char* intptr_atomics[] = { |
| "atomic_intptr_t", |
| "atomic_uintptr_t", |
| "atomic_size_t", |
| "atomic_ptrdiff_t" |
| }; |
| |
| l_load_abilities(device); |
| num_type_info = 0; |
| |
| // Boolean. |
| type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar"); |
| |
| // Vector types, and the related scalar element types. |
| for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) { |
| if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue; |
| if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue; |
| |
| // Scalar |
| TypeInfo* elem_type = type_info + num_type_info++; |
| *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] ); |
| |
| // Vector |
| for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) { |
| type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] ); |
| } |
| } |
| |
| // Size_t-like types |
| for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) { |
| type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t(); |
| } |
| |
| // Atomic types. |
| for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) { |
| if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue; |
| if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue; |
| |
| // The +7 is used to skip over the "atomic_" prefix. |
| const char* buf_type = atomics[iscalar] + 7; |
| type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type ); |
| } |
| if ( l_has_intptr_atomics ) { |
| for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) { |
| type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t(); |
| } |
| } |
| |
| assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES |
| |
| #if 0 |
| for ( size_t i = 0 ; i < num_type_info ; i++ ) { |
| type_info[ i ].dump(stdout); |
| } |
| exit(0); |
| #endif |
| |
| return status; |
| } |
| |
| static const TypeInfo& l_find_type( const char* name ) |
| { |
| auto itr = |
| std::find_if(type_info, type_info + num_type_info, |
| [name](TypeInfo& ti) { return ti.get_name() == name; }); |
| assert(itr != type_info + num_type_info); |
| return *itr; |
| } |
| |
| |
| |
| // Populate return parameters for max program variable size, preferred program variable size. |
| |
| static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret) |
| { |
| int err = CL_SUCCESS; |
| size_t return_size = 0; |
| |
| err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size); |
| if ( err != CL_SUCCESS ) { |
| log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n"); |
| return err; |
| } |
| if ( return_size != sizeof(size_t) ) { |
| log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size ); |
| return 1; |
| } |
| if ( return_size != sizeof(size_t) ) { |
| log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size ); |
| return 1; |
| } |
| |
| return_size = 0; |
| err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size); |
| if ( err != CL_SUCCESS ) { |
| log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err); |
| return err; |
| } |
| if ( return_size != sizeof(size_t) ) { |
| log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size ); |
| return 1; |
| } |
| |
| return CL_SUCCESS; |
| } |
| |
| |
| static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state ) |
| { |
| assert( 0 == (buf_size % sizeof(cl_uint) ) ); |
| for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) { |
| *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state ); |
| } |
| #if 0 |
| for ( size_t i = 0; i < buf_size ; i++ ) { |
| printf("%02x",buf[i]); |
| } |
| printf("\n"); |
| #endif |
| } |
| |
| // Return num_value values of the given type. |
| // Returns CL_SUCCESS if they compared as equal. |
| static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti ) |
| { |
| // Compare only the valid returned bytes. |
| for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) { |
| const cl_uchar* expv = expected + value_idx * ti.get_size(); |
| const cl_uchar* gotv = received + value_idx * ti.get_size(); |
| if ( memcmp( expv, gotv, ti.get_value_size() ) ) { |
| std::string exp_str = ti.as_string( expv ); |
| std::string got_str = ti.as_string( gotv ); |
| log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n", |
| test_name, |
| ti.get_name_c_str(), value_idx, |
| exp_str.c_str(), |
| got_str.c_str() ); |
| return 1; |
| } |
| } |
| return CL_SUCCESS; |
| } |
| |
| // Copy a target value from src[idx] to dest[idx] |
| static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti ) |
| { |
| cl_uchar* raw_dest = dest + dest_idx * ti.get_size(); |
| const cl_uchar* raw_src = src + src_idx * ti.get_size(); |
| memcpy( raw_dest, raw_src, ti.get_value_size() ); |
| |
| return 0; |
| } |
| |
| |
| static std::string conversion_functions(const TypeInfo& ti) |
| { |
| std::string result; |
| static char buf[MAX_STR]; |
| int num_printed = 0; |
| // The atomic types just use the base type. |
| if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) { |
| // The type is represented in a buffer by itself. |
| num_printed = snprintf(buf,MAX_STR, |
| "%s from_buf(%s a) { return a; }\n" |
| "%s to_buf(%s a) { return a; }\n", |
| ti.get_buf_elem_type(), ti.get_buf_elem_type(), |
| ti.get_buf_elem_type(), ti.get_buf_elem_type() ); |
| } else { |
| // Just use C-style cast. |
| num_printed = snprintf(buf,MAX_STR, |
| "%s from_buf(%s a) { return (%s)a; }\n" |
| "%s to_buf(%s a) { return (%s)a; }\n", |
| ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(), |
| ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() ); |
| } |
| // Add initializations. |
| if ( ti.is_atomic() ) { |
| num_printed += snprintf( buf + num_printed, MAX_STR-num_printed, |
| "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" ); |
| } else { |
| // This cast works even if the target type is a vector type. |
| num_printed += snprintf( buf + num_printed, MAX_STR-num_printed, |
| "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str()); |
| } |
| assert( num_printed < MAX_STR ); // or increase MAX_STR |
| result = buf; |
| return result; |
| } |
| |
| static std::string global_decls(const TypeInfo& ti, bool with_init ) |
| { |
| const char* tn = ti.get_name_c_str(); |
| const char* vol = (ti.is_atomic() ? " volatile " : " "); |
| static char decls[MAX_STR]; |
| int num_printed = 0; |
| if ( with_init ) { |
| const char *decls_template_with_init = |
| "%s %s var = INIT_VAR(0);\n" |
| "global %s %s g_var = INIT_VAR(1);\n" |
| "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n" |
| "volatile global %s %s* p_var = &a_var[1];\n\n"; |
| num_printed = snprintf(decls,sizeof(decls),decls_template_with_init, |
| vol,tn,vol,tn,vol,tn,vol,tn); |
| } else { |
| const char *decls_template_no_init = |
| "%s %s var;\n" |
| "global %s %s g_var;\n" |
| "%s %s a_var[2];\n" |
| "global %s %s* p_var;\n\n"; |
| num_printed = snprintf(decls,sizeof(decls),decls_template_no_init, |
| vol,tn,vol,tn,vol,tn,vol,tn); |
| } |
| assert( num_printed < sizeof(decls) ); |
| return std::string(decls); |
| } |
| |
| // Return the source code for the "global_check" function for the given type. |
| // This function checks that all program-scope variables have appropriate |
| // initial values when no explicit initializer is used. If all tests pass the |
| // kernel writes a non-zero value to its output argument, otherwise it writes |
| // zero. |
| static std::string global_check_function(const TypeInfo& ti) |
| { |
| const std::string type_name = ti.get_buf_elem_type(); |
| |
| // all() should only be used on vector inputs. For scalar comparison, the |
| // result of the equality operator can be used as a bool value. |
| const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1. |
| const std::string is_equality_true = is_scalar ? "" : "all"; |
| |
| std::string code = "kernel void global_check(global int* out) {\n"; |
| code += " const " + type_name + " zero = ((" + type_name + ")0);\n"; |
| code += " bool status = true;\n"; |
| if (ti.is_atomic()) { |
| code += " status &= " + is_equality_true + "(atomic_load(&var) == zero);\n"; |
| code += " status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n"; |
| code += " status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n"; |
| code += " status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n"; |
| } else { |
| code += " status &= " + is_equality_true + "(var == zero);\n"; |
| code += " status &= " + is_equality_true + "(g_var == zero);\n"; |
| code += " status &= " + is_equality_true + "(a_var[0] == zero);\n"; |
| code += " status &= " + is_equality_true + "(a_var[1] == zero);\n"; |
| } |
| code += " status &= (p_var == NULL);\n"; |
| code += " *out = status ? 1 : 0;\n"; |
| code += "}\n\n"; |
| |
| return code; |
| } |
| |
| // Return the source text for the writer function for the given type. |
| // For types that can't be passed as pointer-to-type as a kernel argument, |
| // use a substitute base type of the same size. |
| static std::string writer_function(const TypeInfo& ti) |
| { |
| static char writer_src[MAX_STR]; |
| int num_printed = 0; |
| if ( !ti.is_atomic() ) { |
| const char* writer_template_normal = |
| "kernel void writer( global %s* src, uint idx ) {\n" |
| " var = from_buf(src[0]);\n" |
| " g_var = from_buf(src[1]);\n" |
| " a_var[0] = from_buf(src[2]);\n" |
| " a_var[1] = from_buf(src[3]);\n" |
| " p_var = a_var + idx;\n" |
| "}\n\n"; |
| num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type()); |
| } else { |
| const char* writer_template_atomic = |
| "kernel void writer( global %s* src, uint idx ) {\n" |
| " atomic_store( &var, from_buf(src[0]) );\n" |
| " atomic_store( &g_var, from_buf(src[1]) );\n" |
| " atomic_store( &a_var[0], from_buf(src[2]) );\n" |
| " atomic_store( &a_var[1], from_buf(src[3]) );\n" |
| " p_var = a_var + idx;\n" |
| "}\n\n"; |
| num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type()); |
| } |
| assert( num_printed < sizeof(writer_src) ); |
| std::string result = writer_src; |
| return result; |
| } |
| |
| |
| // Return source text for teh reader function for the given type. |
| // For types that can't be passed as pointer-to-type as a kernel argument, |
| // use a substitute base type of the same size. |
| static std::string reader_function(const TypeInfo& ti) |
| { |
| static char reader_src[MAX_STR]; |
| int num_printed = 0; |
| if ( !ti.is_atomic() ) { |
| const char* reader_template_normal = |
| "kernel void reader( global %s* dest, %s ptr_write_val ) {\n" |
| " *p_var = from_buf(ptr_write_val);\n" |
| " dest[0] = to_buf(var);\n" |
| " dest[1] = to_buf(g_var);\n" |
| " dest[2] = to_buf(a_var[0]);\n" |
| " dest[3] = to_buf(a_var[1]);\n" |
| "}\n\n"; |
| num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type()); |
| } else { |
| const char* reader_template_atomic = |
| "kernel void reader( global %s* dest, %s ptr_write_val ) {\n" |
| " atomic_store( p_var, from_buf(ptr_write_val) );\n" |
| " dest[0] = to_buf( atomic_load( &var ) );\n" |
| " dest[1] = to_buf( atomic_load( &g_var ) );\n" |
| " dest[2] = to_buf( atomic_load( &a_var[0] ) );\n" |
| " dest[3] = to_buf( atomic_load( &a_var[1] ) );\n" |
| "}\n\n"; |
| num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type()); |
| } |
| assert( num_printed < sizeof(reader_src) ); |
| std::string result = reader_src; |
| return result; |
| } |
| |
| // Check that all globals where appropriately default-initialized. |
| static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue) |
| { |
| int status = CL_SUCCESS; |
| |
| // Create a buffer on device to store a unique integer. |
| cl_int is_init_valid = 0; |
| clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status)); |
| test_error_ret(status, "Failed to allocate buffer", status); |
| |
| // Create, setup and invoke kernel. |
| clKernelWrapper global_check(clCreateKernel(program, "global_check", &status)); |
| test_error_ret(status, "Failed to create global_check kernel", status); |
| status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer); |
| test_error_ret(status, "Failed to set up argument for the global_check kernel", status); |
| const cl_uint work_dim = 1; |
| const size_t global_work_offset[] = { 0 }; |
| const size_t global_work_size[] = { 1 }; |
| status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr); |
| test_error_ret(status, "Failed to run global_check kernel", status); |
| status = clFinish(queue); |
| test_error_ret(status, "clFinish() failed", status); |
| |
| // Read back the memory buffer from the device. |
| status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr); |
| test_error_ret(status, "Failed to read buffer from device", status); |
| if (is_init_valid == 0) { |
| log_error("Unexpected default values were detected"); |
| return 1; |
| } |
| |
| return CL_SUCCESS; |
| } |
| |
| // Check write-then-read. |
| static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue ) |
| { |
| int status = CL_SUCCESS; |
| int itype; |
| |
| RandomSeed rand_state( gRandomSeed ); |
| |
| for ( itype = 0; itype < num_type_info ; itype++ ) { |
| status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state ); |
| FLUSH; |
| } |
| |
| return status; |
| } |
| |
| static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ) |
| { |
| int err = CL_SUCCESS; |
| std::string type_name( ti.get_name() ); |
| const char* tn = type_name.c_str(); |
| log_info(" %s ",tn); |
| |
| StringTable ksrc; |
| ksrc.add( l_get_fp64_pragma() ); |
| ksrc.add( l_get_cles_int64_pragma() ); |
| if (ti.is_atomic_64bit()) |
| ksrc.add( l_get_int64_atomic_pragma() ); |
| ksrc.add( conversion_functions(ti) ); |
| ksrc.add( global_decls(ti,false) ); |
| ksrc.add( global_check_function(ti) ); |
| ksrc.add( writer_function(ti) ); |
| ksrc.add( reader_function(ti) ); |
| |
| int status = CL_SUCCESS; |
| clProgramWrapper program; |
| clKernelWrapper writer; |
| |
| status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS); |
| test_error_ret(status,"Failed to create program for read-after-write test",status); |
| |
| clKernelWrapper reader( clCreateKernel( program, "reader", &status ) ); |
| test_error_ret(status,"Failed to create reader kernel for read-after-write test",status); |
| |
| // Check size query. |
| size_t used_bytes = 0; |
| status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); |
| test_error_ret(status,"Failed to query global variable total size",status); |
| size_t expected_used_bytes = |
| (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements. |
| + ( l_64bit_device ? 8 : 4 ); // The pointer |
| if ( used_bytes < expected_used_bytes ) { |
| log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes ); |
| err |= 1; |
| } |
| |
| err |= check_global_initialization(context, program, queue); |
| |
| // We need to create 5 random values of the given type, |
| // and read 4 of them back. |
| const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16); |
| const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16); |
| cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); |
| cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); |
| |
| clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); |
| test_error_ret(status,"Failed to allocate write buffer",status); |
| clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); |
| test_error_ret(status,"Failed to allocate read buffer",status); |
| |
| status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); |
| status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status); |
| |
| // Boolean random data needs to be massaged a bit more. |
| const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS; |
| unsigned bool_iter = 0; |
| |
| for ( int iround = 0; iround < num_rounds ; iround++ ) { |
| for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer |
| // Generate new random data to push through. |
| // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. |
| |
| cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); |
| |
| if ( ti.is_bool() ) { |
| // For boolean, random data cast to bool isn't very random. |
| // So use the bottom bit of bool_value_iter to get true |
| // diversity. |
| for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) { |
| write_data[value_idx] = (1<<value_idx) & bool_iter; |
| //printf(" %s", (write_data[value_idx] ? "true" : "false" )); |
| } |
| bool_iter++; |
| } else { |
| l_set_randomly( write_data, write_data_size, rand_state ); |
| } |
| status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); |
| |
| // The value to write via the pointer should be taken from the |
| // 5th typed slot of the write_data. |
| status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); |
| |
| // Determine the expected values. |
| cl_uchar expected[read_data_size]; |
| memset( expected, -1, sizeof(expected) ); |
| l_copy( expected, 0, write_data, 0, ti ); |
| l_copy( expected, 1, write_data, 1, ti ); |
| l_copy( expected, 2, write_data, 2, ti ); |
| l_copy( expected, 3, write_data, 3, ti ); |
| // But we need to take into account the value from the pointer write. |
| // The 2 represents where the "a" array values begin in our read-back. |
| l_copy( expected, 2 + iptr_idx, write_data, 4, ti ); |
| |
| clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0); |
| |
| if ( ti.is_bool() ) { |
| // Collapse down to one bit. |
| for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i]; |
| } |
| |
| cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); |
| memset(read_data, -1, read_data_size); |
| clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); |
| |
| // Now run the kernel |
| const size_t one = 1; |
| status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status); |
| status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); |
| status = clFinish(queue); test_error_ret(status,"finish",status); |
| |
| read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); |
| |
| if ( ti.is_bool() ) { |
| // Collapse down to one bit. |
| for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i]; |
| } |
| |
| // Compare only the valid returned bytes. |
| int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti ); |
| // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result ); |
| err |= compare_result; |
| |
| clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); |
| |
| if ( err ) break; |
| } |
| } |
| |
| if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } |
| align_free(write_data); |
| align_free(read_data); |
| return err; |
| } |
| |
| |
| // Check initialization, then, read, then write, then read. |
| static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue ) |
| { |
| int status = CL_SUCCESS; |
| int itype; |
| |
| RandomSeed rand_state( gRandomSeed ); |
| |
| for ( itype = 0; itype < num_type_info ; itype++ ) { |
| status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state ); |
| } |
| return status; |
| } |
| static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ) |
| { |
| int err = CL_SUCCESS; |
| std::string type_name( ti.get_name() ); |
| const char* tn = type_name.c_str(); |
| log_info(" %s ",tn); |
| |
| StringTable ksrc; |
| ksrc.add( l_get_fp64_pragma() ); |
| ksrc.add( l_get_cles_int64_pragma() ); |
| if (ti.is_atomic_64bit()) |
| ksrc.add( l_get_int64_atomic_pragma() ); |
| ksrc.add( conversion_functions(ti) ); |
| ksrc.add( global_decls(ti,true) ); |
| ksrc.add( writer_function(ti) ); |
| ksrc.add( reader_function(ti) ); |
| |
| int status = CL_SUCCESS; |
| clProgramWrapper program; |
| clKernelWrapper writer; |
| |
| status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS); |
| test_error_ret(status,"Failed to create program for init-read-after-write test",status); |
| |
| clKernelWrapper reader( clCreateKernel( program, "reader", &status ) ); |
| test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status); |
| |
| // Check size query. |
| size_t used_bytes = 0; |
| status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); |
| test_error_ret(status,"Failed to query global variable total size",status); |
| size_t expected_used_bytes = |
| (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements. |
| + ( l_64bit_device ? 8 : 4 ); // The pointer |
| if ( used_bytes < expected_used_bytes ) { |
| log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes ); |
| err |= 1; |
| } |
| |
| // We need to create 5 random values of the given type, |
| // and read 4 of them back. |
| const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16); |
| const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16); |
| |
| cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); |
| cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); |
| clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); |
| test_error_ret(status,"Failed to allocate write buffer",status); |
| clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); |
| test_error_ret(status,"Failed to allocate read buffer",status); |
| |
| status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); |
| status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status); |
| |
| // Boolean random data needs to be massaged a bit more. |
| const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS; |
| unsigned bool_iter = 0; |
| |
| // We need to count iterations. We do something *different on the |
| // first iteration, to ensure we actually pick up the initialized |
| // values. |
| unsigned iteration = 0; |
| |
| for ( int iround = 0; iround < num_rounds ; iround++ ) { |
| for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer |
| // Generate new random data to push through. |
| // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. |
| |
| cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); |
| |
| if ( ti.is_bool() ) { |
| // For boolean, random data cast to bool isn't very random. |
| // So use the bottom bit of bool_value_iter to get true |
| // diversity. |
| for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) { |
| write_data[value_idx] = (1<<value_idx) & bool_iter; |
| //printf(" %s", (write_data[value_idx] ? "true" : "false" )); |
| } |
| bool_iter++; |
| } else { |
| l_set_randomly( write_data, write_data_size, rand_state ); |
| } |
| status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); |
| |
| if ( !iteration ) { |
| // On first iteration, the value we write via the last arg |
| // to the "reader" function is 0. |
| // It's way easier to code the test this way. |
| ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 ); |
| } |
| |
| // The value to write via the pointer should be taken from the |
| // 5th typed slot of the write_data. |
| status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); |
| |
| // Determine the expected values. |
| cl_uchar expected[read_data_size]; |
| memset( expected, -1, sizeof(expected) ); |
| if ( iteration ) { |
| l_copy( expected, 0, write_data, 0, ti ); |
| l_copy( expected, 1, write_data, 1, ti ); |
| l_copy( expected, 2, write_data, 2, ti ); |
| l_copy( expected, 3, write_data, 3, ti ); |
| // But we need to take into account the value from the pointer write. |
| // The 2 represents where the "a" array values begin in our read-back. |
| // But we need to take into account the value from the pointer write. |
| l_copy( expected, 2 + iptr_idx, write_data, 4, ti ); |
| } else { |
| // On first iteration, expect these initialized values! |
| // See the decls_template_with_init above. |
| ti.init( expected, 0 ); |
| ti.init( expected + ti.get_size(), 1 ); |
| ti.init( expected + 2*ti.get_size(), 1 ); |
| // Emulate the effect of the write via the pointer. |
| // The value is 0, not 1 (see above). |
| // The pointer is always initialized to the second element |
| // of the array. So it goes into slot 3 of the "expected" array. |
| ti.init( expected + 3*ti.get_size(), 0 ); |
| } |
| |
| if ( ti.is_bool() ) { |
| // Collapse down to one bit. |
| for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i]; |
| } |
| |
| clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0); |
| |
| cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); |
| memset( read_data, -1, read_data_size ); |
| clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); |
| |
| // Now run the kernel |
| const size_t one = 1; |
| if ( iteration ) { |
| status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status); |
| } else { |
| // On first iteration, we should be picking up the |
| // initialized value. So don't enqueue the writer. |
| } |
| status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); |
| status = clFinish(queue); test_error_ret(status,"finish",status); |
| |
| read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); |
| |
| if ( ti.is_bool() ) { |
| // Collapse down to one bit. |
| for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i]; |
| } |
| |
| // Compare only the valid returned bytes. |
| //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx ); |
| int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti ); |
| //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result ); |
| err |= compare_result; |
| |
| clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); |
| |
| if ( err ) break; |
| |
| iteration++; |
| } |
| } |
| |
| if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } |
| align_free(write_data); |
| align_free(read_data); |
| |
| return err; |
| } |
| |
| |
| // Check that we can make at least one variable with size |
| // max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE. |
| static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size ) |
| { |
| int err = CL_SUCCESS; |
| // Just test one type. |
| const TypeInfo ti( l_find_type("uchar") ); |
| log_info(" l_capacity..."); |
| |
| const char prog_src_template[] = |
| #if defined(_WIN32) |
| "uchar var[%Iu];\n\n" |
| #else |
| "uchar var[%zu];\n\n" |
| #endif |
| "kernel void get_max_size( global ulong* size_ret ) {\n" |
| #if defined(_WIN32) |
| " *size_ret = (ulong)%Iu;\n" |
| #else |
| " *size_ret = (ulong)%zu;\n" |
| #endif |
| "}\n\n" |
| "kernel void writer( global uchar* src ) {\n" |
| " var[get_global_id(0)] = src[get_global_linear_id()];\n" |
| "}\n\n" |
| "kernel void reader( global uchar* dest ) {\n" |
| " dest[get_global_linear_id()] = var[get_global_id(0)];\n" |
| "}\n\n"; |
| char prog_src[MAX_STR]; |
| int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size); |
| assert( num_printed < MAX_STR ); // or increase MAX_STR |
| |
| StringTable ksrc; |
| ksrc.add( prog_src ); |
| |
| int status = CL_SUCCESS; |
| clProgramWrapper program; |
| clKernelWrapper get_max_size; |
| |
| status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS); |
| test_error_ret(status,"Failed to create program for capacity test",status); |
| |
| // Check size query. |
| size_t used_bytes = 0; |
| status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); |
| test_error_ret(status,"Failed to query global variable total size",status); |
| if ( used_bytes < max_size ) { |
| log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes ); |
| err |= 1; |
| } |
| |
| // Prepare to execute |
| clKernelWrapper writer( clCreateKernel( program, "writer", &status ) ); |
| test_error_ret(status,"Failed to create writer kernel for capacity test",status); |
| clKernelWrapper reader( clCreateKernel( program, "reader", &status ) ); |
| test_error_ret(status,"Failed to create reader kernel for capacity test",status); |
| |
| cl_ulong max_size_ret = 0; |
| const size_t arr_size = 10*1024*1024; |
| cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT ); |
| |
| if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; } |
| |
| clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) ); |
| test_error_ret(status,"Failed to allocate size query buffer",status); |
| clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) ); |
| test_error_ret(status,"Failed to allocate write buffer",status); |
| |
| status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status); |
| status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status); |
| status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status); |
| |
| // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE |
| const size_t one = 1; |
| status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status); |
| status = clFinish(queue); test_error_ret(status,"finish",status); |
| |
| cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0); |
| if ( max_size_ret != max_size ) { |
| log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n", |
| (unsigned long long) max_size_ret, |
| (unsigned long long) max_size ); |
| err |= 1; |
| } |
| clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0); |
| |
| RandomSeed rand_state_write( gRandomSeed ); |
| for ( size_t offset = 0; offset < max_size ; offset += arr_size ) { |
| size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size; |
| l_set_randomly( buffer, curr_size, rand_state_write ); |
| status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status); |
| status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status); |
| status = clFinish(queue); test_error_ret(status,"finish",status); |
| } |
| |
| RandomSeed rand_state_read( gRandomSeed ); |
| for ( size_t offset = 0; offset < max_size ; offset += arr_size ) { |
| size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size; |
| status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status); |
| cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status); |
| l_set_randomly( buffer, curr_size, rand_state_read ); |
| err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti ); |
| clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0); |
| } |
| |
| if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } |
| align_free(buffer); |
| |
| return err; |
| } |
| |
| |
| // Check operation on a user type. |
| static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile ) |
| { |
| int err = CL_SUCCESS; |
| // Just test one type. |
| const TypeInfo ti( l_find_type("uchar") ); |
| log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" ); |
| |
| if ( separate_compile && ! l_linker_available ) { |
| log_info("Separate compilation is not supported. Skipping test\n"); |
| return err; |
| } |
| |
| const char type_src[] = |
| "typedef struct { uchar c; uint i; } my_struct_t;\n\n"; |
| const char def_src[] = |
| "my_struct_t var = { 'a', 42 };\n\n"; |
| const char decl_src[] = |
| "extern my_struct_t var;\n\n"; |
| |
| // Don't use a host struct. We can't guarantee that the host |
| // compiler has the same structure layout as the device compiler. |
| const char writer_src[] = |
| "kernel void writer( uchar c, uint i ) {\n" |
| " var.c = c;\n" |
| " var.i = i;\n" |
| "}\n\n"; |
| const char reader_src[] = |
| "kernel void reader( global uchar* C, global uint* I ) {\n" |
| " *C = var.c;\n" |
| " *I = var.i;\n" |
| "}\n\n"; |
| |
| clProgramWrapper program; |
| |
| if ( separate_compile ) { |
| // Separate compilation flow. |
| StringTable wksrc; |
| wksrc.add( type_src ); |
| wksrc.add( def_src ); |
| wksrc.add( writer_src ); |
| |
| StringTable rksrc; |
| rksrc.add( type_src ); |
| rksrc.add( decl_src ); |
| rksrc.add( reader_src ); |
| |
| int status = CL_SUCCESS; |
| clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) ); |
| test_error_ret(status,"Failed to create writer program for user type test",status); |
| |
| status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 ); |
| if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status))) |
| { |
| print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS); |
| return status; |
| } |
| |
| clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) ); |
| test_error_ret(status,"Failed to create reader program for user type test",status); |
| |
| status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 ); |
| if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status))) |
| { |
| print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS); |
| return status; |
| } |
| |
| cl_program progs[2]; |
| progs[0] = writer_program; |
| progs[1] = reader_program; |
| |
| program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status ); |
| if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status))) |
| { |
| print_build_log(program, 1, &device, 0, NULL, NULL, ""); |
| return status; |
| } |
| } else { |
| // Single compilation flow. |
| StringTable ksrc; |
| ksrc.add( type_src ); |
| ksrc.add( def_src ); |
| ksrc.add( writer_src ); |
| ksrc.add( reader_src ); |
| |
| int status = CL_SUCCESS; |
| |
| status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS); |
| if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status))) |
| { |
| print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS); |
| return status; |
| } |
| |
| status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0); |
| if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status))) |
| { |
| print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS); |
| return status; |
| } |
| } |
| |
| |
| // Check size query. |
| size_t used_bytes = 0; |
| int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); |
| test_error_ret(status,"Failed to query global variable total size",status); |
| size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint); |
| if ( used_bytes < expected_size ) { |
| log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes ); |
| err |= 1; |
| } |
| |
| // Prepare to execute |
| clKernelWrapper writer( clCreateKernel( program, "writer", &status ) ); |
| test_error_ret(status,"Failed to create writer kernel for user type test",status); |
| clKernelWrapper reader( clCreateKernel( program, "reader", &status ) ); |
| test_error_ret(status,"Failed to create reader kernel for user type test",status); |
| |
| // Set up data. |
| cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT); |
| cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT); |
| |
| clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) ); |
| test_error_ret(status,"Failed to allocate uchar buffer",status); |
| clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) ); |
| test_error_ret(status,"Failed to allocate uint buffer",status); |
| |
| status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status); |
| status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status); |
| |
| cl_uchar expected_uchar = 'a'; |
| cl_uint expected_uint = 42; |
| for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice |
| // Read back data |
| *uchar_data = -1; |
| *uint_data = -1; |
| const size_t one = 1; |
| status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); |
| status = clFinish(queue); test_error_ret(status,"finish",status); |
| |
| cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0); |
| cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0); |
| |
| if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) { |
| log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n", |
| iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint ); |
| err |= 1; |
| } |
| |
| clEnqueueUnmapMemObject(queue, uint_mem, uint_data_ptr, 0, 0, 0); |
| clEnqueueUnmapMemObject(queue, uchar_mem, uchar_data_ptr, 0, 0, 0); |
| |
| // Mutate the data. |
| expected_uchar++; |
| expected_uint++; |
| |
| // Write the new values into persistent store. |
| *uchar_data = expected_uchar; |
| *uint_data = expected_uint; |
| status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status); |
| status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status); |
| status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status); |
| status = clFinish(queue); test_error_ret(status,"finish",status); |
| } |
| |
| if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } |
| align_free(uchar_data); |
| align_free(uint_data); |
| return err; |
| } |
| |
| // Determines whether its valid to skip this test based on the driver version |
| // and the features it optionally supports. |
| // Whether the test should be skipped is writen into the out paramter skip. |
| // The check returns an error code for the clDeviceInfo query. |
| static cl_int should_skip(cl_device_id device, cl_bool& skip) |
| { |
| // Assume we can't skip to begin with. |
| skip = CL_FALSE; |
| |
| // Progvar tests are already skipped for OpenCL < 2.0, so here we only need |
| // to test for 3.0 since that is when program scope global variables become |
| // optional. |
| if (get_device_cl_version(device) >= Version(3, 0)) |
| { |
| size_t max_global_variable_size{}; |
| test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, |
| sizeof(max_global_variable_size), |
| &max_global_variable_size, nullptr), |
| "clGetDeviceInfo failed"); |
| skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE; |
| } |
| return CL_SUCCESS; |
| } |
| |
| //////////////////// |
| // Global functions |
| |
| |
| // Test support for variables at program scope. Miscellaneous |
| int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| cl_bool skip{ CL_FALSE }; |
| auto error = should_skip(device, skip); |
| if (CL_SUCCESS != error) |
| { |
| return TEST_FAIL; |
| } |
| if (skip) |
| { |
| log_info("Skipping progvar_prog_scope_misc since it is optionally not " |
| "supported on this device\n"); |
| return TEST_SKIPPED_ITSELF; |
| } |
| size_t max_size = 0; |
| size_t pref_size = 0; |
| |
| cl_int err = CL_SUCCESS; |
| |
| err = l_get_device_info( device, &max_size, &pref_size ); |
| err |= l_build_type_table( device ); |
| |
| err |= l_capacity( device, context, queue, max_size ); |
| err |= l_user_type( device, context, queue, false ); |
| err |= l_user_type( device, context, queue, true ); |
| |
| return err; |
| } |
| |
| |
| // Test support for variables at program scope. Unitialized data |
| int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| cl_bool skip{ CL_FALSE }; |
| auto error = should_skip(device, skip); |
| if (CL_SUCCESS != error) |
| { |
| return TEST_FAIL; |
| } |
| if (skip) |
| { |
| log_info( |
| "Skipping progvar_prog_scope_uninit since it is optionally not " |
| "supported on this device\n"); |
| return TEST_SKIPPED_ITSELF; |
| } |
| size_t max_size = 0; |
| size_t pref_size = 0; |
| |
| cl_int err = CL_SUCCESS; |
| |
| err = l_get_device_info( device, &max_size, &pref_size ); |
| err |= l_build_type_table( device ); |
| |
| err |= l_write_read( device, context, queue ); |
| |
| return err; |
| } |
| |
| // Test support for variables at program scope. Initialized data. |
| int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| cl_bool skip{ CL_FALSE }; |
| auto error = should_skip(device, skip); |
| if (CL_SUCCESS != error) |
| { |
| return TEST_FAIL; |
| } |
| if (skip) |
| { |
| log_info("Skipping progvar_prog_scope_init since it is optionally not " |
| "supported on this device\n"); |
| return TEST_SKIPPED_ITSELF; |
| } |
| size_t max_size = 0; |
| size_t pref_size = 0; |
| |
| cl_int err = CL_SUCCESS; |
| |
| err = l_get_device_info( device, &max_size, &pref_size ); |
| err |= l_build_type_table( device ); |
| |
| err |= l_init_write_read( device, context, queue ); |
| |
| return err; |
| } |
| |
| |
| // A simple test for support of static variables inside a kernel. |
| int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| cl_bool skip{ CL_FALSE }; |
| auto error = should_skip(device, skip); |
| if (CL_SUCCESS != error) |
| { |
| return TEST_FAIL; |
| } |
| if (skip) |
| { |
| log_info("Skipping progvar_func_scope since it is optionally not " |
| "supported on this device\n"); |
| return TEST_SKIPPED_ITSELF; |
| } |
| size_t max_size = 0; |
| size_t pref_size = 0; |
| |
| cl_int err = CL_SUCCESS; |
| |
| // Deliberately have two variables with the same name but in different |
| // scopes. |
| // Also, use a large initialized structure in both cases. |
| const char prog_src[] = |
| "typedef struct { char c; int16 i; } mystruct_t;\n" |
| "kernel void test_bump( global int* value, int which ) {\n" |
| " if ( which ) {\n" |
| // Explicit address space. |
| // Last element set to 0 |
| " static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n" |
| " *value = persistent.i.sf++;\n" |
| " } else {\n" |
| // Implicitly global |
| // Last element set to 100 |
| " static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n" |
| " *value = persistent.i.sf++;\n" |
| " }\n" |
| "}\n"; |
| |
| StringTable ksrc; |
| ksrc.add( prog_src ); |
| |
| int status = CL_SUCCESS; |
| clProgramWrapper program; |
| clKernelWrapper test_bump; |
| |
| status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS); |
| test_error_ret(status, "Failed to create program for function static variable test", status); |
| |
| // Check size query. |
| size_t used_bytes = 0; |
| status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); |
| test_error_ret(status,"Failed to query global variable total size",status); |
| size_t expected_size = 2 * sizeof(cl_int); // Two ints. |
| if ( used_bytes < expected_size ) { |
| log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes ); |
| err |= 1; |
| } |
| |
| // Prepare the data. |
| cl_int counter_value = 0; |
| clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) ); |
| test_error_ret(status,"Failed to allocate counter query buffer",status); |
| |
| status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status); |
| |
| // Go a few rounds, alternating between the two counters in the kernel. |
| |
| // Same as initial values in kernel. |
| // But "true" which increments the 0-based counter, and "false" which |
| // increments the 100-based counter. |
| cl_int expected_counter[2] = { 100, 0 }; |
| |
| const size_t one = 1; |
| for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around |
| for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters |
| status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status); |
| status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status); |
| status = clFinish(queue); test_error_ret(status,"finish",status); |
| |
| cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0); |
| |
| if ( counter_value != expected_counter[iwhich] ) { |
| log_error("Error: Round %d on counter %d: Expected %d but got %d\n", |
| iround, iwhich, expected_counter[iwhich], counter_value ); |
| err |= 1; |
| } |
| expected_counter[iwhich]++; // Emulate behaviour of the kernel. |
| |
| clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0); |
| } |
| } |
| |
| if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } |
| |
| return err; |
| } |