| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #ifndef TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP |
| #define TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP |
| |
| #include <vector> |
| #include <algorithm> |
| #include <random> |
| |
| // Common for all OpenCL C++ tests |
| #include "../common.hpp" |
| |
| |
| namespace test_workitems { |
| |
| struct test_options |
| { |
| bool uniform_work_group_size; |
| size_t max_count; |
| size_t num_tests; |
| }; |
| |
| struct output_type |
| { |
| cl_uint work_dim; |
| cl_ulong global_size[3]; |
| cl_ulong global_id[3]; |
| cl_ulong local_size[3]; |
| cl_ulong enqueued_local_size[3]; |
| cl_ulong local_id[3]; |
| cl_ulong num_groups[3]; |
| cl_ulong group_id[3]; |
| cl_ulong global_offset[3]; |
| cl_ulong global_linear_id; |
| cl_ulong local_linear_id; |
| cl_ulong sub_group_size; |
| cl_ulong max_sub_group_size; |
| cl_ulong num_sub_groups; |
| cl_ulong enqueued_num_sub_groups; |
| cl_ulong sub_group_id; |
| cl_ulong sub_group_local_id; |
| }; |
| |
| const std::string source_common = R"( |
| struct output_type |
| { |
| uint work_dim; |
| ulong global_size[3]; |
| ulong global_id[3]; |
| ulong local_size[3]; |
| ulong enqueued_local_size[3]; |
| ulong local_id[3]; |
| ulong num_groups[3]; |
| ulong group_id[3]; |
| ulong global_offset[3]; |
| ulong global_linear_id; |
| ulong local_linear_id; |
| ulong sub_group_size; |
| ulong max_sub_group_size; |
| ulong num_sub_groups; |
| ulong enqueued_num_sub_groups; |
| ulong sub_group_id; |
| ulong sub_group_local_id; |
| }; |
| )"; |
| |
| // ----------------------------------------------------------------------------------- |
| // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ |
| // ----------------------------------------------------------------------------------- |
| #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) |
| const std::string source = |
| source_common + |
| R"( |
| #ifdef cl_khr_subgroups |
| #pragma OPENCL EXTENSION cl_khr_subgroups : enable |
| #endif |
| |
| kernel void test(global struct output_type *output) |
| { |
| const ulong gid = get_global_linear_id(); |
| output[gid].work_dim = get_work_dim(); |
| for (uint dimindx = 0; dimindx < 3; dimindx++) |
| { |
| output[gid].global_size[dimindx] = get_global_size(dimindx); |
| output[gid].global_id[dimindx] = get_global_id(dimindx); |
| output[gid].local_size[dimindx] = get_local_size(dimindx); |
| output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx); |
| output[gid].local_id[dimindx] = get_local_id(dimindx); |
| output[gid].num_groups[dimindx] = get_num_groups(dimindx); |
| output[gid].group_id[dimindx] = get_group_id(dimindx); |
| output[gid].global_offset[dimindx] = get_global_offset(dimindx); |
| } |
| output[gid].global_linear_id = get_global_linear_id(); |
| output[gid].local_linear_id = get_local_linear_id(); |
| #ifdef cl_khr_subgroups |
| output[gid].sub_group_size = get_sub_group_size(); |
| output[gid].max_sub_group_size = get_max_sub_group_size(); |
| output[gid].num_sub_groups = get_num_sub_groups(); |
| output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups(); |
| output[gid].sub_group_id = get_sub_group_id(); |
| output[gid].sub_group_local_id = get_sub_group_local_id(); |
| #endif |
| } |
| )"; |
| #else |
| const std::string source = |
| R"( |
| #include <opencl_memory> |
| #include <opencl_work_item> |
| using namespace cl; |
| )" + |
| source_common + |
| R"( |
| |
| kernel void test(global_ptr<output_type[]> output) |
| { |
| const size_t gid = get_global_linear_id(); |
| output[gid].work_dim = get_work_dim(); |
| for (uint dimindx = 0; dimindx < 3; dimindx++) |
| { |
| output[gid].global_size[dimindx] = get_global_size(dimindx); |
| output[gid].global_id[dimindx] = get_global_id(dimindx); |
| output[gid].local_size[dimindx] = get_local_size(dimindx); |
| output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx); |
| output[gid].local_id[dimindx] = get_local_id(dimindx); |
| output[gid].num_groups[dimindx] = get_num_groups(dimindx); |
| output[gid].group_id[dimindx] = get_group_id(dimindx); |
| output[gid].global_offset[dimindx] = get_global_offset(dimindx); |
| } |
| output[gid].global_linear_id = get_global_linear_id(); |
| output[gid].local_linear_id = get_local_linear_id(); |
| output[gid].sub_group_size = get_sub_group_size(); |
| output[gid].max_sub_group_size = get_max_sub_group_size(); |
| output[gid].num_sub_groups = get_num_sub_groups(); |
| output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups(); |
| output[gid].sub_group_id = get_sub_group_id(); |
| output[gid].sub_group_local_id = get_sub_group_local_id(); |
| } |
| |
| )"; |
| #endif |
| |
| #define CHECK_EQUAL(result, expected, func_name) \ |
| if (result != expected) \ |
| { \ |
| RETURN_ON_ERROR_MSG(-1, \ |
| "Function %s failed. Expected: %s, got: %s", func_name, \ |
| format_value(expected).c_str(), format_value(result).c_str() \ |
| ); \ |
| } |
| |
| #define CHECK(expression, func_name) \ |
| if (expression) \ |
| { \ |
| RETURN_ON_ERROR_MSG(-1, \ |
| "Function %s returned incorrect result", func_name \ |
| ); \ |
| } |
| |
| int test_workitems(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) |
| { |
| int error = CL_SUCCESS; |
| |
| cl_program program; |
| cl_kernel kernel; |
| |
| std::string kernel_name = "test"; |
| |
| // ----------------------------------------------------------------------------------- |
| // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ |
| // ----------------------------------------------------------------------------------- |
| // Only OpenCL C++ to SPIR-V compilation |
| #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) |
| error = create_opencl_kernel( |
| context, &program, &kernel, |
| source, kernel_name |
| ); |
| RETURN_ON_ERROR(error) |
| return error; |
| // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) |
| #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) |
| error = create_opencl_kernel( |
| context, &program, &kernel, |
| source, kernel_name, "-cl-std=CL2.0", false |
| ); |
| RETURN_ON_ERROR(error) |
| // Normal run |
| #else |
| error = create_opencl_kernel( |
| context, &program, &kernel, |
| source, kernel_name |
| ); |
| RETURN_ON_ERROR(error) |
| #endif |
| |
| size_t max_work_group_size; |
| size_t max_local_sizes[3]; |
| error = get_max_allowed_work_group_size(context, kernel, &max_work_group_size, max_local_sizes); |
| RETURN_ON_ERROR(error) |
| |
| bool check_sub_groups = true; |
| bool check_sub_groups_limits = true; |
| #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) |
| check_sub_groups = false; |
| check_sub_groups_limits = false; |
| if (is_extension_available(device, "cl_khr_subgroups")) |
| { |
| Version version = get_device_cl_version(device); |
| RETURN_ON_ERROR(error) |
| check_sub_groups_limits = (version >= Version(2,1)); // clGetKernelSubGroupInfo is from 2.1 |
| check_sub_groups = true; |
| } |
| #endif |
| |
| std::random_device rd; |
| std::mt19937 gen(rd()); |
| std::uniform_int_distribution<size_t> count_dis(1, options.max_count); |
| |
| for (int test = 0; test < options.num_tests; test++) |
| { |
| for (size_t dim = 1; dim <= 3; dim++) |
| { |
| size_t global_size[3] = { 1, 1, 1 }; |
| size_t global_offset[3] = { 0, 0, 0 }; |
| size_t enqueued_local_size[3] = { 1, 1, 1 }; |
| size_t count = count_dis(gen); |
| std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim))); |
| for (int d = 0; d < dim; d++) |
| { |
| std::uniform_int_distribution<size_t> enqueued_local_size_dis(1, max_local_sizes[d]); |
| global_size[d] = global_size_dis(gen); |
| global_offset[d] = global_size_dis(gen); |
| enqueued_local_size[d] = enqueued_local_size_dis(gen); |
| } |
| // Local work size must not exceed CL_KERNEL_WORK_GROUP_SIZE for this kernel |
| while (enqueued_local_size[0] * enqueued_local_size[1] * enqueued_local_size[2] > max_work_group_size) |
| { |
| // otherwise decrease it until it fits |
| for (int d = 0; d < dim; d++) |
| { |
| enqueued_local_size[d] = (std::max)((size_t)1, enqueued_local_size[d] / 2); |
| } |
| } |
| if (options.uniform_work_group_size) |
| { |
| for (int d = 0; d < dim; d++) |
| { |
| global_size[d] = get_uniform_global_size(global_size[d], enqueued_local_size[d]); |
| } |
| } |
| count = global_size[0] * global_size[1] * global_size[2]; |
| |
| cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error); |
| RETURN_ON_CL_ERROR(error, "clCreateBuffer") |
| |
| const char pattern = 0; |
| error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL); |
| RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer") |
| |
| error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); |
| RETURN_ON_CL_ERROR(error, "clSetKernelArg") |
| |
| error = clEnqueueNDRangeKernel(queue, kernel, dim, global_offset, global_size, enqueued_local_size, 0, NULL, NULL); |
| RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") |
| |
| std::vector<output_type> output(count); |
| error = clEnqueueReadBuffer( |
| queue, output_buffer, CL_TRUE, |
| 0, sizeof(output_type) * count, |
| static_cast<void *>(output.data()), |
| 0, NULL, NULL |
| ); |
| RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") |
| |
| error = clReleaseMemObject(output_buffer); |
| RETURN_ON_CL_ERROR(error, "clReleaseMemObject") |
| |
| size_t sub_group_count_for_ndrange = 0; |
| size_t max_sub_group_size_for_ndrange = 0; |
| if (check_sub_groups_limits) |
| { |
| error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, |
| sizeof(size_t) * dim, enqueued_local_size, |
| sizeof(size_t), &sub_group_count_for_ndrange, NULL); |
| RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") |
| |
| error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, |
| sizeof(size_t) * dim, enqueued_local_size, |
| sizeof(size_t), &max_sub_group_size_for_ndrange, NULL); |
| RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") |
| } |
| |
| size_t num_groups[3]; |
| for (int d = 0; d < 3; d++) |
| num_groups[d] = static_cast<size_t>(std::ceil(static_cast<double>(global_size[d]) / enqueued_local_size[d])); |
| |
| size_t group_id[3]; |
| for (group_id[0] = 0; group_id[0] < num_groups[0]; group_id[0]++) |
| for (group_id[1] = 0; group_id[1] < num_groups[1]; group_id[1]++) |
| for (group_id[2] = 0; group_id[2] < num_groups[2]; group_id[2]++) |
| { |
| size_t local_size[3]; |
| for (int d = 0; d < 3; d++) |
| { |
| if (group_id[d] == num_groups[d] - 1) |
| local_size[d] = global_size[d] - group_id[d] * enqueued_local_size[d]; |
| else |
| local_size[d] = enqueued_local_size[d]; |
| } |
| |
| size_t local_id[3]; |
| for (local_id[0] = 0; local_id[0] < local_size[0]; local_id[0]++) |
| for (local_id[1] = 0; local_id[1] < local_size[1]; local_id[1]++) |
| for (local_id[2] = 0; local_id[2] < local_size[2]; local_id[2]++) |
| { |
| size_t global_id_wo_offset[3]; |
| size_t global_id[3]; |
| for (int d = 0; d < 3; d++) |
| { |
| global_id_wo_offset[d] = group_id[d] * enqueued_local_size[d] + local_id[d]; |
| global_id[d] = global_id_wo_offset[d] + global_offset[d]; |
| } |
| |
| // Ignore if the current work-item is outside of global work size (i.e. the work-group is non-uniform) |
| if (global_id_wo_offset[0] >= global_size[0] || |
| global_id_wo_offset[1] >= global_size[1] || |
| global_id_wo_offset[2] >= global_size[2]) break; |
| |
| const size_t global_linear_id = |
| global_id_wo_offset[2] * global_size[1] * global_size[0] + |
| global_id_wo_offset[1] * global_size[0] + |
| global_id_wo_offset[0]; |
| const size_t local_linear_id = |
| local_id[2] * local_size[1] * local_size[0] + |
| local_id[1] * local_size[0] + |
| local_id[0]; |
| |
| const output_type &o = output[global_linear_id]; |
| |
| CHECK_EQUAL(o.work_dim, dim, "get_work_dim") |
| for (int d = 0; d < 3; d++) |
| { |
| CHECK_EQUAL(o.global_size[d], global_size[d], "get_global_size") |
| CHECK_EQUAL(o.global_id[d], global_id[d], "get_global_id") |
| CHECK_EQUAL(o.local_size[d], local_size[d], "get_local_size") |
| CHECK_EQUAL(o.enqueued_local_size[d], enqueued_local_size[d], "get_enqueued_local_size") |
| CHECK_EQUAL(o.local_id[d], local_id[d], "get_local_id") |
| CHECK_EQUAL(o.num_groups[d], num_groups[d], "get_num_groups") |
| CHECK_EQUAL(o.group_id[d], group_id[d], "get_group_id") |
| CHECK_EQUAL(o.global_offset[d], global_offset[d], "get_global_offset") |
| } |
| |
| CHECK_EQUAL(o.global_linear_id, global_linear_id, "get_global_linear_id") |
| CHECK_EQUAL(o.local_linear_id, local_linear_id, "get_local_linear_id") |
| |
| // A few (but not all possible) sub-groups related checks |
| if (check_sub_groups) |
| { |
| if (check_sub_groups_limits) |
| { |
| CHECK_EQUAL(o.max_sub_group_size, max_sub_group_size_for_ndrange, "get_max_sub_group_size") |
| CHECK_EQUAL(o.enqueued_num_sub_groups, sub_group_count_for_ndrange, "get_enqueued_num_sub_groups") |
| } |
| CHECK(o.sub_group_size == 0 || o.sub_group_size > o.max_sub_group_size, "get_sub_group_size or get_max_sub_group_size") |
| CHECK(o.num_sub_groups == 0 || o.num_sub_groups > o.enqueued_num_sub_groups, "get_enqueued_num_sub_groups") |
| CHECK(o.sub_group_id >= o.num_sub_groups, "get_sub_group_id or get_num_sub_groups") |
| CHECK(o.sub_group_local_id >= o.sub_group_size, "get_sub_group_local_id or get_sub_group_size") |
| } |
| } |
| } |
| } |
| } |
| |
| clReleaseKernel(kernel); |
| clReleaseProgram(program); |
| return error; |
| } |
| |
| #undef CHECK_EQUAL |
| #undef CHECK |
| |
| AUTO_TEST_CASE(test_workitems_uniform) |
| (cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| test_options options; |
| options.uniform_work_group_size = true; |
| options.max_count = num_elements; |
| options.num_tests = 1000; |
| return test_workitems(device, context, queue, options); |
| } |
| |
| AUTO_TEST_CASE(test_workitems_non_uniform) |
| (cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| test_options options; |
| options.uniform_work_group_size = false; |
| options.max_count = num_elements; |
| options.num_tests = 1000; |
| return test_workitems(device, context, queue, options); |
| } |
| |
| } // namespace |
| |
| #endif // TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP |