blob: 2380eafe39bd8511d50dcfb84e2319ed1265b2d3 [file] [log] [blame]
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
#include <sstream>
#include <string>
#include <vector>
#include <random>
// Common for all OpenCL C++ tests
#include "../common.hpp"
namespace test_required_num_sub_groups {
struct test_options
{
size_t num_sub_groups;
bool spec_const;
size_t max_count;
size_t num_tests;
};
struct output_type
{
cl_ulong num_sub_groups;
cl_ulong enqueued_num_sub_groups;
};
const std::string source_common = R"(
struct output_type
{
ulong num_sub_groups;
ulong enqueued_num_sub_groups;
};
)";
// -----------------------------------------------------------------------------------
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
// -----------------------------------------------------------------------------------
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
std::string generate_source(test_options options)
{
std::stringstream s;
s << source_common;
s << R"(
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
kernel void test(global struct output_type *output)
{
const ulong gid = get_global_linear_id();
output[gid].num_sub_groups = get_num_sub_groups();
output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
}
)";
return s.str();
}
#else
std::string generate_source(test_options options)
{
std::stringstream s;
s << R"(
#include <opencl_memory>
#include <opencl_work_item>
using namespace cl;
)";
if (options.spec_const)
{
s << "#include <opencl_spec_constant>" << std::endl;
s << "cl::spec_constant<uint, 1> num_sub_groups_spec{ 1234567890 };" << std::endl;
}
s << source_common << std::endl;
s << "[[cl::required_num_sub_groups(" << (options.spec_const ? "num_sub_groups_spec" : std::to_string(options.num_sub_groups)) << ")]]";
s << R"(
kernel void test(global_ptr<output_type[]> output)
{
const ulong gid = get_global_linear_id();
output[gid].num_sub_groups = get_num_sub_groups();
output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
}
)";
return s.str();
}
#endif
int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
{
int error = CL_SUCCESS;
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
if (!is_extension_available(device, "cl_khr_subgroups"))
{
log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
return CL_SUCCESS;
}
#endif
cl_program program;
cl_kernel kernel;
std::string kernel_name = "test";
std::string source = generate_source(options);
// -----------------------------------------------------------------------------------
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
// -----------------------------------------------------------------------------------
// Only OpenCL C++ to SPIR-V compilation
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
error = create_opencl_kernel(
context, &program, &kernel,
source, kernel_name
);
RETURN_ON_ERROR(error)
return error;
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
error = create_opencl_kernel(
context, &program, &kernel,
source, kernel_name, "-cl-std=CL2.0", false
);
RETURN_ON_ERROR(error)
// Normal run
#else
const char *source_c_str = source.c_str();
error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
RETURN_ON_ERROR(error)
if (options.spec_const)
{
cl_uint spec_num_sub_groups = static_cast<cl_uint>(options.num_sub_groups);
error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_uint), &spec_num_sub_groups);
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
}
error = build_program_create_kernel_helper(
context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
);
RETURN_ON_ERROR(error)
#endif
size_t compile_num_sub_groups;
error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
0, NULL,
sizeof(size_t), &compile_num_sub_groups, NULL);
RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
if (compile_num_sub_groups != options.num_sub_groups)
{
RETURN_ON_ERROR_MSG(-1,
"CL_KERNEL_COMPILE_NUM_SUB_GROUPS did not return correct value (expected %lu, got %lu)",
options.num_sub_groups, compile_num_sub_groups
)
}
cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * options.max_count, NULL, &error);
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
for (size_t test = 0; test < options.num_tests; test++)
{
for (size_t dim = 1; dim <= 3; dim++)
{
size_t global_size[3] = { 1, 1, 1 };
size_t count = count_dis(gen);
std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
for (size_t d = 0; d < dim; d++)
{
global_size[d] = global_size_dis(gen);
}
count = global_size[0] * global_size[1] * global_size[2];
size_t local_size[3] = { 1, 1, 1 };
error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
sizeof(size_t), &options.num_sub_groups,
sizeof(size_t) * dim, local_size, NULL);
RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
{
RETURN_ON_ERROR_MSG(-1,
"CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT did not return correct value"
)
}
size_t sub_group_count_for_ndrange;
error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
sizeof(size_t) * dim, local_size,
sizeof(size_t), &sub_group_count_for_ndrange, NULL);
RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
if (sub_group_count_for_ndrange != options.num_sub_groups)
{
RETURN_ON_ERROR_MSG(-1,
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected %lu, got %lu)",
options.num_sub_groups, sub_group_count_for_ndrange
)
}
const char pattern = 0;
error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, global_size, local_size, 0, NULL, NULL);
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
std::vector<output_type> output(count);
error = clEnqueueReadBuffer(
queue, output_buffer, CL_TRUE,
0, sizeof(output_type) * count,
static_cast<void *>(output.data()),
0, NULL, NULL
);
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
for (size_t gid = 0; gid < count; gid++)
{
const output_type &o = output[gid];
if (o.enqueued_num_sub_groups != options.num_sub_groups)
{
RETURN_ON_ERROR_MSG(-1, "get_enqueued_num_sub_groups does not equal to required_num_sub_groups")
}
if (o.num_sub_groups > options.num_sub_groups)
{
RETURN_ON_ERROR_MSG(-1, "get_num_sub_groups did not return correct value")
}
}
}
}
clReleaseMemObject(output_buffer);
clReleaseKernel(kernel);
clReleaseProgram(program);
return error;
}
AUTO_TEST_CASE(test_required_num_sub_groups)
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
int error = CL_SUCCESS;
cl_uint max_num_sub_groups;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(max_num_sub_groups), &max_num_sub_groups, NULL);
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
for (bool spec_const : { false, true })
for (size_t num_sub_groups = 1; num_sub_groups <= max_num_sub_groups; num_sub_groups++)
{
test_options options;
options.spec_const = spec_const;
options.num_sub_groups = num_sub_groups;
options.num_tests = 100;
options.max_count = num_elements;
error = test(device, context, queue, options);
RETURN_ON_ERROR(error)
}
return error;
}
} // namespace
#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP