| // |
| // Copyright 2012 Francisco Jerez |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining a |
| // copy of this software and associated documentation files (the "Software"), |
| // to deal in the Software without restriction, including without limitation |
| // the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| // and/or sell copies of the Software, and to permit persons to whom the |
| // Software is furnished to do so, subject to the following conditions: |
| // |
| // The above copyright notice and this permission notice shall be included in |
| // all copies or substantial portions of the Software. |
| // |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF |
| // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| // SOFTWARE. |
| // |
| |
| #include "api/util.hpp" |
| #include "core/kernel.hpp" |
| #include "core/event.hpp" |
| |
| using namespace clover; |
| |
| PUBLIC cl_kernel |
| clCreateKernel(cl_program prog, const char *name, |
| cl_int *errcode_ret) try { |
| if (!prog) |
| throw error(CL_INVALID_PROGRAM); |
| |
| if (!name) |
| throw error(CL_INVALID_VALUE); |
| |
| if (prog->binaries().empty()) |
| throw error(CL_INVALID_PROGRAM_EXECUTABLE); |
| |
| auto sym = prog->binaries().begin()->second.sym(name); |
| |
| ret_error(errcode_ret, CL_SUCCESS); |
| return new kernel(*prog, name, { sym.args.begin(), sym.args.end() }); |
| |
| } catch (module::noent_error &e) { |
| ret_error(errcode_ret, CL_INVALID_KERNEL_NAME); |
| return NULL; |
| |
| } catch(error &e) { |
| ret_error(errcode_ret, e); |
| return NULL; |
| } |
| |
| PUBLIC cl_int |
| clCreateKernelsInProgram(cl_program prog, cl_uint count, |
| cl_kernel *kerns, cl_uint *count_ret) { |
| if (!prog) |
| throw error(CL_INVALID_PROGRAM); |
| |
| if (prog->binaries().empty()) |
| throw error(CL_INVALID_PROGRAM_EXECUTABLE); |
| |
| auto &syms = prog->binaries().begin()->second.syms; |
| |
| if (kerns && count < syms.size()) |
| throw error(CL_INVALID_VALUE); |
| |
| if (kerns) |
| std::transform(syms.begin(), syms.end(), kerns, |
| [=](const module::symbol &sym) { |
| return new kernel(*prog, compat::string(sym.name), |
| { sym.args.begin(), sym.args.end() }); |
| }); |
| |
| if (count_ret) |
| *count_ret = syms.size(); |
| |
| return CL_SUCCESS; |
| } |
| |
| PUBLIC cl_int |
| clRetainKernel(cl_kernel kern) { |
| if (!kern) |
| return CL_INVALID_KERNEL; |
| |
| kern->retain(); |
| return CL_SUCCESS; |
| } |
| |
| PUBLIC cl_int |
| clReleaseKernel(cl_kernel kern) { |
| if (!kern) |
| return CL_INVALID_KERNEL; |
| |
| if (kern->release()) |
| delete kern; |
| |
| return CL_SUCCESS; |
| } |
| |
| PUBLIC cl_int |
| clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size, |
| const void *value) try { |
| if (!kern) |
| throw error(CL_INVALID_KERNEL); |
| |
| if (idx >= kern->args.size()) |
| throw error(CL_INVALID_ARG_INDEX); |
| |
| kern->args[idx]->set(size, value); |
| |
| return CL_SUCCESS; |
| |
| } catch(error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clGetKernelInfo(cl_kernel kern, cl_kernel_info param, |
| size_t size, void *buf, size_t *size_ret) { |
| if (!kern) |
| return CL_INVALID_KERNEL; |
| |
| switch (param) { |
| case CL_KERNEL_FUNCTION_NAME: |
| return string_property(buf, size, size_ret, kern->name()); |
| |
| case CL_KERNEL_NUM_ARGS: |
| return scalar_property<cl_uint>(buf, size, size_ret, |
| kern->args.size()); |
| |
| case CL_KERNEL_REFERENCE_COUNT: |
| return scalar_property<cl_uint>(buf, size, size_ret, |
| kern->ref_count()); |
| |
| case CL_KERNEL_CONTEXT: |
| return scalar_property<cl_context>(buf, size, size_ret, |
| &kern->prog.ctx); |
| |
| case CL_KERNEL_PROGRAM: |
| return scalar_property<cl_program>(buf, size, size_ret, |
| &kern->prog); |
| |
| default: |
| return CL_INVALID_VALUE; |
| } |
| } |
| |
| PUBLIC cl_int |
| clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev, |
| cl_kernel_work_group_info param, |
| size_t size, void *buf, size_t *size_ret) { |
| if (!kern) |
| return CL_INVALID_KERNEL; |
| |
| if ((!dev && kern->prog.binaries().size() != 1) || |
| (dev && !kern->prog.binaries().count(dev))) |
| return CL_INVALID_DEVICE; |
| |
| switch (param) { |
| case CL_KERNEL_WORK_GROUP_SIZE: |
| return scalar_property<size_t>(buf, size, size_ret, |
| kern->max_block_size()); |
| |
| case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: |
| return vector_property<size_t>(buf, size, size_ret, |
| kern->block_size()); |
| |
| case CL_KERNEL_LOCAL_MEM_SIZE: |
| return scalar_property<cl_ulong>(buf, size, size_ret, |
| kern->mem_local()); |
| |
| case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: |
| return scalar_property<size_t>(buf, size, size_ret, 1); |
| |
| case CL_KERNEL_PRIVATE_MEM_SIZE: |
| return scalar_property<cl_ulong>(buf, size, size_ret, |
| kern->mem_private()); |
| |
| default: |
| return CL_INVALID_VALUE; |
| } |
| } |
| |
| namespace { |
| /// |
| /// Common argument checking shared by kernel invocation commands. |
| /// |
| void |
| kernel_validate(cl_command_queue q, cl_kernel kern, |
| cl_uint dims, const size_t *grid_offset, |
| const size_t *grid_size, const size_t *block_size, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) { |
| if (!q) |
| throw error(CL_INVALID_COMMAND_QUEUE); |
| |
| if (!kern) |
| throw error(CL_INVALID_KERNEL); |
| |
| if (&kern->prog.ctx != &q->ctx || |
| any_of([&](const cl_event ev) { |
| return &ev->ctx != &q->ctx; |
| }, deps, deps + num_deps)) |
| throw error(CL_INVALID_CONTEXT); |
| |
| if (bool(num_deps) != bool(deps) || |
| any_of(is_zero<cl_event>(), deps, deps + num_deps)) |
| throw error(CL_INVALID_EVENT_WAIT_LIST); |
| |
| if (any_of([](std::unique_ptr<kernel::argument> &arg) { |
| return !arg->set(); |
| }, kern->args.begin(), kern->args.end())) |
| throw error(CL_INVALID_KERNEL_ARGS); |
| |
| if (!kern->prog.binaries().count(&q->dev)) |
| throw error(CL_INVALID_PROGRAM_EXECUTABLE); |
| |
| if (dims < 1 || dims > q->dev.max_block_size().size()) |
| throw error(CL_INVALID_WORK_DIMENSION); |
| |
| if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims)) |
| throw error(CL_INVALID_GLOBAL_WORK_SIZE); |
| |
| if (block_size) { |
| if (any_of([](size_t b, size_t max) { |
| return b == 0 || b > max; |
| }, block_size, block_size + dims, |
| q->dev.max_block_size().begin())) |
| throw error(CL_INVALID_WORK_ITEM_SIZE); |
| |
| if (any_of([](size_t b, size_t g) { |
| return g % b; |
| }, block_size, block_size + dims, grid_size)) |
| throw error(CL_INVALID_WORK_GROUP_SIZE); |
| |
| if (fold(std::multiplies<size_t>(), 1u, |
| block_size, block_size + dims) > |
| q->dev.max_threads_per_block()) |
| throw error(CL_INVALID_WORK_GROUP_SIZE); |
| } |
| } |
| |
| /// |
| /// Common event action shared by kernel invocation commands. |
| /// |
| std::function<void (event &)> |
| kernel_op(cl_command_queue q, cl_kernel kern, |
| const std::vector<size_t> &grid_offset, |
| const std::vector<size_t> &grid_size, |
| const std::vector<size_t> &block_size) { |
| const std::vector<size_t> reduced_grid_size = map( |
| std::divides<size_t>(), grid_size.begin(), grid_size.end(), |
| block_size.begin()); |
| |
| return [=](event &) { |
| kern->launch(*q, grid_offset, reduced_grid_size, block_size); |
| }; |
| } |
| |
| template<typename T, typename S> |
| std::vector<T> |
| opt_vector(const T *p, S n) { |
| if (p) |
| return { p, p + n }; |
| else |
| return { n }; |
| } |
| } |
| |
| PUBLIC cl_int |
| clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern, |
| cl_uint dims, const size_t *pgrid_offset, |
| const size_t *pgrid_size, const size_t *pblock_size, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims); |
| const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims); |
| const std::vector<size_t> block_size = opt_vector(pblock_size, dims); |
| |
| kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size, |
| num_deps, deps, ev); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps }, |
| kernel_op(q, kern, grid_offset, grid_size, block_size)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch(error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueTask(cl_command_queue q, cl_kernel kern, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| const std::vector<size_t> grid_offset = { 0 }; |
| const std::vector<size_t> grid_size = { 1 }; |
| const std::vector<size_t> block_size = { 1 }; |
| |
| kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(), |
| block_size.data(), num_deps, deps, ev); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_TASK, { deps, deps + num_deps }, |
| kernel_op(q, kern, grid_offset, grid_size, block_size)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch(error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *), |
| void *args, size_t args_size, |
| cl_uint obj_count, const cl_mem *obj_list, |
| const void **obj_args, cl_uint num_deps, |
| const cl_event *deps, cl_event *ev) { |
| return CL_INVALID_OPERATION; |
| } |