blob: 6fa8bd63453c86f3d9e5aa7a840c5a6b92afadd6 [file] [log] [blame]
//
// Copyright 2012 Francisco Jerez
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
#include "core/kernel.hpp"
#include "core/resource.hpp"
#include "pipe/p_context.h"
using namespace clover;
_cl_kernel::_cl_kernel(clover::program &prog,
const std::string &name,
const std::vector<clover::module::argument> &args) :
prog(prog), __name(name), exec(*this) {
for (auto arg : args) {
if (arg.type == module::argument::scalar)
this->args.emplace_back(new scalar_argument(arg.size));
else if (arg.type == module::argument::global)
this->args.emplace_back(new global_argument(arg.size));
else if (arg.type == module::argument::local)
this->args.emplace_back(new local_argument());
else if (arg.type == module::argument::constant)
this->args.emplace_back(new constant_argument());
else if (arg.type == module::argument::image2d_rd ||
arg.type == module::argument::image3d_rd)
this->args.emplace_back(new image_rd_argument());
else if (arg.type == module::argument::image2d_wr ||
arg.type == module::argument::image3d_wr)
this->args.emplace_back(new image_wr_argument());
else if (arg.type == module::argument::sampler)
this->args.emplace_back(new sampler_argument());
else
throw error(CL_INVALID_KERNEL_DEFINITION);
}
}
template<typename T, typename V>
static inline std::vector<T>
pad_vector(clover::command_queue &q, const V &v, T x) {
std::vector<T> w { v.begin(), v.end() };
w.resize(q.dev.max_block_size().size(), x);
return w;
}
void
_cl_kernel::launch(clover::command_queue &q,
const std::vector<size_t> &grid_offset,
const std::vector<size_t> &grid_size,
const std::vector<size_t> &block_size) {
void *st = exec.bind(&q);
auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; },
exec.g_handles.begin(), exec.g_handles.end());
q.pipe->bind_compute_state(q.pipe, st);
q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(),
exec.samplers.data());
q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
exec.sviews.data());
q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
exec.resources.data());
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
exec.g_buffers.data(), g_handles.data());
q.pipe->launch_grid(q.pipe,
pad_vector<uint>(q, block_size, 1).data(),
pad_vector<uint>(q, grid_size, 1).data(),
module(q).sym(__name).offset,
exec.input.data());
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL);
exec.unbind();
}
size_t
_cl_kernel::mem_local() const {
size_t sz = 0;
for (auto &arg : args) {
if (dynamic_cast<local_argument *>(arg.get()))
sz += arg->storage();
}
return sz;
}
size_t
_cl_kernel::mem_private() const {
return 0;
}
size_t
_cl_kernel::max_block_size() const {
return SIZE_MAX;
}
const std::string &
_cl_kernel::name() const {
return __name;
}
std::vector<size_t>
_cl_kernel::block_size() const {
return { 0, 0, 0 };
}
const clover::module &
_cl_kernel::module(const clover::command_queue &q) const {
return prog.binaries().find(&q.dev)->second;
}
_cl_kernel::exec_context::exec_context(clover::kernel &kern) :
kern(kern), q(NULL), mem_local(0), st(NULL) {
}
_cl_kernel::exec_context::~exec_context() {
if (st)
q->pipe->delete_compute_state(q->pipe, st);
}
void *
_cl_kernel::exec_context::bind(clover::command_queue *__q) {
std::swap(q, __q);
for (auto &arg : kern.args)
arg->bind(*this);
// Create a new compute state if anything changed.
if (!st || q != __q ||
cs.req_local_mem != mem_local ||
cs.req_input_mem != input.size()) {
if (st)
__q->pipe->delete_compute_state(__q->pipe, st);
cs.prog = kern.module(*q).sec(module::section::text).data.begin();
cs.req_local_mem = mem_local;
cs.req_input_mem = input.size();
st = q->pipe->create_compute_state(q->pipe, &cs);
}
return st;
}
void
_cl_kernel::exec_context::unbind() {
for (auto &arg : kern.args)
arg->unbind(*this);
input.clear();
samplers.clear();
sviews.clear();
resources.clear();
g_buffers.clear();
g_handles.clear();
mem_local = 0;
}
_cl_kernel::argument::argument(size_t size) :
__size(size), __set(false) {
}
bool
_cl_kernel::argument::set() const {
return __set;
}
size_t
_cl_kernel::argument::storage() const {
return 0;
}
_cl_kernel::scalar_argument::scalar_argument(size_t size) :
argument(size) {
}
void
_cl_kernel::scalar_argument::set(size_t size, const void *value) {
if (size != __size)
throw error(CL_INVALID_ARG_SIZE);
v = { (uint8_t *)value, (uint8_t *)value + size };
__set = true;
}
void
_cl_kernel::scalar_argument::bind(exec_context &ctx) {
ctx.input.insert(ctx.input.end(), v.begin(), v.end());
}
void
_cl_kernel::scalar_argument::unbind(exec_context &ctx) {
}
_cl_kernel::global_argument::global_argument(size_t size) :
argument(size) {
}
void
_cl_kernel::global_argument::set(size_t size, const void *value) {
if (size != sizeof(cl_mem))
throw error(CL_INVALID_ARG_SIZE);
obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
__set = true;
}
void
_cl_kernel::global_argument::bind(exec_context &ctx) {
size_t offset = ctx.input.size();
size_t idx = ctx.g_buffers.size();
ctx.input.resize(offset + __size);
ctx.g_buffers.resize(idx + 1);
ctx.g_buffers[idx] = obj->resource(ctx.q).pipe;
ctx.g_handles.resize(idx + 1);
ctx.g_handles[idx] = offset;
}
void
_cl_kernel::global_argument::unbind(exec_context &ctx) {
}
_cl_kernel::local_argument::local_argument() :
argument(sizeof(uint32_t)) {
}
size_t
_cl_kernel::local_argument::storage() const {
return __storage;
}
void
_cl_kernel::local_argument::set(size_t size, const void *value) {
if (value)
throw error(CL_INVALID_ARG_VALUE);
__storage = size;
__set = true;
}
void
_cl_kernel::local_argument::bind(exec_context &ctx) {
size_t offset = ctx.input.size();
size_t ptr = ctx.mem_local;
ctx.input.resize(offset + sizeof(uint32_t));
*(uint32_t *)&ctx.input[offset] = ptr;
ctx.mem_local += __storage;
}
void
_cl_kernel::local_argument::unbind(exec_context &ctx) {
}
_cl_kernel::constant_argument::constant_argument() :
argument(sizeof(uint32_t)) {
}
void
_cl_kernel::constant_argument::set(size_t size, const void *value) {
if (size != sizeof(cl_mem))
throw error(CL_INVALID_ARG_SIZE);
obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
__set = true;
}
void
_cl_kernel::constant_argument::bind(exec_context &ctx) {
size_t offset = ctx.input.size();
size_t idx = ctx.resources.size();
ctx.input.resize(offset + sizeof(uint32_t));
*(uint32_t *)&ctx.input[offset] = idx << 24;
ctx.resources.resize(idx + 1);
ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false);
}
void
_cl_kernel::constant_argument::unbind(exec_context &ctx) {
obj->resource(ctx.q).unbind_surface(*ctx.q, st);
}
_cl_kernel::image_rd_argument::image_rd_argument() :
argument(sizeof(uint32_t)) {
}
void
_cl_kernel::image_rd_argument::set(size_t size, const void *value) {
if (size != sizeof(cl_mem))
throw error(CL_INVALID_ARG_SIZE);
obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
__set = true;
}
void
_cl_kernel::image_rd_argument::bind(exec_context &ctx) {
size_t offset = ctx.input.size();
size_t idx = ctx.sviews.size();
ctx.input.resize(offset + sizeof(uint32_t));
*(uint32_t *)&ctx.input[offset] = idx;
ctx.sviews.resize(idx + 1);
ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q);
}
void
_cl_kernel::image_rd_argument::unbind(exec_context &ctx) {
obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st);
}
_cl_kernel::image_wr_argument::image_wr_argument() :
argument(sizeof(uint32_t)) {
}
void
_cl_kernel::image_wr_argument::set(size_t size, const void *value) {
if (size != sizeof(cl_mem))
throw error(CL_INVALID_ARG_SIZE);
obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
__set = true;
}
void
_cl_kernel::image_wr_argument::bind(exec_context &ctx) {
size_t offset = ctx.input.size();
size_t idx = ctx.resources.size();
ctx.input.resize(offset + sizeof(uint32_t));
*(uint32_t *)&ctx.input[offset] = idx;
ctx.resources.resize(idx + 1);
ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true);
}
void
_cl_kernel::image_wr_argument::unbind(exec_context &ctx) {
obj->resource(ctx.q).unbind_surface(*ctx.q, st);
}
_cl_kernel::sampler_argument::sampler_argument() :
argument(0) {
}
void
_cl_kernel::sampler_argument::set(size_t size, const void *value) {
if (size != sizeof(cl_sampler))
throw error(CL_INVALID_ARG_SIZE);
obj = *(cl_sampler *)value;
__set = true;
}
void
_cl_kernel::sampler_argument::bind(exec_context &ctx) {
size_t idx = ctx.samplers.size();
ctx.samplers.resize(idx + 1);
ctx.samplers[idx] = st = obj->bind(*ctx.q);
}
void
_cl_kernel::sampler_argument::unbind(exec_context &ctx) {
obj->unbind(*ctx.q, st);
}