| // |
| // Copyright 2012 Francisco Jerez |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining a |
| // copy of this software and associated documentation files (the "Software"), |
| // to deal in the Software without restriction, including without limitation |
| // the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| // and/or sell copies of the Software, and to permit persons to whom the |
| // Software is furnished to do so, subject to the following conditions: |
| // |
| // The above copyright notice and this permission notice shall be included in |
| // all copies or substantial portions of the Software. |
| // |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF |
| // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| // SOFTWARE. |
| // |
| |
| #include <cstring> |
| |
| #include "api/util.hpp" |
| #include "core/event.hpp" |
| #include "core/resource.hpp" |
| |
| using namespace clover; |
| |
| namespace { |
| typedef resource::point point; |
| |
| /// |
| /// Common argument checking shared by memory transfer commands. |
| /// |
| void |
| validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) { |
| if (!q) |
| throw error(CL_INVALID_COMMAND_QUEUE); |
| |
| if (bool(num_deps) != bool(deps) || |
| any_of(is_zero<cl_event>(), deps, deps + num_deps)) |
| throw error(CL_INVALID_EVENT_WAIT_LIST); |
| |
| if (any_of([&](const cl_event ev) { |
| return &ev->ctx != &q->ctx; |
| }, deps, deps + num_deps)) |
| throw error(CL_INVALID_CONTEXT); |
| } |
| |
| /// |
| /// Memory object-specific argument checking shared by most memory |
| /// transfer commands. |
| /// |
| void |
| validate_obj(cl_command_queue q, cl_mem obj) { |
| if (!obj) |
| throw error(CL_INVALID_MEM_OBJECT); |
| |
| if (&obj->ctx != &q->ctx) |
| throw error(CL_INVALID_CONTEXT); |
| } |
| |
| /// |
| /// Class that encapsulates the task of mapping an object of type |
| /// \a T. The return value of get() should be implicitly |
| /// convertible to \a void *. |
| /// |
| template<typename T> struct __map; |
| |
| template<> struct __map<void *> { |
| static void * |
| get(cl_command_queue q, void *obj, cl_map_flags flags, |
| size_t offset, size_t size) { |
| return (char *)obj + offset; |
| } |
| }; |
| |
| template<> struct __map<const void *> { |
| static const void * |
| get(cl_command_queue q, const void *obj, cl_map_flags flags, |
| size_t offset, size_t size) { |
| return (const char *)obj + offset; |
| } |
| }; |
| |
| template<> struct __map<memory_obj *> { |
| static mapping |
| get(cl_command_queue q, memory_obj *obj, cl_map_flags flags, |
| size_t offset, size_t size) { |
| return { *q, obj->resource(q), flags, true, { offset }, { size }}; |
| } |
| }; |
| |
| /// |
| /// Software copy from \a src_obj to \a dst_obj. They can be |
| /// either pointers or memory objects. |
| /// |
| template<typename T, typename S> |
| std::function<void (event &)> |
| soft_copy_op(cl_command_queue q, |
| T dst_obj, const point &dst_orig, const point &dst_pitch, |
| S src_obj, const point &src_orig, const point &src_pitch, |
| const point ®ion) { |
| return [=](event &) { |
| auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE, |
| dst_pitch(dst_orig), dst_pitch(region)); |
| auto src = __map<S>::get(q, src_obj, CL_MAP_READ, |
| src_pitch(src_orig), src_pitch(region)); |
| point p; |
| |
| for (p[2] = 0; p[2] < region[2]; ++p[2]) { |
| for (p[1] = 0; p[1] < region[1]; ++p[1]) { |
| std::memcpy(static_cast<char *>(dst) + dst_pitch(p), |
| static_cast<const char *>(src) + src_pitch(p), |
| src_pitch[0] * region[0]); |
| } |
| } |
| }; |
| } |
| |
| /// |
| /// Hardware copy from \a src_obj to \a dst_obj. |
| /// |
| template<typename T, typename S> |
| std::function<void (event &)> |
| hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig, |
| S src_obj, const point &src_orig, const point ®ion) { |
| return [=](event &) { |
| dst_obj->resource(q).copy(*q, dst_orig, region, |
| src_obj->resource(q), src_orig); |
| }; |
| } |
| } |
| |
| PUBLIC cl_int |
| clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| size_t offset, size_t size, void *ptr, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, obj); |
| |
| if (!ptr || offset > obj->size() || offset + size > obj->size()) |
| throw error(CL_INVALID_VALUE); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| ptr, { 0 }, { 1 }, |
| obj, { offset }, { 1 }, |
| { size, 1, 1 })); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| size_t offset, size_t size, const void *ptr, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, obj); |
| |
| if (!ptr || offset > obj->size() || offset + size > obj->size()) |
| throw error(CL_INVALID_VALUE); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| obj, { offset }, { 1 }, |
| ptr, { 0 }, { 1 }, |
| { size, 1, 1 })); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| const size_t *obj_origin, const size_t *host_origin, |
| const size_t *region, |
| size_t obj_row_pitch, size_t obj_slice_pitch, |
| size_t host_row_pitch, size_t host_slice_pitch, |
| void *ptr, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, obj); |
| |
| if (!ptr) |
| throw error(CL_INVALID_VALUE); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| ptr, host_origin, |
| { 1, host_row_pitch, host_slice_pitch }, |
| obj, obj_origin, |
| { 1, obj_row_pitch, obj_slice_pitch }, |
| region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| const size_t *obj_origin, const size_t *host_origin, |
| const size_t *region, |
| size_t obj_row_pitch, size_t obj_slice_pitch, |
| size_t host_row_pitch, size_t host_slice_pitch, |
| const void *ptr, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, obj); |
| |
| if (!ptr) |
| throw error(CL_INVALID_VALUE); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| obj, obj_origin, |
| { 1, obj_row_pitch, obj_slice_pitch }, |
| ptr, host_origin, |
| { 1, host_row_pitch, host_slice_pitch }, |
| region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
| size_t src_offset, size_t dst_offset, size_t size, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, src_obj); |
| validate_obj(q, dst_obj); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps }, |
| hard_copy_op(q, dst_obj, { dst_offset }, |
| src_obj, { src_offset }, |
| { size, 1, 1 })); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
| const size_t *src_origin, const size_t *dst_origin, |
| const size_t *region, |
| size_t src_row_pitch, size_t src_slice_pitch, |
| size_t dst_row_pitch, size_t dst_slice_pitch, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, src_obj); |
| validate_obj(q, dst_obj); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| dst_obj, dst_origin, |
| { 1, dst_row_pitch, dst_slice_pitch }, |
| src_obj, src_origin, |
| { 1, src_row_pitch, src_slice_pitch }, |
| region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| const size_t *origin, const size_t *region, |
| size_t row_pitch, size_t slice_pitch, void *ptr, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| image *img = dynamic_cast<image *>(obj); |
| |
| validate_base(q, num_deps, deps); |
| validate_obj(q, img); |
| |
| if (!ptr) |
| throw error(CL_INVALID_VALUE); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| ptr, {}, |
| { 1, row_pitch, slice_pitch }, |
| obj, origin, |
| { 1, img->row_pitch(), img->slice_pitch() }, |
| region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| const size_t *origin, const size_t *region, |
| size_t row_pitch, size_t slice_pitch, const void *ptr, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| image *img = dynamic_cast<image *>(obj); |
| |
| validate_base(q, num_deps, deps); |
| validate_obj(q, img); |
| |
| if (!ptr) |
| throw error(CL_INVALID_VALUE); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| obj, origin, |
| { 1, img->row_pitch(), img->slice_pitch() }, |
| ptr, {}, |
| { 1, row_pitch, slice_pitch }, |
| region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
| const size_t *src_origin, const size_t *dst_origin, |
| const size_t *region, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| image *src_img = dynamic_cast<image *>(src_obj); |
| image *dst_img = dynamic_cast<image *>(dst_obj); |
| |
| validate_base(q, num_deps, deps); |
| validate_obj(q, src_img); |
| validate_obj(q, dst_img); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps }, |
| hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
| const size_t *src_origin, const size_t *region, |
| size_t dst_offset, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| image *src_img = dynamic_cast<image *>(src_obj); |
| |
| validate_base(q, num_deps, deps); |
| validate_obj(q, src_img); |
| validate_obj(q, dst_obj); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| dst_obj, { dst_offset }, |
| { 0, 0, 0 }, |
| src_obj, src_origin, |
| { 1, src_img->row_pitch(), src_img->slice_pitch() }, |
| region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC cl_int |
| clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, |
| size_t src_offset, |
| const size_t *dst_origin, const size_t *region, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| image *dst_img = dynamic_cast<image *>(src_obj); |
| |
| validate_base(q, num_deps, deps); |
| validate_obj(q, src_obj); |
| validate_obj(q, dst_img); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps }, |
| soft_copy_op(q, |
| dst_obj, dst_origin, |
| { 1, dst_img->row_pitch(), dst_img->slice_pitch() }, |
| src_obj, { src_offset }, |
| { 0, 0, 0 }, |
| region)); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |
| |
| PUBLIC void * |
| clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| cl_map_flags flags, size_t offset, size_t size, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev, cl_int *errcode_ret) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, obj); |
| |
| if (offset > obj->size() || offset + size > obj->size()) |
| throw error(CL_INVALID_VALUE); |
| |
| void *map = obj->resource(q).add_map( |
| *q, flags, blocking, { offset }, { size }); |
| |
| ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER, |
| { deps, deps + num_deps })); |
| ret_error(errcode_ret, CL_SUCCESS); |
| return map; |
| |
| } catch (error &e) { |
| ret_error(errcode_ret, e); |
| return NULL; |
| } |
| |
| PUBLIC void * |
| clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking, |
| cl_map_flags flags, |
| const size_t *origin, const size_t *region, |
| size_t *row_pitch, size_t *slice_pitch, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev, cl_int *errcode_ret) try { |
| image *img = dynamic_cast<image *>(obj); |
| |
| validate_base(q, num_deps, deps); |
| validate_obj(q, img); |
| |
| void *map = obj->resource(q).add_map( |
| *q, flags, blocking, origin, region); |
| |
| ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE, |
| { deps, deps + num_deps })); |
| ret_error(errcode_ret, CL_SUCCESS); |
| return map; |
| |
| } catch (error &e) { |
| ret_error(errcode_ret, e); |
| return NULL; |
| } |
| |
| PUBLIC cl_int |
| clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr, |
| cl_uint num_deps, const cl_event *deps, |
| cl_event *ev) try { |
| validate_base(q, num_deps, deps); |
| validate_obj(q, obj); |
| |
| hard_event *hev = new hard_event( |
| *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps }, |
| [=](event &) { |
| obj->resource(q).del_map(ptr); |
| }); |
| |
| ret_object(ev, hev); |
| return CL_SUCCESS; |
| |
| } catch (error &e) { |
| return e.get(); |
| } |