clover: implements clEnqueueFillBuffer

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5897>
diff --git a/src/gallium/frontends/clover/api/device.cpp b/src/gallium/frontends/clover/api/device.cpp
index 042f2ed..ce9ae75 100644
--- a/src/gallium/frontends/clover/api/device.cpp
+++ b/src/gallium/frontends/clover/api/device.cpp
@@ -205,8 +205,7 @@
       break;
 
    case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
-      buf.as_scalar<cl_uint>() = 8 *
-         std::max(dev.mem_base_addr_align(), (cl_uint) sizeof(cl_long) * 16);
+      buf.as_scalar<cl_uint>() = 8 * dev.mem_base_addr_align();
       break;
 
    case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:
diff --git a/src/gallium/frontends/clover/api/memory.cpp b/src/gallium/frontends/clover/api/memory.cpp
index 84c1aaf..d069a09 100644
--- a/src/gallium/frontends/clover/api/memory.cpp
+++ b/src/gallium/frontends/clover/api/memory.cpp
@@ -425,17 +425,6 @@
 }
 
 CLOVER_API cl_int
-clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
-                    const void *pattern, size_t pattern_size,
-                    size_t offset, size_t size,
-                    cl_uint num_events_in_wait_list,
-                    const cl_event *event_wait_list,
-                    cl_event *event) {
-   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
-   return CL_INVALID_VALUE;
-}
-
-CLOVER_API cl_int
 clEnqueueFillImage(cl_command_queue command_queue, cl_mem image,
                    const void *fill_color,
                    const size_t *origin, const size_t *region,
diff --git a/src/gallium/frontends/clover/api/transfer.cpp b/src/gallium/frontends/clover/api/transfer.cpp
index fa8741e..0e39d97 100644
--- a/src/gallium/frontends/clover/api/transfer.cpp
+++ b/src/gallium/frontends/clover/api/transfer.cpp
@@ -422,6 +422,50 @@
 }
 
 CLOVER_API cl_int
+clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
+                    const void *pattern, size_t pattern_size,
+                    size_t offset, size_t size,
+                    cl_uint num_deps, const cl_event *d_deps,
+                    cl_event *rd_ev) try {
+   auto &q = obj(d_queue);
+   auto &mem = obj<buffer>(d_mem);
+   auto deps = objs<wait_list_tag>(d_deps, num_deps);
+   vector_t region = { size, 1, 1 };
+   vector_t dst_origin = { offset };
+   auto dst_pitch = pitch(region, {{ 1 }});
+
+   validate_common(q, deps);
+   validate_object(q, mem, dst_origin, dst_pitch, region);
+
+   if (!pattern)
+      return CL_INVALID_VALUE;
+
+   if (!util_is_power_of_two_nonzero(pattern_size) ||
+      pattern_size > 128 || size % pattern_size
+      || offset % pattern_size) {
+      return CL_INVALID_VALUE;
+   }
+
+   auto sub = dynamic_cast<sub_buffer *>(&mem);
+   if (sub && sub->offset() % q.device().mem_base_addr_align()) {
+      return CL_MISALIGNED_SUB_BUFFER_OFFSET;
+   }
+
+   std::string data = std::string((char *)pattern, pattern_size);
+   auto hev = create<hard_event>(
+      q, CL_COMMAND_FILL_BUFFER, deps,
+      [=, &q, &mem](event &) {
+         mem.resource(q).clear(q, offset, size, &data[0], data.size());
+      });
+
+   ret_object(rd_ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+CLOVER_API cl_int
 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
                     size_t src_offset, size_t dst_offset, size_t size,
                     cl_uint num_deps, const cl_event *d_deps,
diff --git a/src/gallium/frontends/clover/core/device.cpp b/src/gallium/frontends/clover/core/device.cpp
index 609885c..ca2d951 100644
--- a/src/gallium/frontends/clover/core/device.cpp
+++ b/src/gallium/frontends/clover/core/device.cpp
@@ -20,6 +20,7 @@
 // OTHER DEALINGS IN THE SOFTWARE.
 //
 
+#include <algorithm>
 #include <unistd.h>
 #include "core/device.hpp"
 #include "core/platform.hpp"
@@ -215,9 +216,9 @@
    return pipe->get_param(pipe, PIPE_CAP_UMA);
 }
 
-cl_uint
+size_t
 device::mem_base_addr_align() const {
-   return sysconf(_SC_PAGESIZE);
+   return std::max((size_t)sysconf(_SC_PAGESIZE), sizeof(cl_long) * 16);
 }
 
 cl_device_svm_capabilities
diff --git a/src/gallium/frontends/clover/core/device.hpp b/src/gallium/frontends/clover/core/device.hpp
index 597f948..7c8cf13 100644
--- a/src/gallium/frontends/clover/core/device.hpp
+++ b/src/gallium/frontends/clover/core/device.hpp
@@ -70,7 +70,7 @@
       bool has_halves() const;
       bool has_int64_atomics() const;
       bool has_unified_memory() const;
-      cl_uint mem_base_addr_align() const;
+      size_t mem_base_addr_align() const;
       cl_device_svm_capabilities svm_support() const;
 
       std::vector<size_t> max_block_size() const;
diff --git a/src/gallium/frontends/clover/core/resource.cpp b/src/gallium/frontends/clover/core/resource.cpp
index dd20798..b8e257d 100644
--- a/src/gallium/frontends/clover/core/resource.cpp
+++ b/src/gallium/frontends/clover/core/resource.cpp
@@ -64,6 +64,14 @@
                                 box(src_res.offset + src_origin, region));
 }
 
+void
+resource::clear(command_queue &q, const size_t origin, const size_t size,
+                const void *pattern, const size_t pattern_size) {
+   auto p = offset[0] + origin;
+
+   q.pipe->clear_buffer(q.pipe, pipe, p, size, pattern, pattern_size);
+}
+
 void *
 resource::add_map(command_queue &q, cl_map_flags flags, bool blocking,
                   const vector &origin, const vector &region) {
diff --git a/src/gallium/frontends/clover/core/resource.hpp b/src/gallium/frontends/clover/core/resource.hpp
index 3b994b4..208016d 100644
--- a/src/gallium/frontends/clover/core/resource.hpp
+++ b/src/gallium/frontends/clover/core/resource.hpp
@@ -50,6 +50,9 @@
       void copy(command_queue &q, const vector &origin, const vector &region,
                 resource &src_resource, const vector &src_origin);
 
+      void clear(command_queue &q, const size_t origin, const size_t size,
+                 const void *pattern, const size_t pattern_size);
+
       void *add_map(command_queue &q, cl_map_flags flags, bool blocking,
                     const vector &origin, const vector &region);
       void del_map(void *p);