CL: device partitioning for front end and passthrough

Partitioning is the creation of sub-devices. Also add reference
counting for CL objects, which is needed now for sub-devices.

Also fix CL print format strings, since cl_ulong is actually
always 64 bit and not unsigned long.

Bug: angleproject:5904
Change-Id: I006699fad2f953ce312bca87c9b6362b5d77a18a
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2880665
Commit-Queue: John Plate <jplate@google.com>
Reviewed-by: Cody Northrop <cnorthrop@google.com>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/include/angle_cl.h b/include/angle_cl.h
index 955f8f9..7b4ace0 100644
--- a/include/angle_cl.h
+++ b/include/angle_cl.h
@@ -37,6 +37,9 @@
 
     constexpr const cl_icd_dispatch &getDispatch() { return *mDispatch; }
 
+  protected:
+    bool isCompatible(void *ptr) const { return ptr == &mDispatch; }
+
   private:
     // This has to be the first member to be OpenCL ICD compatible
     const cl_icd_dispatch *const mDispatch;
diff --git a/scripts/code_generation_hashes/GL_EGL_entry_points.json b/scripts/code_generation_hashes/GL_EGL_entry_points.json
index 929b37b..5f4ce34 100644
--- a/scripts/code_generation_hashes/GL_EGL_entry_points.json
+++ b/scripts/code_generation_hashes/GL_EGL_entry_points.json
@@ -10,7 +10,7 @@
   "scripts/entry_point_packed_gl_enums.json":
     "4f7b43863a5e61991bba4010db463679",
   "scripts/generate_entry_points.py":
-    "de7a2201b2a550bf1e890b9941e9ad57",
+    "a749fa006d3da248f415b07e4b9ecf35",
   "scripts/gl.xml":
     "2a73a58a7e26d8676a2c0af6d528cae6",
   "scripts/gl_angle_ext.xml":
@@ -130,7 +130,7 @@
   "src/libGLESv2/egl_stubs_autogen.h":
     "6439daa350c1663e71dd0af37dcc91df",
   "src/libGLESv2/entry_points_cl_autogen.cpp":
-    "4e7af65ebb7f126992adcf932a5b3060",
+    "2b2176bb17ed88bdb5aa2d6e9424608f",
   "src/libGLESv2/entry_points_cl_autogen.h":
     "dde2f94c3004874a7da995dae69da811",
   "src/libGLESv2/entry_points_egl_autogen.cpp":
diff --git a/scripts/generate_entry_points.py b/scripts/generate_entry_points.py
index e09c0d6..19132e4 100755
--- a/scripts/generate_entry_points.py
+++ b/scripts/generate_entry_points.py
@@ -699,8 +699,8 @@
     "cl_ushort": "%hu",
     "cl_int": "%d",
     "cl_uint": "%u",
-    "cl_long": "%ld",
-    "cl_ulong": "%lu",
+    "cl_long": "%lld",
+    "cl_ulong": "%llu",
     "cl_half": "%hu",
     "cl_float": "%f",
     "cl_double": "%f",
@@ -714,37 +714,37 @@
     "cl_event": POINTER_FORMAT,
     "cl_sampler": POINTER_FORMAT,
     "cl_bool": "%u",
-    "cl_bitfield": "%lu",
-    "cl_properties": "%lu",
-    "cl_device_type": "%lu",
+    "cl_bitfield": "%llu",
+    "cl_properties": "%llu",
+    "cl_device_type": "%llu",
     "cl_platform_info": "%u",
     "cl_device_info": "%u",
-    "cl_device_fp_config": "%lu",
+    "cl_device_fp_config": "%llu",
     "cl_device_mem_cache_type": "%u",
     "cl_device_local_mem_type": "%u",
-    "cl_device_exec_capabilities": "%lu",
-    "cl_device_svm_capabilities": "%lu",
-    "cl_command_queue_properties": "%lu",
+    "cl_device_exec_capabilities": "%llu",
+    "cl_device_svm_capabilities": "%llu",
+    "cl_command_queue_properties": "%llu",
     "cl_device_partition_property": "%zu",
-    "cl_device_affinity_domain": "%lu",
+    "cl_device_affinity_domain": "%llu",
     "cl_context_properties": "%zu",
     "cl_context_info": "%u",
-    "cl_queue_properties": "%lu",
+    "cl_queue_properties": "%llu",
     "cl_command_queue_info": "%u",
     "cl_channel_order": "%u",
     "cl_channel_type": "%u",
-    "cl_mem_flags": "%lu",
-    "cl_svm_mem_flags": "%lu",
+    "cl_mem_flags": "%llu",
+    "cl_svm_mem_flags": "%llu",
     "cl_mem_object_type": "%u",
     "cl_mem_info": "%u",
-    "cl_mem_migration_flags": "%lu",
-    "cl_mem_properties": "%lu",
+    "cl_mem_migration_flags": "%llu",
+    "cl_mem_properties": "%llu",
     "cl_image_info": "%u",
     "cl_buffer_create_type": "%u",
     "cl_addressing_mode": "%u",
     "cl_filter_mode": "%u",
     "cl_sampler_info": "%u",
-    "cl_map_flags": "%lu",
+    "cl_map_flags": "%llu",
     "cl_pipe_properties": "%zu",
     "cl_pipe_info": "%u",
     "cl_program_info": "%u",
@@ -755,18 +755,18 @@
     "cl_kernel_arg_info": "%u",
     "cl_kernel_arg_address_qualifier": "%u",
     "cl_kernel_arg_access_qualifier": "%u",
-    "cl_kernel_arg_type_qualifier": "%lu",
+    "cl_kernel_arg_type_qualifier": "%llu",
     "cl_kernel_work_group_info": "%u",
     "cl_kernel_sub_group_info": "%u",
     "cl_event_info": "%u",
     "cl_command_type": "%u",
     "cl_profiling_info": "%u",
-    "cl_sampler_properties": "%lu",
+    "cl_sampler_properties": "%llu",
     "cl_kernel_exec_info": "%u",
-    "cl_device_atomic_capabilities": "%lu",
+    "cl_device_atomic_capabilities": "%llu",
     "cl_khronos_vendor_id": "%u",
     "cl_version": "%u",
-    "cl_device_device_enqueue_capabilities": "%lu",
+    "cl_device_device_enqueue_capabilities": "%llu",
 }
 
 TEMPLATE_HEADER_INCLUDES = """\
diff --git a/src/libANGLE/CLDevice.cpp b/src/libANGLE/CLDevice.cpp
index 08b7577..2fcc078 100644
--- a/src/libANGLE/CLDevice.cpp
+++ b/src/libANGLE/CLDevice.cpp
@@ -8,12 +8,31 @@
 #include "libANGLE/CLDevice.h"
 
 #include "libANGLE/CLPlatform.h"
-#include "libANGLE/Debug.h"
 
 namespace cl
 {
 
-Device::~Device() = default;
+Device::~Device()
+{
+    if (isRoot())
+    {
+        removeRef();
+    }
+}
+
+bool Device::release()
+{
+    if (isRoot())
+    {
+        return false;
+    }
+    const bool released = removeRef();
+    if (released)
+    {
+        mParent->destroySubDevice(this);
+    }
+    return released;
+}
 
 cl_int Device::getInfo(DeviceInfo name, size_t valueSize, void *value, size_t *valueSizeRet)
 {
@@ -94,7 +113,6 @@
         case DeviceInfo::NumericVersion:
         case DeviceInfo::PreferredInteropUserSync:
         case DeviceInfo::PartitionMaxSubDevices:
-        case DeviceInfo::ReferenceCount:
         case DeviceInfo::PreferredPlatformAtomicAlignment:
         case DeviceInfo::PreferredGlobalAtomicAlignment:
         case DeviceInfo::PreferredLocalAtomicAlignment:
@@ -161,7 +179,6 @@
         case DeviceInfo::Profile:
         case DeviceInfo::Version:
         case DeviceInfo::OpenCL_C_Version:
-        case DeviceInfo::Extensions:
         case DeviceInfo::LatestConformanceVersionPassed:
             result = mImpl->getInfoStringLength(name, &copySize);
             if (result != CL_SUCCESS)
@@ -173,7 +190,7 @@
             copyValue = valString.data();
             break;
 
-        // Handle all array types
+        // Handle all cached values
         case DeviceInfo::MaxWorkItemDimensions:
             valUInt   = static_cast<cl_uint>(mInfo.mMaxWorkItemSizes.size());
             copyValue = &valUInt;
@@ -220,6 +237,10 @@
             copySize  = mInfo.mOpenCL_C_Features.size() *
                        sizeof(decltype(mInfo.mOpenCL_C_Features)::value_type);
             break;
+        case DeviceInfo::Extensions:
+            copyValue = mInfo.mExtensions.c_str();
+            copySize  = mInfo.mExtensions.length() + 1u;
+            break;
         case DeviceInfo::ExtensionsWithVersion:
             if (!mInfo.mIsSupportedExtensionsWithVersion)
             {
@@ -240,7 +261,7 @@
                 mInfo.mPartitionType.size() * sizeof(decltype(mInfo.mPartitionType)::value_type);
             break;
 
-        // Handle all special types
+        // Handle all mapped values
         case DeviceInfo::Platform:
             valPointer = &mPlatform;
             copyValue  = &valPointer;
@@ -250,6 +271,10 @@
             copyValue = &mParent;
             copySize  = sizeof(mParent);
             break;
+        case DeviceInfo::ReferenceCount:
+            copyValue = getRefCountPtr();
+            copySize  = sizeof(*getRefCountPtr());
+            break;
 
         default:
             WARN() << "CL device info " << name << " is not (yet) supported";
@@ -278,6 +303,29 @@
     return CL_SUCCESS;
 }
 
+cl_int Device::createSubDevices(const cl_device_partition_property *properties,
+                                cl_uint numDevices,
+                                Device **devices,
+                                cl_uint *numDevicesRet)
+{
+    if (devices == nullptr)
+    {
+        numDevices = 0u;
+    }
+    rx::CLDeviceImpl::InitList initList;
+    const cl_int result = mImpl->createSubDevices(properties, numDevices, initList, numDevicesRet);
+    if (result == CL_SUCCESS)
+    {
+        while (!initList.empty())
+        {
+            mSubDevices.emplace_back(new Device(mPlatform, this, initList.front()));
+            *devices++ = mSubDevices.back().get();
+            initList.pop_front();
+        }
+    }
+    return result;
+}
+
 Device::PtrList Device::CreateDevices(Platform &platform, rx::CLDeviceImpl::InitList &&initList)
 {
     PtrList devices;
@@ -305,4 +353,22 @@
       mInfo(std::move(initData.second))
 {}
 
+void Device::destroySubDevice(Device *device)
+{
+    auto deviceIt = mSubDevices.cbegin();
+    while (deviceIt != mSubDevices.cend() && deviceIt->get() != device)
+    {
+        ++deviceIt;
+    }
+    if (deviceIt != mSubDevices.cend())
+    {
+        mSubDevices.erase(deviceIt);
+        release();
+    }
+    else
+    {
+        ERR() << "Sub-device not found";
+    }
+}
+
 }  // namespace cl
diff --git a/src/libANGLE/CLDevice.h b/src/libANGLE/CLDevice.h
index df03289..368b965 100644
--- a/src/libANGLE/CLDevice.h
+++ b/src/libANGLE/CLDevice.h
@@ -24,11 +24,20 @@
     ~Device();
 
     Platform &getPlatform() const;
+    bool isRoot() const;
+    bool hasSubDevice(const Device *device) const;
+
+    void retain();
+    bool release();
 
     cl_int getInfoULong(DeviceInfo name, cl_ulong *value) const;
-
     cl_int getInfo(DeviceInfo name, size_t valueSize, void *value, size_t *valueSizeRet);
 
+    cl_int createSubDevices(const cl_device_partition_property *properties,
+                            cl_uint numDevices,
+                            Device **devices,
+                            cl_uint *numDevicesRet);
+
     static PtrList CreateDevices(Platform &platform, rx::CLDeviceImpl::InitList &&initList);
 
     static bool IsValid(const Device *device);
@@ -37,10 +46,14 @@
   private:
     Device(Platform &platform, Device *parent, rx::CLDeviceImpl::InitData &initData);
 
+    void destroySubDevice(Device *device);
+
     Platform &mPlatform;
     Device *const mParent;
     const rx::CLDeviceImpl::Ptr mImpl;
     const rx::CLDeviceImpl::Info mInfo;
+
+    PtrList mSubDevices;
 };
 
 inline Platform &Device::getPlatform() const
@@ -48,6 +61,26 @@
     return mPlatform;
 }
 
+inline bool Device::isRoot() const
+{
+    return mParent == nullptr;
+}
+
+inline bool Device::hasSubDevice(const Device *device) const
+{
+    return std::find_if(mSubDevices.cbegin(), mSubDevices.cend(), [=](const Device::Ptr &ptr) {
+               return ptr.get() == device || ptr->hasSubDevice(device);
+           }) != mSubDevices.cend();
+}
+
+inline void Device::retain()
+{
+    if (!isRoot())
+    {
+        addRef();
+    }
+}
+
 inline cl_int Device::getInfoULong(DeviceInfo name, cl_ulong *value) const
 {
     return mImpl->getInfoULong(name, value);
diff --git a/src/libANGLE/CLObject.h b/src/libANGLE/CLObject.h
index 1cbef50..26ac705 100644
--- a/src/libANGLE/CLObject.h
+++ b/src/libANGLE/CLObject.h
@@ -10,14 +10,32 @@
 
 #include "libANGLE/renderer/CLtypes.h"
 
+#include "libANGLE/Debug.h"
+
 namespace cl
 {
 
 class Object
 {
   public:
-    constexpr Object() {}
-    ~Object() = default;
+    // This class cannot be virtual as its derived classes need to have standard layout
+    Object() = default;
+    ~Object() { ASSERT(mRefCount == 0u); }
+
+    cl_uint getRefCount() { return mRefCount; }
+
+    const cl_uint *getRefCountPtr() { return &mRefCount; }
+
+  protected:
+    void addRef() { ++mRefCount; }
+    bool removeRef()
+    {
+        ASSERT(mRefCount > 0u);
+        return --mRefCount == 0u;
+    }
+
+  private:
+    cl_uint mRefCount = 1u;
 };
 
 }  // namespace cl
diff --git a/src/libANGLE/CLPlatform.cpp b/src/libANGLE/CLPlatform.cpp
index 91d0151..39317b1 100644
--- a/src/libANGLE/CLPlatform.cpp
+++ b/src/libANGLE/CLPlatform.cpp
@@ -7,6 +7,7 @@
 
 #include "libANGLE/CLPlatform.h"
 
+#include <cstdint>
 #include <cstring>
 
 namespace cl
@@ -24,7 +25,10 @@
 }
 }  // namespace
 
-Platform::~Platform() = default;
+Platform::~Platform()
+{
+    removeRef();
+}
 
 cl_int Platform::getInfo(PlatformInfo name, size_t valueSize, void *value, size_t *sizeRet)
 {
@@ -139,7 +143,9 @@
       mImpl(std::move(initData.first)),
       mInfo(std::move(initData.second)),
       mDevices(Device::CreateDevices(*this, std::move(deviceInitList)))
-{}
+{
+    ASSERT(isCompatible(this));
+}
 
 constexpr char Platform::kVendor[];
 constexpr char Platform::kIcdSuffix[];
diff --git a/src/libANGLE/CLPlatform.h b/src/libANGLE/CLPlatform.h
index 1939593..cd974e3 100644
--- a/src/libANGLE/CLPlatform.h
+++ b/src/libANGLE/CLPlatform.h
@@ -62,7 +62,7 @@
 inline bool Platform::hasDevice(const Device *device) const
 {
     return std::find_if(mDevices.cbegin(), mDevices.cend(), [=](const Device::Ptr &ptr) {
-               return ptr.get() == device;
+               return ptr.get() == device || ptr->hasSubDevice(device);
            }) != mDevices.cend();
 }
 
diff --git a/src/libANGLE/renderer/CLDeviceImpl.h b/src/libANGLE/renderer/CLDeviceImpl.h
index 9168a33..0a87ad8 100644
--- a/src/libANGLE/renderer/CLDeviceImpl.h
+++ b/src/libANGLE/renderer/CLDeviceImpl.h
@@ -34,6 +34,7 @@
         NameVersionVector mBuiltInKernelsWithVersion;
         NameVersionVector mOpenCL_C_AllVersions;
         NameVersionVector mOpenCL_C_Features;
+        std::string mExtensions;
         NameVersionVector mExtensionsWithVersion;
         std::vector<cl_device_partition_property> mPartitionProperties;
         std::vector<cl_device_partition_property> mPartitionType;
@@ -57,6 +58,11 @@
     virtual cl_int getInfoSizeT(cl::DeviceInfo name, size_t *value) const             = 0;
     virtual cl_int getInfoStringLength(cl::DeviceInfo name, size_t *value) const      = 0;
     virtual cl_int getInfoString(cl::DeviceInfo name, size_t size, char *value) const = 0;
+
+    virtual cl_int createSubDevices(const cl_device_partition_property *properties,
+                                    cl_uint numDevices,
+                                    InitList &deviceInitList,
+                                    cl_uint *numDevicesRet) = 0;
 };
 
 }  // namespace rx
diff --git a/src/libANGLE/renderer/cl/BUILD.gn b/src/libANGLE/renderer/cl/BUILD.gn
index c938031..9e1aeb5 100644
--- a/src/libANGLE/renderer/cl/BUILD.gn
+++ b/src/libANGLE/renderer/cl/BUILD.gn
@@ -15,6 +15,8 @@
   "CLDeviceCL.h",
   "CLPlatformCL.cpp",
   "CLPlatformCL.h",
+  "cl_util.cpp",
+  "cl_util.h",
 ]
 
 config("angle_cl_backend_config") {
diff --git a/src/libANGLE/renderer/cl/CLDeviceCL.cpp b/src/libANGLE/renderer/cl/CLDeviceCL.cpp
index 6a88e44..88adb39 100644
--- a/src/libANGLE/renderer/cl/CLDeviceCL.cpp
+++ b/src/libANGLE/renderer/cl/CLDeviceCL.cpp
@@ -7,14 +7,21 @@
 
 #include "libANGLE/renderer/cl/CLDeviceCL.h"
 
+#include "libANGLE/renderer/cl/cl_util.h"
+
 #include "libANGLE/Debug.h"
 
 namespace rx
 {
 
-CLDeviceCL::CLDeviceCL(cl_device_id device) : mDevice(device) {}
-
-CLDeviceCL::~CLDeviceCL() = default;
+CLDeviceCL::~CLDeviceCL()
+{
+    if (mVersion >= CL_MAKE_VERSION(1, 2, 0) &&
+        mDevice->getDispatch().clReleaseDevice(mDevice) != CL_SUCCESS)
+    {
+        ERR() << "Error while releasing CL device";
+    }
+}
 
 cl_int CLDeviceCL::getInfoUInt(cl::DeviceInfo name, cl_uint *value) const
 {
@@ -45,9 +52,55 @@
                                                   nullptr);
 }
 
+cl_int CLDeviceCL::createSubDevices(const cl_device_partition_property *properties,
+                                    cl_uint numDevices,
+                                    InitList &deviceInitList,
+                                    cl_uint *numDevicesRet)
+{
+    if (mVersion < CL_MAKE_VERSION(1, 2, 0))
+    {
+        return CL_INVALID_VALUE;
+    }
+    if (numDevices == 0u)
+    {
+        return mDevice->getDispatch().clCreateSubDevices(mDevice, properties, 0u, nullptr,
+                                                         numDevicesRet);
+    }
+    std::vector<cl_device_id> devices(numDevices, nullptr);
+    const cl_int result = mDevice->getDispatch().clCreateSubDevices(mDevice, properties, numDevices,
+                                                                    devices.data(), nullptr);
+    if (result == CL_SUCCESS)
+    {
+        for (cl_device_id device : devices)
+        {
+            CLDeviceImpl::Ptr impl(CLDeviceCL::Create(device));
+            CLDeviceImpl::Info info = CLDeviceCL::GetInfo(device);
+            if (impl && info.isValid())
+            {
+                deviceInitList.emplace_back(std::move(impl), std::move(info));
+            }
+        }
+        if (deviceInitList.size() != devices.size())
+        {
+            return CL_INVALID_VALUE;
+        }
+    }
+    return result;
+}
+
 #define ANGLE_GET_INFO_SIZE(name, size_ret) \
     device->getDispatch().clGetDeviceInfo(device, name, 0u, nullptr, size_ret)
 
+#define ANGLE_GET_INFO_SIZE_RET(name, size_ret)                     \
+    do                                                              \
+    {                                                               \
+        if (ANGLE_GET_INFO_SIZE(name, size_ret) != CL_SUCCESS)      \
+        {                                                           \
+            ERR() << "Failed to query CL device info for " << name; \
+            return info;                                            \
+        }                                                           \
+    } while (0)
+
 #define ANGLE_GET_INFO(name, size, param) \
     device->getDispatch().clGetDeviceInfo(device, name, size, param, nullptr)
 
@@ -61,10 +114,29 @@
         }                                                           \
     } while (0)
 
+CLDeviceCL *CLDeviceCL::Create(cl_device_id device)
+{
+    size_t valueSize = 0u;
+    if (ANGLE_GET_INFO_SIZE(CL_DEVICE_VERSION, &valueSize) == CL_SUCCESS)
+    {
+        std::vector<char> valString(valueSize, '\0');
+        if (ANGLE_GET_INFO(CL_DEVICE_VERSION, valueSize, valString.data()) == CL_SUCCESS)
+        {
+            const cl_version version = ExtractCLVersion(valString.data());
+            if (version != 0u)
+            {
+                return new CLDeviceCL(device, version);
+            }
+        }
+    }
+    return nullptr;
+}
+
 CLDeviceImpl::Info CLDeviceCL::GetInfo(cl_device_id device)
 {
     Info info;
     size_t valueSize = 0u;
+    std::vector<char> valString;
 
     if (ANGLE_GET_INFO_SIZE(CL_DEVICE_ILS_WITH_VERSION, &valueSize) == CL_SUCCESS &&
         (valueSize % sizeof(decltype(info.mILsWithVersion)::value_type)) == 0u)
@@ -103,6 +175,12 @@
         info.mIsSupportedOpenCL_C_Features = true;
     }
 
+    ANGLE_GET_INFO_SIZE_RET(CL_DEVICE_EXTENSIONS, &valueSize);
+    valString.resize(valueSize, '\0');
+    ANGLE_GET_INFO_RET(CL_DEVICE_EXTENSIONS, valueSize, valString.data());
+    info.mExtensions.assign(valString.data());
+    RemoveUnsupportedCLExtensions(info.mExtensions);
+
     if (ANGLE_GET_INFO_SIZE(CL_DEVICE_EXTENSIONS_WITH_VERSION, &valueSize) == CL_SUCCESS &&
         (valueSize % sizeof(decltype(info.mExtensionsWithVersion)::value_type)) == 0u)
     {
@@ -110,6 +188,7 @@
             valueSize / sizeof(decltype(info.mExtensionsWithVersion)::value_type));
         ANGLE_GET_INFO_RET(CL_DEVICE_EXTENSIONS_WITH_VERSION, valueSize,
                            info.mExtensionsWithVersion.data());
+        RemoveUnsupportedCLExtensions(info.mExtensionsWithVersion);
         info.mIsSupportedExtensionsWithVersion = true;
     }
 
@@ -141,4 +220,7 @@
     return info;
 }
 
+CLDeviceCL::CLDeviceCL(cl_device_id device, cl_version version) : mDevice(device), mVersion(version)
+{}
+
 }  // namespace rx
diff --git a/src/libANGLE/renderer/cl/CLDeviceCL.h b/src/libANGLE/renderer/cl/CLDeviceCL.h
index 4e52bfb..91af173 100644
--- a/src/libANGLE/renderer/cl/CLDeviceCL.h
+++ b/src/libANGLE/renderer/cl/CLDeviceCL.h
@@ -16,7 +16,6 @@
 class CLDeviceCL : public CLDeviceImpl
 {
   public:
-    explicit CLDeviceCL(cl_device_id device);
     ~CLDeviceCL() override;
 
     cl_device_id getNative();
@@ -27,10 +26,19 @@
     cl_int getInfoStringLength(cl::DeviceInfo name, size_t *value) const override;
     cl_int getInfoString(cl::DeviceInfo name, size_t size, char *value) const override;
 
+    cl_int createSubDevices(const cl_device_partition_property *properties,
+                            cl_uint numDevices,
+                            InitList &deviceInitList,
+                            cl_uint *numDevicesRet) override;
+
+    static CLDeviceCL *Create(cl_device_id device);
     static Info GetInfo(cl_device_id device);
 
   private:
+    CLDeviceCL(cl_device_id device, cl_version version);
+
     const cl_device_id mDevice;
+    const cl_version mVersion;
 };
 
 inline cl_device_id CLDeviceCL::getNative()
diff --git a/src/libANGLE/renderer/cl/CLPlatformCL.cpp b/src/libANGLE/renderer/cl/CLPlatformCL.cpp
index 52fc999..2f9e68c 100644
--- a/src/libANGLE/renderer/cl/CLPlatformCL.cpp
+++ b/src/libANGLE/renderer/cl/CLPlatformCL.cpp
@@ -8,6 +8,7 @@
 #include "libANGLE/renderer/cl/CLPlatformCL.h"
 
 #include "libANGLE/renderer/cl/CLDeviceCL.h"
+#include "libANGLE/renderer/cl/cl_util.h"
 
 #include "libANGLE/CLPlatform.h"
 #include "libANGLE/Debug.h"
@@ -19,24 +20,9 @@
 #include "icd.h"
 }  // extern "C"
 
-#include <cstdlib>
-#include <unordered_set>
-
 namespace rx
 {
 
-namespace
-{
-using ExtensionSet = std::unordered_set<std::string>;
-
-const ExtensionSet &GetSupportedExtensions()
-{
-    static angle::base::NoDestructor<ExtensionSet> sExtensions(
-        {"cl_khr_extended_versioning", "cl_khr_icd"});
-    return *sExtensions;
-}
-}  // namespace
-
 CLPlatformCL::~CLPlatformCL() = default;
 
 CLDeviceImpl::InitList CLPlatformCL::getDevices()
@@ -56,10 +42,11 @@
         {
             for (cl_device_id device : devices)
             {
+                CLDeviceImpl::Ptr impl(CLDeviceCL::Create(device));
                 CLDeviceImpl::Info info = CLDeviceCL::GetInfo(device);
-                if (info.isValid())
+                if (impl && info.isValid())
                 {
-                    initList.emplace_back(new CLDeviceCL(device), std::move(info));
+                    initList.emplace_back(std::move(impl), std::move(info));
                 }
             }
         }
@@ -152,10 +139,75 @@
     std::vector<char> valString;
 
     // Verify that the platform is valid
-    ASSERT(platform != nullptr);
-    ASSERT(platform->getDispatch().clGetPlatformInfo != nullptr);
-    ASSERT(platform->getDispatch().clGetDeviceIDs != nullptr);
-    ASSERT(platform->getDispatch().clGetDeviceInfo != nullptr);
+    if (platform == nullptr || platform->getDispatch().clGetPlatformIDs == nullptr ||
+        platform->getDispatch().clGetPlatformInfo == nullptr ||
+        platform->getDispatch().clGetDeviceIDs == nullptr ||
+        platform->getDispatch().clGetDeviceInfo == nullptr ||
+        platform->getDispatch().clCreateContext == nullptr ||
+        platform->getDispatch().clCreateContextFromType == nullptr ||
+        platform->getDispatch().clRetainContext == nullptr ||
+        platform->getDispatch().clReleaseContext == nullptr ||
+        platform->getDispatch().clGetContextInfo == nullptr ||
+        platform->getDispatch().clCreateCommandQueue == nullptr ||
+        platform->getDispatch().clRetainCommandQueue == nullptr ||
+        platform->getDispatch().clReleaseCommandQueue == nullptr ||
+        platform->getDispatch().clGetCommandQueueInfo == nullptr ||
+        platform->getDispatch().clSetCommandQueueProperty == nullptr ||
+        platform->getDispatch().clCreateBuffer == nullptr ||
+        platform->getDispatch().clCreateImage2D == nullptr ||
+        platform->getDispatch().clCreateImage3D == nullptr ||
+        platform->getDispatch().clRetainMemObject == nullptr ||
+        platform->getDispatch().clReleaseMemObject == nullptr ||
+        platform->getDispatch().clGetSupportedImageFormats == nullptr ||
+        platform->getDispatch().clGetMemObjectInfo == nullptr ||
+        platform->getDispatch().clGetImageInfo == nullptr ||
+        platform->getDispatch().clCreateSampler == nullptr ||
+        platform->getDispatch().clRetainSampler == nullptr ||
+        platform->getDispatch().clReleaseSampler == nullptr ||
+        platform->getDispatch().clGetSamplerInfo == nullptr ||
+        platform->getDispatch().clCreateProgramWithSource == nullptr ||
+        platform->getDispatch().clCreateProgramWithBinary == nullptr ||
+        platform->getDispatch().clRetainProgram == nullptr ||
+        platform->getDispatch().clReleaseProgram == nullptr ||
+        platform->getDispatch().clBuildProgram == nullptr ||
+        platform->getDispatch().clUnloadCompiler == nullptr ||
+        platform->getDispatch().clGetProgramInfo == nullptr ||
+        platform->getDispatch().clGetProgramBuildInfo == nullptr ||
+        platform->getDispatch().clCreateKernel == nullptr ||
+        platform->getDispatch().clCreateKernelsInProgram == nullptr ||
+        platform->getDispatch().clRetainKernel == nullptr ||
+        platform->getDispatch().clReleaseKernel == nullptr ||
+        platform->getDispatch().clSetKernelArg == nullptr ||
+        platform->getDispatch().clGetKernelInfo == nullptr ||
+        platform->getDispatch().clGetKernelWorkGroupInfo == nullptr ||
+        platform->getDispatch().clWaitForEvents == nullptr ||
+        platform->getDispatch().clGetEventInfo == nullptr ||
+        platform->getDispatch().clRetainEvent == nullptr ||
+        platform->getDispatch().clReleaseEvent == nullptr ||
+        platform->getDispatch().clGetEventProfilingInfo == nullptr ||
+        platform->getDispatch().clFlush == nullptr || platform->getDispatch().clFinish == nullptr ||
+        platform->getDispatch().clEnqueueReadBuffer == nullptr ||
+        platform->getDispatch().clEnqueueWriteBuffer == nullptr ||
+        platform->getDispatch().clEnqueueCopyBuffer == nullptr ||
+        platform->getDispatch().clEnqueueReadImage == nullptr ||
+        platform->getDispatch().clEnqueueWriteImage == nullptr ||
+        platform->getDispatch().clEnqueueCopyImage == nullptr ||
+        platform->getDispatch().clEnqueueCopyImageToBuffer == nullptr ||
+        platform->getDispatch().clEnqueueCopyBufferToImage == nullptr ||
+        platform->getDispatch().clEnqueueMapBuffer == nullptr ||
+        platform->getDispatch().clEnqueueMapImage == nullptr ||
+        platform->getDispatch().clEnqueueUnmapMemObject == nullptr ||
+        platform->getDispatch().clEnqueueNDRangeKernel == nullptr ||
+        platform->getDispatch().clEnqueueTask == nullptr ||
+        platform->getDispatch().clEnqueueNativeKernel == nullptr ||
+        platform->getDispatch().clEnqueueMarker == nullptr ||
+        platform->getDispatch().clEnqueueWaitForEvents == nullptr ||
+        platform->getDispatch().clEnqueueBarrier == nullptr ||
+        platform->getDispatch().clGetExtensionFunctionAddress == nullptr)
+    {
+        ERR() << "Missing entry points for OpenCL 1.0";
+        return info;
+    }
 
     // Skip ANGLE CL implementation to prevent passthrough loop
     ANGLE_GET_INFO_SIZE_RET(CL_PLATFORM_VENDOR, &valueSize);
@@ -172,44 +224,13 @@
     valString.resize(valueSize, '\0');
     ANGLE_GET_INFO_RET(CL_PLATFORM_EXTENSIONS, valueSize, valString.data());
     info.mExtensions.assign(valString.data());
+    RemoveUnsupportedCLExtensions(info.mExtensions);
     if (info.mExtensions.find("cl_khr_icd") == std::string::npos)
     {
         WARN() << "CL platform is not ICD compatible";
         return info;
     }
 
-    // Filter out extensions which are not (yet) supported to be passed through
-    if (!info.mExtensions.empty())
-    {
-        const ExtensionSet &supported   = GetSupportedExtensions();
-        std::string::size_type extStart = 0u;
-        do
-        {
-            const std::string::size_type spacePos = info.mExtensions.find(' ', extStart);
-            const bool foundSpace                 = spacePos != std::string::npos;
-            const std::string::size_type length =
-                (foundSpace ? spacePos : info.mExtensions.length()) - extStart;
-            if (supported.find(info.mExtensions.substr(extStart, length)) != supported.cend())
-            {
-                extStart = foundSpace && spacePos + 1u < info.mExtensions.length()
-                               ? spacePos + 1u
-                               : std::string::npos;
-            }
-            else
-            {
-                info.mExtensions.erase(extStart, length + (foundSpace ? 1u : 0u));
-                if (extStart >= info.mExtensions.length())
-                {
-                    extStart = std::string::npos;
-                }
-            }
-        } while (extStart != std::string::npos);
-        while (!info.mExtensions.empty() && info.mExtensions.back() == ' ')
-        {
-            info.mExtensions.pop_back();
-        }
-    }
-
     // Fetch common platform info
     ANGLE_GET_INFO_SIZE_RET(CL_PLATFORM_VERSION, &valueSize);
     valString.resize(valueSize, '\0');
@@ -217,29 +238,19 @@
     info.mVersionStr.assign(valString.data());
     info.mVersionStr += " (ANGLE " ANGLE_VERSION_STRING ")";
 
-    const std::string::size_type spacePos = info.mVersionStr.find(' ');
-    const std::string::size_type dotPos   = info.mVersionStr.find('.');
-    if (spacePos == std::string::npos || dotPos == std::string::npos)
+    const cl_version version = ExtractCLVersion(info.mVersionStr);
+    if (version == 0u)
     {
-        ERR() << "Failed to extract version from OpenCL version string: " << info.mVersionStr;
-        return info;
-    }
-    const cl_uint major =
-        static_cast<cl_uint>(std::strtol(&info.mVersionStr[spacePos + 1u], nullptr, 10));
-    const cl_uint minor =
-        static_cast<cl_uint>(std::strtol(&info.mVersionStr[dotPos + 1u], nullptr, 10));
-    if (major == 0)
-    {
-        ERR() << "Failed to extract version from OpenCL version string: " << info.mVersionStr;
         return info;
     }
 
     if (ANGLE_GET_INFO(CL_PLATFORM_NUMERIC_VERSION, sizeof(info.mVersion), &info.mVersion) !=
         CL_SUCCESS)
     {
-        info.mVersion = CL_MAKE_VERSION(major, minor, 0);
+        info.mVersion = version;
     }
-    else if (CL_VERSION_MAJOR(info.mVersion) != major || CL_VERSION_MINOR(info.mVersion) != minor)
+    else if (CL_VERSION_MAJOR(info.mVersion) != CL_VERSION_MAJOR(version) ||
+             CL_VERSION_MINOR(info.mVersion) != CL_VERSION_MINOR(version))
     {
         WARN() << "CL_PLATFORM_NUMERIC_VERSION = " << CL_VERSION_MAJOR(info.mVersion) << '.'
                << CL_VERSION_MINOR(info.mVersion)
@@ -259,26 +270,96 @@
             valueSize / sizeof(decltype(info.mExtensionsWithVersion)::value_type));
         ANGLE_GET_INFO_RET(CL_PLATFORM_EXTENSIONS_WITH_VERSION, valueSize,
                            info.mExtensionsWithVersion.data());
-
-        // Filter out extensions which are not (yet) supported to be passed through
-        const ExtensionSet &supported = GetSupportedExtensions();
-        auto extIt                    = info.mExtensionsWithVersion.cbegin();
-        while (extIt != info.mExtensionsWithVersion.cend())
-        {
-            if (supported.find(extIt->name) != supported.cend())
-            {
-                ++extIt;
-            }
-            else
-            {
-                extIt = info.mExtensionsWithVersion.erase(extIt);
-            }
-        }
+        RemoveUnsupportedCLExtensions(info.mExtensionsWithVersion);
     }
 
     ANGLE_GET_INFO(CL_PLATFORM_HOST_TIMER_RESOLUTION, sizeof(info.mHostTimerRes),
                    &info.mHostTimerRes);
 
+    if (info.mVersion >= CL_MAKE_VERSION(1, 1, 0) &&
+        (platform->getDispatch().clSetEventCallback == nullptr ||
+         platform->getDispatch().clCreateSubBuffer == nullptr ||
+         platform->getDispatch().clSetMemObjectDestructorCallback == nullptr ||
+         platform->getDispatch().clCreateUserEvent == nullptr ||
+         platform->getDispatch().clSetUserEventStatus == nullptr ||
+         platform->getDispatch().clEnqueueReadBufferRect == nullptr ||
+         platform->getDispatch().clEnqueueWriteBufferRect == nullptr ||
+         platform->getDispatch().clEnqueueCopyBufferRect == nullptr))
+    {
+        ERR() << "Missing entry points for OpenCL 1.1";
+        return info;
+    }
+
+    if (info.mVersion >= CL_MAKE_VERSION(1, 2, 0) &&
+        (platform->getDispatch().clCreateSubDevices == nullptr ||
+         platform->getDispatch().clRetainDevice == nullptr ||
+         platform->getDispatch().clReleaseDevice == nullptr ||
+         platform->getDispatch().clCreateImage == nullptr ||
+         platform->getDispatch().clCreateProgramWithBuiltInKernels == nullptr ||
+         platform->getDispatch().clCompileProgram == nullptr ||
+         platform->getDispatch().clLinkProgram == nullptr ||
+         platform->getDispatch().clUnloadPlatformCompiler == nullptr ||
+         platform->getDispatch().clGetKernelArgInfo == nullptr ||
+         platform->getDispatch().clEnqueueFillBuffer == nullptr ||
+         platform->getDispatch().clEnqueueFillImage == nullptr ||
+         platform->getDispatch().clEnqueueMigrateMemObjects == nullptr ||
+         platform->getDispatch().clEnqueueMarkerWithWaitList == nullptr ||
+         platform->getDispatch().clEnqueueBarrierWithWaitList == nullptr ||
+         platform->getDispatch().clGetExtensionFunctionAddressForPlatform == nullptr))
+    {
+        ERR() << "Missing entry points for OpenCL 1.2";
+        return info;
+    }
+
+    if (info.mVersion >= CL_MAKE_VERSION(2, 0, 0) &&
+        (platform->getDispatch().clCreateCommandQueueWithProperties == nullptr ||
+         platform->getDispatch().clCreatePipe == nullptr ||
+         platform->getDispatch().clGetPipeInfo == nullptr ||
+         platform->getDispatch().clSVMAlloc == nullptr ||
+         platform->getDispatch().clSVMFree == nullptr ||
+         platform->getDispatch().clEnqueueSVMFree == nullptr ||
+         platform->getDispatch().clEnqueueSVMMemcpy == nullptr ||
+         platform->getDispatch().clEnqueueSVMMemFill == nullptr ||
+         platform->getDispatch().clEnqueueSVMMap == nullptr ||
+         platform->getDispatch().clEnqueueSVMUnmap == nullptr ||
+         platform->getDispatch().clCreateSamplerWithProperties == nullptr ||
+         platform->getDispatch().clSetKernelArgSVMPointer == nullptr ||
+         platform->getDispatch().clSetKernelExecInfo == nullptr))
+    {
+        ERR() << "Missing entry points for OpenCL 2.0";
+        return info;
+    }
+
+    if (info.mVersion >= CL_MAKE_VERSION(2, 1, 0) &&
+        (platform->getDispatch().clCloneKernel == nullptr ||
+         platform->getDispatch().clCreateProgramWithIL == nullptr ||
+         platform->getDispatch().clEnqueueSVMMigrateMem == nullptr ||
+         platform->getDispatch().clGetDeviceAndHostTimer == nullptr ||
+         platform->getDispatch().clGetHostTimer == nullptr ||
+         platform->getDispatch().clGetKernelSubGroupInfo == nullptr ||
+         platform->getDispatch().clSetDefaultDeviceCommandQueue == nullptr))
+    {
+        ERR() << "Missing entry points for OpenCL 2.1";
+        return info;
+    }
+
+    if (info.mVersion >= CL_MAKE_VERSION(2, 2, 0) &&
+        (platform->getDispatch().clSetProgramReleaseCallback == nullptr ||
+         platform->getDispatch().clSetProgramSpecializationConstant == nullptr))
+    {
+        ERR() << "Missing entry points for OpenCL 2.2";
+        return info;
+    }
+
+    if (info.mVersion >= CL_MAKE_VERSION(3, 0, 0) &&
+        (platform->getDispatch().clCreateBufferWithProperties == nullptr ||
+         platform->getDispatch().clCreateImageWithProperties == nullptr ||
+         platform->getDispatch().clSetContextDestructorCallback == nullptr))
+    {
+        ERR() << "Missing entry points for OpenCL 3.0";
+        return info;
+    }
+
     // Get this last, so the info is invalid if anything before fails
     ANGLE_GET_INFO_SIZE_RET(CL_PLATFORM_PROFILE, &valueSize);
     valString.resize(valueSize, '\0');
diff --git a/src/libANGLE/renderer/cl/cl_util.cpp b/src/libANGLE/renderer/cl/cl_util.cpp
new file mode 100644
index 0000000..168d16d
--- /dev/null
+++ b/src/libANGLE/renderer/cl/cl_util.cpp
@@ -0,0 +1,95 @@
+//
+// Copyright 2021 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// cl_utils.cpp: Helper functions for the CL back end
+
+#include "libANGLE/renderer/cl/cl_util.h"
+
+#include "libANGLE/Debug.h"
+
+#include <cstdlib>
+
+namespace rx
+{
+
+cl_version ExtractCLVersion(const std::string &version)
+{
+    const std::string::size_type spacePos = version.find(' ');
+    const std::string::size_type dotPos   = version.find('.');
+    if (spacePos == std::string::npos || dotPos == std::string::npos)
+    {
+        ERR() << "Failed to extract version from OpenCL version string: " << version;
+        return 0u;
+    }
+
+    const long major = std::strtol(&version[spacePos + 1u], nullptr, 10);
+    const long minor = std::strtol(&version[dotPos + 1u], nullptr, 10);
+    if (major < 1 || major > 9 || minor < 0 || minor > 9)
+    {
+        ERR() << "Failed to extract version from OpenCL version string: " << version;
+        return 0u;
+    }
+    return CL_MAKE_VERSION(static_cast<cl_uint>(major), static_cast<cl_uint>(minor), 0);
+}
+
+void RemoveUnsupportedCLExtensions(std::string &extensions)
+{
+    if (extensions.empty())
+    {
+        return;
+    }
+    using SizeT    = std::string::size_type;
+    SizeT extStart = 0u;
+    SizeT spacePos = extensions.find(' ');
+
+    // Remove all unsupported extensions which are terminated by a space
+    while (spacePos != std::string::npos)
+    {
+        const SizeT length = spacePos - extStart;
+        if (IsCLExtensionSupported(extensions.substr(extStart, length)))
+        {
+            extStart = spacePos + 1u;
+        }
+        else
+        {
+            extensions.erase(extStart, length + 1u);
+        }
+        spacePos = extensions.find(' ', extStart);
+    }
+
+    // Remove last extension in string, if exists and unsupported
+    if (extStart < extensions.length())
+    {
+        const SizeT length = extensions.length() - extStart;
+        if (!IsCLExtensionSupported(extensions.substr(extStart, length)))
+        {
+            extensions.erase(extStart, length);
+        }
+    }
+
+    // Remove trailing spaces
+    while (!extensions.empty() && extensions.back() == ' ')
+    {
+        extensions.pop_back();
+    }
+}
+
+void RemoveUnsupportedCLExtensions(NameVersionVector &extensions)
+{
+    auto extIt = extensions.cbegin();
+    while (extIt != extensions.cend())
+    {
+        if (IsCLExtensionSupported(extIt->name))
+        {
+            ++extIt;
+        }
+        else
+        {
+            extIt = extensions.erase(extIt);
+        }
+    }
+}
+
+}  // namespace rx
diff --git a/src/libANGLE/renderer/cl/cl_util.h b/src/libANGLE/renderer/cl/cl_util.h
new file mode 100644
index 0000000..f1c6522
--- /dev/null
+++ b/src/libANGLE/renderer/cl/cl_util.h
@@ -0,0 +1,49 @@
+//
+// Copyright 2021 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// cl_util.h: Helper functions for the CL back end
+
+#ifndef LIBANGLE_RENDERER_CL_CL_UTIL_H_
+#define LIBANGLE_RENDERER_CL_CL_UTIL_H_
+
+#include "libANGLE/renderer/CLtypes.h"
+
+#include "anglebase/no_destructor.h"
+
+#include <string>
+#include <unordered_set>
+
+#define ANGLE_SUPPORTED_OPENCL_EXTENSIONS "cl_khr_extended_versioning", "cl_khr_icd"
+
+namespace rx
+{
+
+// Extract numeric version from OpenCL version string
+cl_version ExtractCLVersion(const std::string &version);
+
+using CLExtensionSet = std::unordered_set<std::string>;
+
+// Get a set of OpenCL extensions which are supported to be passed through
+inline const CLExtensionSet &GetSupportedCLExtensions()
+{
+    static angle::base::NoDestructor<CLExtensionSet> sExtensions(
+        {ANGLE_SUPPORTED_OPENCL_EXTENSIONS});
+    return *sExtensions;
+}
+
+// Check if a specific OpenCL extensions is supported to be passed through
+inline bool IsCLExtensionSupported(const std::string &extension)
+{
+    const CLExtensionSet &supported = GetSupportedCLExtensions();
+    return supported.find(extension) != supported.cend();
+}
+
+// Filter out extensions which are not (yet) supported to be passed through
+void RemoveUnsupportedCLExtensions(std::string &extensions);
+void RemoveUnsupportedCLExtensions(NameVersionVector &extensions);
+
+}  // namespace rx
+
+#endif  // LIBANGLE_RENDERER_CL_CL_UTIL_H_
diff --git a/src/libANGLE/renderer/vulkan/CLDeviceVk.cpp b/src/libANGLE/renderer/vulkan/CLDeviceVk.cpp
index 2b121fa..027d629 100644
--- a/src/libANGLE/renderer/vulkan/CLDeviceVk.cpp
+++ b/src/libANGLE/renderer/vulkan/CLDeviceVk.cpp
@@ -39,6 +39,14 @@
     return CL_INVALID_VALUE;
 }
 
+cl_int CLDeviceVk::createSubDevices(const cl_device_partition_property *properties,
+                                    cl_uint numDevices,
+                                    InitList &deviceInitList,
+                                    cl_uint *numDevicesRet)
+{
+    return CL_INVALID_VALUE;
+}
+
 CLDeviceImpl::Info CLDeviceVk::GetInfo()
 {
     CLDeviceImpl::Info info;
diff --git a/src/libANGLE/renderer/vulkan/CLDeviceVk.h b/src/libANGLE/renderer/vulkan/CLDeviceVk.h
index ddb3eb7..dce61be 100644
--- a/src/libANGLE/renderer/vulkan/CLDeviceVk.h
+++ b/src/libANGLE/renderer/vulkan/CLDeviceVk.h
@@ -25,6 +25,11 @@
     cl_int getInfoStringLength(cl::DeviceInfo name, size_t *value) const override;
     cl_int getInfoString(cl::DeviceInfo name, size_t size, char *value) const override;
 
+    cl_int createSubDevices(const cl_device_partition_property *properties,
+                            cl_uint numDevices,
+                            InitList &deviceInitList,
+                            cl_uint *numDevicesRet) override;
+
     static Info GetInfo();
 };
 
diff --git a/src/libANGLE/validationCL.cpp b/src/libANGLE/validationCL.cpp
index fee729f..261afec 100644
--- a/src/libANGLE/validationCL.cpp
+++ b/src/libANGLE/validationCL.cpp
@@ -715,23 +715,33 @@
 }
 
 // CL 1.2
-cl_int ValidateCreateSubDevices(const Device *in_devicePacked,
+cl_int ValidateCreateSubDevices(const Device *in_device,
                                 const cl_device_partition_property *properties,
                                 cl_uint num_devices,
-                                Device *const *out_devicesPacked,
+                                Device *const *out_devices,
                                 const cl_uint *num_devices_ret)
 {
+    if (!Device::IsValid(in_device))
+    {
+        return CL_INVALID_DEVICE;
+    }
+    if (properties == nullptr || (*properties != CL_DEVICE_PARTITION_EQUALLY &&
+                                  *properties != CL_DEVICE_PARTITION_BY_COUNTS &&
+                                  *properties != CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN))
+    {
+        return CL_INVALID_VALUE;
+    }
     return CL_SUCCESS;
 }
 
-cl_int ValidateRetainDevice(const Device *devicePacked)
+cl_int ValidateRetainDevice(const Device *device)
 {
-    return CL_SUCCESS;
+    return Device::IsValid(device) ? CL_SUCCESS : CL_INVALID_DEVICE;
 }
 
-cl_int ValidateReleaseDevice(const Device *devicePacked)
+cl_int ValidateReleaseDevice(const Device *device)
 {
-    return CL_SUCCESS;
+    return Device::IsValid(device) ? CL_SUCCESS : CL_INVALID_DEVICE;
 }
 
 bool ValidateCreateImage(const Context *contextPacked,
diff --git a/src/libGLESv2/cl_stubs.cpp b/src/libGLESv2/cl_stubs.cpp
index 0cb46e4..f348060 100644
--- a/src/libGLESv2/cl_stubs.cpp
+++ b/src/libGLESv2/cl_stubs.cpp
@@ -132,20 +132,19 @@
                         Device **out_devices,
                         cl_uint *num_devices_ret)
 {
-    WARN_NOT_SUPPORTED(CreateSubDevices);
-    return 0;
+    return in_device->createSubDevices(properties, num_devices, out_devices, num_devices_ret);
 }
 
 cl_int RetainDevice(Device *device)
 {
-    WARN_NOT_SUPPORTED(RetainDevice);
-    return 0;
+    device->retain();
+    return CL_SUCCESS;
 }
 
 cl_int ReleaseDevice(Device *device)
 {
-    WARN_NOT_SUPPORTED(ReleaseDevice);
-    return 0;
+    device->release();
+    return CL_SUCCESS;
 }
 
 cl_int SetDefaultDeviceCommandQueue(Context *context, Device *device, CommandQueue *command_queue)
diff --git a/src/libGLESv2/entry_points_cl_autogen.cpp b/src/libGLESv2/entry_points_cl_autogen.cpp
index e2d6358..5089a34 100644
--- a/src/libGLESv2/entry_points_cl_autogen.cpp
+++ b/src/libGLESv2/entry_points_cl_autogen.cpp
@@ -68,10 +68,10 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(GetDeviceIDs,
              "platform = 0x%016" PRIxPTR
-             ", device_type = %lu, num_entries = %u, devices = 0x%016" PRIxPTR
+             ", device_type = %llu, num_entries = %u, devices = 0x%016" PRIxPTR
              ", num_devices = 0x%016" PRIxPTR "",
-             (uintptr_t)platform, device_type, num_entries, (uintptr_t)devices,
-             (uintptr_t)num_devices);
+             (uintptr_t)platform, static_cast<unsigned long long>(device_type), num_entries,
+             (uintptr_t)devices, (uintptr_t)num_devices);
 
     Platform *platformPacked = PackParam<Platform *>(platform);
     Device **devicesPacked   = PackParam<Device **>(devices);
@@ -145,10 +145,10 @@
 {
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(CreateContextFromType,
-             "properties = 0x%016" PRIxPTR ", device_type = %lu, pfn_notify = 0x%016" PRIxPTR
+             "properties = 0x%016" PRIxPTR ", device_type = %llu, pfn_notify = 0x%016" PRIxPTR
              ", user_data = 0x%016" PRIxPTR ", errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)properties, device_type, (uintptr_t)pfn_notify, (uintptr_t)user_data,
-             (uintptr_t)errcode_ret);
+             (uintptr_t)properties, static_cast<unsigned long long>(device_type),
+             (uintptr_t)pfn_notify, (uintptr_t)user_data, (uintptr_t)errcode_ret);
 
     ANGLE_CL_VALIDATE_POINTER(CreateContextFromType, properties, device_type, pfn_notify, user_data,
                               errcode_ret);
@@ -260,9 +260,10 @@
 {
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(CreateBuffer,
-             "context = 0x%016" PRIxPTR ", flags = %lu, size = %zu, host_ptr = 0x%016" PRIxPTR
+             "context = 0x%016" PRIxPTR ", flags = %llu, size = %zu, host_ptr = 0x%016" PRIxPTR
              ", errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)context, flags, size, (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
+             (uintptr_t)context, static_cast<unsigned long long>(flags), size, (uintptr_t)host_ptr,
+             (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
 
@@ -305,10 +306,10 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(GetSupportedImageFormats,
              "context = 0x%016" PRIxPTR
-             ", flags = %lu, image_type = %u, num_entries = %u, image_formats = 0x%016" PRIxPTR
+             ", flags = %llu, image_type = %u, num_entries = %u, image_formats = 0x%016" PRIxPTR
              ", num_image_formats = 0x%016" PRIxPTR "",
-             (uintptr_t)context, flags, image_type, num_entries, (uintptr_t)image_formats,
-             (uintptr_t)num_image_formats);
+             (uintptr_t)context, static_cast<unsigned long long>(flags), image_type, num_entries,
+             (uintptr_t)image_formats, (uintptr_t)num_image_formats);
 
     Context *contextPacked         = PackParam<Context *>(context);
     MemObjectType image_typePacked = PackParam<MemObjectType>(image_type);
@@ -1096,12 +1097,12 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(EnqueueMapBuffer,
              "command_queue = 0x%016" PRIxPTR ", buffer = 0x%016" PRIxPTR
-             ", blocking_map = %u, map_flags = %lu, offset = %zu, size = %zu, "
+             ", blocking_map = %u, map_flags = %llu, offset = %zu, size = %zu, "
              "num_events_in_wait_list = %u, event_wait_list = 0x%016" PRIxPTR
              ", event = 0x%016" PRIxPTR ", errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)command_queue, (uintptr_t)buffer, blocking_map, map_flags, offset, size,
-             num_events_in_wait_list, (uintptr_t)event_wait_list, (uintptr_t)event,
-             (uintptr_t)errcode_ret);
+             (uintptr_t)command_queue, (uintptr_t)buffer, blocking_map,
+             static_cast<unsigned long long>(map_flags), offset, size, num_events_in_wait_list,
+             (uintptr_t)event_wait_list, (uintptr_t)event, (uintptr_t)errcode_ret);
 
     CommandQueue *command_queuePacked   = PackParam<CommandQueue *>(command_queue);
     Memory *bufferPacked                = PackParam<Memory *>(buffer);
@@ -1133,15 +1134,15 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(EnqueueMapImage,
              "command_queue = 0x%016" PRIxPTR ", image = 0x%016" PRIxPTR
-             ", blocking_map = %u, map_flags = %lu, origin = 0x%016" PRIxPTR
+             ", blocking_map = %u, map_flags = %llu, origin = 0x%016" PRIxPTR
              ", region = 0x%016" PRIxPTR ", image_row_pitch = 0x%016" PRIxPTR
              ", image_slice_pitch = 0x%016" PRIxPTR
              ", num_events_in_wait_list = %u, event_wait_list = 0x%016" PRIxPTR
              ", event = 0x%016" PRIxPTR ", errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)command_queue, (uintptr_t)image, blocking_map, map_flags, (uintptr_t)origin,
-             (uintptr_t)region, (uintptr_t)image_row_pitch, (uintptr_t)image_slice_pitch,
-             num_events_in_wait_list, (uintptr_t)event_wait_list, (uintptr_t)event,
-             (uintptr_t)errcode_ret);
+             (uintptr_t)command_queue, (uintptr_t)image, blocking_map,
+             static_cast<unsigned long long>(map_flags), (uintptr_t)origin, (uintptr_t)region,
+             (uintptr_t)image_row_pitch, (uintptr_t)image_slice_pitch, num_events_in_wait_list,
+             (uintptr_t)event_wait_list, (uintptr_t)event, (uintptr_t)errcode_ret);
 
     CommandQueue *command_queuePacked   = PackParam<CommandQueue *>(command_queue);
     Memory *imagePacked                 = PackParam<Memory *>(image);
@@ -1266,8 +1267,9 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(SetCommandQueueProperty,
              "command_queue = 0x%016" PRIxPTR
-             ", properties = %lu, enable = %u, old_properties = 0x%016" PRIxPTR "",
-             (uintptr_t)command_queue, properties, enable, (uintptr_t)old_properties);
+             ", properties = %llu, enable = %u, old_properties = 0x%016" PRIxPTR "",
+             (uintptr_t)command_queue, static_cast<unsigned long long>(properties), enable,
+             (uintptr_t)old_properties);
 
     CommandQueue *command_queuePacked = PackParam<CommandQueue *>(command_queue);
 
@@ -1289,11 +1291,11 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(
         CreateImage2D,
-        "context = 0x%016" PRIxPTR ", flags = %lu, image_format = 0x%016" PRIxPTR
+        "context = 0x%016" PRIxPTR ", flags = %llu, image_format = 0x%016" PRIxPTR
         ", image_width = %zu, image_height = %zu, image_row_pitch = %zu, host_ptr = 0x%016" PRIxPTR
         ", errcode_ret = 0x%016" PRIxPTR "",
-        (uintptr_t)context, flags, (uintptr_t)image_format, image_width, image_height,
-        image_row_pitch, (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
+        (uintptr_t)context, static_cast<unsigned long long>(flags), (uintptr_t)image_format,
+        image_width, image_height, image_row_pitch, (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
 
@@ -1316,13 +1318,14 @@
                                    cl_int *errcode_ret)
 {
     ANGLE_SCOPED_GLOBAL_LOCK();
-    CL_EVENT(
-        CreateImage3D,
-        "context = 0x%016" PRIxPTR ", flags = %lu, image_format = 0x%016" PRIxPTR
-        ", image_width = %zu, image_height = %zu, image_depth = %zu, image_row_pitch = %zu, "
-        "image_slice_pitch = %zu, host_ptr = 0x%016" PRIxPTR ", errcode_ret = 0x%016" PRIxPTR "",
-        (uintptr_t)context, flags, (uintptr_t)image_format, image_width, image_height, image_depth,
-        image_row_pitch, image_slice_pitch, (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
+    CL_EVENT(CreateImage3D,
+             "context = 0x%016" PRIxPTR ", flags = %llu, image_format = 0x%016" PRIxPTR
+             ", image_width = %zu, image_height = %zu, image_depth = %zu, image_row_pitch = %zu, "
+             "image_slice_pitch = %zu, host_ptr = 0x%016" PRIxPTR ", errcode_ret = 0x%016" PRIxPTR
+             "",
+             (uintptr_t)context, static_cast<unsigned long long>(flags), (uintptr_t)image_format,
+             image_width, image_height, image_depth, image_row_pitch, image_slice_pitch,
+             (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
 
@@ -1406,8 +1409,9 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(CreateCommandQueue,
              "context = 0x%016" PRIxPTR ", device = 0x%016" PRIxPTR
-             ", properties = %lu, errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)context, (uintptr_t)device, properties, (uintptr_t)errcode_ret);
+             ", properties = %llu, errcode_ret = 0x%016" PRIxPTR "",
+             (uintptr_t)context, (uintptr_t)device, static_cast<unsigned long long>(properties),
+             (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
     Device *devicePacked   = PackParam<Device *>(device);
@@ -1479,10 +1483,10 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(CreateSubBuffer,
              "buffer = 0x%016" PRIxPTR
-             ", flags = %lu, buffer_create_type = %u, buffer_create_info = 0x%016" PRIxPTR
+             ", flags = %llu, buffer_create_type = %u, buffer_create_info = 0x%016" PRIxPTR
              ", errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)buffer, flags, buffer_create_type, (uintptr_t)buffer_create_info,
-             (uintptr_t)errcode_ret);
+             (uintptr_t)buffer, static_cast<unsigned long long>(flags), buffer_create_type,
+             (uintptr_t)buffer_create_info, (uintptr_t)errcode_ret);
 
     Memory *bufferPacked = PackParam<Memory *>(buffer);
 
@@ -1751,11 +1755,11 @@
 {
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(CreateImage,
-             "context = 0x%016" PRIxPTR ", flags = %lu, image_format = 0x%016" PRIxPTR
+             "context = 0x%016" PRIxPTR ", flags = %llu, image_format = 0x%016" PRIxPTR
              ", image_desc = 0x%016" PRIxPTR ", host_ptr = 0x%016" PRIxPTR
              ", errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)context, flags, (uintptr_t)image_format, (uintptr_t)image_desc,
-             (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
+             (uintptr_t)context, static_cast<unsigned long long>(flags), (uintptr_t)image_format,
+             (uintptr_t)image_desc, (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
 
@@ -1967,10 +1971,11 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(EnqueueMigrateMemObjects,
              "command_queue = 0x%016" PRIxPTR ", num_mem_objects = %u, mem_objects = 0x%016" PRIxPTR
-             ", flags = %lu, num_events_in_wait_list = %u, event_wait_list = 0x%016" PRIxPTR
+             ", flags = %llu, num_events_in_wait_list = %u, event_wait_list = 0x%016" PRIxPTR
              ", event = 0x%016" PRIxPTR "",
-             (uintptr_t)command_queue, num_mem_objects, (uintptr_t)mem_objects, flags,
-             num_events_in_wait_list, (uintptr_t)event_wait_list, (uintptr_t)event);
+             (uintptr_t)command_queue, num_mem_objects, (uintptr_t)mem_objects,
+             static_cast<unsigned long long>(flags), num_events_in_wait_list,
+             (uintptr_t)event_wait_list, (uintptr_t)event);
 
     CommandQueue *command_queuePacked   = PackParam<CommandQueue *>(command_queue);
     Memory *const *mem_objectsPacked    = PackParam<Memory *const *>(mem_objects);
@@ -2081,10 +2086,10 @@
     CL_EVENT(
         CreatePipe,
         "context = 0x%016" PRIxPTR
-        ", flags = %lu, pipe_packet_size = %u, pipe_max_packets = %u, properties = 0x%016" PRIxPTR
+        ", flags = %llu, pipe_packet_size = %u, pipe_max_packets = %u, properties = 0x%016" PRIxPTR
         ", errcode_ret = 0x%016" PRIxPTR "",
-        (uintptr_t)context, flags, pipe_packet_size, pipe_max_packets, (uintptr_t)properties,
-        (uintptr_t)errcode_ret);
+        (uintptr_t)context, static_cast<unsigned long long>(flags), pipe_packet_size,
+        pipe_max_packets, (uintptr_t)properties, (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
 
@@ -2125,8 +2130,8 @@
                              cl_uint alignment)
 {
     ANGLE_SCOPED_GLOBAL_LOCK();
-    CL_EVENT(SVMAlloc, "context = 0x%016" PRIxPTR ", flags = %lu, size = %zu, alignment = %u",
-             (uintptr_t)context, flags, size, alignment);
+    CL_EVENT(SVMAlloc, "context = 0x%016" PRIxPTR ", flags = %llu, size = %zu, alignment = %u",
+             (uintptr_t)context, static_cast<unsigned long long>(flags), size, alignment);
 
     Context *contextPacked = PackParam<Context *>(context);
 
@@ -2308,11 +2313,12 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(EnqueueSVMMap,
              "command_queue = 0x%016" PRIxPTR
-             ", blocking_map = %u, flags = %lu, svm_ptr = 0x%016" PRIxPTR
+             ", blocking_map = %u, flags = %llu, svm_ptr = 0x%016" PRIxPTR
              ", size = %zu, num_events_in_wait_list = %u, event_wait_list = 0x%016" PRIxPTR
              ", event = 0x%016" PRIxPTR "",
-             (uintptr_t)command_queue, blocking_map, flags, (uintptr_t)svm_ptr, size,
-             num_events_in_wait_list, (uintptr_t)event_wait_list, (uintptr_t)event);
+             (uintptr_t)command_queue, blocking_map, static_cast<unsigned long long>(flags),
+             (uintptr_t)svm_ptr, size, num_events_in_wait_list, (uintptr_t)event_wait_list,
+             (uintptr_t)event);
 
     CommandQueue *command_queuePacked   = PackParam<CommandQueue *>(command_queue);
     Event *const *event_wait_listPacked = PackParam<Event *const *>(event_wait_list);
@@ -2476,10 +2482,11 @@
     CL_EVENT(EnqueueSVMMigrateMem,
              "command_queue = 0x%016" PRIxPTR
              ", num_svm_pointers = %u, svm_pointers = 0x%016" PRIxPTR ", sizes = 0x%016" PRIxPTR
-             ", flags = %lu, num_events_in_wait_list = %u, event_wait_list = 0x%016" PRIxPTR
+             ", flags = %llu, num_events_in_wait_list = %u, event_wait_list = 0x%016" PRIxPTR
              ", event = 0x%016" PRIxPTR "",
              (uintptr_t)command_queue, num_svm_pointers, (uintptr_t)svm_pointers, (uintptr_t)sizes,
-             flags, num_events_in_wait_list, (uintptr_t)event_wait_list, (uintptr_t)event);
+             static_cast<unsigned long long>(flags), num_events_in_wait_list,
+             (uintptr_t)event_wait_list, (uintptr_t)event);
 
     CommandQueue *command_queuePacked   = PackParam<CommandQueue *>(command_queue);
     Event *const *event_wait_listPacked = PackParam<Event *const *>(event_wait_list);
@@ -2560,10 +2567,10 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(CreateBufferWithProperties,
              "context = 0x%016" PRIxPTR ", properties = 0x%016" PRIxPTR
-             ", flags = %lu, size = %zu, host_ptr = 0x%016" PRIxPTR ", errcode_ret = 0x%016" PRIxPTR
-             "",
-             (uintptr_t)context, (uintptr_t)properties, flags, size, (uintptr_t)host_ptr,
-             (uintptr_t)errcode_ret);
+             ", flags = %llu, size = %zu, host_ptr = 0x%016" PRIxPTR
+             ", errcode_ret = 0x%016" PRIxPTR "",
+             (uintptr_t)context, (uintptr_t)properties, static_cast<unsigned long long>(flags),
+             size, (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
 
@@ -2585,10 +2592,11 @@
     ANGLE_SCOPED_GLOBAL_LOCK();
     CL_EVENT(CreateImageWithProperties,
              "context = 0x%016" PRIxPTR ", properties = 0x%016" PRIxPTR
-             ", flags = %lu, image_format = 0x%016" PRIxPTR ", image_desc = 0x%016" PRIxPTR
+             ", flags = %llu, image_format = 0x%016" PRIxPTR ", image_desc = 0x%016" PRIxPTR
              ", host_ptr = 0x%016" PRIxPTR ", errcode_ret = 0x%016" PRIxPTR "",
-             (uintptr_t)context, (uintptr_t)properties, flags, (uintptr_t)image_format,
-             (uintptr_t)image_desc, (uintptr_t)host_ptr, (uintptr_t)errcode_ret);
+             (uintptr_t)context, (uintptr_t)properties, static_cast<unsigned long long>(flags),
+             (uintptr_t)image_format, (uintptr_t)image_desc, (uintptr_t)host_ptr,
+             (uintptr_t)errcode_ret);
 
     Context *contextPacked = PackParam<Context *>(context);
 
diff --git a/src/libGLESv2/entry_points_cl_utils.h b/src/libGLESv2/entry_points_cl_utils.h
index e09d350..b0c0c64 100644
--- a/src/libGLESv2/entry_points_cl_utils.h
+++ b/src/libGLESv2/entry_points_cl_utils.h
@@ -9,11 +9,13 @@
 #ifndef LIBGLESV2_ENTRY_POINTS_CL_UTILS_H_
 #define LIBGLESV2_ENTRY_POINTS_CL_UTILS_H_
 
+#include "libANGLE/Debug.h"
+
 #include <cinttypes>
 #include <cstdio>
 #include <type_traits>
 
-#if defined(ANGLE_TRACE_ENABLED)
+#if defined(ANGLE_ENABLE_DEBUG_TRACE)
 #    define CL_EVENT(entryPoint, ...)                    \
         std::printf("CL " #entryPoint ": " __VA_ARGS__); \
         std::printf("\n")