src/gallium/frontends/clover/core/device.cpp - platform/external/mesa3d - Git at Google

 //
 // Copyright 2012 Francisco Jerez
 //
 // Permission is hereby granted, free of charge, to any person obtaining a
 // copy of this software and associated documentation files (the "Software"),
 // to deal in the Software without restriction, including without limitation
 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
 // and/or sell copies of the Software, and to permit persons to whom the
 // Software is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
 //

 #include <algorithm>
 #include <unistd.h>
 #include "core/device.hpp"
 #include "core/platform.hpp"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
 #include "util/bitscan.h"
 #include "util/u_debug.h"

 using namespace clover;

 namespace {
    template<typename T>
    std::vector<T>
    get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
                      pipe_compute_cap cap) {
       int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
       std::vector<T> v(sz / sizeof(T));

       pipe->get_compute_param(pipe, ir_format, cap, &v.front());
       return v;
    }
 }

 device::device(clover::platform &platform, pipe_loader_device *ldev) :
    platform(platform), ldev(ldev) {
    pipe = pipe_loader_create_screen(ldev);
    if (pipe && pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
       if (supports_ir(PIPE_SHADER_IR_NATIVE))
          return;
 #ifdef HAVE_CLOVER_SPIRV
       if (supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED))
          return;
 #endif
    }
    if (pipe)
       pipe->destroy(pipe);
    throw error(CL_INVALID_DEVICE);
 }

 device::~device() {
    if (pipe)
       pipe->destroy(pipe);
    if (ldev)
       pipe_loader_release(&ldev, 1);
 }

 bool
 device::operator==(const device &dev) const {
    return this == &dev;
 }

 cl_device_type
 device::type() const {
    switch (ldev->type) {
    case PIPE_LOADER_DEVICE_SOFTWARE:
       return CL_DEVICE_TYPE_CPU;
    case PIPE_LOADER_DEVICE_PCI:
    case PIPE_LOADER_DEVICE_PLATFORM:
       return CL_DEVICE_TYPE_GPU;
    default:
       unreachable("Unknown device type.");
    }
 }

 cl_uint
 device::vendor_id() const {
    switch (ldev->type) {
    case PIPE_LOADER_DEVICE_SOFTWARE:
    case PIPE_LOADER_DEVICE_PLATFORM:
       return 0;
    case PIPE_LOADER_DEVICE_PCI:
       return ldev->u.pci.vendor_id;
    default:
       unreachable("Unknown device type.");
    }
 }

 size_t
 device::max_images_read() const {
    return PIPE_MAX_SHADER_IMAGES;
 }

 size_t
 device::max_images_write() const {
    return PIPE_MAX_SHADER_IMAGES;
 }

 size_t
 device::max_image_buffer_size() const {
    return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE);
 }

 cl_uint
 device::max_image_levels_2d() const {
    return util_last_bit(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_SIZE));
 }

 cl_uint
 device::max_image_levels_3d() const {
    return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS);
 }

 size_t
 device::max_image_array_number() const {
    return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);
 }

 cl_uint
 device::max_samplers() const {
    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
                                  PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
 }

 cl_ulong
 device::max_mem_global() const {
    return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
 }

 cl_ulong
 device::max_mem_local() const {
    return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
 }

 cl_ulong
 device::max_mem_input() const {
    return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
 }

 cl_ulong
 device::max_const_buffer_size() const {
    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
                                  PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE);
 }

 cl_uint
 device::max_const_buffers() const {
    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
                                  PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
 }

 size_t
 device::max_threads_per_block() const {
    return get_compute_param<uint64_t>(
       pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
 }

 cl_ulong
 device::max_mem_alloc_size() const {
    return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
 }

 cl_uint
 device::max_clock_frequency() const {
    return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
 }

 cl_uint
 device::max_compute_units() const {
    return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
 }

 bool
 device::image_support() const {
    return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
 }

 bool
 device::has_doubles() const {
    return pipe->get_param(pipe, PIPE_CAP_DOUBLES);
 }

 bool
 device::has_halves() const {
    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
                                  PIPE_SHADER_CAP_FP16);
 }

 bool
 device::has_int64_atomics() const {
    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
                                  PIPE_SHADER_CAP_INT64_ATOMICS);
 }

 bool
 device::has_unified_memory() const {
    return pipe->get_param(pipe, PIPE_CAP_UMA);
 }

 size_t
 device::mem_base_addr_align() const {
    return std::max((size_t)sysconf(_SC_PAGESIZE), sizeof(cl_long) * 16);
 }

 cl_device_svm_capabilities
 device::svm_support() const {
    // Without CAP_RESOURCE_FROM_USER_MEMORY SVM and CL_MEM_USE_HOST_PTR
    // interactions won't work according to spec as clover manages a GPU side
    // copy of the host data.
    //
    // The biggest problem are memory buffers created with CL_MEM_USE_HOST_PTR,
    // but the application and/or the kernel updates the memory via SVM and not
    // the cl_mem buffer.
    // We can't even do proper tracking on what memory might have been accessed
    // as the host ptr to the buffer could be within a SVM region, where through
    // the CL API there is no reliable way of knowing if a certain cl_mem buffer
    // was accessed by a kernel or not and the runtime can't reliably know from
    // which side the GPU buffer content needs to be updated.
    //
    // Another unsolvable scenario is a cl_mem object passed by cl_mem reference
    // and SVM pointer into the same kernel at the same time.
    if (pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) &&
        pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
       // we can emulate all lower levels if we support fine grain system
       return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
              CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
              CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
    return 0;
 }

 std::vector<size_t>
 device::max_block_size() const {
    auto v = get_compute_param<uint64_t>(pipe, ir_format(),
                                         PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
    return { v.begin(), v.end() };
 }

 cl_uint
 device::subgroup_size() const {
    return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
 }

 cl_uint
 device::address_bits() const {
    return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_ADDRESS_BITS)[0];
 }

 std::string
 device::device_name() const {
    return pipe->get_name(pipe);
 }

 std::string
 device::vendor_name() const {
    return pipe->get_device_vendor(pipe);
 }

 enum pipe_shader_ir
 device::ir_format() const {
    if (supports_ir(PIPE_SHADER_IR_NATIVE))
       return PIPE_SHADER_IR_NATIVE;

    assert(supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED));
    return PIPE_SHADER_IR_NIR_SERIALIZED;
 }

 std::string
 device::ir_target() const {
    std::vector<char> target = get_compute_param<char>(
       pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
    return { target.data() };
 }

 enum pipe_endian
 device::endianness() const {
    return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
 }

 std::string
 device::device_version() const {
    static const std::string device_version =
          debug_get_option("CLOVER_DEVICE_VERSION_OVERRIDE", "1.1");
    return device_version;
 }

 std::string
 device::device_clc_version() const {
    static const std::string device_clc_version =
          debug_get_option("CLOVER_DEVICE_CLC_VERSION_OVERRIDE", "1.1");
    return device_clc_version;
 }

 bool
 device::supports_ir(enum pipe_shader_ir ir) const {
    return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
                                  PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << ir);
 }

 std::string
 device::supported_extensions() const {
    return
       "cl_khr_byte_addressable_store"
       " cl_khr_global_int32_base_atomics"
       " cl_khr_global_int32_extended_atomics"
       " cl_khr_local_int32_base_atomics"
       " cl_khr_local_int32_extended_atomics"
       + std::string(has_int64_atomics() ? " cl_khr_int64_base_atomics" : "")
       + std::string(has_int64_atomics() ? " cl_khr_int64_extended_atomics" : "")
       + std::string(has_doubles() ? " cl_khr_fp64" : "")
       + std::string(has_halves() ? " cl_khr_fp16" : "")
       + std::string(svm_support() ? " cl_arm_shared_virtual_memory" : "");
 }

 const void *
 device::get_compiler_options(enum pipe_shader_ir ir) const {
    return pipe->get_compiler_options(pipe, ir, PIPE_SHADER_COMPUTE);
 }
	//
	// Copyright 2012 Francisco Jerez
	//
	// Permission is hereby granted, free of charge, to any person obtaining a
	// copy of this software and associated documentation files (the "Software"),
	// to deal in the Software without restriction, including without limitation
	// the rights to use, copy, modify, merge, publish, distribute, sublicense,
	// and/or sell copies of the Software, and to permit persons to whom the
	// Software is furnished to do so, subject to the following conditions:
	//
	// The above copyright notice and this permission notice shall be included in
	// all copies or substantial portions of the Software.
	//
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
	// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
	// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
	// OTHER DEALINGS IN THE SOFTWARE.
	//

	#include <algorithm>
	#include <unistd.h>
	#include "core/device.hpp"
	#include "core/platform.hpp"
	#include "pipe/p_screen.h"
	#include "pipe/p_state.h"
	#include "util/bitscan.h"
	#include "util/u_debug.h"

	using namespace clover;

	namespace {
	template<typename T>
	std::vector<T>
	get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
	pipe_compute_cap cap) {
	int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
	std::vector<T> v(sz / sizeof(T));

	pipe->get_compute_param(pipe, ir_format, cap, &v.front());
	return v;
	}
	}

	device::device(clover::platform &platform, pipe_loader_device *ldev) :
	platform(platform), ldev(ldev) {
	pipe = pipe_loader_create_screen(ldev);
	if (pipe && pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
	if (supports_ir(PIPE_SHADER_IR_NATIVE))
	return;
	#ifdef HAVE_CLOVER_SPIRV
	if (supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED))
	return;
	#endif
	}
	if (pipe)
	pipe->destroy(pipe);
	throw error(CL_INVALID_DEVICE);
	}

	device::~device() {
	if (pipe)
	pipe->destroy(pipe);
	if (ldev)
	pipe_loader_release(&ldev, 1);
	}

	bool
	device::operator==(const device &dev) const {
	return this == &dev;
	}

	cl_device_type
	device::type() const {
	switch (ldev->type) {
	case PIPE_LOADER_DEVICE_SOFTWARE:
	return CL_DEVICE_TYPE_CPU;
	case PIPE_LOADER_DEVICE_PCI:
	case PIPE_LOADER_DEVICE_PLATFORM:
	return CL_DEVICE_TYPE_GPU;
	default:
	unreachable("Unknown device type.");
	}
	}

	cl_uint
	device::vendor_id() const {
	switch (ldev->type) {
	case PIPE_LOADER_DEVICE_SOFTWARE:
	case PIPE_LOADER_DEVICE_PLATFORM:
	return 0;
	case PIPE_LOADER_DEVICE_PCI:
	return ldev->u.pci.vendor_id;
	default:
	unreachable("Unknown device type.");
	}
	}

	size_t
	device::max_images_read() const {
	return PIPE_MAX_SHADER_IMAGES;
	}

	size_t
	device::max_images_write() const {
	return PIPE_MAX_SHADER_IMAGES;
	}

	size_t
	device::max_image_buffer_size() const {
	return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE);
	}

	cl_uint
	device::max_image_levels_2d() const {
	return util_last_bit(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_SIZE));
	}

	cl_uint
	device::max_image_levels_3d() const {
	return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS);
	}

	size_t
	device::max_image_array_number() const {
	return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);
	}

	cl_uint
	device::max_samplers() const {
	return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
	PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
	}

	cl_ulong
	device::max_mem_global() const {
	return get_compute_param<uint64_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
	}

	cl_ulong
	device::max_mem_local() const {
	return get_compute_param<uint64_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
	}

	cl_ulong
	device::max_mem_input() const {
	return get_compute_param<uint64_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
	}

	cl_ulong
	device::max_const_buffer_size() const {
	return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
	PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE);
	}

	cl_uint
	device::max_const_buffers() const {
	return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
	PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
	}

	size_t
	device::max_threads_per_block() const {
	return get_compute_param<uint64_t>(
	pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
	}

	cl_ulong
	device::max_mem_alloc_size() const {
	return get_compute_param<uint64_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
	}

	cl_uint
	device::max_clock_frequency() const {
	return get_compute_param<uint32_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
	}

	cl_uint
	device::max_compute_units() const {
	return get_compute_param<uint32_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
	}

	bool
	device::image_support() const {
	return get_compute_param<uint32_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
	}

	bool
	device::has_doubles() const {
	return pipe->get_param(pipe, PIPE_CAP_DOUBLES);
	}

	bool
	device::has_halves() const {
	return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
	PIPE_SHADER_CAP_FP16);
	}

	bool
	device::has_int64_atomics() const {
	return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
	PIPE_SHADER_CAP_INT64_ATOMICS);
	}

	bool
	device::has_unified_memory() const {
	return pipe->get_param(pipe, PIPE_CAP_UMA);
	}

	size_t
	device::mem_base_addr_align() const {
	return std::max((size_t)sysconf(_SC_PAGESIZE), sizeof(cl_long) * 16);
	}

	cl_device_svm_capabilities
	device::svm_support() const {
	// Without CAP_RESOURCE_FROM_USER_MEMORY SVM and CL_MEM_USE_HOST_PTR
	// interactions won't work according to spec as clover manages a GPU side
	// copy of the host data.
	//
	// The biggest problem are memory buffers created with CL_MEM_USE_HOST_PTR,
	// but the application and/or the kernel updates the memory via SVM and not
	// the cl_mem buffer.
	// We can't even do proper tracking on what memory might have been accessed
	// as the host ptr to the buffer could be within a SVM region, where through
	// the CL API there is no reliable way of knowing if a certain cl_mem buffer
	// was accessed by a kernel or not and the runtime can't reliably know from
	// which side the GPU buffer content needs to be updated.
	//
	// Another unsolvable scenario is a cl_mem object passed by cl_mem reference
	// and SVM pointer into the same kernel at the same time.
	if (pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) &&
	pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
	// we can emulate all lower levels if we support fine grain system
	return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM \|
	CL_DEVICE_SVM_COARSE_GRAIN_BUFFER \|
	CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
	return 0;
	}

	std::vector<size_t>
	device::max_block_size() const {
	auto v = get_compute_param<uint64_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
	return { v.begin(), v.end() };
	}

	cl_uint
	device::subgroup_size() const {
	return get_compute_param<uint32_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
	}

	cl_uint
	device::address_bits() const {
	return get_compute_param<uint32_t>(pipe, ir_format(),
	PIPE_COMPUTE_CAP_ADDRESS_BITS)[0];
	}

	std::string
	device::device_name() const {
	return pipe->get_name(pipe);
	}

	std::string
	device::vendor_name() const {
	return pipe->get_device_vendor(pipe);
	}

	enum pipe_shader_ir
	device::ir_format() const {
	if (supports_ir(PIPE_SHADER_IR_NATIVE))
	return PIPE_SHADER_IR_NATIVE;

	assert(supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED));
	return PIPE_SHADER_IR_NIR_SERIALIZED;
	}

	std::string
	device::ir_target() const {
	std::vector<char> target = get_compute_param<char>(
	pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
	return { target.data() };
	}

	enum pipe_endian
	device::endianness() const {
	return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
	}

	std::string
	device::device_version() const {
	static const std::string device_version =
	debug_get_option("CLOVER_DEVICE_VERSION_OVERRIDE", "1.1");
	return device_version;
	}

	std::string
	device::device_clc_version() const {
	static const std::string device_clc_version =
	debug_get_option("CLOVER_DEVICE_CLC_VERSION_OVERRIDE", "1.1");
	return device_clc_version;
	}

	bool
	device::supports_ir(enum pipe_shader_ir ir) const {
	return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
	PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << ir);
	}

	std::string
	device::supported_extensions() const {
	return
	"cl_khr_byte_addressable_store"
	" cl_khr_global_int32_base_atomics"
	" cl_khr_global_int32_extended_atomics"
	" cl_khr_local_int32_base_atomics"
	" cl_khr_local_int32_extended_atomics"
	+ std::string(has_int64_atomics() ? " cl_khr_int64_base_atomics" : "")
	+ std::string(has_int64_atomics() ? " cl_khr_int64_extended_atomics" : "")
	+ std::string(has_doubles() ? " cl_khr_fp64" : "")
	+ std::string(has_halves() ? " cl_khr_fp16" : "")
	+ std::string(svm_support() ? " cl_arm_shared_virtual_memory" : "");
	}

	const void *
	device::get_compiler_options(enum pipe_shader_ir ir) const {
	return pipe->get_compiler_options(pipe, ir, PIPE_SHADER_COMPUTE);
	}