blob: cf569ee866acc1b415ea3c74bee8124ee9d506bc [file] [log] [blame]
/*
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#include "nvk_edb_bview_cache.h"
#include "nil.h"
#include "nvk_device.h"
#include "nvk_descriptor_types.h"
#include "nvk_physical_device.h"
#include "util/format/u_format.h"
#include "util/hash_table.h"
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
struct bvdesc_key {
uint16_t format;
uint16_t chunk : 12;
uint16_t rgb_offset : 4;
};
PRAGMA_DIAGNOSTIC_POP
static_assert(sizeof(struct bvdesc_key) == 4, "bvdesc_key has no holes");
static uint64_t
view_size_B(enum pipe_format format)
{
const uint8_t el_size_B = util_format_get_blocksize(format);
if (util_is_power_of_two_nonzero(el_size_B)) {
return 4ull << 30;
} else {
/* On Ampere (but not Turing or Maxwell for some reason), we're limited
* to 3GB for RGB32 buffers.
*/
assert(util_format_get_nr_components(format) == 3);
return 3ull << 30;
}
}
/* Stride in VA between views */
static uint64_t
view_stride_B(enum pipe_format format)
{
return view_size_B(format) / 2;
}
static uint32_t
view_size_el(enum pipe_format format)
{
/* If someone uses the last element of this chunk, then they're a max-sized
* client view which starts at the middle of this chunk and therefore
* should be in the next chunk.
*/
return (view_size_B(format) / util_format_get_blocksize(format)) - 1;
}
static uint64_t
base_addr_for_chunk(struct nvk_device *dev, uint16_t chunk,
enum pipe_format format)
{
return dev->nvkmd->va_start + chunk * view_stride_B(format);
}
static uint64_t
chunk_for_addr(struct nvk_device *dev, uint64_t addr, enum pipe_format format)
{
assert(addr >= dev->nvkmd->va_start);
return (addr - dev->nvkmd->va_start) / view_stride_B(format);
}
static VkResult
nvk_edb_bview_cache_add_bview(struct nvk_device *dev,
struct nvk_edb_bview_cache *cache,
struct bvdesc_key key)
{
void *void_key = NULL;
STATIC_ASSERT(sizeof(key) <= sizeof(void_key));
memcpy(&void_key, &key, sizeof(key));
const uint64_t base_addr =
base_addr_for_chunk(dev, key.chunk, key.format) + key.rgb_offset;
uint32_t size_el = view_size_el(key.format);
const uint8_t el_size_B = util_format_get_blocksize(key.format);
if (base_addr + (uint64_t)size_el * el_size_B > dev->nvkmd->va_end) {
const uint64_t size_B = dev->nvkmd->va_end - base_addr;
size_el = size_B / el_size_B;
}
uint32_t desc[8];
nil_buffer_fill_tic(&nvk_device_physical(dev)->info, base_addr,
nil_format(key.format), size_el, &desc);
uint32_t index;
VkResult result = nvk_descriptor_table_add(dev, &dev->images,
desc, sizeof(desc), &index);
if (result != VK_SUCCESS)
return result;
_mesa_hash_table_insert(cache->cache, void_key, (void *)(uintptr_t)index);
return VK_SUCCESS;
}
static uint32_t
nvk_edb_bview_cache_lookup_bview(struct nvk_device *dev,
struct nvk_edb_bview_cache *cache,
struct bvdesc_key key)
{
void *void_key = NULL;
STATIC_ASSERT(sizeof(key) <= sizeof(void_key));
memcpy(&void_key, &key, sizeof(key));
struct hash_entry *entry = _mesa_hash_table_search(cache->cache, void_key);
if (entry != NULL) {
return (uintptr_t)entry->data;
} else {
return 0;
}
}
VkResult
nvk_edb_bview_cache_init(struct nvk_device *dev,
struct nvk_edb_bview_cache *cache)
{
struct nvk_physical_device *pdev = nvk_device_physical(dev);
VkResult result;
cache->cache = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
if (cache->cache == NULL)
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
for (uint32_t format = 0; format < PIPE_FORMAT_COUNT; format++) {
if (!nil_format_supports_buffer(&pdev->info, format))
continue;
const uint8_t el_size_B = util_format_get_blocksize(format);
for (uint16_t chunk = 0;; chunk++) {
if (base_addr_for_chunk(dev, chunk, format) >= dev->nvkmd->va_end)
break;
assert(format <= UINT16_MAX);
assert(chunk < (1u << 12));
if (!util_is_power_of_two_nonzero(el_size_B)) {
assert(util_format_get_nr_components(format) == 3);
assert(el_size_B % 3 == 0);
const uint8_t chan_size_B = el_size_B / 3;
for (uint8_t chan = 0; chan < 3; chan++) {
struct bvdesc_key key = {
.format = format,
.chunk = chunk,
.rgb_offset = chan * chan_size_B,
};
result = nvk_edb_bview_cache_add_bview(dev, cache, key);
if (result != VK_SUCCESS)
goto fail;
}
} else {
struct bvdesc_key key = {
.format = format,
.chunk = chunk,
};
result = nvk_edb_bview_cache_add_bview(dev, cache, key);
if (result != VK_SUCCESS)
goto fail;
}
}
}
return VK_SUCCESS;
fail:
_mesa_hash_table_destroy(cache->cache, NULL);
return result;
}
void
nvk_edb_bview_cache_finish(struct nvk_device *dev,
struct nvk_edb_bview_cache *cache)
{
/* We don't bother freeing the descriptors as those will be cleaned up
* automatically when the device is destroyed.
*/
if (cache->cache)
_mesa_hash_table_destroy(cache->cache, NULL);
}
struct nvk_edb_buffer_view_descriptor
nvk_edb_bview_cache_get_descriptor(struct nvk_device *dev,
struct nvk_edb_bview_cache *cache,
uint64_t base_addr, uint64_t size_B,
enum pipe_format format)
{
/* The actual hardware limit for buffer image/texture descriptors is 4GB
* regardless of format. This cache works by covering the address space
* with 4GB buffer descriptors at 2GB offsets. In order for this to work
* properly, the size if the client's buffer view must be at most 2 GB.
*/
assert(size_B <= view_stride_B(format));
const uint8_t el_size_B = util_format_get_blocksize(format);
const uint64_t size_el = size_B / el_size_B;
const uint64_t chunk = chunk_for_addr(dev, base_addr, format);
const uint64_t desc_base_addr = base_addr_for_chunk(dev, chunk, format);
const uint32_t offset_B = base_addr - desc_base_addr;
const uint32_t offset_el = offset_B / el_size_B;
uint16_t rgb_offset = 0;
if (!util_is_power_of_two_nonzero(el_size_B)) {
assert(util_format_get_nr_components(format) == 3);
assert(el_size_B % 3 == 0);
rgb_offset = offset_B % el_size_B;
} else {
assert(offset_B % el_size_B == 0);
}
assert(offset_el + size_el > offset_el);
assert(offset_el + size_el <= view_size_el(format));
assert(format <= UINT16_MAX);
assert(chunk < (1u << 12));
assert(rgb_offset < (1u << 4));
const struct bvdesc_key key = {
.format = format,
.chunk = chunk,
.rgb_offset = rgb_offset,
};
uint32_t index = nvk_edb_bview_cache_lookup_bview(dev, cache, key);
uint32_t oob_alpha;
if (util_format_has_alpha(format)) {
/* OOB reads as if it read 0 texture data so an RGBA format reads
* (0, 0, 0, 0) out-of-bounds.
*/
oob_alpha = 0;
} else if (util_format_is_pure_integer(format)) {
/* OOB reads 0 texture data but then gets extended by (0, 0, 0, 1) */
oob_alpha = 1;
} else {
/* OOB reads 0 texture data but then gets extended by
* (0.0, 0.0, 0.0, 1.0)
*/
oob_alpha = 0x3f800000;
}
return (struct nvk_edb_buffer_view_descriptor) {
.index = index,
.offset_el = offset_el,
.size_el = size_el,
.oob_alpha = oob_alpha,
};
}