src/intel/vulkan/anv_pipeline_cache.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #include "util/mesa-sha1.h"
 #include "util/hash_table.h"
 #include "util/debug.h"
 #include "anv_private.h"

 static size_t
 anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
                     uint32_t key_size,
                     uint32_t surface_count, uint32_t sampler_count)
 {
    const uint32_t binding_data_size =
       (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);

    return align_u32(sizeof(struct anv_shader_bin), 8) +
           align_u32(prog_data_size, 8) +
           align_u32(nr_params * sizeof(void *), 8) +
           align_u32(sizeof(uint32_t) + key_size, 8) +
           align_u32(binding_data_size, 8);
 }

 struct anv_shader_bin *
 anv_shader_bin_create(struct anv_device *device,
                       const void *key_data, uint32_t key_size,
                       const void *kernel_data, uint32_t kernel_size,
                       const struct brw_stage_prog_data *prog_data,
                       uint32_t prog_data_size, const void *prog_data_param,
                       const struct anv_pipeline_bind_map *bind_map)
 {
    const size_t size =
       anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
                           bind_map->surface_count, bind_map->sampler_count);

    struct anv_shader_bin *shader =
       vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
    if (!shader)
       return NULL;

    shader->ref_cnt = 1;

    shader->kernel =
       anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
    memcpy(shader->kernel.map, kernel_data, kernel_size);
    shader->kernel_size = kernel_size;
    shader->bind_map = *bind_map;
    shader->prog_data_size = prog_data_size;

    /* Now we fill out the floating data at the end */
    void *data = shader;
    data += align_u32(sizeof(struct anv_shader_bin), 8);

    shader->prog_data = data;
    struct brw_stage_prog_data *new_prog_data = data;
    memcpy(data, prog_data, prog_data_size);
    data += align_u32(prog_data_size, 8);

    assert(prog_data->nr_pull_params == 0);
    assert(prog_data->nr_image_params == 0);
    new_prog_data->param = data;
    uint32_t param_size = prog_data->nr_params * sizeof(void *);
    memcpy(data, prog_data_param, param_size);
    data += align_u32(param_size, 8);

    shader->key = data;
    struct anv_shader_bin_key *key = data;
    key->size = key_size;
    memcpy(key->data, key_data, key_size);
    data += align_u32(sizeof(*key) + key_size, 8);

    shader->bind_map.surface_to_descriptor = data;
    memcpy(data, bind_map->surface_to_descriptor,
           bind_map->surface_count * sizeof(struct anv_pipeline_binding));
    data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);

    shader->bind_map.sampler_to_descriptor = data;
    memcpy(data, bind_map->sampler_to_descriptor,
           bind_map->sampler_count * sizeof(struct anv_pipeline_binding));

    return shader;
 }

 void
 anv_shader_bin_destroy(struct anv_device *device,
                        struct anv_shader_bin *shader)
 {
    assert(shader->ref_cnt == 0);
    anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
    vk_free(&device->alloc, shader);
 }

 static size_t
 anv_shader_bin_data_size(const struct anv_shader_bin *shader)
 {
    return anv_shader_bin_size(shader->prog_data_size,
                               shader->prog_data->nr_params, shader->key->size,
                               shader->bind_map.surface_count,
                               shader->bind_map.sampler_count) +
           align_u32(shader->kernel_size, 8);
 }

 static void
 anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
 {
    size_t struct_size =
       anv_shader_bin_size(shader->prog_data_size,
                           shader->prog_data->nr_params, shader->key->size,
                           shader->bind_map.surface_count,
                           shader->bind_map.sampler_count);

    memcpy(data, shader, struct_size);
    data += struct_size;

    memcpy(data, shader->kernel.map, shader->kernel_size);
 }

 /* Remaining work:
  *
  * - Compact binding table layout so it's tight and not dependent on
  *   descriptor set layout.
  *
  * - Review prog_data struct for size and cacheability: struct
  *   brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
  *   bit quantities etc; param, pull_param, and image_params are pointers, we
  *   just need the compation map. use bit fields for all bools, eg
  *   dual_src_blend.
  */

 static uint32_t
 shader_bin_key_hash_func(const void *void_key)
 {
    const struct anv_shader_bin_key *key = void_key;
    return _mesa_hash_data(key->data, key->size);
 }

 static bool
 shader_bin_key_compare_func(const void *void_a, const void *void_b)
 {
    const struct anv_shader_bin_key *a = void_a, *b = void_b;
    if (a->size != b->size)
       return false;

    return memcmp(a->data, b->data, a->size) == 0;
 }

 void
 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
                         struct anv_device *device,
                         bool cache_enabled)
 {
    cache->device = device;
    pthread_mutex_init(&cache->mutex, NULL);

    if (cache_enabled) {
       cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
                                              shader_bin_key_compare_func);
    } else {
       cache->cache = NULL;
    }
 }

 void
 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
 {
    pthread_mutex_destroy(&cache->mutex);

    if (cache->cache) {
       /* This is a bit unfortunate.  In order to keep things from randomly
        * going away, the shader cache has to hold a reference to all shader
        * binaries it contains.  We unref them when we destroy the cache.
        */
       struct hash_entry *entry;
       hash_table_foreach(cache->cache, entry)
          anv_shader_bin_unref(cache->device, entry->data);

       _mesa_hash_table_destroy(cache->cache, NULL);
    }
 }

 void
 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
                 struct anv_shader_module *module,
                 const char *entrypoint,
                 const struct anv_pipeline_layout *pipeline_layout,
                 const VkSpecializationInfo *spec_info)
 {
    struct mesa_sha1 *ctx;

    ctx = _mesa_sha1_init();
    _mesa_sha1_update(ctx, key, key_size);
    _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
    _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
    if (pipeline_layout) {
       _mesa_sha1_update(ctx, pipeline_layout->sha1,
                         sizeof(pipeline_layout->sha1));
    }
    /* hash in shader stage, pipeline layout? */
    if (spec_info) {
       _mesa_sha1_update(ctx, spec_info->pMapEntries,
                         spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
       _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
    }
    _mesa_sha1_final(ctx, hash);
 }

 static struct anv_shader_bin *
 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
                                  const void *key_data, uint32_t key_size)
 {
    uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
    struct anv_shader_bin_key *key = (void *)vla;
    key->size = key_size;
    memcpy(key->data, key_data, key_size);

    struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
    if (entry)
       return entry->data;
    else
       return NULL;
 }

 struct anv_shader_bin *
 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
                           const void *key_data, uint32_t key_size)
 {
    if (!cache->cache)
       return NULL;

    pthread_mutex_lock(&cache->mutex);

    struct anv_shader_bin *shader =
       anv_pipeline_cache_search_locked(cache, key_data, key_size);

    pthread_mutex_unlock(&cache->mutex);

    /* We increment refcount before handing it to the caller */
    if (shader)
       anv_shader_bin_ref(shader);

    return shader;
 }

 static struct anv_shader_bin *
 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
                               const void *key_data, uint32_t key_size,
                               const void *kernel_data, uint32_t kernel_size,
                               const struct brw_stage_prog_data *prog_data,
                               uint32_t prog_data_size,
                               const void *prog_data_param,
                               const struct anv_pipeline_bind_map *bind_map)
 {
    struct anv_shader_bin *shader =
       anv_pipeline_cache_search_locked(cache, key_data, key_size);
    if (shader)
       return shader;

    struct anv_shader_bin *bin =
       anv_shader_bin_create(cache->device, key_data, key_size,
                             kernel_data, kernel_size,
                             prog_data, prog_data_size, prog_data_param,
                             bind_map);
    if (!bin)
       return NULL;

    _mesa_hash_table_insert(cache->cache, bin->key, bin);

    return bin;
 }

 struct anv_shader_bin *
 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
                                  const void *key_data, uint32_t key_size,
                                  const void *kernel_data, uint32_t kernel_size,
                                  const struct brw_stage_prog_data *prog_data,
                                  uint32_t prog_data_size,
                                  const struct anv_pipeline_bind_map *bind_map)
 {
    if (cache->cache) {
       pthread_mutex_lock(&cache->mutex);

       struct anv_shader_bin *bin =
          anv_pipeline_cache_add_shader(cache, key_data, key_size,
                                        kernel_data, kernel_size,
                                        prog_data, prog_data_size,
                                        prog_data->param, bind_map);

       pthread_mutex_unlock(&cache->mutex);

       /* We increment refcount before handing it to the caller */
       anv_shader_bin_ref(bin);

       return bin;
    } else {
       /* In this case, we're not caching it so the caller owns it entirely */
       return anv_shader_bin_create(cache->device, key_data, key_size,
                                    kernel_data, kernel_size,
                                    prog_data, prog_data_size,
                                    prog_data->param, bind_map);
    }
 }

 struct cache_header {
    uint32_t header_size;
    uint32_t header_version;
    uint32_t vendor_id;
    uint32_t device_id;
    uint8_t  uuid[VK_UUID_SIZE];
 };

 static void
 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
                         const void *data, size_t size)
 {
    struct anv_device *device = cache->device;
    struct anv_physical_device *pdevice = &device->instance->physicalDevice;
    struct cache_header header;

    if (cache->cache == NULL)
       return;

    if (size < sizeof(header))
       return;
    memcpy(&header, data, sizeof(header));
    if (header.header_size < sizeof(header))
       return;
    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
       return;
    if (header.vendor_id != 0x8086)
       return;
    if (header.device_id != device->chipset_id)
       return;
    if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0)
       return;

    const void *end = data + size;
    const void *p = data + header.header_size;

    /* Count is the total number of valid entries */
    uint32_t count;
    if (p + sizeof(count) >= end)
       return;
    memcpy(&count, p, sizeof(count));
    p += align_u32(sizeof(count), 8);

    for (uint32_t i = 0; i < count; i++) {
       struct anv_shader_bin bin;
       if (p + sizeof(bin) > end)
          break;
       memcpy(&bin, p, sizeof(bin));
       p += align_u32(sizeof(struct anv_shader_bin), 8);

       const struct brw_stage_prog_data *prog_data = p;
       p += align_u32(bin.prog_data_size, 8);
       if (p > end)
          break;

       uint32_t param_size = prog_data->nr_params * sizeof(void *);
       const void *prog_data_param = p;
       p += align_u32(param_size, 8);

       struct anv_shader_bin_key key;
       if (p + sizeof(key) > end)
          break;
       memcpy(&key, p, sizeof(key));
       const void *key_data = p + sizeof(key);
       p += align_u32(sizeof(key) + key.size, 8);

       /* We're going to memcpy this so getting rid of const is fine */
       struct anv_pipeline_binding *bindings = (void *)p;
       p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
                      sizeof(struct anv_pipeline_binding), 8);
       bin.bind_map.surface_to_descriptor = bindings;
       bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;

       const void *kernel_data = p;
       p += align_u32(bin.kernel_size, 8);

       if (p > end)
          break;

       anv_pipeline_cache_add_shader(cache, key_data, key.size,
                                     kernel_data, bin.kernel_size,
                                     prog_data, bin.prog_data_size,
                                     prog_data_param, &bin.bind_map);
    }
 }

 static bool
 pipeline_cache_enabled()
 {
    static int enabled = -1;
    if (enabled < 0)
       enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
    return enabled;
 }

 VkResult anv_CreatePipelineCache(
     VkDevice                                    _device,
     const VkPipelineCacheCreateInfo*            pCreateInfo,
     const VkAllocationCallbacks*                pAllocator,
     VkPipelineCache*                            pPipelineCache)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    struct anv_pipeline_cache *cache;

    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
    assert(pCreateInfo->flags == 0);

    cache = vk_alloc2(&device->alloc, pAllocator,
                        sizeof(*cache), 8,
                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    if (cache == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

    anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());

    if (pCreateInfo->initialDataSize > 0)
       anv_pipeline_cache_load(cache,
                               pCreateInfo->pInitialData,
                               pCreateInfo->initialDataSize);

    *pPipelineCache = anv_pipeline_cache_to_handle(cache);

    return VK_SUCCESS;
 }

 void anv_DestroyPipelineCache(
     VkDevice                                    _device,
     VkPipelineCache                             _cache,
     const VkAllocationCallbacks*                pAllocator)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);

    if (!cache)
       return;

    anv_pipeline_cache_finish(cache);

    vk_free2(&device->alloc, pAllocator, cache);
 }

 VkResult anv_GetPipelineCacheData(
     VkDevice                                    _device,
     VkPipelineCache                             _cache,
     size_t*                                     pDataSize,
     void*                                       pData)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
    struct anv_physical_device *pdevice = &device->instance->physicalDevice;
    struct cache_header *header;

    if (pData == NULL) {
       size_t size = align_u32(sizeof(*header), 8) +
                     align_u32(sizeof(uint32_t), 8);

       if (cache->cache) {
          struct hash_entry *entry;
          hash_table_foreach(cache->cache, entry)
             size += anv_shader_bin_data_size(entry->data);
       }

       *pDataSize = size;
       return VK_SUCCESS;
    }

    if (*pDataSize < sizeof(*header)) {
       *pDataSize = 0;
       return VK_INCOMPLETE;
    }

    void *p = pData, *end = pData + *pDataSize;
    header = p;
    header->header_size = sizeof(*header);
    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
    header->vendor_id = 0x8086;
    header->device_id = device->chipset_id;
    memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE);
    p += align_u32(header->header_size, 8);

    uint32_t *count = p;
    p += align_u32(sizeof(*count), 8);
    *count = 0;

    VkResult result = VK_SUCCESS;
    if (cache->cache) {
       struct hash_entry *entry;
       hash_table_foreach(cache->cache, entry) {
          struct anv_shader_bin *shader = entry->data;
          size_t data_size = anv_shader_bin_data_size(entry->data);
          if (p + data_size > end) {
             result = VK_INCOMPLETE;
             break;
          }

          anv_shader_bin_write_data(shader, p);
          p += data_size;

          (*count)++;
       }
    }

    *pDataSize = p - pData;

    return result;
 }

 VkResult anv_MergePipelineCaches(
     VkDevice                                    _device,
     VkPipelineCache                             destCache,
     uint32_t                                    srcCacheCount,
     const VkPipelineCache*                      pSrcCaches)
 {
    ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);

    if (!dst->cache)
       return VK_SUCCESS;

    for (uint32_t i = 0; i < srcCacheCount; i++) {
       ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
       if (!src->cache)
          continue;

       struct hash_entry *entry;
       hash_table_foreach(src->cache, entry) {
          struct anv_shader_bin *bin = entry->data;
          if (_mesa_hash_table_search(dst->cache, bin->key))
             continue;

          anv_shader_bin_ref(bin);
          _mesa_hash_table_insert(dst->cache, bin->key, bin);
       }
    }

    return VK_SUCCESS;
 }
	/*
	* Copyright © 2015 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#include "util/mesa-sha1.h"
	#include "util/hash_table.h"
	#include "util/debug.h"
	#include "anv_private.h"

	static size_t
	anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
	uint32_t key_size,
	uint32_t surface_count, uint32_t sampler_count)
	{
	const uint32_t binding_data_size =
	(surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);

	return align_u32(sizeof(struct anv_shader_bin), 8) +
	align_u32(prog_data_size, 8) +
	align_u32(nr_params * sizeof(void *), 8) +
	align_u32(sizeof(uint32_t) + key_size, 8) +
	align_u32(binding_data_size, 8);
	}

	struct anv_shader_bin *
	anv_shader_bin_create(struct anv_device *device,
	const void *key_data, uint32_t key_size,
	const void *kernel_data, uint32_t kernel_size,
	const struct brw_stage_prog_data *prog_data,
	uint32_t prog_data_size, const void *prog_data_param,
	const struct anv_pipeline_bind_map *bind_map)
	{
	const size_t size =
	anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
	bind_map->surface_count, bind_map->sampler_count);

	struct anv_shader_bin *shader =
	vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
	if (!shader)
	return NULL;

	shader->ref_cnt = 1;

	shader->kernel =
	anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
	memcpy(shader->kernel.map, kernel_data, kernel_size);
	shader->kernel_size = kernel_size;
	shader->bind_map = *bind_map;
	shader->prog_data_size = prog_data_size;

	/* Now we fill out the floating data at the end */
	void *data = shader;
	data += align_u32(sizeof(struct anv_shader_bin), 8);

	shader->prog_data = data;
	struct brw_stage_prog_data *new_prog_data = data;
	memcpy(data, prog_data, prog_data_size);
	data += align_u32(prog_data_size, 8);

	assert(prog_data->nr_pull_params == 0);
	assert(prog_data->nr_image_params == 0);
	new_prog_data->param = data;
	uint32_t param_size = prog_data->nr_params * sizeof(void *);
	memcpy(data, prog_data_param, param_size);
	data += align_u32(param_size, 8);

	shader->key = data;
	struct anv_shader_bin_key *key = data;
	key->size = key_size;
	memcpy(key->data, key_data, key_size);
	data += align_u32(sizeof(*key) + key_size, 8);

	shader->bind_map.surface_to_descriptor = data;
	memcpy(data, bind_map->surface_to_descriptor,
	bind_map->surface_count * sizeof(struct anv_pipeline_binding));
	data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);

	shader->bind_map.sampler_to_descriptor = data;
	memcpy(data, bind_map->sampler_to_descriptor,
	bind_map->sampler_count * sizeof(struct anv_pipeline_binding));

	return shader;
	}

	void
	anv_shader_bin_destroy(struct anv_device *device,
	struct anv_shader_bin *shader)
	{
	assert(shader->ref_cnt == 0);
	anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
	vk_free(&device->alloc, shader);
	}

	static size_t
	anv_shader_bin_data_size(const struct anv_shader_bin *shader)
	{
	return anv_shader_bin_size(shader->prog_data_size,
	shader->prog_data->nr_params, shader->key->size,
	shader->bind_map.surface_count,
	shader->bind_map.sampler_count) +
	align_u32(shader->kernel_size, 8);
	}

	static void
	anv_shader_bin_write_data(const struct anv_shader_bin shader, void data)
	{
	size_t struct_size =
	anv_shader_bin_size(shader->prog_data_size,
	shader->prog_data->nr_params, shader->key->size,
	shader->bind_map.surface_count,
	shader->bind_map.sampler_count);

	memcpy(data, shader, struct_size);
	data += struct_size;

	memcpy(data, shader->kernel.map, shader->kernel_size);
	}

	/* Remaining work:
	*
	* - Compact binding table layout so it's tight and not dependent on
	* descriptor set layout.
	*
	* - Review prog_data struct for size and cacheability: struct
	* brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
	* bit quantities etc; param, pull_param, and image_params are pointers, we
	* just need the compation map. use bit fields for all bools, eg
	* dual_src_blend.
	*/

	static uint32_t
	shader_bin_key_hash_func(const void *void_key)
	{
	const struct anv_shader_bin_key *key = void_key;
	return _mesa_hash_data(key->data, key->size);
	}

	static bool
	shader_bin_key_compare_func(const void void_a, const void void_b)
	{
	const struct anv_shader_bin_key a = void_a, b = void_b;
	if (a->size != b->size)
	return false;

	return memcmp(a->data, b->data, a->size) == 0;
	}

	void
	anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
	struct anv_device *device,
	bool cache_enabled)
	{
	cache->device = device;
	pthread_mutex_init(&cache->mutex, NULL);

	if (cache_enabled) {
	cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
	shader_bin_key_compare_func);
	} else {
	cache->cache = NULL;
	}
	}

	void
	anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
	{
	pthread_mutex_destroy(&cache->mutex);

	if (cache->cache) {
	/* This is a bit unfortunate. In order to keep things from randomly
	* going away, the shader cache has to hold a reference to all shader
	* binaries it contains. We unref them when we destroy the cache.
	*/
	struct hash_entry *entry;
	hash_table_foreach(cache->cache, entry)
	anv_shader_bin_unref(cache->device, entry->data);

	_mesa_hash_table_destroy(cache->cache, NULL);
	}
	}

	void
	anv_hash_shader(unsigned char hash, const void key, size_t key_size,
	struct anv_shader_module *module,
	const char *entrypoint,
	const struct anv_pipeline_layout *pipeline_layout,
	const VkSpecializationInfo *spec_info)
	{
	struct mesa_sha1 *ctx;

	ctx = _mesa_sha1_init();
	_mesa_sha1_update(ctx, key, key_size);
	_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
	_mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
	if (pipeline_layout) {
	_mesa_sha1_update(ctx, pipeline_layout->sha1,
	sizeof(pipeline_layout->sha1));
	}
	/* hash in shader stage, pipeline layout? */
	if (spec_info) {
	_mesa_sha1_update(ctx, spec_info->pMapEntries,
	spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
	_mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
	}
	_mesa_sha1_final(ctx, hash);
	}

	static struct anv_shader_bin *
	anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
	const void *key_data, uint32_t key_size)
	{
	uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
	struct anv_shader_bin_key key = (void )vla;
	key->size = key_size;
	memcpy(key->data, key_data, key_size);

	struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
	if (entry)
	return entry->data;
	else
	return NULL;
	}

	struct anv_shader_bin *
	anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
	const void *key_data, uint32_t key_size)
	{
	if (!cache->cache)
	return NULL;

	pthread_mutex_lock(&cache->mutex);

	struct anv_shader_bin *shader =
	anv_pipeline_cache_search_locked(cache, key_data, key_size);

	pthread_mutex_unlock(&cache->mutex);

	/* We increment refcount before handing it to the caller */
	if (shader)
	anv_shader_bin_ref(shader);

	return shader;
	}

	static struct anv_shader_bin *
	anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
	const void *key_data, uint32_t key_size,
	const void *kernel_data, uint32_t kernel_size,
	const struct brw_stage_prog_data *prog_data,
	uint32_t prog_data_size,
	const void *prog_data_param,
	const struct anv_pipeline_bind_map *bind_map)
	{
	struct anv_shader_bin *shader =
	anv_pipeline_cache_search_locked(cache, key_data, key_size);
	if (shader)
	return shader;

	struct anv_shader_bin *bin =
	anv_shader_bin_create(cache->device, key_data, key_size,
	kernel_data, kernel_size,
	prog_data, prog_data_size, prog_data_param,
	bind_map);
	if (!bin)
	return NULL;

	_mesa_hash_table_insert(cache->cache, bin->key, bin);

	return bin;
	}

	struct anv_shader_bin *
	anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
	const void *key_data, uint32_t key_size,
	const void *kernel_data, uint32_t kernel_size,
	const struct brw_stage_prog_data *prog_data,
	uint32_t prog_data_size,
	const struct anv_pipeline_bind_map *bind_map)
	{
	if (cache->cache) {
	pthread_mutex_lock(&cache->mutex);

	struct anv_shader_bin *bin =
	anv_pipeline_cache_add_shader(cache, key_data, key_size,
	kernel_data, kernel_size,
	prog_data, prog_data_size,
	prog_data->param, bind_map);

	pthread_mutex_unlock(&cache->mutex);

	/* We increment refcount before handing it to the caller */
	anv_shader_bin_ref(bin);

	return bin;
	} else {
	/* In this case, we're not caching it so the caller owns it entirely */
	return anv_shader_bin_create(cache->device, key_data, key_size,
	kernel_data, kernel_size,
	prog_data, prog_data_size,
	prog_data->param, bind_map);
	}
	}

	struct cache_header {
	uint32_t header_size;
	uint32_t header_version;
	uint32_t vendor_id;
	uint32_t device_id;
	uint8_t uuid[VK_UUID_SIZE];
	};

	static void
	anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
	const void *data, size_t size)
	{
	struct anv_device *device = cache->device;
	struct anv_physical_device *pdevice = &device->instance->physicalDevice;
	struct cache_header header;

	if (cache->cache == NULL)
	return;

	if (size < sizeof(header))
	return;
	memcpy(&header, data, sizeof(header));
	if (header.header_size < sizeof(header))
	return;
	if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
	return;
	if (header.vendor_id != 0x8086)
	return;
	if (header.device_id != device->chipset_id)
	return;
	if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0)
	return;

	const void *end = data + size;
	const void *p = data + header.header_size;

	/* Count is the total number of valid entries */
	uint32_t count;
	if (p + sizeof(count) >= end)
	return;
	memcpy(&count, p, sizeof(count));
	p += align_u32(sizeof(count), 8);

	for (uint32_t i = 0; i < count; i++) {
	struct anv_shader_bin bin;
	if (p + sizeof(bin) > end)
	break;
	memcpy(&bin, p, sizeof(bin));
	p += align_u32(sizeof(struct anv_shader_bin), 8);

	const struct brw_stage_prog_data *prog_data = p;
	p += align_u32(bin.prog_data_size, 8);
	if (p > end)
	break;

	uint32_t param_size = prog_data->nr_params * sizeof(void *);
	const void *prog_data_param = p;
	p += align_u32(param_size, 8);

	struct anv_shader_bin_key key;
	if (p + sizeof(key) > end)
	break;
	memcpy(&key, p, sizeof(key));
	const void *key_data = p + sizeof(key);
	p += align_u32(sizeof(key) + key.size, 8);

	/* We're going to memcpy this so getting rid of const is fine */
	struct anv_pipeline_binding bindings = (void )p;
	p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
	sizeof(struct anv_pipeline_binding), 8);
	bin.bind_map.surface_to_descriptor = bindings;
	bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;

	const void *kernel_data = p;
	p += align_u32(bin.kernel_size, 8);

	if (p > end)
	break;

	anv_pipeline_cache_add_shader(cache, key_data, key.size,
	kernel_data, bin.kernel_size,
	prog_data, bin.prog_data_size,
	prog_data_param, &bin.bind_map);
	}
	}

	static bool
	pipeline_cache_enabled()
	{
	static int enabled = -1;
	if (enabled < 0)
	enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
	return enabled;
	}

	VkResult anv_CreatePipelineCache(
	VkDevice _device,
	const VkPipelineCacheCreateInfo* pCreateInfo,
	const VkAllocationCallbacks* pAllocator,
	VkPipelineCache* pPipelineCache)
	{
	ANV_FROM_HANDLE(anv_device, device, _device);
	struct anv_pipeline_cache *cache;

	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
	assert(pCreateInfo->flags == 0);

	cache = vk_alloc2(&device->alloc, pAllocator,
	sizeof(*cache), 8,
	VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
	if (cache == NULL)
	return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());

	if (pCreateInfo->initialDataSize > 0)
	anv_pipeline_cache_load(cache,
	pCreateInfo->pInitialData,
	pCreateInfo->initialDataSize);

	*pPipelineCache = anv_pipeline_cache_to_handle(cache);

	return VK_SUCCESS;
	}

	void anv_DestroyPipelineCache(
	VkDevice _device,
	VkPipelineCache _cache,
	const VkAllocationCallbacks* pAllocator)
	{
	ANV_FROM_HANDLE(anv_device, device, _device);
	ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);

	if (!cache)
	return;

	anv_pipeline_cache_finish(cache);

	vk_free2(&device->alloc, pAllocator, cache);
	}

	VkResult anv_GetPipelineCacheData(
	VkDevice _device,
	VkPipelineCache _cache,
	size_t* pDataSize,
	void* pData)
	{
	ANV_FROM_HANDLE(anv_device, device, _device);
	ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
	struct anv_physical_device *pdevice = &device->instance->physicalDevice;
	struct cache_header *header;

	if (pData == NULL) {
	size_t size = align_u32(sizeof(*header), 8) +
	align_u32(sizeof(uint32_t), 8);

	if (cache->cache) {
	struct hash_entry *entry;
	hash_table_foreach(cache->cache, entry)
	size += anv_shader_bin_data_size(entry->data);
	}

	*pDataSize = size;
	return VK_SUCCESS;
	}

	if (pDataSize < sizeof(header)) {
	*pDataSize = 0;
	return VK_INCOMPLETE;
	}

	void p = pData, end = pData + *pDataSize;
	header = p;
	header->header_size = sizeof(*header);
	header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
	header->vendor_id = 0x8086;
	header->device_id = device->chipset_id;
	memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE);
	p += align_u32(header->header_size, 8);

	uint32_t *count = p;
	p += align_u32(sizeof(*count), 8);
	*count = 0;

	VkResult result = VK_SUCCESS;
	if (cache->cache) {
	struct hash_entry *entry;
	hash_table_foreach(cache->cache, entry) {
	struct anv_shader_bin *shader = entry->data;
	size_t data_size = anv_shader_bin_data_size(entry->data);
	if (p + data_size > end) {
	result = VK_INCOMPLETE;
	break;
	}

	anv_shader_bin_write_data(shader, p);
	p += data_size;

	(*count)++;
	}
	}

	*pDataSize = p - pData;

	return result;
	}

	VkResult anv_MergePipelineCaches(
	VkDevice _device,
	VkPipelineCache destCache,
	uint32_t srcCacheCount,
	const VkPipelineCache* pSrcCaches)
	{
	ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);

	if (!dst->cache)
	return VK_SUCCESS;

	for (uint32_t i = 0; i < srcCacheCount; i++) {
	ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
	if (!src->cache)
	continue;

	struct hash_entry *entry;
	hash_table_foreach(src->cache, entry) {
	struct anv_shader_bin *bin = entry->data;
	if (_mesa_hash_table_search(dst->cache, bin->key))
	continue;

	anv_shader_bin_ref(bin);
	_mesa_hash_table_insert(dst->cache, bin->key, bin);
	}
	}

	return VK_SUCCESS;
	}