src/vulkan/runtime/vk_shader.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2024 Collabora, Ltd.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #include "vk_shader.h"

 #include "vk_alloc.h"
 #include "vk_command_buffer.h"
 #include "vk_common_entrypoints.h"
 #include "vk_descriptor_set_layout.h"
 #include "vk_device.h"
 #include "vk_nir.h"
 #include "vk_physical_device.h"
 #include "vk_physical_device_features.h"
 #include "vk_pipeline.h"

 #include "util/mesa-sha1.h"

 #include "nir.h"

 static void
 vk_shader_init(struct vk_shader *shader,
                struct vk_device *device,
                const struct vk_shader_ops *ops,
                gl_shader_stage stage)
 {
    vk_object_base_init(device, &shader->base, VK_OBJECT_TYPE_SHADER_EXT);
    shader->ops = ops;
    shader->stage = stage;
 }

 void *
 vk_shader_zalloc(struct vk_device *device,
                  const struct vk_shader_ops *ops,
                  gl_shader_stage stage,
                  const VkAllocationCallbacks *alloc,
                  size_t size)
 {
    /* For internal allocations, we need to allocate from the device scope
     * because they might be put in pipeline caches.  Importantly, it is
     * impossible for the client to get at this pointer and we apply this
     * heuristic before we account for allocation fallbacks so this will only
     * ever happen for internal shader objectx.
     */
    const VkSystemAllocationScope alloc_scope =
       alloc == &device->alloc ? VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
                               : VK_SYSTEM_ALLOCATION_SCOPE_OBJECT;

    struct vk_shader *shader = vk_zalloc2(&device->alloc, alloc, size, 8,
                                          alloc_scope);
    if (shader == NULL)
       return NULL;

    vk_shader_init(shader, device, ops, stage);

    return shader;
 }

 void *
 vk_shader_multizalloc(struct vk_device *device,
                       struct vk_multialloc *ma,
                       const struct vk_shader_ops *ops,
                       gl_shader_stage stage,
                       const VkAllocationCallbacks *alloc)
 {
    struct vk_shader *shader =
       vk_multialloc_zalloc2(ma, &device->alloc, alloc,
                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
    if (!shader)
       return NULL;

    vk_shader_init(shader, device, ops, stage);

    return shader;
 }

 void
 vk_shader_free(struct vk_device *device,
                const VkAllocationCallbacks *alloc,
                struct vk_shader *shader)
 {
    vk_object_base_finish(&shader->base);
    vk_free2(&device->alloc, alloc, shader);
 }

 int
 vk_shader_cmp_graphics_stages(gl_shader_stage a, gl_shader_stage b)
 {
    static const int stage_order[MESA_SHADER_MESH + 1] = {
       [MESA_SHADER_VERTEX] = 1,
       [MESA_SHADER_TESS_CTRL] = 2,
       [MESA_SHADER_TESS_EVAL] = 3,
       [MESA_SHADER_GEOMETRY] = 4,
       [MESA_SHADER_TASK] = 5,
       [MESA_SHADER_MESH] = 6,
       [MESA_SHADER_FRAGMENT] = 7,
    };

    assert(a < ARRAY_SIZE(stage_order) && stage_order[a] > 0);
    assert(b < ARRAY_SIZE(stage_order) && stage_order[b] > 0);

    return stage_order[a] - stage_order[b];
 }

 struct stage_idx {
    gl_shader_stage stage;
    uint32_t idx;
 };

 static int
 cmp_stage_idx(const void *_a, const void *_b)
 {
    const struct stage_idx *a = _a, *b = _b;
    return vk_shader_cmp_graphics_stages(a->stage, b->stage);
 }

 static nir_shader *
 vk_shader_to_nir(struct vk_device *device,
                  const VkShaderCreateInfoEXT *info,
                  const struct vk_pipeline_robustness_state *rs)
 {
    const struct vk_device_shader_ops *ops = device->shader_ops;

    const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage);
    const nir_shader_compiler_options *nir_options =
       ops->get_nir_options(device->physical, stage, rs);
    struct spirv_to_nir_options spirv_options =
       ops->get_spirv_options(device->physical, stage, rs);

    enum gl_subgroup_size subgroup_size = vk_get_subgroup_size(
       vk_spirv_version(info->pCode, info->codeSize),
       stage, info->pNext,
       info->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT,
       info->flags &VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);

    nir_shader *nir = vk_spirv_to_nir(device,
                                      info->pCode, info->codeSize,
                                      stage, info->pName,
                                      subgroup_size,
                                      info->pSpecializationInfo,
                                      &spirv_options, nir_options,
                                      false /* internal */, NULL);
    if (nir == NULL)
       return NULL;

    if (ops->preprocess_nir != NULL)
       ops->preprocess_nir(device->physical, nir, rs);

    return nir;
 }

 struct set_layouts {
    struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS];
 };

 static void
 vk_shader_compile_info_init(struct vk_shader_compile_info *info,
                             struct set_layouts *set_layouts,
                             const VkShaderCreateInfoEXT *vk_info,
                             const struct vk_pipeline_robustness_state *rs,
                             nir_shader *nir)
 {
    for (uint32_t sl = 0; sl < vk_info->setLayoutCount; sl++) {
       set_layouts->set_layouts[sl] =
          vk_descriptor_set_layout_from_handle(vk_info->pSetLayouts[sl]);
    }

    *info = (struct vk_shader_compile_info) {
       .stage = nir->info.stage,
       .flags = vk_info->flags,
       .next_stage_mask = vk_info->nextStage,
       .nir = nir,
       .robustness = rs,
       .set_layout_count = vk_info->setLayoutCount,
       .set_layouts = set_layouts->set_layouts,
       .push_constant_range_count = vk_info->pushConstantRangeCount,
       .push_constant_ranges = vk_info->pPushConstantRanges,
    };
 }

 PRAGMA_DIAGNOSTIC_PUSH
 PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
 struct vk_shader_bin_header {
    char mesavkshaderbin[16];
    VkDriverId driver_id;
    uint8_t uuid[VK_UUID_SIZE];
    uint32_t version;
    uint64_t size;
    uint8_t sha1[SHA1_DIGEST_LENGTH];
    uint32_t _pad;
 };
 PRAGMA_DIAGNOSTIC_POP
 static_assert(sizeof(struct vk_shader_bin_header) == 72,
               "This struct has no holes");

 static void
 vk_shader_bin_header_init(struct vk_shader_bin_header *header,
                           struct vk_physical_device *device)
 {
    *header = (struct vk_shader_bin_header) {
       .mesavkshaderbin = "MesaVkShaderBin",
       .driver_id = device->properties.driverID,
    };

    memcpy(header->uuid, device->properties.shaderBinaryUUID, VK_UUID_SIZE);
    header->version = device->properties.shaderBinaryVersion;
 }

 static VkResult
 vk_shader_serialize(struct vk_device *device,
                     struct vk_shader *shader,
                     struct blob *blob)
 {
    struct vk_shader_bin_header header;
    vk_shader_bin_header_init(&header, device->physical);

    ASSERTED intptr_t header_offset = blob_reserve_bytes(blob, sizeof(header));
    assert(header_offset == 0);

    bool success = shader->ops->serialize(device, shader, blob);
    if (!success || blob->out_of_memory)
       return VK_INCOMPLETE;

    /* Finalize and write the header */
    header.size = blob->size;
    if (blob->data != NULL) {
       assert(sizeof(header) <= blob->size);

       struct mesa_sha1 sha1_ctx;
       _mesa_sha1_init(&sha1_ctx);

       /* Hash the header with a zero SHA1 */
       _mesa_sha1_update(&sha1_ctx, &header, sizeof(header));

       /* Hash the serialized data */
       _mesa_sha1_update(&sha1_ctx, blob->data + sizeof(header),
                         blob->size - sizeof(header));

       _mesa_sha1_final(&sha1_ctx, header.sha1);

       blob_overwrite_bytes(blob, header_offset, &header, sizeof(header));
    }

    return VK_SUCCESS;
 }

 static VkResult
 vk_shader_deserialize(struct vk_device *device,
                       size_t data_size, const void *data,
                       const VkAllocationCallbacks* pAllocator,
                       struct vk_shader **shader_out)
 {
    const struct vk_device_shader_ops *ops = device->shader_ops;

    struct blob_reader blob;
    blob_reader_init(&blob, data, data_size);

    struct vk_shader_bin_header header, ref_header;
    blob_copy_bytes(&blob, &header, sizeof(header));
    if (blob.overrun)
       return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

    vk_shader_bin_header_init(&ref_header, device->physical);

    if (memcmp(header.mesavkshaderbin, ref_header.mesavkshaderbin,
               sizeof(header.mesavkshaderbin)))
       return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

    if (header.driver_id != ref_header.driver_id)
       return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

    if (memcmp(header.uuid, ref_header.uuid, sizeof(header.uuid)))
       return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

    /* From the Vulkan 1.3.276 spec:
     *
     *    "Guaranteed compatibility of shader binaries is expressed through a
     *    combination of the shaderBinaryUUID and shaderBinaryVersion members
     *    of the VkPhysicalDeviceShaderObjectPropertiesEXT structure queried
     *    from a physical device. Binary shaders retrieved from a physical
     *    device with a certain shaderBinaryUUID are guaranteed to be
     *    compatible with all other physical devices reporting the same
     *    shaderBinaryUUID and the same or higher shaderBinaryVersion."
     *
     * We handle the version check here on behalf of the driver and then pass
     * the version into the driver's deserialize callback.
     *
     * If a driver doesn't want to mess with versions, they can always make the
     * UUID a hash and always report version 0 and that will make this check
     * effectively a no-op.
     */
    if (header.version > ref_header.version)
       return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

    /* Reject shader binaries that are the wrong size. */
    if (header.size != data_size)
       return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

    assert(blob.current == (uint8_t *)data + sizeof(header));
    blob.end = (uint8_t *)data + data_size;

    struct mesa_sha1 sha1_ctx;
    _mesa_sha1_init(&sha1_ctx);

    /* Hash the header with a zero SHA1 */
    struct vk_shader_bin_header sha1_header = header;
    memset(sha1_header.sha1, 0, sizeof(sha1_header.sha1));
    _mesa_sha1_update(&sha1_ctx, &sha1_header, sizeof(sha1_header));

    /* Hash the serialized data */
    _mesa_sha1_update(&sha1_ctx, (uint8_t *)data + sizeof(header),
                      data_size - sizeof(header));

    _mesa_sha1_final(&sha1_ctx, ref_header.sha1);
    if (memcmp(header.sha1, ref_header.sha1, sizeof(header.sha1)))
       return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

    /* We've now verified that the header matches and that the data has the
     * right SHA1 hash so it's safe to call into the driver.
     */
    return ops->deserialize(device, &blob, header.version,
                            pAllocator, shader_out);
 }

 VKAPI_ATTR VkResult VKAPI_CALL
 vk_common_GetShaderBinaryDataEXT(VkDevice _device,
                                  VkShaderEXT _shader,
                                  size_t *pDataSize,
                                  void *pData)
 {
    VK_FROM_HANDLE(vk_device, device, _device);
    VK_FROM_HANDLE(vk_shader, shader, _shader);
    VkResult result;

    /* From the Vulkan 1.3.275 spec:
     *
     *    "If pData is NULL, then the size of the binary shader code of the
     *    shader object, in bytes, is returned in pDataSize. Otherwise,
     *    pDataSize must point to a variable set by the user to the size of the
     *    buffer, in bytes, pointed to by pData, and on return the variable is
     *    overwritten with the amount of data actually written to pData. If
     *    pDataSize is less than the size of the binary shader code, nothing is
     *    written to pData, and VK_INCOMPLETE will be returned instead of
     *    VK_SUCCESS."
     *
     * This is annoying.  Unlike basically every other Vulkan data return
     * method, we're not allowed to overwrite the client-provided memory region
     * on VK_INCOMPLETE.  This means we either need to query the blob size
     * up-front by serializing twice or we need to serialize into temporary
     * memory and memcpy into the client-provided region.  We choose the first
     * approach.
     *
     * In the common case, this means that vk_shader_ops::serialize will get
     * called 3 times: Once for the client to get the size, once for us to
     * validate the client's size, and once to actually write the data.  It's a
     * bit heavy-weight but this shouldn't be in a hot path and this is better
     * for memory efficiency.  Also, the vk_shader_ops::serialize should be
     * pretty fast on a null blob.
     */
    struct blob blob;
    blob_init_fixed(&blob, NULL, SIZE_MAX);
    result = vk_shader_serialize(device, shader, &blob);
    assert(result == VK_SUCCESS);

    if (result != VK_SUCCESS) {
       *pDataSize = 0;
       return result;
    } else if (pData == NULL) {
       *pDataSize = blob.size;
       return VK_SUCCESS;
    } else if (blob.size > *pDataSize) {
       /* No data written */
       *pDataSize = 0;
       return VK_INCOMPLETE;
    }

    blob_init_fixed(&blob, pData, *pDataSize);
    result = vk_shader_serialize(device, shader, &blob);
    assert(result == VK_SUCCESS);

    *pDataSize = blob.size;

    return result;
 }

 /* The only place where we have "real" linking is graphics shaders and there
  * is a limit as to how many of them can be linked together at one time.
  */
 #define VK_MAX_LINKED_SHADER_STAGES MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES

 const struct vk_pipeline_robustness_state vk_robustness_disabled = {
    .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
    .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
    .vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
    .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
    /* From the Vulkan 1.3.292 spec:
     *
     *    "This extension [VK_EXT_robustness2] also adds support for “null
     *    descriptors”, where VK_NULL_HANDLE can be used instead of a valid
     *    handle. Accesses to null descriptors have well-defined behavior,
     *    and do not rely on robustness."
     *
     * For now, default these to true.
     */
    .null_uniform_buffer_descriptor = true,
    .null_storage_buffer_descriptor = true,
 };

 VKAPI_ATTR VkResult VKAPI_CALL
 vk_common_CreateShadersEXT(VkDevice _device,
                            uint32_t createInfoCount,
                            const VkShaderCreateInfoEXT *pCreateInfos,
                            const VkAllocationCallbacks *pAllocator,
                            VkShaderEXT *pShaders)
 {
    VK_FROM_HANDLE(vk_device, device, _device);
    const struct vk_device_shader_ops *ops = device->shader_ops;
    VkResult first_fail_or_success = VK_SUCCESS;

    /* From the Vulkan 1.3.274 spec:
     *
     *    "When this function returns, whether or not it succeeds, it is
     *    guaranteed that every element of pShaders will have been overwritten
     *    by either VK_NULL_HANDLE or a valid VkShaderEXT handle."
     *
     * Zeroing up-front makes the error path easier.
     */
    memset(pShaders, 0, createInfoCount * sizeof(*pShaders));

    bool has_linked_spirv = false;
    for (uint32_t i = 0; i < createInfoCount; i++) {
       if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT &&
           (pCreateInfos[i].flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT))
          has_linked_spirv = true;
    }

    uint32_t linked_count = 0;
    struct stage_idx linked[VK_MAX_LINKED_SHADER_STAGES];

    for (uint32_t i = 0; i < createInfoCount; i++) {
       const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[i];
       VkResult result = VK_SUCCESS;

       switch (vk_info->codeType) {
       case VK_SHADER_CODE_TYPE_BINARY_EXT: {
          /* This isn't required by Vulkan but we're allowed to fail binary
           * import for basically any reason.  This seems like a pretty good
           * reason.
           */
          if (has_linked_spirv &&
              (vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT)) {
             result = vk_errorf(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT,
                                "Cannot mix linked binary and SPIR-V");
             break;
          }

          struct vk_shader *shader;
          result = vk_shader_deserialize(device, vk_info->codeSize,
                                         vk_info->pCode, pAllocator,
                                         &shader);
          if (result != VK_SUCCESS)
             break;

          pShaders[i] = vk_shader_to_handle(shader);
          break;
       }

       case VK_SHADER_CODE_TYPE_SPIRV_EXT: {
          if (vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT) {
             /* Stash it and compile later */
             assert(linked_count < ARRAY_SIZE(linked));
             linked[linked_count++] = (struct stage_idx) {
                .stage = vk_to_mesa_shader_stage(vk_info->stage),
                .idx = i,
             };
          } else {
             nir_shader *nir = vk_shader_to_nir(device, vk_info,
                                                &vk_robustness_disabled);
             if (nir == NULL) {
                result = vk_errorf(device, VK_ERROR_UNKNOWN,
                                   "Failed to compile shader to NIR");
                break;
             }

             struct vk_shader_compile_info info;
             struct set_layouts set_layouts;
             vk_shader_compile_info_init(&info, &set_layouts,
                                         vk_info, &vk_robustness_disabled, nir);

             struct vk_shader *shader;
             result = ops->compile(device, 1, &info, NULL /* state */,
                                   NULL /* features */, pAllocator, &shader);
             if (result != VK_SUCCESS)
                break;

             pShaders[i] = vk_shader_to_handle(shader);
          }
          break;
       }

       default:
          unreachable("Unknown shader code type");
       }

       if (first_fail_or_success == VK_SUCCESS)
          first_fail_or_success = result;
    }

    if (linked_count > 0) {
       struct set_layouts set_layouts[VK_MAX_LINKED_SHADER_STAGES];
       struct vk_shader_compile_info infos[VK_MAX_LINKED_SHADER_STAGES];
       VkResult result = VK_SUCCESS;

       /* Sort so we guarantee the driver always gets them in-order */
       qsort(linked, linked_count, sizeof(*linked), cmp_stage_idx);

       /* Memset for easy error handling */
       memset(infos, 0, sizeof(infos));

       for (uint32_t l = 0; l < linked_count; l++) {
          const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[linked[l].idx];

          nir_shader *nir = vk_shader_to_nir(device, vk_info,
                                             &vk_robustness_disabled);
          if (nir == NULL) {
             result = vk_errorf(device, VK_ERROR_UNKNOWN,
                                "Failed to compile shader to NIR");
             break;
          }

          vk_shader_compile_info_init(&infos[l], &set_layouts[l],
                                      vk_info, &vk_robustness_disabled, nir);
       }

       if (result == VK_SUCCESS) {
          struct vk_shader *shaders[VK_MAX_LINKED_SHADER_STAGES];

          result = ops->compile(device, linked_count, infos, NULL /* state */,
                                NULL /* features */, pAllocator, shaders);
          if (result == VK_SUCCESS) {
             for (uint32_t l = 0; l < linked_count; l++)
                pShaders[linked[l].idx] = vk_shader_to_handle(shaders[l]);
          }
       } else {
          for (uint32_t l = 0; l < linked_count; l++) {
             if (infos[l].nir != NULL)
                ralloc_free(infos[l].nir);
          }
       }

       if (first_fail_or_success == VK_SUCCESS)
          first_fail_or_success = result;
    }

    return first_fail_or_success;
 }

 VKAPI_ATTR void VKAPI_CALL
 vk_common_DestroyShaderEXT(VkDevice _device,
                            VkShaderEXT _shader,
                            const VkAllocationCallbacks *pAllocator)
 {
    VK_FROM_HANDLE(vk_device, device, _device);
    VK_FROM_HANDLE(vk_shader, shader, _shader);

    if (shader == NULL)
       return;

    vk_shader_destroy(device, shader, pAllocator);
 }

 VKAPI_ATTR void VKAPI_CALL
 vk_common_CmdBindShadersEXT(VkCommandBuffer commandBuffer,
                             uint32_t stageCount,
                             const VkShaderStageFlagBits *pStages,
                             const VkShaderEXT *pShaders)
 {
    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
    struct vk_device *device = cmd_buffer->base.device;
    const struct vk_device_shader_ops *ops = device->shader_ops;

    STACK_ARRAY(gl_shader_stage, stages, stageCount);
    STACK_ARRAY(struct vk_shader *, shaders, stageCount);

    VkShaderStageFlags vk_stages = 0;
    for (uint32_t i = 0; i < stageCount; i++) {
       vk_stages |= pStages[i];
       stages[i] = vk_to_mesa_shader_stage(pStages[i]);
       shaders[i] = pShaders != NULL ? vk_shader_from_handle(pShaders[i]) : NULL;
    }

    vk_cmd_unbind_pipelines_for_stages(cmd_buffer, vk_stages);
    if (vk_stages & ~VK_SHADER_STAGE_COMPUTE_BIT)
       vk_cmd_set_rp_attachments(cmd_buffer, ~0);

    ops->cmd_bind_shaders(cmd_buffer, stageCount, stages, shaders);
 }
	/*
	* Copyright © 2024 Collabora, Ltd.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#include "vk_shader.h"

	#include "vk_alloc.h"
	#include "vk_command_buffer.h"
	#include "vk_common_entrypoints.h"
	#include "vk_descriptor_set_layout.h"
	#include "vk_device.h"
	#include "vk_nir.h"
	#include "vk_physical_device.h"
	#include "vk_physical_device_features.h"
	#include "vk_pipeline.h"

	#include "util/mesa-sha1.h"

	#include "nir.h"

	static void
	vk_shader_init(struct vk_shader *shader,
	struct vk_device *device,
	const struct vk_shader_ops *ops,
	gl_shader_stage stage)
	{
	vk_object_base_init(device, &shader->base, VK_OBJECT_TYPE_SHADER_EXT);
	shader->ops = ops;
	shader->stage = stage;
	}

	void *
	vk_shader_zalloc(struct vk_device *device,
	const struct vk_shader_ops *ops,
	gl_shader_stage stage,
	const VkAllocationCallbacks *alloc,
	size_t size)
	{
	/* For internal allocations, we need to allocate from the device scope
	* because they might be put in pipeline caches. Importantly, it is
	* impossible for the client to get at this pointer and we apply this
	* heuristic before we account for allocation fallbacks so this will only
	* ever happen for internal shader objectx.
	*/
	const VkSystemAllocationScope alloc_scope =
	alloc == &device->alloc ? VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
	: VK_SYSTEM_ALLOCATION_SCOPE_OBJECT;

	struct vk_shader *shader = vk_zalloc2(&device->alloc, alloc, size, 8,
	alloc_scope);
	if (shader == NULL)
	return NULL;

	vk_shader_init(shader, device, ops, stage);

	return shader;
	}

	void *
	vk_shader_multizalloc(struct vk_device *device,
	struct vk_multialloc *ma,
	const struct vk_shader_ops *ops,
	gl_shader_stage stage,
	const VkAllocationCallbacks *alloc)
	{
	struct vk_shader *shader =
	vk_multialloc_zalloc2(ma, &device->alloc, alloc,
	VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
	if (!shader)
	return NULL;

	vk_shader_init(shader, device, ops, stage);

	return shader;
	}

	void
	vk_shader_free(struct vk_device *device,
	const VkAllocationCallbacks *alloc,
	struct vk_shader *shader)
	{
	vk_object_base_finish(&shader->base);
	vk_free2(&device->alloc, alloc, shader);
	}

	int
	vk_shader_cmp_graphics_stages(gl_shader_stage a, gl_shader_stage b)
	{
	static const int stage_order[MESA_SHADER_MESH + 1] = {
	[MESA_SHADER_VERTEX] = 1,
	[MESA_SHADER_TESS_CTRL] = 2,
	[MESA_SHADER_TESS_EVAL] = 3,
	[MESA_SHADER_GEOMETRY] = 4,
	[MESA_SHADER_TASK] = 5,
	[MESA_SHADER_MESH] = 6,
	[MESA_SHADER_FRAGMENT] = 7,
	};

	assert(a < ARRAY_SIZE(stage_order) && stage_order[a] > 0);
	assert(b < ARRAY_SIZE(stage_order) && stage_order[b] > 0);

	return stage_order[a] - stage_order[b];
	}

	struct stage_idx {
	gl_shader_stage stage;
	uint32_t idx;
	};

	static int
	cmp_stage_idx(const void _a, const void _b)
	{
	const struct stage_idx a = _a, b = _b;
	return vk_shader_cmp_graphics_stages(a->stage, b->stage);
	}

	static nir_shader *
	vk_shader_to_nir(struct vk_device *device,
	const VkShaderCreateInfoEXT *info,
	const struct vk_pipeline_robustness_state *rs)
	{
	const struct vk_device_shader_ops *ops = device->shader_ops;

	const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage);
	const nir_shader_compiler_options *nir_options =
	ops->get_nir_options(device->physical, stage, rs);
	struct spirv_to_nir_options spirv_options =
	ops->get_spirv_options(device->physical, stage, rs);

	enum gl_subgroup_size subgroup_size = vk_get_subgroup_size(
	vk_spirv_version(info->pCode, info->codeSize),
	stage, info->pNext,
	info->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT,
	info->flags &VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);

	nir_shader *nir = vk_spirv_to_nir(device,
	info->pCode, info->codeSize,
	stage, info->pName,
	subgroup_size,
	info->pSpecializationInfo,
	&spirv_options, nir_options,
	false /* internal */, NULL);
	if (nir == NULL)
	return NULL;

	if (ops->preprocess_nir != NULL)
	ops->preprocess_nir(device->physical, nir, rs);

	return nir;
	}

	struct set_layouts {
	struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS];
	};

	static void
	vk_shader_compile_info_init(struct vk_shader_compile_info *info,
	struct set_layouts *set_layouts,
	const VkShaderCreateInfoEXT *vk_info,
	const struct vk_pipeline_robustness_state *rs,
	nir_shader *nir)
	{
	for (uint32_t sl = 0; sl < vk_info->setLayoutCount; sl++) {
	set_layouts->set_layouts[sl] =
	vk_descriptor_set_layout_from_handle(vk_info->pSetLayouts[sl]);
	}

	*info = (struct vk_shader_compile_info) {
	.stage = nir->info.stage,
	.flags = vk_info->flags,
	.next_stage_mask = vk_info->nextStage,
	.nir = nir,
	.robustness = rs,
	.set_layout_count = vk_info->setLayoutCount,
	.set_layouts = set_layouts->set_layouts,
	.push_constant_range_count = vk_info->pushConstantRangeCount,
	.push_constant_ranges = vk_info->pPushConstantRanges,
	};
	}

	PRAGMA_DIAGNOSTIC_PUSH
	PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
	struct vk_shader_bin_header {
	char mesavkshaderbin[16];
	VkDriverId driver_id;
	uint8_t uuid[VK_UUID_SIZE];
	uint32_t version;
	uint64_t size;
	uint8_t sha1[SHA1_DIGEST_LENGTH];
	uint32_t _pad;
	};
	PRAGMA_DIAGNOSTIC_POP
	static_assert(sizeof(struct vk_shader_bin_header) == 72,
	"This struct has no holes");

	static void
	vk_shader_bin_header_init(struct vk_shader_bin_header *header,
	struct vk_physical_device *device)
	{
	*header = (struct vk_shader_bin_header) {
	.mesavkshaderbin = "MesaVkShaderBin",
	.driver_id = device->properties.driverID,
	};

	memcpy(header->uuid, device->properties.shaderBinaryUUID, VK_UUID_SIZE);
	header->version = device->properties.shaderBinaryVersion;
	}

	static VkResult
	vk_shader_serialize(struct vk_device *device,
	struct vk_shader *shader,
	struct blob *blob)
	{
	struct vk_shader_bin_header header;
	vk_shader_bin_header_init(&header, device->physical);

	ASSERTED intptr_t header_offset = blob_reserve_bytes(blob, sizeof(header));
	assert(header_offset == 0);

	bool success = shader->ops->serialize(device, shader, blob);
	if (!success \|\| blob->out_of_memory)
	return VK_INCOMPLETE;

	/* Finalize and write the header */
	header.size = blob->size;
	if (blob->data != NULL) {
	assert(sizeof(header) <= blob->size);

	struct mesa_sha1 sha1_ctx;
	_mesa_sha1_init(&sha1_ctx);

	/* Hash the header with a zero SHA1 */
	_mesa_sha1_update(&sha1_ctx, &header, sizeof(header));

	/* Hash the serialized data */
	_mesa_sha1_update(&sha1_ctx, blob->data + sizeof(header),
	blob->size - sizeof(header));

	_mesa_sha1_final(&sha1_ctx, header.sha1);

	blob_overwrite_bytes(blob, header_offset, &header, sizeof(header));
	}

	return VK_SUCCESS;
	}

	static VkResult
	vk_shader_deserialize(struct vk_device *device,
	size_t data_size, const void *data,
	const VkAllocationCallbacks* pAllocator,
	struct vk_shader **shader_out)
	{
	const struct vk_device_shader_ops *ops = device->shader_ops;

	struct blob_reader blob;
	blob_reader_init(&blob, data, data_size);

	struct vk_shader_bin_header header, ref_header;
	blob_copy_bytes(&blob, &header, sizeof(header));
	if (blob.overrun)
	return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

	vk_shader_bin_header_init(&ref_header, device->physical);

	if (memcmp(header.mesavkshaderbin, ref_header.mesavkshaderbin,
	sizeof(header.mesavkshaderbin)))
	return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

	if (header.driver_id != ref_header.driver_id)
	return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

	if (memcmp(header.uuid, ref_header.uuid, sizeof(header.uuid)))
	return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

	/* From the Vulkan 1.3.276 spec:
	*
	* "Guaranteed compatibility of shader binaries is expressed through a
	* combination of the shaderBinaryUUID and shaderBinaryVersion members
	* of the VkPhysicalDeviceShaderObjectPropertiesEXT structure queried
	* from a physical device. Binary shaders retrieved from a physical
	* device with a certain shaderBinaryUUID are guaranteed to be
	* compatible with all other physical devices reporting the same
	* shaderBinaryUUID and the same or higher shaderBinaryVersion."
	*
	* We handle the version check here on behalf of the driver and then pass
	* the version into the driver's deserialize callback.
	*
	* If a driver doesn't want to mess with versions, they can always make the
	* UUID a hash and always report version 0 and that will make this check
	* effectively a no-op.
	*/
	if (header.version > ref_header.version)
	return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

	/* Reject shader binaries that are the wrong size. */
	if (header.size != data_size)
	return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

	assert(blob.current == (uint8_t *)data + sizeof(header));
	blob.end = (uint8_t *)data + data_size;

	struct mesa_sha1 sha1_ctx;
	_mesa_sha1_init(&sha1_ctx);

	/* Hash the header with a zero SHA1 */
	struct vk_shader_bin_header sha1_header = header;
	memset(sha1_header.sha1, 0, sizeof(sha1_header.sha1));
	_mesa_sha1_update(&sha1_ctx, &sha1_header, sizeof(sha1_header));

	/* Hash the serialized data */
	_mesa_sha1_update(&sha1_ctx, (uint8_t *)data + sizeof(header),
	data_size - sizeof(header));

	_mesa_sha1_final(&sha1_ctx, ref_header.sha1);
	if (memcmp(header.sha1, ref_header.sha1, sizeof(header.sha1)))
	return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);

	/* We've now verified that the header matches and that the data has the
	* right SHA1 hash so it's safe to call into the driver.
	*/
	return ops->deserialize(device, &blob, header.version,
	pAllocator, shader_out);
	}

	VKAPI_ATTR VkResult VKAPI_CALL
	vk_common_GetShaderBinaryDataEXT(VkDevice _device,
	VkShaderEXT _shader,
	size_t *pDataSize,
	void *pData)
	{
	VK_FROM_HANDLE(vk_device, device, _device);
	VK_FROM_HANDLE(vk_shader, shader, _shader);
	VkResult result;

	/* From the Vulkan 1.3.275 spec:
	*
	* "If pData is NULL, then the size of the binary shader code of the
	* shader object, in bytes, is returned in pDataSize. Otherwise,
	* pDataSize must point to a variable set by the user to the size of the
	* buffer, in bytes, pointed to by pData, and on return the variable is
	* overwritten with the amount of data actually written to pData. If
	* pDataSize is less than the size of the binary shader code, nothing is
	* written to pData, and VK_INCOMPLETE will be returned instead of
	* VK_SUCCESS."
	*
	* This is annoying. Unlike basically every other Vulkan data return
	* method, we're not allowed to overwrite the client-provided memory region
	* on VK_INCOMPLETE. This means we either need to query the blob size
	* up-front by serializing twice or we need to serialize into temporary
	* memory and memcpy into the client-provided region. We choose the first
	* approach.
	*
	* In the common case, this means that vk_shader_ops::serialize will get
	* called 3 times: Once for the client to get the size, once for us to
	* validate the client's size, and once to actually write the data. It's a
	* bit heavy-weight but this shouldn't be in a hot path and this is better
	* for memory efficiency. Also, the vk_shader_ops::serialize should be
	* pretty fast on a null blob.
	*/
	struct blob blob;
	blob_init_fixed(&blob, NULL, SIZE_MAX);
	result = vk_shader_serialize(device, shader, &blob);
	assert(result == VK_SUCCESS);

	if (result != VK_SUCCESS) {
	*pDataSize = 0;
	return result;
	} else if (pData == NULL) {
	*pDataSize = blob.size;
	return VK_SUCCESS;
	} else if (blob.size > *pDataSize) {
	/* No data written */
	*pDataSize = 0;
	return VK_INCOMPLETE;
	}

	blob_init_fixed(&blob, pData, *pDataSize);
	result = vk_shader_serialize(device, shader, &blob);
	assert(result == VK_SUCCESS);

	*pDataSize = blob.size;

	return result;
	}

	/* The only place where we have "real" linking is graphics shaders and there
	* is a limit as to how many of them can be linked together at one time.
	*/
	#define VK_MAX_LINKED_SHADER_STAGES MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES

	const struct vk_pipeline_robustness_state vk_robustness_disabled = {
	.storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
	.uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
	.vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
	.images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
	/* From the Vulkan 1.3.292 spec:
	*
	* "This extension [VK_EXT_robustness2] also adds support for “null
	* descriptors”, where VK_NULL_HANDLE can be used instead of a valid
	* handle. Accesses to null descriptors have well-defined behavior,
	* and do not rely on robustness."
	*
	* For now, default these to true.
	*/
	.null_uniform_buffer_descriptor = true,
	.null_storage_buffer_descriptor = true,
	};

	VKAPI_ATTR VkResult VKAPI_CALL
	vk_common_CreateShadersEXT(VkDevice _device,
	uint32_t createInfoCount,
	const VkShaderCreateInfoEXT *pCreateInfos,
	const VkAllocationCallbacks *pAllocator,
	VkShaderEXT *pShaders)
	{
	VK_FROM_HANDLE(vk_device, device, _device);
	const struct vk_device_shader_ops *ops = device->shader_ops;
	VkResult first_fail_or_success = VK_SUCCESS;

	/* From the Vulkan 1.3.274 spec:
	*
	* "When this function returns, whether or not it succeeds, it is
	* guaranteed that every element of pShaders will have been overwritten
	* by either VK_NULL_HANDLE or a valid VkShaderEXT handle."
	*
	* Zeroing up-front makes the error path easier.
	*/
	memset(pShaders, 0, createInfoCount * sizeof(*pShaders));

	bool has_linked_spirv = false;
	for (uint32_t i = 0; i < createInfoCount; i++) {
	if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT &&
	(pCreateInfos[i].flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT))
	has_linked_spirv = true;
	}

	uint32_t linked_count = 0;
	struct stage_idx linked[VK_MAX_LINKED_SHADER_STAGES];

	for (uint32_t i = 0; i < createInfoCount; i++) {
	const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[i];
	VkResult result = VK_SUCCESS;

	switch (vk_info->codeType) {
	case VK_SHADER_CODE_TYPE_BINARY_EXT: {
	/* This isn't required by Vulkan but we're allowed to fail binary
	* import for basically any reason. This seems like a pretty good
	* reason.
	*/
	if (has_linked_spirv &&
	(vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT)) {
	result = vk_errorf(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT,
	"Cannot mix linked binary and SPIR-V");
	break;
	}

	struct vk_shader *shader;
	result = vk_shader_deserialize(device, vk_info->codeSize,
	vk_info->pCode, pAllocator,
	&shader);
	if (result != VK_SUCCESS)
	break;

	pShaders[i] = vk_shader_to_handle(shader);
	break;
	}

	case VK_SHADER_CODE_TYPE_SPIRV_EXT: {
	if (vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT) {
	/* Stash it and compile later */
	assert(linked_count < ARRAY_SIZE(linked));
	linked[linked_count++] = (struct stage_idx) {
	.stage = vk_to_mesa_shader_stage(vk_info->stage),
	.idx = i,
	};
	} else {
	nir_shader *nir = vk_shader_to_nir(device, vk_info,
	&vk_robustness_disabled);
	if (nir == NULL) {
	result = vk_errorf(device, VK_ERROR_UNKNOWN,
	"Failed to compile shader to NIR");
	break;
	}

	struct vk_shader_compile_info info;
	struct set_layouts set_layouts;
	vk_shader_compile_info_init(&info, &set_layouts,
	vk_info, &vk_robustness_disabled, nir);

	struct vk_shader *shader;
	result = ops->compile(device, 1, &info, NULL /* state */,
	NULL /* features */, pAllocator, &shader);
	if (result != VK_SUCCESS)
	break;

	pShaders[i] = vk_shader_to_handle(shader);
	}
	break;
	}

	default:
	unreachable("Unknown shader code type");
	}

	if (first_fail_or_success == VK_SUCCESS)
	first_fail_or_success = result;
	}

	if (linked_count > 0) {
	struct set_layouts set_layouts[VK_MAX_LINKED_SHADER_STAGES];
	struct vk_shader_compile_info infos[VK_MAX_LINKED_SHADER_STAGES];
	VkResult result = VK_SUCCESS;

	/* Sort so we guarantee the driver always gets them in-order */
	qsort(linked, linked_count, sizeof(*linked), cmp_stage_idx);

	/* Memset for easy error handling */
	memset(infos, 0, sizeof(infos));

	for (uint32_t l = 0; l < linked_count; l++) {
	const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[linked[l].idx];

	nir_shader *nir = vk_shader_to_nir(device, vk_info,
	&vk_robustness_disabled);
	if (nir == NULL) {
	result = vk_errorf(device, VK_ERROR_UNKNOWN,
	"Failed to compile shader to NIR");
	break;
	}

	vk_shader_compile_info_init(&infos[l], &set_layouts[l],
	vk_info, &vk_robustness_disabled, nir);
	}

	if (result == VK_SUCCESS) {
	struct vk_shader *shaders[VK_MAX_LINKED_SHADER_STAGES];

	result = ops->compile(device, linked_count, infos, NULL /* state */,
	NULL /* features */, pAllocator, shaders);
	if (result == VK_SUCCESS) {
	for (uint32_t l = 0; l < linked_count; l++)
	pShaders[linked[l].idx] = vk_shader_to_handle(shaders[l]);
	}
	} else {
	for (uint32_t l = 0; l < linked_count; l++) {
	if (infos[l].nir != NULL)
	ralloc_free(infos[l].nir);
	}
	}

	if (first_fail_or_success == VK_SUCCESS)
	first_fail_or_success = result;
	}

	return first_fail_or_success;
	}

	VKAPI_ATTR void VKAPI_CALL
	vk_common_DestroyShaderEXT(VkDevice _device,
	VkShaderEXT _shader,
	const VkAllocationCallbacks *pAllocator)
	{
	VK_FROM_HANDLE(vk_device, device, _device);
	VK_FROM_HANDLE(vk_shader, shader, _shader);

	if (shader == NULL)
	return;

	vk_shader_destroy(device, shader, pAllocator);
	}

	VKAPI_ATTR void VKAPI_CALL
	vk_common_CmdBindShadersEXT(VkCommandBuffer commandBuffer,
	uint32_t stageCount,
	const VkShaderStageFlagBits *pStages,
	const VkShaderEXT *pShaders)
	{
	VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
	struct vk_device *device = cmd_buffer->base.device;
	const struct vk_device_shader_ops *ops = device->shader_ops;

	STACK_ARRAY(gl_shader_stage, stages, stageCount);
	STACK_ARRAY(struct vk_shader *, shaders, stageCount);

	VkShaderStageFlags vk_stages = 0;
	for (uint32_t i = 0; i < stageCount; i++) {
	vk_stages \|= pStages[i];
	stages[i] = vk_to_mesa_shader_stage(pStages[i]);
	shaders[i] = pShaders != NULL ? vk_shader_from_handle(pShaders[i]) : NULL;
	}

	vk_cmd_unbind_pipelines_for_stages(cmd_buffer, vk_stages);
	if (vk_stages & ~VK_SHADER_STAGE_COMPUTE_BIT)
	vk_cmd_set_rp_attachments(cmd_buffer, ~0);

	ops->cmd_bind_shaders(cmd_buffer, stageCount, stages, shaders);
	}