| /* |
| * Copyright © 2024 Collabora Ltd. and Red Hat Inc. |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "nvk_upload_queue.h" |
| |
| #include "nvk_device.h" |
| #include "nvk_physical_device.h" |
| #include "nvkmd/nvkmd.h" |
| #include "vk_alloc.h" |
| |
| #include "nv_push.h" |
| #include "nv_push_cl90b5.h" |
| |
| #define NVK_UPLOAD_MEM_SIZE 64*1024 |
| |
| struct nvk_upload_mem { |
| struct nvkmd_mem *mem; |
| |
| /** Link in nvk_upload_queue::recycle */ |
| struct list_head link; |
| |
| /** Time point at which point this BO will be idle */ |
| uint64_t idle_time_point; |
| }; |
| |
| static VkResult |
| nvk_upload_mem_create(struct nvk_device *dev, |
| struct nvk_upload_mem **mem_out) |
| { |
| struct nvk_upload_mem *mem; |
| VkResult result; |
| |
| mem = vk_zalloc(&dev->vk.alloc, sizeof(*mem), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (mem == NULL) |
| return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base, |
| NVK_UPLOAD_MEM_SIZE, 0, NVKMD_MEM_GART, |
| NVKMD_MEM_MAP_WR, &mem->mem); |
| if (result != VK_SUCCESS) { |
| vk_free(&dev->vk.alloc, mem); |
| return result; |
| } |
| |
| *mem_out = mem; |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| nvk_upload_mem_destroy(struct nvk_device *dev, |
| struct nvk_upload_mem *mem) |
| { |
| nvkmd_mem_unref(mem->mem); |
| vk_free(&dev->vk.alloc, mem); |
| } |
| |
| VkResult |
| nvk_upload_queue_init(struct nvk_device *dev, |
| struct nvk_upload_queue *queue) |
| { |
| struct nvk_physical_device *pdev = nvk_device_physical(dev); |
| VkResult result; |
| |
| memset(queue, 0, sizeof(*queue)); |
| |
| simple_mtx_init(&queue->mutex, mtx_plain); |
| |
| result = nvkmd_dev_create_ctx(dev->nvkmd, &dev->vk.base, |
| NVKMD_ENGINE_COPY, &queue->ctx); |
| if (result != VK_SUCCESS) |
| goto fail_mutex; |
| |
| const struct vk_sync_type *sync_type = pdev->nvkmd->sync_types[0]; |
| assert(sync_type->features & VK_SYNC_FEATURE_TIMELINE); |
| |
| result = vk_sync_create(&dev->vk, sync_type, VK_SYNC_IS_TIMELINE, |
| 0, &queue->sync); |
| if (result != VK_SUCCESS) |
| goto fail_ctx; |
| |
| list_inithead(&queue->recycle); |
| |
| return VK_SUCCESS; |
| |
| fail_ctx: |
| nvkmd_ctx_destroy(queue->ctx); |
| fail_mutex: |
| simple_mtx_destroy(&queue->mutex); |
| |
| return result; |
| } |
| |
| void |
| nvk_upload_queue_finish(struct nvk_device *dev, |
| struct nvk_upload_queue *queue) |
| { |
| list_for_each_entry_safe(struct nvk_upload_mem, mem, &queue->recycle, link) |
| nvk_upload_mem_destroy(dev, mem); |
| |
| if (queue->mem != NULL) |
| nvk_upload_mem_destroy(dev, queue->mem); |
| |
| vk_sync_destroy(&dev->vk, queue->sync); |
| nvkmd_ctx_destroy(queue->ctx); |
| simple_mtx_destroy(&queue->mutex); |
| } |
| |
| static VkResult |
| nvk_upload_queue_flush_locked(struct nvk_device *dev, |
| struct nvk_upload_queue *queue, |
| uint64_t *time_point_out) |
| { |
| VkResult result; |
| |
| if (queue->mem == NULL || queue->mem_push_start == queue->mem_push_end) { |
| if (time_point_out != NULL) |
| *time_point_out = queue->last_time_point; |
| return VK_SUCCESS; |
| } |
| |
| uint64_t time_point = queue->last_time_point + 1; |
| if (time_point == UINT64_MAX) |
| abort(); |
| |
| const struct nvkmd_ctx_exec exec = { |
| .addr = queue->mem->mem->va->addr + queue->mem_push_start, |
| .size_B = queue->mem_push_end - queue->mem_push_start, |
| }; |
| result = nvkmd_ctx_exec(queue->ctx, &dev->vk.base, 1, &exec); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| const struct vk_sync_signal signal = { |
| .sync = queue->sync, |
| .stage_mask = ~0, |
| .signal_value = time_point, |
| }; |
| result = nvkmd_ctx_signal(queue->ctx, &dev->vk.base, 1, &signal); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| /* Wait until now to update last_time_point so that, if we do fail and lose |
| * the device, nvk_upload_queue_sync won't wait forever on a time point |
| * that will never signal. |
| */ |
| queue->last_time_point = time_point; |
| |
| queue->mem->idle_time_point = time_point; |
| queue->mem_push_start = queue->mem_push_end; |
| |
| if (time_point_out != NULL) |
| *time_point_out = time_point; |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| nvk_upload_queue_flush(struct nvk_device *dev, |
| struct nvk_upload_queue *queue, |
| uint64_t *time_point_out) |
| { |
| VkResult result; |
| |
| simple_mtx_lock(&queue->mutex); |
| result = nvk_upload_queue_flush_locked(dev, queue, time_point_out); |
| simple_mtx_unlock(&queue->mutex); |
| |
| return result; |
| } |
| |
| static VkResult |
| nvk_upload_queue_sync_locked(struct nvk_device *dev, |
| struct nvk_upload_queue *queue) |
| { |
| VkResult result; |
| |
| result = nvk_upload_queue_flush_locked(dev, queue, NULL); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| if (queue->last_time_point == 0) |
| return VK_SUCCESS; |
| |
| return vk_sync_wait(&dev->vk, queue->sync, queue->last_time_point, |
| VK_SYNC_WAIT_COMPLETE, UINT64_MAX); |
| } |
| |
| VkResult |
| nvk_upload_queue_sync(struct nvk_device *dev, |
| struct nvk_upload_queue *queue) |
| { |
| VkResult result; |
| |
| simple_mtx_lock(&queue->mutex); |
| result = nvk_upload_queue_sync_locked(dev, queue); |
| simple_mtx_unlock(&queue->mutex); |
| |
| return result; |
| } |
| |
| static VkResult |
| nvk_upload_queue_reserve(struct nvk_device *dev, |
| struct nvk_upload_queue *queue, |
| uint32_t min_mem_size) |
| { |
| VkResult result; |
| |
| assert(min_mem_size <= NVK_UPLOAD_MEM_SIZE); |
| assert(queue->mem_push_end <= queue->mem_data_start); |
| |
| if (queue->mem != NULL) { |
| if (queue->mem_data_start - queue->mem_push_end >= min_mem_size) |
| return VK_SUCCESS; |
| |
| /* Not enough room in the BO. Flush and add it to the recycle list */ |
| result = nvk_upload_queue_flush_locked(dev, queue, NULL); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| assert(queue->mem_push_start == queue->mem_push_end); |
| list_addtail(&queue->mem->link, &queue->recycle); |
| queue->mem = NULL; |
| } |
| |
| assert(queue->mem == NULL); |
| queue->mem_push_start = queue->mem_push_end = 0; |
| queue->mem_data_start = NVK_UPLOAD_MEM_SIZE; |
| |
| /* Try to pop an idle BO off the recycle list */ |
| if (!list_is_empty(&queue->recycle)) { |
| uint64_t time_point_passed = 0; |
| result = vk_sync_get_value(&dev->vk, queue->sync, &time_point_passed); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| struct nvk_upload_mem *mem = |
| list_first_entry(&queue->recycle, struct nvk_upload_mem, link); |
| if (time_point_passed >= mem->idle_time_point) { |
| list_del(&mem->link); |
| queue->mem = mem; |
| return VK_SUCCESS; |
| } |
| } |
| |
| return nvk_upload_mem_create(dev, &queue->mem); |
| } |
| |
| static VkResult |
| nvk_upload_queue_upload_locked(struct nvk_device *dev, |
| struct nvk_upload_queue *queue, |
| uint64_t dst_addr, |
| const void *src, size_t size) |
| { |
| VkResult result; |
| |
| assert(dst_addr % 4 == 0); |
| assert(size % 4 == 0); |
| |
| while (size > 0) { |
| const uint32_t cmd_size_dw = 12; |
| const uint32_t cmd_size = cmd_size_dw * 4; |
| |
| /* Don't split the upload for stmall stuff. If it's under 1KB and we |
| * can't fit it in the current buffer, just get another. |
| */ |
| const uint32_t min_size = cmd_size + MIN2(size, 1024); |
| result = nvk_upload_queue_reserve(dev, queue, min_size); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| assert(queue->mem != NULL); |
| assert(queue->mem_data_start > queue->mem_push_end); |
| const uint32_t avail = queue->mem_data_start - queue->mem_push_end; |
| assert(avail >= min_size); |
| |
| const uint32_t data_size = MIN2(size, avail - cmd_size); |
| |
| const uint32_t data_mem_offset = queue->mem_data_start - data_size; |
| assert(queue->mem_push_end + cmd_size <= data_mem_offset); |
| const uint64_t data_addr = queue->mem->mem->va->addr + data_mem_offset; |
| memcpy(queue->mem->mem->map + data_mem_offset, src, data_size); |
| queue->mem_data_start = data_mem_offset; |
| |
| struct nv_push p; |
| nv_push_init(&p, queue->mem->mem->map + queue->mem_push_end, cmd_size_dw); |
| |
| assert(data_size <= (1 << 17)); |
| |
| P_MTHD(&p, NV90B5, OFFSET_IN_UPPER); |
| P_NV90B5_OFFSET_IN_UPPER(&p, data_addr >> 32); |
| P_NV90B5_OFFSET_IN_LOWER(&p, data_addr & 0xffffffff); |
| P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32); |
| P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff); |
| P_NV90B5_PITCH_IN(&p, data_size); |
| P_NV90B5_PITCH_OUT(&p, data_size); |
| P_NV90B5_LINE_LENGTH_IN(&p, data_size); |
| P_NV90B5_LINE_COUNT(&p, 1); |
| |
| P_IMMD(&p, NV90B5, LAUNCH_DMA, { |
| .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, |
| .multi_line_enable = MULTI_LINE_ENABLE_FALSE, |
| .flush_enable = FLUSH_ENABLE_TRUE, |
| .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH, |
| .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH, |
| }); |
| |
| assert(nv_push_dw_count(&p) <= cmd_size_dw); |
| queue->mem_push_end += nv_push_dw_count(&p) * 4; |
| |
| dst_addr += data_size; |
| src += data_size; |
| size -= data_size; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| nvk_upload_queue_upload(struct nvk_device *dev, |
| struct nvk_upload_queue *queue, |
| uint64_t dst_addr, |
| const void *src, size_t size) |
| { |
| VkResult result; |
| |
| simple_mtx_lock(&queue->mutex); |
| result = nvk_upload_queue_upload_locked(dev, queue, dst_addr, src, size); |
| simple_mtx_unlock(&queue->mutex); |
| |
| return result; |
| } |
| |
| static VkResult |
| nvk_upload_queue_fill_locked(struct nvk_device *dev, |
| struct nvk_upload_queue *queue, |
| uint64_t dst_addr, uint32_t data, size_t size) |
| { |
| VkResult result; |
| |
| assert(dst_addr % 4 == 0); |
| assert(size % 4 == 0); |
| |
| while (size > 0) { |
| const uint32_t cmd_size_dw = 14; |
| const uint32_t cmd_size = cmd_size_dw * 4; |
| |
| result = nvk_upload_queue_reserve(dev, queue, cmd_size); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| const uint32_t max_dim = 1 << 17; |
| uint32_t width_B, height; |
| if (size > max_dim) { |
| width_B = max_dim; |
| height = MIN2(max_dim, size / width_B); |
| } else { |
| width_B = size; |
| height = 1; |
| } |
| assert(width_B * height <= size); |
| |
| struct nv_push p; |
| nv_push_init(&p, queue->mem->mem->map + queue->mem_push_end, cmd_size_dw); |
| |
| P_MTHD(&p, NV90B5, OFFSET_OUT_UPPER); |
| P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32); |
| P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff); |
| P_NV90B5_PITCH_IN(&p, width_B); |
| P_NV90B5_PITCH_OUT(&p, width_B); |
| P_NV90B5_LINE_LENGTH_IN(&p, width_B / 4); |
| P_NV90B5_LINE_COUNT(&p, height); |
| |
| P_IMMD(&p, NV90B5, SET_REMAP_CONST_A, data); |
| P_IMMD(&p, NV90B5, SET_REMAP_COMPONENTS, { |
| .dst_x = DST_X_CONST_A, |
| .dst_y = DST_Y_CONST_A, |
| .dst_z = DST_Z_CONST_A, |
| .dst_w = DST_W_CONST_A, |
| .component_size = COMPONENT_SIZE_FOUR, |
| .num_src_components = NUM_SRC_COMPONENTS_ONE, |
| .num_dst_components = NUM_DST_COMPONENTS_ONE, |
| }); |
| |
| P_IMMD(&p, NV90B5, LAUNCH_DMA, { |
| .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, |
| .multi_line_enable = height > 1, |
| .flush_enable = FLUSH_ENABLE_TRUE, |
| .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH, |
| .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH, |
| .remap_enable = REMAP_ENABLE_TRUE, |
| }); |
| |
| assert(nv_push_dw_count(&p) <= cmd_size_dw); |
| queue->mem_push_end += nv_push_dw_count(&p) * 4; |
| |
| dst_addr += width_B * height; |
| size -= width_B * height; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| nvk_upload_queue_fill(struct nvk_device *dev, |
| struct nvk_upload_queue *queue, |
| uint64_t dst_addr, uint32_t data, size_t size) |
| { |
| VkResult result; |
| |
| simple_mtx_lock(&queue->mutex); |
| result = nvk_upload_queue_fill_locked(dev, queue, dst_addr, data, size); |
| simple_mtx_unlock(&queue->mutex); |
| |
| return result; |
| } |