| /* |
| * Copyright © 2021 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "anv_private.h" |
| |
| #include "perf/intel_perf.h" |
| |
| static uint32_t |
| command_buffers_count_utraces(struct anv_device *device, |
| uint32_t cmd_buffer_count, |
| struct anv_cmd_buffer **cmd_buffers, |
| uint32_t *utrace_copies) |
| { |
| if (!u_trace_context_actively_tracing(&device->ds.trace_context)) |
| return 0; |
| |
| uint32_t utraces = 0; |
| for (uint32_t i = 0; i < cmd_buffer_count; i++) { |
| if (u_trace_has_points(&cmd_buffers[i]->trace)) { |
| utraces++; |
| if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) |
| *utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks); |
| } |
| } |
| |
| return utraces; |
| } |
| |
| static void |
| anv_utrace_delete_flush_data(struct u_trace_context *utctx, |
| void *flush_data) |
| { |
| struct anv_device *device = |
| container_of(utctx, struct anv_device, ds.trace_context); |
| struct anv_utrace_flush_copy *flush = flush_data; |
| |
| intel_ds_flush_data_fini(&flush->ds); |
| |
| if (flush->trace_bo) { |
| assert(flush->batch_bo); |
| anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); |
| anv_device_release_bo(device, flush->batch_bo); |
| anv_device_release_bo(device, flush->trace_bo); |
| } |
| |
| vk_sync_destroy(&device->vk, flush->sync); |
| |
| vk_free(&device->vk.alloc, flush); |
| } |
| |
| static void |
| anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx, |
| void *cmdstream, |
| void *ts_from, uint32_t from_offset, |
| void *ts_to, uint32_t to_offset, |
| uint32_t count) |
| { |
| struct anv_device *device = |
| container_of(utctx, struct anv_device, ds.trace_context); |
| struct anv_utrace_flush_copy *flush = cmdstream; |
| struct anv_address from_addr = (struct anv_address) { |
| .bo = ts_from, .offset = from_offset * sizeof(uint64_t) }; |
| struct anv_address to_addr = (struct anv_address) { |
| .bo = ts_to, .offset = to_offset * sizeof(uint64_t) }; |
| |
| anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state, |
| to_addr, from_addr, count * sizeof(uint64_t)); |
| } |
| |
| VkResult |
| anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, |
| uint32_t cmd_buffer_count, |
| struct anv_cmd_buffer **cmd_buffers, |
| struct anv_utrace_flush_copy **out_flush_data) |
| { |
| struct anv_device *device = queue->device; |
| uint32_t utrace_copies = 0; |
| uint32_t utraces = command_buffers_count_utraces(device, |
| cmd_buffer_count, |
| cmd_buffers, |
| &utrace_copies); |
| if (!utraces) { |
| *out_flush_data = NULL; |
| return VK_SUCCESS; |
| } |
| |
| VkResult result; |
| struct anv_utrace_flush_copy *flush = |
| vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy), |
| 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (!flush) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id); |
| |
| result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, |
| 0, 0, &flush->sync); |
| if (result != VK_SUCCESS) |
| goto error_sync; |
| |
| if (utrace_copies > 0) { |
| result = anv_bo_pool_alloc(&device->utrace_bo_pool, |
| utrace_copies * 4096, |
| &flush->trace_bo); |
| if (result != VK_SUCCESS) |
| goto error_trace_buf; |
| |
| result = anv_bo_pool_alloc(&device->utrace_bo_pool, |
| /* 128 dwords of setup + 64 dwords per copy */ |
| align_u32(512 + 64 * utrace_copies, 4096), |
| &flush->batch_bo); |
| if (result != VK_SUCCESS) |
| goto error_batch_buf; |
| |
| result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc); |
| if (result != VK_SUCCESS) |
| goto error_reloc_list; |
| |
| flush->batch.alloc = &device->vk.alloc; |
| flush->batch.relocs = &flush->relocs; |
| anv_batch_set_storage(&flush->batch, |
| (struct anv_address) { .bo = flush->batch_bo, }, |
| flush->batch_bo->map, flush->batch_bo->size); |
| |
| /* Emit the copies */ |
| anv_genX(device->info, emit_so_memcpy_init)(&flush->memcpy_state, |
| device, |
| &flush->batch); |
| for (uint32_t i = 0; i < cmd_buffer_count; i++) { |
| if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { |
| u_trace_flush(&cmd_buffers[i]->trace, flush, false); |
| } else { |
| u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace), |
| u_trace_end_iterator(&cmd_buffers[i]->trace), |
| &flush->ds.trace, |
| flush, |
| anv_device_utrace_emit_copy_ts_buffer); |
| } |
| } |
| anv_genX(device->info, emit_so_memcpy_fini)(&flush->memcpy_state); |
| |
| u_trace_flush(&flush->ds.trace, flush, true); |
| |
| if (flush->batch.status != VK_SUCCESS) { |
| result = flush->batch.status; |
| goto error_batch; |
| } |
| } else { |
| for (uint32_t i = 0; i < cmd_buffer_count; i++) { |
| assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); |
| u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1)); |
| } |
| } |
| |
| flush->queue = queue; |
| |
| *out_flush_data = flush; |
| |
| return VK_SUCCESS; |
| |
| error_batch: |
| anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); |
| error_reloc_list: |
| anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo); |
| error_batch_buf: |
| anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo); |
| error_trace_buf: |
| vk_sync_destroy(&device->vk, flush->sync); |
| error_sync: |
| vk_free(&device->vk.alloc, flush); |
| return result; |
| } |
| |
| static void * |
| anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b) |
| { |
| struct anv_device *device = |
| container_of(utctx, struct anv_device, ds.trace_context); |
| |
| struct anv_bo *bo = NULL; |
| UNUSED VkResult result = |
| anv_bo_pool_alloc(&device->utrace_bo_pool, |
| align_u32(size_b, 4096), |
| &bo); |
| assert(result == VK_SUCCESS); |
| |
| return bo; |
| } |
| |
| static void |
| anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) |
| { |
| struct anv_device *device = |
| container_of(utctx, struct anv_device, ds.trace_context); |
| struct anv_bo *bo = timestamps; |
| |
| anv_bo_pool_free(&device->utrace_bo_pool, bo); |
| } |
| |
| static void |
| anv_utrace_record_ts(struct u_trace *ut, void *cs, |
| void *timestamps, unsigned idx, |
| bool end_of_pipe) |
| { |
| struct anv_cmd_buffer *cmd_buffer = |
| container_of(ut, struct anv_cmd_buffer, trace); |
| struct anv_device *device = cmd_buffer->device; |
| struct anv_bo *bo = timestamps; |
| |
| device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device, |
| (struct anv_address) { |
| .bo = bo, |
| .offset = idx * sizeof(uint64_t) }, |
| end_of_pipe); |
| } |
| |
| static uint64_t |
| anv_utrace_read_ts(struct u_trace_context *utctx, |
| void *timestamps, unsigned idx, void *flush_data) |
| { |
| struct anv_device *device = |
| container_of(utctx, struct anv_device, ds.trace_context); |
| struct anv_bo *bo = timestamps; |
| struct anv_utrace_flush_copy *flush = flush_data; |
| |
| /* Only need to stall on results for the first entry: */ |
| if (idx == 0) { |
| UNUSED VkResult result = |
| vk_sync_wait(&device->vk, |
| flush->sync, |
| 0, |
| VK_SYNC_WAIT_COMPLETE, |
| os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE)); |
| assert(result == VK_SUCCESS); |
| } |
| |
| uint64_t *ts = bo->map; |
| |
| /* Don't translate the no-timestamp marker: */ |
| if (ts[idx] == U_TRACE_NO_TIMESTAMP) |
| return U_TRACE_NO_TIMESTAMP; |
| |
| return intel_device_info_timebase_scale(device->info, ts[idx]); |
| } |
| |
| static const char * |
| queue_family_to_name(const struct anv_queue_family *family) |
| { |
| switch (family->engine_class) { |
| case I915_ENGINE_CLASS_RENDER: |
| return "render"; |
| case I915_ENGINE_CLASS_COPY: |
| return "copy"; |
| case I915_ENGINE_CLASS_VIDEO: |
| return "video"; |
| case I915_ENGINE_CLASS_VIDEO_ENHANCE: |
| return "video-enh"; |
| default: |
| return "unknown"; |
| } |
| } |
| |
| void |
| anv_device_utrace_init(struct anv_device *device) |
| { |
| anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace"); |
| intel_ds_device_init(&device->ds, device->info, device->fd, |
| device->physical->local_minor - 128, |
| INTEL_DS_API_VULKAN); |
| u_trace_context_init(&device->ds.trace_context, |
| &device->ds, |
| anv_utrace_create_ts_buffer, |
| anv_utrace_destroy_ts_buffer, |
| anv_utrace_record_ts, |
| anv_utrace_read_ts, |
| anv_utrace_delete_flush_data); |
| |
| for (uint32_t q = 0; q < device->queue_count; q++) { |
| struct anv_queue *queue = &device->queues[q]; |
| |
| queue->ds = |
| intel_ds_device_add_queue(&device->ds, "%s%u", |
| queue_family_to_name(queue->family), |
| queue->index_in_family); |
| } |
| } |
| |
| void |
| anv_device_utrace_finish(struct anv_device *device) |
| { |
| u_trace_context_process(&device->ds.trace_context, true); |
| intel_ds_device_fini(&device->ds); |
| anv_bo_pool_finish(&device->utrace_bo_pool); |
| } |
| |
| enum intel_ds_stall_flag |
| anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits) |
| { |
| static const struct { |
| enum anv_pipe_bits anv; |
| enum intel_ds_stall_flag ds; |
| } anv_to_ds_flags[] = { |
| { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, }, |
| { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, }, |
| { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, }, |
| { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, }, |
| { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, }, |
| { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, }, |
| { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, }, |
| { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, }, |
| { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, }, |
| { .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, }, |
| { .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, }, |
| { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, }, |
| { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, }, |
| { .anv = ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, }, |
| }; |
| |
| enum intel_ds_stall_flag ret = 0; |
| for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) { |
| if (anv_to_ds_flags[i].anv & bits) |
| ret |= anv_to_ds_flags[i].ds; |
| } |
| |
| return ret; |
| } |