| /************************************************************************** |
| * |
| * Copyright 2012 Marek Olšák <maraeo@gmail.com> |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sub license, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR |
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| **************************************************************************/ |
| |
| #include "util/u_cpu_detect.h" |
| #include "util/u_helpers.h" |
| #include "util/u_inlines.h" |
| #include "util/u_upload_mgr.h" |
| #include "util/u_thread.h" |
| #include "util/os_time.h" |
| #include <inttypes.h> |
| |
| /** |
| * This function is used to copy an array of pipe_vertex_buffer structures, |
| * while properly referencing the pipe_vertex_buffer::buffer member. |
| * |
| * enabled_buffers is updated such that the bits corresponding to the indices |
| * of disabled buffers are set to 0 and the enabled ones are set to 1. |
| * |
| * \sa util_copy_framebuffer_state |
| */ |
| void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, |
| uint32_t *enabled_buffers, |
| const struct pipe_vertex_buffer *src, |
| unsigned start_slot, unsigned count) |
| { |
| unsigned i; |
| uint32_t bitmask = 0; |
| |
| dst += start_slot; |
| |
| if (src) { |
| for (i = 0; i < count; i++) { |
| if (src[i].buffer.resource) |
| bitmask |= 1 << i; |
| |
| pipe_vertex_buffer_unreference(&dst[i]); |
| |
| if (!src[i].is_user_buffer) |
| pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource); |
| } |
| |
| /* Copy over the other members of pipe_vertex_buffer. */ |
| memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer)); |
| |
| *enabled_buffers &= ~(((1ull << count) - 1) << start_slot); |
| *enabled_buffers |= bitmask << start_slot; |
| } |
| else { |
| /* Unreference the buffers. */ |
| for (i = 0; i < count; i++) |
| pipe_vertex_buffer_unreference(&dst[i]); |
| |
| *enabled_buffers &= ~(((1ull << count) - 1) << start_slot); |
| } |
| } |
| |
| /** |
| * Same as util_set_vertex_buffers_mask, but it only returns the number |
| * of bound buffers. |
| */ |
| void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst, |
| unsigned *dst_count, |
| const struct pipe_vertex_buffer *src, |
| unsigned start_slot, unsigned count) |
| { |
| unsigned i; |
| uint32_t enabled_buffers = 0; |
| |
| for (i = 0; i < *dst_count; i++) { |
| if (dst[i].buffer.resource) |
| enabled_buffers |= (1ull << i); |
| } |
| |
| util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot, |
| count); |
| |
| *dst_count = util_last_bit(enabled_buffers); |
| } |
| |
| /** |
| * This function is used to copy an array of pipe_shader_buffer structures, |
| * while properly referencing the pipe_shader_buffer::buffer member. |
| * |
| * \sa util_set_vertex_buffer_mask |
| */ |
| void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst, |
| uint32_t *enabled_buffers, |
| const struct pipe_shader_buffer *src, |
| unsigned start_slot, unsigned count) |
| { |
| unsigned i; |
| |
| dst += start_slot; |
| |
| if (src) { |
| for (i = 0; i < count; i++) { |
| pipe_resource_reference(&dst[i].buffer, src[i].buffer); |
| |
| if (src[i].buffer) |
| *enabled_buffers |= (1ull << (start_slot + i)); |
| else |
| *enabled_buffers &= ~(1ull << (start_slot + i)); |
| } |
| |
| /* Copy over the other members of pipe_shader_buffer. */ |
| memcpy(dst, src, count * sizeof(struct pipe_shader_buffer)); |
| } |
| else { |
| /* Unreference the buffers. */ |
| for (i = 0; i < count; i++) |
| pipe_resource_reference(&dst[i].buffer, NULL); |
| |
| *enabled_buffers &= ~(((1ull << count) - 1) << start_slot); |
| } |
| } |
| |
| /** |
| * Given a user index buffer, save the structure to "saved", and upload it. |
| */ |
| bool |
| util_upload_index_buffer(struct pipe_context *pipe, |
| const struct pipe_draw_info *info, |
| struct pipe_resource **out_buffer, |
| unsigned *out_offset) |
| { |
| unsigned start_offset = info->start * info->index_size; |
| |
| u_upload_data(pipe->stream_uploader, start_offset, |
| info->count * info->index_size, 4, |
| (char*)info->index.user + start_offset, |
| out_offset, out_buffer); |
| u_upload_unmap(pipe->stream_uploader); |
| *out_offset -= start_offset; |
| return *out_buffer != NULL; |
| } |
| |
| /** |
| * Called by MakeCurrent. Used to notify the driver that the application |
| * thread may have been changed. |
| * |
| * The function pins the current thread and driver threads to a group of |
| * CPU cores that share the same L3 cache. This is needed for good multi- |
| * threading performance on AMD Zen CPUs. |
| * |
| * \param upper_thread thread in the state tracker that also needs to be |
| * pinned. |
| */ |
| void |
| util_pin_driver_threads_to_random_L3(struct pipe_context *ctx, |
| thrd_t *upper_thread) |
| { |
| /* If pinning has no effect, don't do anything. */ |
| if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3) |
| return; |
| |
| unsigned num_L3_caches = util_cpu_caps.nr_cpus / |
| util_cpu_caps.cores_per_L3; |
| |
| /* Get a semi-random number. */ |
| int64_t t = os_time_get_nano(); |
| unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches; |
| |
| /* Tell the driver to pin its threads to the selected L3 cache. */ |
| if (ctx->set_context_param) { |
| ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, |
| cache); |
| } |
| |
| /* Do the same for the upper level thread if there is any (e.g. glthread) */ |
| if (upper_thread) |
| util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3); |
| } |
| |
| /* This is a helper for hardware bring-up. Don't remove. */ |
| struct pipe_query * |
| util_begin_pipestat_query(struct pipe_context *ctx) |
| { |
| struct pipe_query *q = |
| ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0); |
| if (!q) |
| return NULL; |
| |
| ctx->begin_query(ctx, q); |
| return q; |
| } |
| |
| /* This is a helper for hardware bring-up. Don't remove. */ |
| void |
| util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q, |
| FILE *f) |
| { |
| static unsigned counter; |
| struct pipe_query_data_pipeline_statistics stats; |
| |
| ctx->end_query(ctx, q); |
| ctx->get_query_result(ctx, q, true, (void*)&stats); |
| ctx->destroy_query(ctx, q); |
| |
| fprintf(f, |
| "Draw call %u:\n" |
| " ia_vertices = %"PRIu64"\n" |
| " ia_primitives = %"PRIu64"\n" |
| " vs_invocations = %"PRIu64"\n" |
| " gs_invocations = %"PRIu64"\n" |
| " gs_primitives = %"PRIu64"\n" |
| " c_invocations = %"PRIu64"\n" |
| " c_primitives = %"PRIu64"\n" |
| " ps_invocations = %"PRIu64"\n" |
| " hs_invocations = %"PRIu64"\n" |
| " ds_invocations = %"PRIu64"\n" |
| " cs_invocations = %"PRIu64"\n", |
| (unsigned)p_atomic_inc_return(&counter), |
| stats.ia_vertices, |
| stats.ia_primitives, |
| stats.vs_invocations, |
| stats.gs_invocations, |
| stats.gs_primitives, |
| stats.c_invocations, |
| stats.c_primitives, |
| stats.ps_invocations, |
| stats.hs_invocations, |
| stats.ds_invocations, |
| stats.cs_invocations); |
| } |
| |
| /* This is a helper for hardware bring-up. Don't remove. */ |
| void |
| util_wait_for_idle(struct pipe_context *ctx) |
| { |
| struct pipe_fence_handle *fence = NULL; |
| |
| ctx->flush(ctx, &fence, 0); |
| ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE); |
| } |
| |
| void |
| util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage) |
| { |
| t->max_mem_usage = max_mem_usage; |
| } |
| |
| void |
| util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t) |
| { |
| for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) |
| screen->fence_reference(screen, &t->ring[i].fence, NULL); |
| } |
| |
| static uint64_t |
| util_get_throttle_total_memory_usage(struct util_throttle *t) |
| { |
| uint64_t total_usage = 0; |
| |
| for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) |
| total_usage += t->ring[i].mem_usage; |
| return total_usage; |
| } |
| |
| static void util_dump_throttle_ring(struct util_throttle *t) |
| { |
| printf("Throttle:\n"); |
| for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) { |
| printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n", |
| i, t->ring[i].fence ? "yes" : " no", |
| t->ring[i].mem_usage, |
| t->flush_index == i ? " [flush]" : "", |
| t->wait_index == i ? " [wait]" : ""); |
| } |
| } |
| |
| /** |
| * Notify util_throttle that the next operation allocates memory. |
| * util_throttle tracks memory usage and waits for fences until its tracked |
| * memory usage decreases. |
| * |
| * Example: |
| * util_throttle_memory_usage(..., w*h*d*Bpp); |
| * TexSubImage(..., w, h, d, ...); |
| * |
| * This means that TexSubImage can't allocate more memory its maximum limit |
| * set during initialization. |
| */ |
| void |
| util_throttle_memory_usage(struct pipe_context *pipe, |
| struct util_throttle *t, uint64_t memory_size) |
| { |
| (void)util_dump_throttle_ring; /* silence warning */ |
| |
| if (!t->max_mem_usage) |
| return; |
| |
| struct pipe_screen *screen = pipe->screen; |
| struct pipe_fence_handle **fence = NULL; |
| unsigned ring_size = ARRAY_SIZE(t->ring); |
| uint64_t total = util_get_throttle_total_memory_usage(t); |
| |
| /* If there is not enough memory, walk the list of fences and find |
| * the latest one that we need to wait for. |
| */ |
| while (t->wait_index != t->flush_index && |
| total && total + memory_size > t->max_mem_usage) { |
| assert(t->ring[t->wait_index].fence); |
| |
| /* Release an older fence if we need to wait for a newer one. */ |
| if (fence) |
| screen->fence_reference(screen, fence, NULL); |
| |
| fence = &t->ring[t->wait_index].fence; |
| t->ring[t->wait_index].mem_usage = 0; |
| t->wait_index = (t->wait_index + 1) % ring_size; |
| |
| total = util_get_throttle_total_memory_usage(t); |
| } |
| |
| /* Wait for the fence to decrease memory usage. */ |
| if (fence) { |
| screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE); |
| screen->fence_reference(screen, fence, NULL); |
| } |
| |
| /* Flush and get a fence if we've exhausted memory usage for the current |
| * slot. |
| */ |
| if (t->ring[t->flush_index].mem_usage && |
| t->ring[t->flush_index].mem_usage + memory_size > |
| t->max_mem_usage / (ring_size / 2)) { |
| struct pipe_fence_handle **fence = |
| &t->ring[t->flush_index].fence; |
| |
| /* Expect that the current flush slot doesn't have a fence yet. */ |
| assert(!*fence); |
| |
| pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC); |
| t->flush_index = (t->flush_index + 1) % ring_size; |
| |
| /* Vacate the next slot if it's occupied. This should be rare. */ |
| if (t->flush_index == t->wait_index) { |
| struct pipe_fence_handle **fence = |
| &t->ring[t->wait_index].fence; |
| |
| t->ring[t->wait_index].mem_usage = 0; |
| t->wait_index = (t->wait_index + 1) % ring_size; |
| |
| assert(*fence); |
| screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE); |
| screen->fence_reference(screen, fence, NULL); |
| } |
| |
| assert(!t->ring[t->flush_index].mem_usage); |
| assert(!t->ring[t->flush_index].fence); |
| } |
| |
| t->ring[t->flush_index].mem_usage += memory_size; |
| } |