| /************************************************************************** |
| * |
| * Copyright 2017 Advanced Micro Devices, Inc. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * on the rights to use, copy, modify, merge, publish, distribute, sub |
| * license, and/or sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| **************************************************************************/ |
| |
| #include "util/u_threaded_context.h" |
| #include "util/u_cpu_detect.h" |
| #include "util/format/u_format.h" |
| #include "util/u_inlines.h" |
| #include "util/u_memory.h" |
| #include "util/u_upload_mgr.h" |
| #include "driver_trace/tr_context.h" |
| #include "util/log.h" |
| #include "compiler/shader_info.h" |
| |
| #if TC_DEBUG >= 1 |
| #define tc_assert assert |
| #else |
| #define tc_assert(x) |
| #endif |
| |
| #if TC_DEBUG >= 2 |
| #define tc_printf mesa_logi |
| #define tc_asprintf asprintf |
| #define tc_strcmp strcmp |
| #else |
| #define tc_printf(...) |
| #define tc_asprintf(...) 0 |
| #define tc_strcmp(...) 0 |
| #endif |
| |
| #define TC_SENTINEL 0x5ca1ab1e |
| |
| enum tc_call_id { |
| #define CALL(name) TC_CALL_##name, |
| #include "u_threaded_context_calls.h" |
| #undef CALL |
| TC_NUM_CALLS, |
| }; |
| |
| #if TC_DEBUG >= 3 |
| static const char *tc_call_names[] = { |
| #define CALL(name) #name, |
| #include "u_threaded_context_calls.h" |
| #undef CALL |
| }; |
| #endif |
| |
| typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last); |
| |
| static const tc_execute execute_func[TC_NUM_CALLS]; |
| |
| static void |
| tc_buffer_subdata(struct pipe_context *_pipe, |
| struct pipe_resource *resource, |
| unsigned usage, unsigned offset, |
| unsigned size, const void *data); |
| |
| static void |
| tc_batch_check(UNUSED struct tc_batch *batch) |
| { |
| tc_assert(batch->sentinel == TC_SENTINEL); |
| tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH); |
| } |
| |
| static void |
| tc_debug_check(struct threaded_context *tc) |
| { |
| for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { |
| tc_batch_check(&tc->batch_slots[i]); |
| tc_assert(tc->batch_slots[i].tc == tc); |
| } |
| } |
| |
| static void |
| tc_set_driver_thread(struct threaded_context *tc) |
| { |
| #ifndef NDEBUG |
| tc->driver_thread = util_get_thread_id(); |
| #endif |
| } |
| |
| static void |
| tc_clear_driver_thread(struct threaded_context *tc) |
| { |
| #ifndef NDEBUG |
| memset(&tc->driver_thread, 0, sizeof(tc->driver_thread)); |
| #endif |
| } |
| |
| static void * |
| to_call_check(void *ptr, unsigned num_slots) |
| { |
| #if TC_DEBUG >= 1 |
| struct tc_call_base *call = ptr; |
| tc_assert(call->num_slots == num_slots); |
| #endif |
| return ptr; |
| } |
| #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type))) |
| |
| #define size_to_slots(size) DIV_ROUND_UP(size, 8) |
| #define call_size(type) size_to_slots(sizeof(struct type)) |
| #define call_size_with_slots(type, num_slots) size_to_slots( \ |
| sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots)) |
| #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type))) |
| |
| /* Assign src to dst while dst is uninitialized. */ |
| static inline void |
| tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src) |
| { |
| *dst = src; |
| pipe_reference(NULL, &src->reference); /* only increment refcount */ |
| } |
| |
| /* Assign src to dst while dst is uninitialized. */ |
| static inline void |
| tc_set_vertex_state_reference(struct pipe_vertex_state **dst, |
| struct pipe_vertex_state *src) |
| { |
| *dst = src; |
| pipe_reference(NULL, &src->reference); /* only increment refcount */ |
| } |
| |
| /* Unreference dst but don't touch the dst pointer. */ |
| static inline void |
| tc_drop_resource_reference(struct pipe_resource *dst) |
| { |
| if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ |
| pipe_resource_destroy(dst); |
| } |
| |
| /* Unreference dst but don't touch the dst pointer. */ |
| static inline void |
| tc_drop_surface_reference(struct pipe_surface *dst) |
| { |
| if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ |
| dst->context->surface_destroy(dst->context, dst); |
| } |
| |
| /* Unreference dst but don't touch the dst pointer. */ |
| static inline void |
| tc_drop_so_target_reference(struct pipe_stream_output_target *dst) |
| { |
| if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ |
| dst->context->stream_output_target_destroy(dst->context, dst); |
| } |
| |
| /** |
| * Subtract the given number of references. |
| */ |
| static inline void |
| tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs) |
| { |
| int count = p_atomic_add_return(&dst->reference.count, -num_refs); |
| |
| assert(count >= 0); |
| /* Underflows shouldn't happen, but let's be safe. */ |
| if (count <= 0) |
| dst->screen->vertex_state_destroy(dst->screen, dst); |
| } |
| |
| /* We don't want to read or write min_index and max_index, because |
| * it shouldn't be needed by drivers at this point. |
| */ |
| #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \ |
| offsetof(struct pipe_draw_info, min_index) |
| |
| static void |
| tc_batch_execute(void *job, UNUSED void *gdata, int thread_index) |
| { |
| struct tc_batch *batch = job; |
| struct pipe_context *pipe = batch->tc->pipe; |
| uint64_t *last = &batch->slots[batch->num_total_slots]; |
| |
| tc_batch_check(batch); |
| tc_set_driver_thread(batch->tc); |
| |
| assert(!batch->token); |
| |
| for (uint64_t *iter = batch->slots; iter != last;) { |
| struct tc_call_base *call = (struct tc_call_base *)iter; |
| |
| tc_assert(call->sentinel == TC_SENTINEL); |
| |
| #if TC_DEBUG >= 3 |
| tc_printf("CALL: %s", tc_call_names[call->call_id]); |
| #endif |
| |
| iter += execute_func[call->call_id](pipe, call, last); |
| } |
| |
| /* Add the fence to the list of fences for the driver to signal at the next |
| * flush, which we use for tracking which buffers are referenced by |
| * an unflushed command buffer. |
| */ |
| struct threaded_context *tc = batch->tc; |
| struct util_queue_fence *fence = |
| &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence; |
| |
| if (tc->options.driver_calls_flush_notify) { |
| tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence; |
| |
| /* Since our buffer lists are chained as a ring, we need to flush |
| * the context twice as we go around the ring to make the driver signal |
| * the buffer list fences, so that the producer thread can reuse the buffer |
| * list structures for the next batches without waiting. |
| */ |
| unsigned half_ring = TC_MAX_BUFFER_LISTS / 2; |
| if (batch->buffer_list_index % half_ring == half_ring - 1) |
| pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC); |
| } else { |
| util_queue_fence_signal(fence); |
| } |
| |
| tc_clear_driver_thread(batch->tc); |
| tc_batch_check(batch); |
| batch->num_total_slots = 0; |
| } |
| |
| static void |
| tc_begin_next_buffer_list(struct threaded_context *tc) |
| { |
| tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS; |
| |
| tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list; |
| |
| /* Clear the buffer list in the new empty batch. */ |
| struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list]; |
| assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence)); |
| util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */ |
| BITSET_ZERO(buf_list->buffer_list); |
| |
| tc->add_all_gfx_bindings_to_buffer_list = true; |
| tc->add_all_compute_bindings_to_buffer_list = true; |
| } |
| |
| static void |
| tc_batch_flush(struct threaded_context *tc) |
| { |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| |
| tc_assert(next->num_total_slots != 0); |
| tc_batch_check(next); |
| tc_debug_check(tc); |
| tc->bytes_mapped_estimate = 0; |
| p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots); |
| |
| if (next->token) { |
| next->token->tc = NULL; |
| tc_unflushed_batch_token_reference(&next->token, NULL); |
| } |
| |
| util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute, |
| NULL, 0); |
| tc->last = tc->next; |
| tc->next = (tc->next + 1) % TC_MAX_BATCHES; |
| tc_begin_next_buffer_list(tc); |
| } |
| |
| /* This is the function that adds variable-sized calls into the current |
| * batch. It also flushes the batch if there is not enough space there. |
| * All other higher-level "add" functions use it. |
| */ |
| static void * |
| tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id, |
| unsigned num_slots) |
| { |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| assert(num_slots <= TC_SLOTS_PER_BATCH); |
| tc_debug_check(tc); |
| |
| if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) { |
| tc_batch_flush(tc); |
| next = &tc->batch_slots[tc->next]; |
| tc_assert(next->num_total_slots == 0); |
| } |
| |
| tc_assert(util_queue_fence_is_signalled(&next->fence)); |
| |
| struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots]; |
| next->num_total_slots += num_slots; |
| |
| #if !defined(NDEBUG) && TC_DEBUG >= 1 |
| call->sentinel = TC_SENTINEL; |
| #endif |
| call->call_id = id; |
| call->num_slots = num_slots; |
| |
| #if TC_DEBUG >= 3 |
| tc_printf("ENQUEUE: %s", tc_call_names[id]); |
| #endif |
| |
| tc_debug_check(tc); |
| return call; |
| } |
| |
| #define tc_add_call(tc, execute, type) \ |
| ((struct type*)tc_add_sized_call(tc, execute, call_size(type))) |
| |
| #define tc_add_slot_based_call(tc, execute, type, num_slots) \ |
| ((struct type*)tc_add_sized_call(tc, execute, \ |
| call_size_with_slots(type, num_slots))) |
| |
| static bool |
| tc_is_sync(struct threaded_context *tc) |
| { |
| struct tc_batch *last = &tc->batch_slots[tc->last]; |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| |
| return util_queue_fence_is_signalled(&last->fence) && |
| !next->num_total_slots; |
| } |
| |
| static void |
| _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func) |
| { |
| struct tc_batch *last = &tc->batch_slots[tc->last]; |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| bool synced = false; |
| |
| tc_debug_check(tc); |
| |
| /* Only wait for queued calls... */ |
| if (!util_queue_fence_is_signalled(&last->fence)) { |
| util_queue_fence_wait(&last->fence); |
| synced = true; |
| } |
| |
| tc_debug_check(tc); |
| |
| if (next->token) { |
| next->token->tc = NULL; |
| tc_unflushed_batch_token_reference(&next->token, NULL); |
| } |
| |
| /* .. and execute unflushed calls directly. */ |
| if (next->num_total_slots) { |
| p_atomic_add(&tc->num_direct_slots, next->num_total_slots); |
| tc->bytes_mapped_estimate = 0; |
| tc_batch_execute(next, NULL, 0); |
| tc_begin_next_buffer_list(tc); |
| synced = true; |
| } |
| |
| if (synced) { |
| p_atomic_inc(&tc->num_syncs); |
| |
| if (tc_strcmp(func, "tc_destroy") != 0) { |
| tc_printf("sync %s %s", func, info); |
| } |
| } |
| |
| tc_debug_check(tc); |
| } |
| |
| #define tc_sync(tc) _tc_sync(tc, "", __func__) |
| #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__) |
| |
| /** |
| * Call this from fence_finish for same-context fence waits of deferred fences |
| * that haven't been flushed yet. |
| * |
| * The passed pipe_context must be the one passed to pipe_screen::fence_finish, |
| * i.e., the wrapped one. |
| */ |
| void |
| threaded_context_flush(struct pipe_context *_pipe, |
| struct tc_unflushed_batch_token *token, |
| bool prefer_async) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| /* This is called from the gallium frontend / application thread. */ |
| if (token->tc && token->tc == tc) { |
| struct tc_batch *last = &tc->batch_slots[tc->last]; |
| |
| /* Prefer to do the flush in the driver thread if it is already |
| * running. That should be better for cache locality. |
| */ |
| if (prefer_async || !util_queue_fence_is_signalled(&last->fence)) |
| tc_batch_flush(tc); |
| else |
| tc_sync(token->tc); |
| } |
| } |
| |
| static void |
| tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf) |
| { |
| uint32_t id = threaded_resource(buf)->buffer_id_unique; |
| BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK); |
| } |
| |
| /* Set a buffer binding and add it to the buffer list. */ |
| static void |
| tc_bind_buffer(uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf) |
| { |
| uint32_t id = threaded_resource(buf)->buffer_id_unique; |
| *binding = id; |
| BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK); |
| } |
| |
| /* Reset a buffer binding. */ |
| static void |
| tc_unbind_buffer(uint32_t *binding) |
| { |
| *binding = 0; |
| } |
| |
| /* Reset a range of buffer binding slots. */ |
| static void |
| tc_unbind_buffers(uint32_t *binding, unsigned count) |
| { |
| if (count) |
| memset(binding, 0, sizeof(*binding) * count); |
| } |
| |
| static void |
| tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings, |
| unsigned count) |
| { |
| for (unsigned i = 0; i < count; i++) { |
| if (bindings[i]) |
| BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK); |
| } |
| } |
| |
| static bool |
| tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings, |
| unsigned count) |
| { |
| unsigned rebind_count = 0; |
| |
| for (unsigned i = 0; i < count; i++) { |
| if (bindings[i] == old_id) { |
| bindings[i] = new_id; |
| rebind_count++; |
| } |
| } |
| return rebind_count; |
| } |
| |
| static void |
| tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc, |
| BITSET_WORD *buffer_list, |
| enum pipe_shader_type shader) |
| { |
| tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader], |
| tc->max_const_buffers); |
| if (tc->seen_shader_buffers[shader]) { |
| tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader], |
| tc->max_shader_buffers); |
| } |
| if (tc->seen_image_buffers[shader]) { |
| tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader], |
| tc->max_images); |
| } |
| if (tc->seen_sampler_buffers[shader]) { |
| tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader], |
| tc->max_samplers); |
| } |
| } |
| |
| static unsigned |
| tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id, |
| uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask) |
| { |
| unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0; |
| |
| ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader], |
| tc->max_const_buffers); |
| if (ubo) |
| *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader; |
| if (tc->seen_shader_buffers[shader]) { |
| ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader], |
| tc->max_shader_buffers); |
| if (ssbo) |
| *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader; |
| } |
| if (tc->seen_image_buffers[shader]) { |
| img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader], |
| tc->max_images); |
| if (img) |
| *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader; |
| } |
| if (tc->seen_sampler_buffers[shader]) { |
| sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader], |
| tc->max_samplers); |
| if (sampler) |
| *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader; |
| } |
| return ubo + ssbo + img + sampler; |
| } |
| |
| /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list. |
| * This is called by the first draw call in a batch when we want to inherit |
| * all bindings set by the previous batch. |
| */ |
| static void |
| tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc) |
| { |
| BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list; |
| |
| tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers); |
| if (tc->seen_streamout_buffers) |
| tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS); |
| |
| tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX); |
| tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT); |
| |
| if (tc->seen_tcs) |
| tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL); |
| if (tc->seen_tes) |
| tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL); |
| if (tc->seen_gs) |
| tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY); |
| |
| tc->add_all_gfx_bindings_to_buffer_list = false; |
| } |
| |
| /* Add all bound buffers used by compute to the buffer list. |
| * This is called by the first compute call in a batch when we want to inherit |
| * all bindings set by the previous batch. |
| */ |
| static void |
| tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc) |
| { |
| BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list; |
| |
| tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE); |
| tc->add_all_compute_bindings_to_buffer_list = false; |
| } |
| |
| static unsigned |
| tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask) |
| { |
| unsigned vbo = 0, so = 0; |
| |
| vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers, |
| tc->max_vertex_buffers); |
| if (vbo) |
| *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER); |
| |
| if (tc->seen_streamout_buffers) { |
| so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers, |
| PIPE_MAX_SO_BUFFERS); |
| if (so) |
| *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER); |
| } |
| unsigned rebound = vbo + so; |
| |
| rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask); |
| rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask); |
| |
| if (tc->seen_tcs) |
| rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask); |
| if (tc->seen_tes) |
| rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask); |
| if (tc->seen_gs) |
| rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask); |
| |
| rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask); |
| |
| if (rebound) |
| BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK); |
| return rebound; |
| } |
| |
| static bool |
| tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask) |
| { |
| while (binding_mask) { |
| if (bindings[u_bit_scan(&binding_mask)] == id) |
| return true; |
| } |
| return false; |
| } |
| |
| static bool |
| tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id, |
| enum pipe_shader_type shader) |
| { |
| if (tc->seen_shader_buffers[shader] && |
| tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader], |
| tc->shader_buffers_writeable_mask[shader])) |
| return true; |
| |
| if (tc->seen_image_buffers[shader] && |
| tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader], |
| tc->image_buffers_writeable_mask[shader])) |
| return true; |
| |
| return false; |
| } |
| |
| static bool |
| tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id) |
| { |
| if (tc->seen_streamout_buffers && |
| tc_is_buffer_bound_with_mask(id, tc->streamout_buffers, |
| BITFIELD_MASK(PIPE_MAX_SO_BUFFERS))) |
| return true; |
| |
| if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) || |
| tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) || |
| tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE)) |
| return true; |
| |
| if (tc->seen_tcs && |
| tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL)) |
| return true; |
| |
| if (tc->seen_tes && |
| tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL)) |
| return true; |
| |
| if (tc->seen_gs && |
| tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY)) |
| return true; |
| |
| return false; |
| } |
| |
| static bool |
| tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf, |
| unsigned map_usage) |
| { |
| if (!tc->options.is_resource_busy) |
| return true; |
| |
| uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK; |
| |
| for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) { |
| struct tc_buffer_list *buf_list = &tc->buffer_lists[i]; |
| |
| /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver), |
| * then the buffer is considered busy. */ |
| if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) && |
| BITSET_TEST(buf_list->buffer_list, id_hash)) |
| return true; |
| } |
| |
| /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether |
| * this buffer is busy or not. */ |
| return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage); |
| } |
| |
| /** |
| * allow_cpu_storage should be false for user memory and imported buffers. |
| */ |
| void |
| threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage) |
| { |
| struct threaded_resource *tres = threaded_resource(res); |
| |
| tres->latest = &tres->b; |
| tres->cpu_storage = NULL; |
| util_range_init(&tres->valid_buffer_range); |
| tres->is_shared = false; |
| tres->is_user_ptr = false; |
| tres->buffer_id_unique = 0; |
| tres->pending_staging_uploads = 0; |
| util_range_init(&tres->pending_staging_uploads_range); |
| |
| if (allow_cpu_storage && |
| !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | |
| PIPE_RESOURCE_FLAG_SPARSE | |
| PIPE_RESOURCE_FLAG_ENCRYPTED)) && |
| /* We need buffer invalidation and buffer busyness tracking for the CPU |
| * storage, which aren't supported with pipe_vertex_state. */ |
| !(res->bind & PIPE_BIND_VERTEX_STATE)) |
| tres->allow_cpu_storage = true; |
| else |
| tres->allow_cpu_storage = false; |
| } |
| |
| void |
| threaded_resource_deinit(struct pipe_resource *res) |
| { |
| struct threaded_resource *tres = threaded_resource(res); |
| |
| if (tres->latest != &tres->b) |
| pipe_resource_reference(&tres->latest, NULL); |
| util_range_destroy(&tres->valid_buffer_range); |
| util_range_destroy(&tres->pending_staging_uploads_range); |
| align_free(tres->cpu_storage); |
| } |
| |
| struct pipe_context * |
| threaded_context_unwrap_sync(struct pipe_context *pipe) |
| { |
| if (!pipe || !pipe->priv) |
| return pipe; |
| |
| tc_sync(threaded_context(pipe)); |
| return (struct pipe_context*)pipe->priv; |
| } |
| |
| |
| /******************************************************************** |
| * simple functions |
| */ |
| |
| #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \ |
| struct tc_call_##func { \ |
| struct tc_call_base base; \ |
| type state; \ |
| }; \ |
| \ |
| static uint16_t \ |
| tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \ |
| { \ |
| pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \ |
| return call_size(tc_call_##func); \ |
| } \ |
| \ |
| static void \ |
| tc_##func(struct pipe_context *_pipe, qualifier type deref param) \ |
| { \ |
| struct threaded_context *tc = threaded_context(_pipe); \ |
| struct tc_call_##func *p = (struct tc_call_##func*) \ |
| tc_add_call(tc, TC_CALL_##func, tc_call_##func); \ |
| p->state = deref(param); \ |
| __VA_ARGS__; \ |
| } |
| |
| TC_FUNC1(set_active_query_state, , bool, , ) |
| |
| TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &) |
| TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , ) |
| TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &) |
| TC_FUNC1(set_sample_mask, , unsigned, , ) |
| TC_FUNC1(set_min_samples, , unsigned, , ) |
| TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &) |
| |
| TC_FUNC1(texture_barrier, , unsigned, , ) |
| TC_FUNC1(memory_barrier, , unsigned, , ) |
| TC_FUNC1(delete_texture_handle, , uint64_t, , ) |
| TC_FUNC1(delete_image_handle, , uint64_t, , ) |
| TC_FUNC1(set_frontend_noop, , bool, , ) |
| |
| |
| /******************************************************************** |
| * queries |
| */ |
| |
| static struct pipe_query * |
| tc_create_query(struct pipe_context *_pipe, unsigned query_type, |
| unsigned index) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| return pipe->create_query(pipe, query_type, index); |
| } |
| |
| static struct pipe_query * |
| tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries, |
| unsigned *query_types) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| return pipe->create_batch_query(pipe, num_queries, query_types); |
| } |
| |
| struct tc_query_call { |
| struct tc_call_base base; |
| struct pipe_query *query; |
| }; |
| |
| static uint16_t |
| tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_query *query = to_call(call, tc_query_call)->query; |
| struct threaded_query *tq = threaded_query(query); |
| |
| if (list_is_linked(&tq->head_unflushed)) |
| list_del(&tq->head_unflushed); |
| |
| pipe->destroy_query(pipe, query); |
| return call_size(tc_query_call); |
| } |
| |
| static void |
| tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query; |
| } |
| |
| static uint16_t |
| tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| pipe->begin_query(pipe, to_call(call, tc_query_call)->query); |
| return call_size(tc_query_call); |
| } |
| |
| static bool |
| tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query; |
| return true; /* we don't care about the return value for this call */ |
| } |
| |
| struct tc_end_query_call { |
| struct tc_call_base base; |
| struct threaded_context *tc; |
| struct pipe_query *query; |
| }; |
| |
| static uint16_t |
| tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_end_query_call *p = to_call(call, tc_end_query_call); |
| struct threaded_query *tq = threaded_query(p->query); |
| |
| if (!list_is_linked(&tq->head_unflushed)) |
| list_add(&tq->head_unflushed, &p->tc->unflushed_queries); |
| |
| pipe->end_query(pipe, p->query); |
| return call_size(tc_end_query_call); |
| } |
| |
| static bool |
| tc_end_query(struct pipe_context *_pipe, struct pipe_query *query) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_query *tq = threaded_query(query); |
| struct tc_end_query_call *call = |
| tc_add_call(tc, TC_CALL_end_query, tc_end_query_call); |
| |
| call->tc = tc; |
| call->query = query; |
| |
| tq->flushed = false; |
| |
| return true; /* we don't care about the return value for this call */ |
| } |
| |
| static bool |
| tc_get_query_result(struct pipe_context *_pipe, |
| struct pipe_query *query, bool wait, |
| union pipe_query_result *result) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_query *tq = threaded_query(query); |
| struct pipe_context *pipe = tc->pipe; |
| bool flushed = tq->flushed; |
| |
| if (!flushed) { |
| tc_sync_msg(tc, wait ? "wait" : "nowait"); |
| tc_set_driver_thread(tc); |
| } |
| |
| bool success = pipe->get_query_result(pipe, query, wait, result); |
| |
| if (!flushed) |
| tc_clear_driver_thread(tc); |
| |
| if (success) { |
| tq->flushed = true; |
| if (list_is_linked(&tq->head_unflushed)) { |
| /* This is safe because it can only happen after we sync'd. */ |
| list_del(&tq->head_unflushed); |
| } |
| } |
| return success; |
| } |
| |
| struct tc_query_result_resource { |
| struct tc_call_base base; |
| enum pipe_query_flags flags:8; |
| enum pipe_query_value_type result_type:8; |
| int8_t index; /* it can be -1 */ |
| unsigned offset; |
| struct pipe_query *query; |
| struct pipe_resource *resource; |
| }; |
| |
| static uint16_t |
| tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_query_result_resource *p = to_call(call, tc_query_result_resource); |
| |
| pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type, |
| p->index, p->resource, p->offset); |
| tc_drop_resource_reference(p->resource); |
| return call_size(tc_query_result_resource); |
| } |
| |
| static void |
| tc_get_query_result_resource(struct pipe_context *_pipe, |
| struct pipe_query *query, |
| enum pipe_query_flags flags, |
| enum pipe_query_value_type result_type, int index, |
| struct pipe_resource *resource, unsigned offset) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| tc_buffer_disable_cpu_storage(resource); |
| |
| struct tc_query_result_resource *p = |
| tc_add_call(tc, TC_CALL_get_query_result_resource, |
| tc_query_result_resource); |
| p->query = query; |
| p->flags = flags; |
| p->result_type = result_type; |
| p->index = index; |
| tc_set_resource_reference(&p->resource, resource); |
| tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource); |
| p->offset = offset; |
| } |
| |
| struct tc_render_condition { |
| struct tc_call_base base; |
| bool condition; |
| unsigned mode; |
| struct pipe_query *query; |
| }; |
| |
| static uint16_t |
| tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_render_condition *p = to_call(call, tc_render_condition); |
| pipe->render_condition(pipe, p->query, p->condition, p->mode); |
| return call_size(tc_render_condition); |
| } |
| |
| static void |
| tc_render_condition(struct pipe_context *_pipe, |
| struct pipe_query *query, bool condition, |
| enum pipe_render_cond_flag mode) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_render_condition *p = |
| tc_add_call(tc, TC_CALL_render_condition, tc_render_condition); |
| |
| p->query = query; |
| p->condition = condition; |
| p->mode = mode; |
| } |
| |
| |
| /******************************************************************** |
| * constant (immutable) states |
| */ |
| |
| #define TC_CSO_CREATE(name, sname) \ |
| static void * \ |
| tc_create_##name##_state(struct pipe_context *_pipe, \ |
| const struct pipe_##sname##_state *state) \ |
| { \ |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; \ |
| return pipe->create_##name##_state(pipe, state); \ |
| } |
| |
| #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__) |
| #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , ) |
| |
| #define TC_CSO(name, sname, ...) \ |
| TC_CSO_CREATE(name, sname) \ |
| TC_CSO_BIND(name, ##__VA_ARGS__) \ |
| TC_CSO_DELETE(name) |
| |
| #define TC_CSO_WHOLE(name) TC_CSO(name, name) |
| #define TC_CSO_SHADER(name) TC_CSO(name, shader) |
| #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;) |
| |
| TC_CSO_WHOLE(blend) |
| TC_CSO_WHOLE(rasterizer) |
| TC_CSO_WHOLE(depth_stencil_alpha) |
| TC_CSO_WHOLE(compute) |
| TC_CSO_SHADER(fs) |
| TC_CSO_SHADER(vs) |
| TC_CSO_SHADER_TRACK(gs) |
| TC_CSO_SHADER_TRACK(tcs) |
| TC_CSO_SHADER_TRACK(tes) |
| TC_CSO_CREATE(sampler, sampler) |
| TC_CSO_DELETE(sampler) |
| TC_CSO_BIND(vertex_elements) |
| TC_CSO_DELETE(vertex_elements) |
| |
| static void * |
| tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count, |
| const struct pipe_vertex_element *elems) |
| { |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; |
| |
| return pipe->create_vertex_elements_state(pipe, count, elems); |
| } |
| |
| struct tc_sampler_states { |
| struct tc_call_base base; |
| ubyte shader, start, count; |
| void *slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_sampler_states *p = (struct tc_sampler_states *)call; |
| |
| pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_bind_sampler_states(struct pipe_context *_pipe, |
| enum pipe_shader_type shader, |
| unsigned start, unsigned count, void **states) |
| { |
| if (!count) |
| return; |
| |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_sampler_states *p = |
| tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count); |
| |
| p->shader = shader; |
| p->start = start; |
| p->count = count; |
| memcpy(p->slot, states, count * sizeof(states[0])); |
| } |
| |
| |
| /******************************************************************** |
| * immediate states |
| */ |
| |
| struct tc_framebuffer { |
| struct tc_call_base base; |
| struct pipe_framebuffer_state state; |
| }; |
| |
| static uint16_t |
| tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state; |
| |
| pipe->set_framebuffer_state(pipe, p); |
| |
| unsigned nr_cbufs = p->nr_cbufs; |
| for (unsigned i = 0; i < nr_cbufs; i++) |
| tc_drop_surface_reference(p->cbufs[i]); |
| tc_drop_surface_reference(p->zsbuf); |
| return call_size(tc_framebuffer); |
| } |
| |
| static void |
| tc_set_framebuffer_state(struct pipe_context *_pipe, |
| const struct pipe_framebuffer_state *fb) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_framebuffer *p = |
| tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer); |
| unsigned nr_cbufs = fb->nr_cbufs; |
| |
| p->state.width = fb->width; |
| p->state.height = fb->height; |
| p->state.samples = fb->samples; |
| p->state.layers = fb->layers; |
| p->state.nr_cbufs = nr_cbufs; |
| |
| for (unsigned i = 0; i < nr_cbufs; i++) { |
| p->state.cbufs[i] = NULL; |
| pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]); |
| } |
| p->state.zsbuf = NULL; |
| pipe_surface_reference(&p->state.zsbuf, fb->zsbuf); |
| } |
| |
| struct tc_tess_state { |
| struct tc_call_base base; |
| float state[6]; |
| }; |
| |
| static uint16_t |
| tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| float *p = to_call(call, tc_tess_state)->state; |
| |
| pipe->set_tess_state(pipe, p, p + 4); |
| return call_size(tc_tess_state); |
| } |
| |
| static void |
| tc_set_tess_state(struct pipe_context *_pipe, |
| const float default_outer_level[4], |
| const float default_inner_level[2]) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state; |
| |
| memcpy(p, default_outer_level, 4 * sizeof(float)); |
| memcpy(p + 4, default_inner_level, 2 * sizeof(float)); |
| } |
| |
| struct tc_patch_vertices { |
| struct tc_call_base base; |
| ubyte patch_vertices; |
| }; |
| |
| static uint16_t |
| tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices; |
| |
| pipe->set_patch_vertices(pipe, patch_vertices); |
| return call_size(tc_patch_vertices); |
| } |
| |
| static void |
| tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| tc_add_call(tc, TC_CALL_set_patch_vertices, |
| tc_patch_vertices)->patch_vertices = patch_vertices; |
| } |
| |
| struct tc_constant_buffer_base { |
| struct tc_call_base base; |
| ubyte shader, index; |
| bool is_null; |
| }; |
| |
| struct tc_constant_buffer { |
| struct tc_constant_buffer_base base; |
| struct pipe_constant_buffer cb; |
| }; |
| |
| static uint16_t |
| tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_constant_buffer *p = (struct tc_constant_buffer *)call; |
| |
| if (unlikely(p->base.is_null)) { |
| pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL); |
| return call_size(tc_constant_buffer_base); |
| } |
| |
| pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb); |
| return call_size(tc_constant_buffer); |
| } |
| |
| static void |
| tc_set_constant_buffer(struct pipe_context *_pipe, |
| enum pipe_shader_type shader, uint index, |
| bool take_ownership, |
| const struct pipe_constant_buffer *cb) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) { |
| struct tc_constant_buffer_base *p = |
| tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base); |
| p->shader = shader; |
| p->index = index; |
| p->is_null = true; |
| tc_unbind_buffer(&tc->const_buffers[shader][index]); |
| return; |
| } |
| |
| struct pipe_resource *buffer; |
| unsigned offset; |
| |
| if (cb->user_buffer) { |
| /* This must be done before adding set_constant_buffer, because it could |
| * generate e.g. transfer_unmap and flush partially-uninitialized |
| * set_constant_buffer to the driver if it was done afterwards. |
| */ |
| buffer = NULL; |
| u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, |
| tc->ubo_alignment, cb->user_buffer, &offset, &buffer); |
| u_upload_unmap(tc->base.const_uploader); |
| take_ownership = true; |
| } else { |
| buffer = cb->buffer; |
| offset = cb->buffer_offset; |
| } |
| |
| struct tc_constant_buffer *p = |
| tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer); |
| p->base.shader = shader; |
| p->base.index = index; |
| p->base.is_null = false; |
| p->cb.user_buffer = NULL; |
| p->cb.buffer_offset = offset; |
| p->cb.buffer_size = cb->buffer_size; |
| |
| if (take_ownership) |
| p->cb.buffer = buffer; |
| else |
| tc_set_resource_reference(&p->cb.buffer, buffer); |
| |
| if (buffer) { |
| tc_bind_buffer(&tc->const_buffers[shader][index], |
| &tc->buffer_lists[tc->next_buf_list], buffer); |
| } else { |
| tc_unbind_buffer(&tc->const_buffers[shader][index]); |
| } |
| } |
| |
| struct tc_inlinable_constants { |
| struct tc_call_base base; |
| ubyte shader; |
| ubyte num_values; |
| uint32_t values[MAX_INLINABLE_UNIFORMS]; |
| }; |
| |
| static uint16_t |
| tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants); |
| |
| pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values); |
| return call_size(tc_inlinable_constants); |
| } |
| |
| static void |
| tc_set_inlinable_constants(struct pipe_context *_pipe, |
| enum pipe_shader_type shader, |
| uint num_values, uint32_t *values) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_inlinable_constants *p = |
| tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants); |
| p->shader = shader; |
| p->num_values = num_values; |
| memcpy(p->values, values, num_values * 4); |
| } |
| |
| struct tc_sample_locations { |
| struct tc_call_base base; |
| uint16_t size; |
| uint8_t slot[0]; |
| }; |
| |
| |
| static uint16_t |
| tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_sample_locations *p = (struct tc_sample_locations *)call; |
| |
| pipe->set_sample_locations(pipe, p->size, p->slot); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_sample_locations *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_sample_locations, |
| tc_sample_locations, size); |
| |
| p->size = size; |
| memcpy(p->slot, locations, size); |
| } |
| |
| struct tc_scissors { |
| struct tc_call_base base; |
| ubyte start, count; |
| struct pipe_scissor_state slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_scissors *p = (struct tc_scissors *)call; |
| |
| pipe->set_scissor_states(pipe, p->start, p->count, p->slot); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_scissor_states(struct pipe_context *_pipe, |
| unsigned start, unsigned count, |
| const struct pipe_scissor_state *states) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_scissors *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count); |
| |
| p->start = start; |
| p->count = count; |
| memcpy(&p->slot, states, count * sizeof(states[0])); |
| } |
| |
| struct tc_viewports { |
| struct tc_call_base base; |
| ubyte start, count; |
| struct pipe_viewport_state slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_viewports *p = (struct tc_viewports *)call; |
| |
| pipe->set_viewport_states(pipe, p->start, p->count, p->slot); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_viewport_states(struct pipe_context *_pipe, |
| unsigned start, unsigned count, |
| const struct pipe_viewport_state *states) |
| { |
| if (!count) |
| return; |
| |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_viewports *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count); |
| |
| p->start = start; |
| p->count = count; |
| memcpy(&p->slot, states, count * sizeof(states[0])); |
| } |
| |
| struct tc_window_rects { |
| struct tc_call_base base; |
| bool include; |
| ubyte count; |
| struct pipe_scissor_state slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_window_rects *p = (struct tc_window_rects *)call; |
| |
| pipe->set_window_rectangles(pipe, p->include, p->count, p->slot); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_window_rectangles(struct pipe_context *_pipe, bool include, |
| unsigned count, |
| const struct pipe_scissor_state *rects) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_window_rects *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count); |
| |
| p->include = include; |
| p->count = count; |
| memcpy(p->slot, rects, count * sizeof(rects[0])); |
| } |
| |
| struct tc_sampler_views { |
| struct tc_call_base base; |
| ubyte shader, start, count, unbind_num_trailing_slots; |
| struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_sampler_views *p = (struct tc_sampler_views *)call; |
| |
| pipe->set_sampler_views(pipe, p->shader, p->start, p->count, |
| p->unbind_num_trailing_slots, true, p->slot); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_sampler_views(struct pipe_context *_pipe, |
| enum pipe_shader_type shader, |
| unsigned start, unsigned count, |
| unsigned unbind_num_trailing_slots, bool take_ownership, |
| struct pipe_sampler_view **views) |
| { |
| if (!count && !unbind_num_trailing_slots) |
| return; |
| |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_sampler_views *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views, |
| views ? count : 0); |
| |
| p->shader = shader; |
| p->start = start; |
| |
| if (views) { |
| struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; |
| |
| p->count = count; |
| p->unbind_num_trailing_slots = unbind_num_trailing_slots; |
| |
| if (take_ownership) { |
| memcpy(p->slot, views, sizeof(*views) * count); |
| |
| for (unsigned i = 0; i < count; i++) { |
| if (views[i] && views[i]->target == PIPE_BUFFER) { |
| tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next, |
| views[i]->texture); |
| } else { |
| tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]); |
| } |
| } |
| } else { |
| for (unsigned i = 0; i < count; i++) { |
| p->slot[i] = NULL; |
| pipe_sampler_view_reference(&p->slot[i], views[i]); |
| |
| if (views[i] && views[i]->target == PIPE_BUFFER) { |
| tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next, |
| views[i]->texture); |
| } else { |
| tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]); |
| } |
| } |
| } |
| |
| tc_unbind_buffers(&tc->sampler_buffers[shader][start + count], |
| unbind_num_trailing_slots); |
| tc->seen_sampler_buffers[shader] = true; |
| } else { |
| p->count = 0; |
| p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; |
| |
| tc_unbind_buffers(&tc->sampler_buffers[shader][start], |
| count + unbind_num_trailing_slots); |
| } |
| } |
| |
| struct tc_shader_images { |
| struct tc_call_base base; |
| ubyte shader, start, count; |
| ubyte unbind_num_trailing_slots; |
| struct pipe_image_view slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_shader_images *p = (struct tc_shader_images *)call; |
| unsigned count = p->count; |
| |
| if (!p->count) { |
| pipe->set_shader_images(pipe, p->shader, p->start, 0, |
| p->unbind_num_trailing_slots, NULL); |
| return call_size(tc_shader_images); |
| } |
| |
| pipe->set_shader_images(pipe, p->shader, p->start, p->count, |
| p->unbind_num_trailing_slots, p->slot); |
| |
| for (unsigned i = 0; i < count; i++) |
| tc_drop_resource_reference(p->slot[i].resource); |
| |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_shader_images(struct pipe_context *_pipe, |
| enum pipe_shader_type shader, |
| unsigned start, unsigned count, |
| unsigned unbind_num_trailing_slots, |
| const struct pipe_image_view *images) |
| { |
| if (!count && !unbind_num_trailing_slots) |
| return; |
| |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_shader_images *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images, |
| images ? count : 0); |
| unsigned writable_buffers = 0; |
| |
| p->shader = shader; |
| p->start = start; |
| |
| if (images) { |
| p->count = count; |
| p->unbind_num_trailing_slots = unbind_num_trailing_slots; |
| |
| struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; |
| |
| for (unsigned i = 0; i < count; i++) { |
| struct pipe_resource *resource = images[i].resource; |
| |
| tc_set_resource_reference(&p->slot[i].resource, resource); |
| |
| if (resource && resource->target == PIPE_BUFFER) { |
| tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource); |
| |
| if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) { |
| struct threaded_resource *tres = threaded_resource(resource); |
| |
| tc_buffer_disable_cpu_storage(resource); |
| util_range_add(&tres->b, &tres->valid_buffer_range, |
| images[i].u.buf.offset, |
| images[i].u.buf.offset + images[i].u.buf.size); |
| writable_buffers |= BITFIELD_BIT(start + i); |
| } |
| } else { |
| tc_unbind_buffer(&tc->image_buffers[shader][start + i]); |
| } |
| } |
| memcpy(p->slot, images, count * sizeof(images[0])); |
| |
| tc_unbind_buffers(&tc->image_buffers[shader][start + count], |
| unbind_num_trailing_slots); |
| tc->seen_image_buffers[shader] = true; |
| } else { |
| p->count = 0; |
| p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; |
| |
| tc_unbind_buffers(&tc->image_buffers[shader][start], |
| count + unbind_num_trailing_slots); |
| } |
| |
| tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count); |
| tc->image_buffers_writeable_mask[shader] |= writable_buffers; |
| } |
| |
| struct tc_shader_buffers { |
| struct tc_call_base base; |
| ubyte shader, start, count; |
| bool unbind; |
| unsigned writable_bitmask; |
| struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_shader_buffers *p = (struct tc_shader_buffers *)call; |
| unsigned count = p->count; |
| |
| if (p->unbind) { |
| pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0); |
| return call_size(tc_shader_buffers); |
| } |
| |
| pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot, |
| p->writable_bitmask); |
| |
| for (unsigned i = 0; i < count; i++) |
| tc_drop_resource_reference(p->slot[i].buffer); |
| |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_shader_buffers(struct pipe_context *_pipe, |
| enum pipe_shader_type shader, |
| unsigned start, unsigned count, |
| const struct pipe_shader_buffer *buffers, |
| unsigned writable_bitmask) |
| { |
| if (!count) |
| return; |
| |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_shader_buffers *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers, |
| buffers ? count : 0); |
| |
| p->shader = shader; |
| p->start = start; |
| p->count = count; |
| p->unbind = buffers == NULL; |
| p->writable_bitmask = writable_bitmask; |
| |
| if (buffers) { |
| struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; |
| |
| for (unsigned i = 0; i < count; i++) { |
| struct pipe_shader_buffer *dst = &p->slot[i]; |
| const struct pipe_shader_buffer *src = buffers + i; |
| |
| tc_set_resource_reference(&dst->buffer, src->buffer); |
| dst->buffer_offset = src->buffer_offset; |
| dst->buffer_size = src->buffer_size; |
| |
| if (src->buffer) { |
| struct threaded_resource *tres = threaded_resource(src->buffer); |
| |
| tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b); |
| |
| if (writable_bitmask & BITFIELD_BIT(i)) { |
| tc_buffer_disable_cpu_storage(src->buffer); |
| util_range_add(&tres->b, &tres->valid_buffer_range, |
| src->buffer_offset, |
| src->buffer_offset + src->buffer_size); |
| } |
| } else { |
| tc_unbind_buffer(&tc->shader_buffers[shader][start + i]); |
| } |
| } |
| tc->seen_shader_buffers[shader] = true; |
| } else { |
| tc_unbind_buffers(&tc->shader_buffers[shader][start], count); |
| } |
| |
| tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count); |
| tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start; |
| } |
| |
| struct tc_vertex_buffers { |
| struct tc_call_base base; |
| ubyte start, count; |
| ubyte unbind_num_trailing_slots; |
| struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call; |
| unsigned count = p->count; |
| |
| if (!count) { |
| pipe->set_vertex_buffers(pipe, p->start, 0, |
| p->unbind_num_trailing_slots, false, NULL); |
| return call_size(tc_vertex_buffers); |
| } |
| |
| for (unsigned i = 0; i < count; i++) |
| tc_assert(!p->slot[i].is_user_buffer); |
| |
| pipe->set_vertex_buffers(pipe, p->start, count, |
| p->unbind_num_trailing_slots, true, p->slot); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_set_vertex_buffers(struct pipe_context *_pipe, |
| unsigned start, unsigned count, |
| unsigned unbind_num_trailing_slots, |
| bool take_ownership, |
| const struct pipe_vertex_buffer *buffers) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| if (!count && !unbind_num_trailing_slots) |
| return; |
| |
| if (count && buffers) { |
| struct tc_vertex_buffers *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count); |
| p->start = start; |
| p->count = count; |
| p->unbind_num_trailing_slots = unbind_num_trailing_slots; |
| |
| struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; |
| |
| if (take_ownership) { |
| memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer)); |
| |
| for (unsigned i = 0; i < count; i++) { |
| struct pipe_resource *buf = buffers[i].buffer.resource; |
| |
| if (buf) { |
| tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf); |
| } else { |
| tc_unbind_buffer(&tc->vertex_buffers[start + i]); |
| } |
| } |
| } else { |
| for (unsigned i = 0; i < count; i++) { |
| struct pipe_vertex_buffer *dst = &p->slot[i]; |
| const struct pipe_vertex_buffer *src = buffers + i; |
| struct pipe_resource *buf = src->buffer.resource; |
| |
| tc_assert(!src->is_user_buffer); |
| dst->stride = src->stride; |
| dst->is_user_buffer = false; |
| tc_set_resource_reference(&dst->buffer.resource, buf); |
| dst->buffer_offset = src->buffer_offset; |
| |
| if (buf) { |
| tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf); |
| } else { |
| tc_unbind_buffer(&tc->vertex_buffers[start + i]); |
| } |
| } |
| } |
| |
| tc_unbind_buffers(&tc->vertex_buffers[start + count], |
| unbind_num_trailing_slots); |
| } else { |
| struct tc_vertex_buffers *p = |
| tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0); |
| p->start = start; |
| p->count = 0; |
| p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; |
| |
| tc_unbind_buffers(&tc->vertex_buffers[start], |
| count + unbind_num_trailing_slots); |
| } |
| } |
| |
| struct tc_stream_outputs { |
| struct tc_call_base base; |
| unsigned count; |
| struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; |
| unsigned offsets[PIPE_MAX_SO_BUFFERS]; |
| }; |
| |
| static uint16_t |
| tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_stream_outputs *p = to_call(call, tc_stream_outputs); |
| unsigned count = p->count; |
| |
| pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets); |
| for (unsigned i = 0; i < count; i++) |
| tc_drop_so_target_reference(p->targets[i]); |
| |
| return call_size(tc_stream_outputs); |
| } |
| |
| static void |
| tc_set_stream_output_targets(struct pipe_context *_pipe, |
| unsigned count, |
| struct pipe_stream_output_target **tgs, |
| const unsigned *offsets) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_stream_outputs *p = |
| tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs); |
| struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; |
| |
| for (unsigned i = 0; i < count; i++) { |
| p->targets[i] = NULL; |
| pipe_so_target_reference(&p->targets[i], tgs[i]); |
| if (tgs[i]) { |
| tc_buffer_disable_cpu_storage(tgs[i]->buffer); |
| tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer); |
| } else { |
| tc_unbind_buffer(&tc->streamout_buffers[i]); |
| } |
| } |
| p->count = count; |
| memcpy(p->offsets, offsets, count * sizeof(unsigned)); |
| |
| tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count); |
| if (count) |
| tc->seen_streamout_buffers = true; |
| } |
| |
| static void |
| tc_set_compute_resources(struct pipe_context *_pipe, unsigned start, |
| unsigned count, struct pipe_surface **resources) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| pipe->set_compute_resources(pipe, start, count, resources); |
| } |
| |
| static void |
| tc_set_global_binding(struct pipe_context *_pipe, unsigned first, |
| unsigned count, struct pipe_resource **resources, |
| uint32_t **handles) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| pipe->set_global_binding(pipe, first, count, resources, handles); |
| } |
| |
| |
| /******************************************************************** |
| * views |
| */ |
| |
| static struct pipe_surface * |
| tc_create_surface(struct pipe_context *_pipe, |
| struct pipe_resource *resource, |
| const struct pipe_surface *surf_tmpl) |
| { |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; |
| struct pipe_surface *view = |
| pipe->create_surface(pipe, resource, surf_tmpl); |
| |
| if (view) |
| view->context = _pipe; |
| return view; |
| } |
| |
| static void |
| tc_surface_destroy(struct pipe_context *_pipe, |
| struct pipe_surface *surf) |
| { |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; |
| |
| pipe->surface_destroy(pipe, surf); |
| } |
| |
| static struct pipe_sampler_view * |
| tc_create_sampler_view(struct pipe_context *_pipe, |
| struct pipe_resource *resource, |
| const struct pipe_sampler_view *templ) |
| { |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; |
| struct pipe_sampler_view *view = |
| pipe->create_sampler_view(pipe, resource, templ); |
| |
| if (view) |
| view->context = _pipe; |
| return view; |
| } |
| |
| static void |
| tc_sampler_view_destroy(struct pipe_context *_pipe, |
| struct pipe_sampler_view *view) |
| { |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; |
| |
| pipe->sampler_view_destroy(pipe, view); |
| } |
| |
| static struct pipe_stream_output_target * |
| tc_create_stream_output_target(struct pipe_context *_pipe, |
| struct pipe_resource *res, |
| unsigned buffer_offset, |
| unsigned buffer_size) |
| { |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; |
| struct threaded_resource *tres = threaded_resource(res); |
| struct pipe_stream_output_target *view; |
| |
| util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset, |
| buffer_offset + buffer_size); |
| |
| view = pipe->create_stream_output_target(pipe, res, buffer_offset, |
| buffer_size); |
| if (view) |
| view->context = _pipe; |
| return view; |
| } |
| |
| static void |
| tc_stream_output_target_destroy(struct pipe_context *_pipe, |
| struct pipe_stream_output_target *target) |
| { |
| struct pipe_context *pipe = threaded_context(_pipe)->pipe; |
| |
| pipe->stream_output_target_destroy(pipe, target); |
| } |
| |
| |
| /******************************************************************** |
| * bindless |
| */ |
| |
| static uint64_t |
| tc_create_texture_handle(struct pipe_context *_pipe, |
| struct pipe_sampler_view *view, |
| const struct pipe_sampler_state *state) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| return pipe->create_texture_handle(pipe, view, state); |
| } |
| |
| struct tc_make_texture_handle_resident { |
| struct tc_call_base base; |
| bool resident; |
| uint64_t handle; |
| }; |
| |
| static uint16_t |
| tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_make_texture_handle_resident *p = |
| to_call(call, tc_make_texture_handle_resident); |
| |
| pipe->make_texture_handle_resident(pipe, p->handle, p->resident); |
| return call_size(tc_make_texture_handle_resident); |
| } |
| |
| static void |
| tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle, |
| bool resident) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_make_texture_handle_resident *p = |
| tc_add_call(tc, TC_CALL_make_texture_handle_resident, |
| tc_make_texture_handle_resident); |
| |
| p->handle = handle; |
| p->resident = resident; |
| } |
| |
| static uint64_t |
| tc_create_image_handle(struct pipe_context *_pipe, |
| const struct pipe_image_view *image) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| if (image->resource->target == PIPE_BUFFER) |
| tc_buffer_disable_cpu_storage(image->resource); |
| |
| tc_sync(tc); |
| return pipe->create_image_handle(pipe, image); |
| } |
| |
| struct tc_make_image_handle_resident { |
| struct tc_call_base base; |
| bool resident; |
| unsigned access; |
| uint64_t handle; |
| }; |
| |
| static uint16_t |
| tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_make_image_handle_resident *p = |
| to_call(call, tc_make_image_handle_resident); |
| |
| pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident); |
| return call_size(tc_make_image_handle_resident); |
| } |
| |
| static void |
| tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle, |
| unsigned access, bool resident) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_make_image_handle_resident *p = |
| tc_add_call(tc, TC_CALL_make_image_handle_resident, |
| tc_make_image_handle_resident); |
| |
| p->handle = handle; |
| p->access = access; |
| p->resident = resident; |
| } |
| |
| |
| /******************************************************************** |
| * transfer |
| */ |
| |
| struct tc_replace_buffer_storage { |
| struct tc_call_base base; |
| uint16_t num_rebinds; |
| uint32_t rebind_mask; |
| uint32_t delete_buffer_id; |
| struct pipe_resource *dst; |
| struct pipe_resource *src; |
| tc_replace_buffer_storage_func func; |
| }; |
| |
| static uint16_t |
| tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage); |
| |
| p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id); |
| |
| tc_drop_resource_reference(p->dst); |
| tc_drop_resource_reference(p->src); |
| return call_size(tc_replace_buffer_storage); |
| } |
| |
| /* Return true if the buffer has been invalidated or is idle. */ |
| static bool |
| tc_invalidate_buffer(struct threaded_context *tc, |
| struct threaded_resource *tbuf) |
| { |
| if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) { |
| /* It's idle, so invalidation would be a no-op, but we can still clear |
| * the valid range because we are technically doing invalidation, but |
| * skipping it because it's useless. |
| * |
| * If the buffer is bound for write, we can't invalidate the range. |
| */ |
| if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique)) |
| util_range_set_empty(&tbuf->valid_buffer_range); |
| return true; |
| } |
| |
| struct pipe_screen *screen = tc->base.screen; |
| struct pipe_resource *new_buf; |
| |
| /* Shared, pinned, and sparse buffers can't be reallocated. */ |
| if (tbuf->is_shared || |
| tbuf->is_user_ptr || |
| tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) |
| return false; |
| |
| /* Allocate a new one. */ |
| new_buf = screen->resource_create(screen, &tbuf->b); |
| if (!new_buf) |
| return false; |
| |
| /* Replace the "latest" pointer. */ |
| if (tbuf->latest != &tbuf->b) |
| pipe_resource_reference(&tbuf->latest, NULL); |
| |
| tbuf->latest = new_buf; |
| |
| uint32_t delete_buffer_id = tbuf->buffer_id_unique; |
| |
| /* Enqueue storage replacement of the original buffer. */ |
| struct tc_replace_buffer_storage *p = |
| tc_add_call(tc, TC_CALL_replace_buffer_storage, |
| tc_replace_buffer_storage); |
| |
| p->func = tc->replace_buffer_storage; |
| tc_set_resource_reference(&p->dst, &tbuf->b); |
| tc_set_resource_reference(&p->src, new_buf); |
| p->delete_buffer_id = delete_buffer_id; |
| p->rebind_mask = 0; |
| |
| /* Treat the current buffer as the new buffer. */ |
| bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique); |
| p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique, |
| threaded_resource(new_buf)->buffer_id_unique, |
| &p->rebind_mask); |
| |
| /* If the buffer is not bound for write, clear the valid range. */ |
| if (!bound_for_write) |
| util_range_set_empty(&tbuf->valid_buffer_range); |
| |
| tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique; |
| threaded_resource(new_buf)->buffer_id_unique = 0; |
| |
| return true; |
| } |
| |
| static unsigned |
| tc_improve_map_buffer_flags(struct threaded_context *tc, |
| struct threaded_resource *tres, unsigned usage, |
| unsigned offset, unsigned size) |
| { |
| /* Never invalidate inside the driver and never infer "unsynchronized". */ |
| unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE | |
| TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED; |
| |
| /* Prevent a reentry. */ |
| if (usage & tc_flags) |
| return usage; |
| |
| /* Use the staging upload if it's preferred. */ |
| if (usage & (PIPE_MAP_DISCARD_RANGE | |
| PIPE_MAP_DISCARD_WHOLE_RESOURCE) && |
| !(usage & PIPE_MAP_PERSISTENT) && |
| tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY && |
| tc->use_forced_staging_uploads) { |
| usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE | |
| PIPE_MAP_UNSYNCHRONIZED); |
| |
| return usage | tc_flags | PIPE_MAP_DISCARD_RANGE; |
| } |
| |
| /* Sparse buffers can't be mapped directly and can't be reallocated |
| * (fully invalidated). That may just be a radeonsi limitation, but |
| * the threaded context must obey it with radeonsi. |
| */ |
| if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) { |
| /* We can use DISCARD_RANGE instead of full discard. This is the only |
| * fast path for sparse buffers that doesn't need thread synchronization. |
| */ |
| if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) |
| usage |= PIPE_MAP_DISCARD_RANGE; |
| |
| /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers. |
| * The threaded context doesn't do unsychronized mappings and invalida- |
| * tions of sparse buffers, therefore a correct driver behavior won't |
| * result in an incorrect behavior with the threaded context. |
| */ |
| return usage; |
| } |
| |
| usage |= tc_flags; |
| |
| /* Handle CPU reads trivially. */ |
| if (usage & PIPE_MAP_READ) { |
| if (usage & PIPE_MAP_UNSYNCHRONIZED) |
| usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */ |
| |
| /* Drivers aren't allowed to do buffer invalidations. */ |
| return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE; |
| } |
| |
| /* See if the buffer range being mapped has never been initialized or |
| * the buffer is idle, in which case it can be mapped unsynchronized. */ |
| if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && |
| ((!tres->is_shared && |
| !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) || |
| !tc_is_buffer_busy(tc, tres, usage))) |
| usage |= PIPE_MAP_UNSYNCHRONIZED; |
| |
| if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { |
| /* If discarding the entire range, discard the whole resource instead. */ |
| if (usage & PIPE_MAP_DISCARD_RANGE && |
| offset == 0 && size == tres->b.width0) |
| usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; |
| |
| /* Discard the whole resource if needed. */ |
| if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { |
| if (tc_invalidate_buffer(tc, tres)) |
| usage |= PIPE_MAP_UNSYNCHRONIZED; |
| else |
| usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */ |
| } |
| } |
| |
| /* We won't need this flag anymore. */ |
| /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */ |
| usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE; |
| |
| /* GL_AMD_pinned_memory and persistent mappings can't use staging |
| * buffers. */ |
| if (usage & (PIPE_MAP_UNSYNCHRONIZED | |
| PIPE_MAP_PERSISTENT) || |
| tres->is_user_ptr) |
| usage &= ~PIPE_MAP_DISCARD_RANGE; |
| |
| /* Unsychronized buffer mappings don't have to synchronize the thread. */ |
| if (usage & PIPE_MAP_UNSYNCHRONIZED) { |
| usage &= ~PIPE_MAP_DISCARD_RANGE; |
| usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */ |
| } |
| |
| return usage; |
| } |
| |
| static void * |
| tc_buffer_map(struct pipe_context *_pipe, |
| struct pipe_resource *resource, unsigned level, |
| unsigned usage, const struct pipe_box *box, |
| struct pipe_transfer **transfer) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_resource *tres = threaded_resource(resource); |
| struct pipe_context *pipe = tc->pipe; |
| |
| /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and |
| * this shouldn't normally be necessary because glthread only uses large buffers. |
| */ |
| if (usage & PIPE_MAP_THREAD_SAFE) |
| tc_buffer_disable_cpu_storage(resource); |
| |
| usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width); |
| |
| /* If the CPU storage is enabled, return it directly. */ |
| if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { |
| /* We can't let resource_copy_region disable the CPU storage. */ |
| assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY)); |
| |
| if (!tres->cpu_storage) |
| tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment); |
| |
| if (tres->cpu_storage) { |
| struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers); |
| ttrans->b.resource = resource; |
| ttrans->b.usage = usage; |
| ttrans->b.box = *box; |
| ttrans->valid_buffer_range = &tres->valid_buffer_range; |
| ttrans->cpu_storage_mapped = true; |
| *transfer = &ttrans->b; |
| |
| return (uint8_t*)tres->cpu_storage + box->x; |
| } else { |
| tres->allow_cpu_storage = false; |
| } |
| } |
| |
| /* Do a staging transfer within the threaded context. The driver should |
| * only get resource_copy_region. |
| */ |
| if (usage & PIPE_MAP_DISCARD_RANGE) { |
| struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers); |
| uint8_t *map; |
| |
| u_upload_alloc(tc->base.stream_uploader, 0, |
| box->width + (box->x % tc->map_buffer_alignment), |
| tc->map_buffer_alignment, &ttrans->b.offset, |
| &ttrans->staging, (void**)&map); |
| if (!map) { |
| slab_free(&tc->pool_transfers, ttrans); |
| return NULL; |
| } |
| |
| ttrans->b.resource = resource; |
| ttrans->b.level = 0; |
| ttrans->b.usage = usage; |
| ttrans->b.box = *box; |
| ttrans->b.stride = 0; |
| ttrans->b.layer_stride = 0; |
| ttrans->valid_buffer_range = &tres->valid_buffer_range; |
| ttrans->cpu_storage_mapped = false; |
| *transfer = &ttrans->b; |
| |
| p_atomic_inc(&tres->pending_staging_uploads); |
| util_range_add(resource, &tres->pending_staging_uploads_range, |
| box->x, box->x + box->width); |
| |
| return map + (box->x % tc->map_buffer_alignment); |
| } |
| |
| if (usage & PIPE_MAP_UNSYNCHRONIZED && |
| p_atomic_read(&tres->pending_staging_uploads) && |
| util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) { |
| /* Write conflict detected between a staging transfer and the direct mapping we're |
| * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping |
| * will have to wait for the staging transfer completion. |
| * Note: The conflict detection is only based on the mapped range, not on the actual |
| * written range(s). |
| */ |
| usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC; |
| tc->use_forced_staging_uploads = false; |
| } |
| |
| /* Unsychronized buffer mappings don't have to synchronize the thread. */ |
| if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) { |
| tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" : |
| usage & PIPE_MAP_READ ? " read" : " staging conflict"); |
| tc_set_driver_thread(tc); |
| } |
| |
| tc->bytes_mapped_estimate += box->width; |
| |
| void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource, |
| level, usage, box, transfer); |
| threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range; |
| threaded_transfer(*transfer)->cpu_storage_mapped = false; |
| |
| if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) |
| tc_clear_driver_thread(tc); |
| |
| return ret; |
| } |
| |
| static void * |
| tc_texture_map(struct pipe_context *_pipe, |
| struct pipe_resource *resource, unsigned level, |
| unsigned usage, const struct pipe_box *box, |
| struct pipe_transfer **transfer) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_resource *tres = threaded_resource(resource); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync_msg(tc, "texture"); |
| tc_set_driver_thread(tc); |
| |
| tc->bytes_mapped_estimate += box->width; |
| |
| void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource, |
| level, usage, box, transfer); |
| |
| if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) |
| tc_clear_driver_thread(tc); |
| |
| return ret; |
| } |
| |
| struct tc_transfer_flush_region { |
| struct tc_call_base base; |
| struct pipe_box box; |
| struct pipe_transfer *transfer; |
| }; |
| |
| static uint16_t |
| tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region); |
| |
| pipe->transfer_flush_region(pipe, p->transfer, &p->box); |
| return call_size(tc_transfer_flush_region); |
| } |
| |
| struct tc_resource_copy_region { |
| struct tc_call_base base; |
| unsigned dst_level; |
| unsigned dstx, dsty, dstz; |
| unsigned src_level; |
| struct pipe_box src_box; |
| struct pipe_resource *dst; |
| struct pipe_resource *src; |
| }; |
| |
| static void |
| tc_resource_copy_region(struct pipe_context *_pipe, |
| struct pipe_resource *dst, unsigned dst_level, |
| unsigned dstx, unsigned dsty, unsigned dstz, |
| struct pipe_resource *src, unsigned src_level, |
| const struct pipe_box *src_box); |
| |
| static void |
| tc_buffer_do_flush_region(struct threaded_context *tc, |
| struct threaded_transfer *ttrans, |
| const struct pipe_box *box) |
| { |
| struct threaded_resource *tres = threaded_resource(ttrans->b.resource); |
| |
| if (ttrans->staging) { |
| struct pipe_box src_box; |
| |
| u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment + |
| (box->x - ttrans->b.box.x), |
| box->width, &src_box); |
| |
| /* Copy the staging buffer into the original one. */ |
| tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0, |
| ttrans->staging, 0, &src_box); |
| } |
| |
| /* Don't update the valid range when we're uploading the CPU storage |
| * because it includes the uninitialized range too. |
| */ |
| if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { |
| util_range_add(&tres->b, ttrans->valid_buffer_range, |
| box->x, box->x + box->width); |
| } |
| } |
| |
| static void |
| tc_transfer_flush_region(struct pipe_context *_pipe, |
| struct pipe_transfer *transfer, |
| const struct pipe_box *rel_box) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_transfer *ttrans = threaded_transfer(transfer); |
| struct threaded_resource *tres = threaded_resource(transfer->resource); |
| unsigned required_usage = PIPE_MAP_WRITE | |
| PIPE_MAP_FLUSH_EXPLICIT; |
| |
| if (tres->b.target == PIPE_BUFFER) { |
| if ((transfer->usage & required_usage) == required_usage) { |
| struct pipe_box box; |
| |
| u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box); |
| tc_buffer_do_flush_region(tc, ttrans, &box); |
| } |
| |
| /* Staging transfers don't send the call to the driver. */ |
| if (ttrans->staging) |
| return; |
| } |
| |
| struct tc_transfer_flush_region *p = |
| tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region); |
| p->transfer = transfer; |
| p->box = *rel_box; |
| } |
| |
| static void |
| tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, |
| unsigned flags); |
| |
| struct tc_buffer_unmap { |
| struct tc_call_base base; |
| bool was_staging_transfer; |
| union { |
| struct pipe_transfer *transfer; |
| struct pipe_resource *resource; |
| }; |
| }; |
| |
| static uint16_t |
| tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap); |
| |
| if (p->was_staging_transfer) { |
| struct threaded_resource *tres = threaded_resource(p->resource); |
| /* Nothing to do except keeping track of staging uploads */ |
| assert(tres->pending_staging_uploads > 0); |
| p_atomic_dec(&tres->pending_staging_uploads); |
| tc_drop_resource_reference(p->resource); |
| } else { |
| pipe->buffer_unmap(pipe, p->transfer); |
| } |
| |
| return call_size(tc_buffer_unmap); |
| } |
| |
| static void |
| tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_transfer *ttrans = threaded_transfer(transfer); |
| struct threaded_resource *tres = threaded_resource(transfer->resource); |
| |
| /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be |
| * called from any thread and bypasses all multithreaded queues. |
| */ |
| if (transfer->usage & PIPE_MAP_THREAD_SAFE) { |
| assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED); |
| assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT | |
| PIPE_MAP_DISCARD_RANGE))); |
| |
| struct pipe_context *pipe = tc->pipe; |
| util_range_add(&tres->b, ttrans->valid_buffer_range, |
| transfer->box.x, transfer->box.x + transfer->box.width); |
| |
| pipe->buffer_unmap(pipe, transfer); |
| return; |
| } |
| |
| if (transfer->usage & PIPE_MAP_WRITE && |
| !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT)) |
| tc_buffer_do_flush_region(tc, ttrans, &transfer->box); |
| |
| if (ttrans->cpu_storage_mapped) { |
| /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't |
| * touch the mapped range. That's a problem because GPU stores free the CPU storage. |
| * If that happens, we just ignore the unmap call and don't upload anything to prevent |
| * a crash. |
| * |
| * Disallow the CPU storage in the driver to work around this. |
| */ |
| assert(tres->cpu_storage); |
| |
| if (tres->cpu_storage) { |
| tc_invalidate_buffer(tc, tres); |
| tc_buffer_subdata(&tc->base, &tres->b, |
| PIPE_MAP_UNSYNCHRONIZED | |
| TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE, |
| 0, tres->b.width0, tres->cpu_storage); |
| /* This shouldn't have been freed by buffer_subdata. */ |
| assert(tres->cpu_storage); |
| } else { |
| static bool warned_once = false; |
| if (!warned_once) { |
| fprintf(stderr, "This application is incompatible with cpu_storage.\n"); |
| fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n"); |
| warned_once = true; |
| } |
| } |
| |
| tc_drop_resource_reference(ttrans->staging); |
| slab_free(&tc->pool_transfers, ttrans); |
| return; |
| } |
| |
| bool was_staging_transfer = false; |
| |
| if (ttrans->staging) { |
| was_staging_transfer = true; |
| |
| tc_drop_resource_reference(ttrans->staging); |
| slab_free(&tc->pool_transfers, ttrans); |
| } |
| |
| struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap, |
| tc_buffer_unmap); |
| if (was_staging_transfer) { |
| tc_set_resource_reference(&p->resource, &tres->b); |
| p->was_staging_transfer = true; |
| } else { |
| p->transfer = transfer; |
| p->was_staging_transfer = false; |
| } |
| |
| /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap |
| * defers the unmap operation to the batch execution. |
| * bytes_mapped_estimate is an estimation of the map/unmap bytes delta |
| * and if it goes over an optional limit the current batch is flushed, |
| * to reclaim some RAM. */ |
| if (!ttrans->staging && tc->bytes_mapped_limit && |
| tc->bytes_mapped_estimate > tc->bytes_mapped_limit) { |
| tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC); |
| } |
| } |
| |
| struct tc_texture_unmap { |
| struct tc_call_base base; |
| struct pipe_transfer *transfer; |
| }; |
| |
| static uint16_t |
| tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_texture_unmap *p = (struct tc_texture_unmap *) call; |
| |
| pipe->texture_unmap(pipe, p->transfer); |
| return call_size(tc_texture_unmap); |
| } |
| |
| static void |
| tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_transfer *ttrans = threaded_transfer(transfer); |
| |
| tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer; |
| |
| /* tc_texture_map directly maps the textures, but tc_texture_unmap |
| * defers the unmap operation to the batch execution. |
| * bytes_mapped_estimate is an estimation of the map/unmap bytes delta |
| * and if it goes over an optional limit the current batch is flushed, |
| * to reclaim some RAM. */ |
| if (!ttrans->staging && tc->bytes_mapped_limit && |
| tc->bytes_mapped_estimate > tc->bytes_mapped_limit) { |
| tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC); |
| } |
| } |
| |
| struct tc_buffer_subdata { |
| struct tc_call_base base; |
| unsigned usage, offset, size; |
| struct pipe_resource *resource; |
| char slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call; |
| |
| pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size, |
| p->slot); |
| tc_drop_resource_reference(p->resource); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_buffer_subdata(struct pipe_context *_pipe, |
| struct pipe_resource *resource, |
| unsigned usage, unsigned offset, |
| unsigned size, const void *data) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_resource *tres = threaded_resource(resource); |
| |
| if (!size) |
| return; |
| |
| usage |= PIPE_MAP_WRITE; |
| |
| /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */ |
| if (!(usage & PIPE_MAP_DIRECTLY)) |
| usage |= PIPE_MAP_DISCARD_RANGE; |
| |
| usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size); |
| |
| /* Unsychronized and big transfers should use transfer_map. Also handle |
| * full invalidations, because drivers aren't allowed to do them. |
| */ |
| if (usage & (PIPE_MAP_UNSYNCHRONIZED | |
| PIPE_MAP_DISCARD_WHOLE_RESOURCE) || |
| size > TC_MAX_SUBDATA_BYTES || |
| tres->cpu_storage) { |
| struct pipe_transfer *transfer; |
| struct pipe_box box; |
| uint8_t *map = NULL; |
| |
| u_box_1d(offset, size, &box); |
| |
| map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer); |
| if (map) { |
| memcpy(map, data, size); |
| tc_buffer_unmap(_pipe, transfer); |
| } |
| return; |
| } |
| |
| util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size); |
| |
| /* The upload is small. Enqueue it. */ |
| struct tc_buffer_subdata *p = |
| tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size); |
| |
| tc_set_resource_reference(&p->resource, resource); |
| /* This is will always be busy because if it wasn't, tc_improve_map_buffer- |
| * _flags would set UNSYNCHRONIZED and we wouldn't get here. |
| */ |
| tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource); |
| p->usage = usage; |
| p->offset = offset; |
| p->size = size; |
| memcpy(p->slot, data, size); |
| } |
| |
| struct tc_texture_subdata { |
| struct tc_call_base base; |
| unsigned level, usage, stride, layer_stride; |
| struct pipe_box box; |
| struct pipe_resource *resource; |
| char slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_texture_subdata *p = (struct tc_texture_subdata *)call; |
| |
| pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box, |
| p->slot, p->stride, p->layer_stride); |
| tc_drop_resource_reference(p->resource); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_texture_subdata(struct pipe_context *_pipe, |
| struct pipe_resource *resource, |
| unsigned level, unsigned usage, |
| const struct pipe_box *box, |
| const void *data, unsigned stride, |
| unsigned layer_stride) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| unsigned size; |
| |
| assert(box->height >= 1); |
| assert(box->depth >= 1); |
| |
| size = (box->depth - 1) * layer_stride + |
| (box->height - 1) * stride + |
| box->width * util_format_get_blocksize(resource->format); |
| if (!size) |
| return; |
| |
| /* Small uploads can be enqueued, big uploads must sync. */ |
| if (size <= TC_MAX_SUBDATA_BYTES) { |
| struct tc_texture_subdata *p = |
| tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size); |
| |
| tc_set_resource_reference(&p->resource, resource); |
| p->level = level; |
| p->usage = usage; |
| p->box = *box; |
| p->stride = stride; |
| p->layer_stride = layer_stride; |
| memcpy(p->slot, data, size); |
| } else { |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| tc_set_driver_thread(tc); |
| pipe->texture_subdata(pipe, resource, level, usage, box, data, |
| stride, layer_stride); |
| tc_clear_driver_thread(tc); |
| } |
| } |
| |
| |
| /******************************************************************** |
| * miscellaneous |
| */ |
| |
| #define TC_FUNC_SYNC_RET0(ret_type, func) \ |
| static ret_type \ |
| tc_##func(struct pipe_context *_pipe) \ |
| { \ |
| struct threaded_context *tc = threaded_context(_pipe); \ |
| struct pipe_context *pipe = tc->pipe; \ |
| tc_sync(tc); \ |
| return pipe->func(pipe); \ |
| } |
| |
| TC_FUNC_SYNC_RET0(uint64_t, get_timestamp) |
| |
| static void |
| tc_get_sample_position(struct pipe_context *_pipe, |
| unsigned sample_count, unsigned sample_index, |
| float *out_value) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| pipe->get_sample_position(pipe, sample_count, sample_index, |
| out_value); |
| } |
| |
| static enum pipe_reset_status |
| tc_get_device_reset_status(struct pipe_context *_pipe) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| if (!tc->options.unsynchronized_get_device_reset_status) |
| tc_sync(tc); |
| |
| return pipe->get_device_reset_status(pipe); |
| } |
| |
| static void |
| tc_set_device_reset_callback(struct pipe_context *_pipe, |
| const struct pipe_device_reset_callback *cb) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| pipe->set_device_reset_callback(pipe, cb); |
| } |
| |
| struct tc_string_marker { |
| struct tc_call_base base; |
| int len; |
| char slot[0]; /* more will be allocated if needed */ |
| }; |
| |
| static uint16_t |
| tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_string_marker *p = (struct tc_string_marker *)call; |
| pipe->emit_string_marker(pipe, p->slot, p->len); |
| return p->base.num_slots; |
| } |
| |
| static void |
| tc_emit_string_marker(struct pipe_context *_pipe, |
| const char *string, int len) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| if (len <= TC_MAX_STRING_MARKER_BYTES) { |
| struct tc_string_marker *p = |
| tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len); |
| |
| memcpy(p->slot, string, len); |
| p->len = len; |
| } else { |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| tc_set_driver_thread(tc); |
| pipe->emit_string_marker(pipe, string, len); |
| tc_clear_driver_thread(tc); |
| } |
| } |
| |
| static void |
| tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream, |
| unsigned flags) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| pipe->dump_debug_state(pipe, stream, flags); |
| } |
| |
| static void |
| tc_set_debug_callback(struct pipe_context *_pipe, |
| const struct pipe_debug_callback *cb) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| /* Drop all synchronous debug callbacks. Drivers are expected to be OK |
| * with this. shader-db will use an environment variable to disable |
| * the threaded context. |
| */ |
| if (cb && cb->debug_message && !cb->async) |
| return; |
| |
| tc_sync(tc); |
| pipe->set_debug_callback(pipe, cb); |
| } |
| |
| static void |
| tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| pipe->set_log_context(pipe, log); |
| } |
| |
| static void |
| tc_create_fence_fd(struct pipe_context *_pipe, |
| struct pipe_fence_handle **fence, int fd, |
| enum pipe_fd_type type) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); |
| pipe->create_fence_fd(pipe, fence, fd, type); |
| } |
| |
| struct tc_fence_call { |
| struct tc_call_base base; |
| struct pipe_fence_handle *fence; |
| }; |
| |
| static uint16_t |
| tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence; |
| |
| pipe->fence_server_sync(pipe, fence); |
| pipe->screen->fence_reference(pipe->screen, &fence, NULL); |
| return call_size(tc_fence_call); |
| } |
| |
| static void |
| tc_fence_server_sync(struct pipe_context *_pipe, |
| struct pipe_fence_handle *fence) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_screen *screen = tc->pipe->screen; |
| struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync, |
| tc_fence_call); |
| |
| call->fence = NULL; |
| screen->fence_reference(screen, &call->fence, fence); |
| } |
| |
| static uint16_t |
| tc_call_fence_server_signal(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence; |
| |
| pipe->fence_server_signal(pipe, fence); |
| pipe->screen->fence_reference(pipe->screen, &fence, NULL); |
| return call_size(tc_fence_call); |
| } |
| |
| static void |
| tc_fence_server_signal(struct pipe_context *_pipe, |
| struct pipe_fence_handle *fence) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_screen *screen = tc->pipe->screen; |
| struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_signal, |
| tc_fence_call); |
| |
| call->fence = NULL; |
| screen->fence_reference(screen, &call->fence, fence); |
| } |
| |
| static struct pipe_video_codec * |
| tc_create_video_codec(UNUSED struct pipe_context *_pipe, |
| UNUSED const struct pipe_video_codec *templ) |
| { |
| unreachable("Threaded context should not be enabled for video APIs"); |
| return NULL; |
| } |
| |
| static struct pipe_video_buffer * |
| tc_create_video_buffer(UNUSED struct pipe_context *_pipe, |
| UNUSED const struct pipe_video_buffer *templ) |
| { |
| unreachable("Threaded context should not be enabled for video APIs"); |
| return NULL; |
| } |
| |
| struct tc_context_param { |
| struct tc_call_base base; |
| enum pipe_context_param param; |
| unsigned value; |
| }; |
| |
| static uint16_t |
| tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_context_param *p = to_call(call, tc_context_param); |
| |
| if (pipe->set_context_param) |
| pipe->set_context_param(pipe, p->param, p->value); |
| |
| return call_size(tc_context_param); |
| } |
| |
| static void |
| tc_set_context_param(struct pipe_context *_pipe, |
| enum pipe_context_param param, |
| unsigned value) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) { |
| /* Pin the gallium thread as requested. */ |
| util_set_thread_affinity(tc->queue.threads[0], |
| util_get_cpu_caps()->L3_affinity_mask[value], |
| NULL, util_get_cpu_caps()->num_cpu_mask_bits); |
| |
| /* Execute this immediately (without enqueuing). |
| * It's required to be thread-safe. |
| */ |
| struct pipe_context *pipe = tc->pipe; |
| if (pipe->set_context_param) |
| pipe->set_context_param(pipe, param, value); |
| return; |
| } |
| |
| if (tc->pipe->set_context_param) { |
| struct tc_context_param *call = |
| tc_add_call(tc, TC_CALL_set_context_param, tc_context_param); |
| |
| call->param = param; |
| call->value = value; |
| } |
| } |
| |
| |
| /******************************************************************** |
| * draw, launch, clear, blit, copy, flush |
| */ |
| |
| struct tc_flush_call { |
| struct tc_call_base base; |
| unsigned flags; |
| struct threaded_context *tc; |
| struct pipe_fence_handle *fence; |
| }; |
| |
| static void |
| tc_flush_queries(struct threaded_context *tc) |
| { |
| struct threaded_query *tq, *tmp; |
| LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) { |
| list_del(&tq->head_unflushed); |
| |
| /* Memory release semantics: due to a possible race with |
| * tc_get_query_result, we must ensure that the linked list changes |
| * are visible before setting tq->flushed. |
| */ |
| p_atomic_set(&tq->flushed, true); |
| } |
| } |
| |
| static uint16_t |
| tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_flush_call *p = to_call(call, tc_flush_call); |
| struct pipe_screen *screen = pipe->screen; |
| |
| pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags); |
| screen->fence_reference(screen, &p->fence, NULL); |
| |
| if (!(p->flags & PIPE_FLUSH_DEFERRED)) |
| tc_flush_queries(p->tc); |
| |
| return call_size(tc_flush_call); |
| } |
| |
| static void |
| tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, |
| unsigned flags) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| struct pipe_screen *screen = pipe->screen; |
| bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC); |
| |
| if (async && tc->options.create_fence) { |
| if (fence) { |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| |
| if (!next->token) { |
| next->token = malloc(sizeof(*next->token)); |
| if (!next->token) |
| goto out_of_memory; |
| |
| pipe_reference_init(&next->token->ref, 1); |
| next->token->tc = tc; |
| } |
| |
| screen->fence_reference(screen, fence, |
| tc->options.create_fence(pipe, next->token)); |
| if (!*fence) |
| goto out_of_memory; |
| } |
| |
| struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call); |
| p->tc = tc; |
| p->fence = fence ? *fence : NULL; |
| p->flags = flags | TC_FLUSH_ASYNC; |
| |
| if (!(flags & PIPE_FLUSH_DEFERRED)) |
| tc_batch_flush(tc); |
| return; |
| } |
| |
| out_of_memory: |
| tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" : |
| flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal"); |
| |
| if (!(flags & PIPE_FLUSH_DEFERRED)) |
| tc_flush_queries(tc); |
| tc_set_driver_thread(tc); |
| pipe->flush(pipe, fence, flags); |
| tc_clear_driver_thread(tc); |
| } |
| |
| struct tc_draw_single { |
| struct tc_call_base base; |
| unsigned index_bias; |
| struct pipe_draw_info info; |
| }; |
| |
| struct tc_draw_single_drawid { |
| struct tc_draw_single base; |
| unsigned drawid_offset; |
| }; |
| |
| static uint16_t |
| tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid); |
| struct tc_draw_single *info = &info_drawid->base; |
| |
| /* u_threaded_context stores start/count in min/max_index for single draws. */ |
| /* Drivers using u_threaded_context shouldn't use min/max_index. */ |
| struct pipe_draw_start_count_bias draw; |
| |
| draw.start = info->info.min_index; |
| draw.count = info->info.max_index; |
| draw.index_bias = info->index_bias; |
| |
| info->info.index_bounds_valid = false; |
| info->info.has_user_indices = false; |
| info->info.take_index_buffer_ownership = false; |
| |
| pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1); |
| if (info->info.index_size) |
| tc_drop_resource_reference(info->info.index.resource); |
| |
| return call_size(tc_draw_single_drawid); |
| } |
| |
| static void |
| simplify_draw_info(struct pipe_draw_info *info) |
| { |
| /* Clear these fields to facilitate draw merging. |
| * Drivers shouldn't use them. |
| */ |
| info->has_user_indices = false; |
| info->index_bounds_valid = false; |
| info->take_index_buffer_ownership = false; |
| info->index_bias_varies = false; |
| info->_pad = 0; |
| |
| /* This shouldn't be set when merging single draws. */ |
| info->increment_draw_id = false; |
| |
| if (info->index_size) { |
| if (!info->primitive_restart) |
| info->restart_index = 0; |
| } else { |
| assert(!info->primitive_restart); |
| info->primitive_restart = false; |
| info->restart_index = 0; |
| info->index.resource = NULL; |
| } |
| } |
| |
| static bool |
| is_next_call_a_mergeable_draw(struct tc_draw_single *first, |
| struct tc_draw_single *next) |
| { |
| if (next->base.call_id != TC_CALL_draw_single) |
| return false; |
| |
| simplify_draw_info(&next->info); |
| |
| STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) == |
| sizeof(struct pipe_draw_info) - 8); |
| STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) == |
| sizeof(struct pipe_draw_info) - 4); |
| /* All fields must be the same except start and count. */ |
| /* u_threaded_context stores start/count in min/max_index for single draws. */ |
| return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info, |
| DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0; |
| } |
| |
| static uint16_t |
| tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr) |
| { |
| /* Draw call merging. */ |
| struct tc_draw_single *first = to_call(call, tc_draw_single); |
| struct tc_draw_single *last = (struct tc_draw_single *)last_ptr; |
| struct tc_draw_single *next = get_next_call(first, tc_draw_single); |
| |
| /* If at least 2 consecutive draw calls can be merged... */ |
| if (next != last && |
| next->base.call_id == TC_CALL_draw_single) { |
| simplify_draw_info(&first->info); |
| |
| if (is_next_call_a_mergeable_draw(first, next)) { |
| /* The maximum number of merged draws is given by the batch size. */ |
| struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)]; |
| unsigned num_draws = 2; |
| bool index_bias_varies = first->index_bias != next->index_bias; |
| |
| /* u_threaded_context stores start/count in min/max_index for single draws. */ |
| multi[0].start = first->info.min_index; |
| multi[0].count = first->info.max_index; |
| multi[0].index_bias = first->index_bias; |
| multi[1].start = next->info.min_index; |
| multi[1].count = next->info.max_index; |
| multi[1].index_bias = next->index_bias; |
| |
| /* Find how many other draws can be merged. */ |
| next = get_next_call(next, tc_draw_single); |
| for (; next != last && is_next_call_a_mergeable_draw(first, next); |
| next = get_next_call(next, tc_draw_single), num_draws++) { |
| /* u_threaded_context stores start/count in min/max_index for single draws. */ |
| multi[num_draws].start = next->info.min_index; |
| multi[num_draws].count = next->info.max_index; |
| multi[num_draws].index_bias = next->index_bias; |
| index_bias_varies |= first->index_bias != next->index_bias; |
| } |
| |
| first->info.index_bias_varies = index_bias_varies; |
| pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws); |
| |
| /* Since all draws use the same index buffer, drop all references at once. */ |
| if (first->info.index_size) |
| pipe_drop_resource_references(first->info.index.resource, num_draws); |
| |
| return call_size(tc_draw_single) * num_draws; |
| } |
| } |
| |
| /* u_threaded_context stores start/count in min/max_index for single draws. */ |
| /* Drivers using u_threaded_context shouldn't use min/max_index. */ |
| struct pipe_draw_start_count_bias draw; |
| |
| draw.start = first->info.min_index; |
| draw.count = first->info.max_index; |
| draw.index_bias = first->index_bias; |
| |
| first->info.index_bounds_valid = false; |
| first->info.has_user_indices = false; |
| first->info.take_index_buffer_ownership = false; |
| |
| pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1); |
| if (first->info.index_size) |
| tc_drop_resource_reference(first->info.index.resource); |
| |
| return call_size(tc_draw_single); |
| } |
| |
| struct tc_draw_indirect { |
| struct tc_call_base base; |
| struct pipe_draw_start_count_bias draw; |
| struct pipe_draw_info info; |
| struct pipe_draw_indirect_info indirect; |
| }; |
| |
| static uint16_t |
| tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_draw_indirect *info = to_call(call, tc_draw_indirect); |
| |
| info->info.index_bounds_valid = false; |
| info->info.take_index_buffer_ownership = false; |
| |
| pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1); |
| if (info->info.index_size) |
| tc_drop_resource_reference(info->info.index.resource); |
| |
| tc_drop_resource_reference(info->indirect.buffer); |
| tc_drop_resource_reference(info->indirect.indirect_draw_count); |
| tc_drop_so_target_reference(info->indirect.count_from_stream_output); |
| return call_size(tc_draw_indirect); |
| } |
| |
| struct tc_draw_multi { |
| struct tc_call_base base; |
| unsigned num_draws; |
| struct pipe_draw_info info; |
| struct pipe_draw_start_count_bias slot[]; /* variable-sized array */ |
| }; |
| |
| static uint16_t |
| tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_draw_multi *info = (struct tc_draw_multi*)call; |
| |
| info->info.has_user_indices = false; |
| info->info.index_bounds_valid = false; |
| info->info.take_index_buffer_ownership = false; |
| |
| pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws); |
| if (info->info.index_size) |
| tc_drop_resource_reference(info->info.index.resource); |
| |
| return info->base.num_slots; |
| } |
| |
| #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \ |
| offsetof(struct pipe_draw_info, index) |
| |
| void |
| tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, |
| unsigned drawid_offset, |
| const struct pipe_draw_indirect_info *indirect, |
| const struct pipe_draw_start_count_bias *draws, |
| unsigned num_draws) |
| { |
| STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX + |
| sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index)); |
| |
| struct threaded_context *tc = threaded_context(_pipe); |
| unsigned index_size = info->index_size; |
| bool has_user_indices = info->has_user_indices; |
| |
| if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) |
| tc_add_all_gfx_bindings_to_buffer_list(tc); |
| |
| if (unlikely(indirect)) { |
| assert(!has_user_indices); |
| assert(num_draws == 1); |
| |
| struct tc_draw_indirect *p = |
| tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect); |
| struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; |
| |
| if (index_size) { |
| if (!info->take_index_buffer_ownership) { |
| tc_set_resource_reference(&p->info.index.resource, |
| info->index.resource); |
| } |
| tc_add_to_buffer_list(next, info->index.resource); |
| } |
| memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); |
| |
| tc_set_resource_reference(&p->indirect.buffer, indirect->buffer); |
| tc_set_resource_reference(&p->indirect.indirect_draw_count, |
| indirect->indirect_draw_count); |
| p->indirect.count_from_stream_output = NULL; |
| pipe_so_target_reference(&p->indirect.count_from_stream_output, |
| indirect->count_from_stream_output); |
| |
| if (indirect->buffer) |
| tc_add_to_buffer_list(next, indirect->buffer); |
| if (indirect->indirect_draw_count) |
| tc_add_to_buffer_list(next, indirect->indirect_draw_count); |
| if (indirect->count_from_stream_output) |
| tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer); |
| |
| memcpy(&p->indirect, indirect, sizeof(*indirect)); |
| p->draw.start = draws[0].start; |
| return; |
| } |
| |
| if (num_draws == 1) { |
| /* Single draw. */ |
| if (index_size && has_user_indices) { |
| unsigned size = draws[0].count * index_size; |
| struct pipe_resource *buffer = NULL; |
| unsigned offset; |
| |
| if (!size) |
| return; |
| |
| /* This must be done before adding draw_vbo, because it could generate |
| * e.g. transfer_unmap and flush partially-uninitialized draw_vbo |
| * to the driver if it was done afterwards. |
| */ |
| u_upload_data(tc->base.stream_uploader, 0, size, 4, |
| (uint8_t*)info->index.user + draws[0].start * index_size, |
| &offset, &buffer); |
| if (unlikely(!buffer)) |
| return; |
| |
| struct tc_draw_single *p = drawid_offset > 0 ? |
| &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base : |
| tc_add_call(tc, TC_CALL_draw_single, tc_draw_single); |
| memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX); |
| p->info.index.resource = buffer; |
| if (drawid_offset > 0) |
| ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset; |
| /* u_threaded_context stores start/count in min/max_index for single draws. */ |
| p->info.min_index = offset >> util_logbase2(index_size); |
| p->info.max_index = draws[0].count; |
| p->index_bias = draws[0].index_bias; |
| } else { |
| /* Non-indexed call or indexed with a real index buffer. */ |
| struct tc_draw_single *p = drawid_offset > 0 ? |
| &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base : |
| tc_add_call(tc, TC_CALL_draw_single, tc_draw_single); |
| if (index_size) { |
| if (!info->take_index_buffer_ownership) { |
| tc_set_resource_reference(&p->info.index.resource, |
| info->index.resource); |
| } |
| tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource); |
| } |
| if (drawid_offset > 0) |
| ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset; |
| memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); |
| /* u_threaded_context stores start/count in min/max_index for single draws. */ |
| p->info.min_index = draws[0].start; |
| p->info.max_index = draws[0].count; |
| p->index_bias = draws[0].index_bias; |
| } |
| return; |
| } |
| |
| const int draw_overhead_bytes = sizeof(struct tc_draw_multi); |
| const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]); |
| const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes, |
| sizeof(struct tc_call_base)); |
| /* Multi draw. */ |
| if (index_size && has_user_indices) { |
| struct pipe_resource *buffer = NULL; |
| unsigned buffer_offset, total_count = 0; |
| unsigned index_size_shift = util_logbase2(index_size); |
| uint8_t *ptr = NULL; |
| |
| /* Get the total count. */ |
| for (unsigned i = 0; i < num_draws; i++) |
| total_count += draws[i].count; |
| |
| if (!total_count) |
| return; |
| |
| /* Allocate space for all index buffers. |
| * |
| * This must be done before adding draw_vbo, because it could generate |
| * e.g. transfer_unmap and flush partially-uninitialized draw_vbo |
| * to the driver if it was done afterwards. |
| */ |
| u_upload_alloc(tc->base.stream_uploader, 0, |
| total_count << index_size_shift, 4, |
| &buffer_offset, &buffer, (void**)&ptr); |
| if (unlikely(!buffer)) |
| return; |
| |
| int total_offset = 0; |
| while (num_draws) { |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| |
| int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots; |
| /* If there isn't enough place for one draw, try to fill the next one */ |
| if (nb_slots_left < slots_for_one_draw) |
| nb_slots_left = TC_SLOTS_PER_BATCH; |
| const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base); |
| |
| /* How many draws can we fit in the current batch */ |
| const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes); |
| |
| struct tc_draw_multi *p = |
| tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi, |
| dr); |
| memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX); |
| p->info.index.resource = buffer; |
| p->num_draws = dr; |
| |
| /* Upload index buffers. */ |
| for (unsigned i = 0, offset = 0; i < dr; i++) { |
| unsigned count = draws[i + total_offset].count; |
| |
| if (!count) { |
| p->slot[i].start = 0; |
| p->slot[i].count = 0; |
| p->slot[i].index_bias = 0; |
| continue; |
| } |
| |
| unsigned size = count << index_size_shift; |
| memcpy(ptr + offset, |
| (uint8_t*)info->index.user + |
| (draws[i + total_offset].start << index_size_shift), size); |
| p->slot[i].start = (buffer_offset + offset) >> index_size_shift; |
| p->slot[i].count = count; |
| p->slot[i].index_bias = draws[i + total_offset].index_bias; |
| offset += size; |
| } |
| |
| total_offset += dr; |
| num_draws -= dr; |
| } |
| } else { |
| int total_offset = 0; |
| bool take_index_buffer_ownership = info->take_index_buffer_ownership; |
| while (num_draws) { |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| |
| int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots; |
| /* If there isn't enough place for one draw, try to fill the next one */ |
| if (nb_slots_left < slots_for_one_draw) |
| nb_slots_left = TC_SLOTS_PER_BATCH; |
| const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base); |
| |
| /* How many draws can we fit in the current batch */ |
| const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes); |
| |
| /* Non-indexed call or indexed with a real index buffer. */ |
| struct tc_draw_multi *p = |
| tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi, |
| dr); |
| if (index_size) { |
| if (!take_index_buffer_ownership) { |
| tc_set_resource_reference(&p->info.index.resource, |
| info->index.resource); |
| } |
| tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource); |
| } |
| take_index_buffer_ownership = false; |
| memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); |
| p->num_draws = dr; |
| memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr); |
| num_draws -= dr; |
| |
| total_offset += dr; |
| } |
| } |
| } |
| |
| struct tc_draw_vstate_single { |
| struct tc_call_base base; |
| struct pipe_draw_start_count_bias draw; |
| |
| /* The following states must be together without holes because they are |
| * compared by draw merging. |
| */ |
| struct pipe_vertex_state *state; |
| uint32_t partial_velem_mask; |
| struct pipe_draw_vertex_state_info info; |
| }; |
| |
| static bool |
| is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first, |
| struct tc_draw_vstate_single *next) |
| { |
| if (next->base.call_id != TC_CALL_draw_vstate_single) |
| return false; |
| |
| return !memcmp(&first->state, &next->state, |
| offsetof(struct tc_draw_vstate_single, info) + |
| sizeof(struct pipe_draw_vertex_state_info) - |
| offsetof(struct tc_draw_vstate_single, state)); |
| } |
| |
| static uint16_t |
| tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr) |
| { |
| /* Draw call merging. */ |
| struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single); |
| struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr; |
| struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single); |
| |
| /* If at least 2 consecutive draw calls can be merged... */ |
| if (next != last && |
| is_next_call_a_mergeable_draw_vstate(first, next)) { |
| /* The maximum number of merged draws is given by the batch size. */ |
| struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH / |
| call_size(tc_draw_vstate_single)]; |
| unsigned num_draws = 2; |
| |
| draws[0] = first->draw; |
| draws[1] = next->draw; |
| |
| /* Find how many other draws can be merged. */ |
| next = get_next_call(next, tc_draw_vstate_single); |
| for (; next != last && |
| is_next_call_a_mergeable_draw_vstate(first, next); |
| next = get_next_call(next, tc_draw_vstate_single), |
| num_draws++) |
| draws[num_draws] = next->draw; |
| |
| pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask, |
| first->info, draws, num_draws); |
| /* Since all draws use the same state, drop all references at once. */ |
| tc_drop_vertex_state_references(first->state, num_draws); |
| |
| return call_size(tc_draw_vstate_single) * num_draws; |
| } |
| |
| pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask, |
| first->info, &first->draw, 1); |
| tc_drop_vertex_state_references(first->state, 1); |
| return call_size(tc_draw_vstate_single); |
| } |
| |
| struct tc_draw_vstate_multi { |
| struct tc_call_base base; |
| uint32_t partial_velem_mask; |
| struct pipe_draw_vertex_state_info info; |
| unsigned num_draws; |
| struct pipe_vertex_state *state; |
| struct pipe_draw_start_count_bias slot[0]; |
| }; |
| |
| static uint16_t |
| tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call; |
| |
| pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask, |
| info->info, info->slot, info->num_draws); |
| tc_drop_vertex_state_references(info->state, 1); |
| return info->base.num_slots; |
| } |
| |
| static void |
| tc_draw_vertex_state(struct pipe_context *_pipe, |
| struct pipe_vertex_state *state, |
| uint32_t partial_velem_mask, |
| struct pipe_draw_vertex_state_info info, |
| const struct pipe_draw_start_count_bias *draws, |
| unsigned num_draws) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) |
| tc_add_all_gfx_bindings_to_buffer_list(tc); |
| |
| if (num_draws == 1) { |
| /* Single draw. */ |
| struct tc_draw_vstate_single *p = |
| tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single); |
| p->partial_velem_mask = partial_velem_mask; |
| p->draw = draws[0]; |
| p->info.mode = info.mode; |
| p->info.take_vertex_state_ownership = false; |
| |
| /* This should be always 0 for simplicity because we assume that |
| * index_bias doesn't vary. |
| */ |
| assert(draws[0].index_bias == 0); |
| |
| if (!info.take_vertex_state_ownership) |
| tc_set_vertex_state_reference(&p->state, state); |
| else |
| p->state = state; |
| return; |
| } |
| |
| const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi); |
| const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]); |
| const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes, |
| sizeof(struct tc_call_base)); |
| /* Multi draw. */ |
| int total_offset = 0; |
| bool take_vertex_state_ownership = info.take_vertex_state_ownership; |
| while (num_draws) { |
| struct tc_batch *next = &tc->batch_slots[tc->next]; |
| |
| int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots; |
| /* If there isn't enough place for one draw, try to fill the next one */ |
| if (nb_slots_left < slots_for_one_draw) |
| nb_slots_left = TC_SLOTS_PER_BATCH; |
| const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base); |
| |
| /* How many draws can we fit in the current batch */ |
| const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes); |
| |
| /* Non-indexed call or indexed with a real index buffer. */ |
| struct tc_draw_vstate_multi *p = |
| tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr); |
| |
| if (!take_vertex_state_ownership) |
| tc_set_vertex_state_reference(&p->state, state); |
| else |
| p->state = state; |
| |
| take_vertex_state_ownership = false; |
| p->partial_velem_mask = partial_velem_mask; |
| p->info.mode = info.mode; |
| p->info.take_vertex_state_ownership = false; |
| p->num_draws = dr; |
| memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr); |
| num_draws -= dr; |
| |
| total_offset += dr; |
| } |
| } |
| |
| struct tc_launch_grid_call { |
| struct tc_call_base base; |
| struct pipe_grid_info info; |
| }; |
| |
| static uint16_t |
| tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info; |
| |
| pipe->launch_grid(pipe, p); |
| tc_drop_resource_reference(p->indirect); |
| return call_size(tc_launch_grid_call); |
| } |
| |
| static void |
| tc_launch_grid(struct pipe_context *_pipe, |
| const struct pipe_grid_info *info) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid, |
| tc_launch_grid_call); |
| assert(info->input == NULL); |
| |
| if (unlikely(tc->add_all_compute_bindings_to_buffer_list)) |
| tc_add_all_compute_bindings_to_buffer_list(tc); |
| |
| tc_set_resource_reference(&p->info.indirect, info->indirect); |
| memcpy(&p->info, info, sizeof(*info)); |
| |
| if (info->indirect) |
| tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect); |
| } |
| |
| static uint16_t |
| tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region); |
| |
| pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty, |
| p->dstz, p->src, p->src_level, &p->src_box); |
| tc_drop_resource_reference(p->dst); |
| tc_drop_resource_reference(p->src); |
| return call_size(tc_resource_copy_region); |
| } |
| |
| static void |
| tc_resource_copy_region(struct pipe_context *_pipe, |
| struct pipe_resource *dst, unsigned dst_level, |
| unsigned dstx, unsigned dsty, unsigned dstz, |
| struct pipe_resource *src, unsigned src_level, |
| const struct pipe_box *src_box) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_resource *tdst = threaded_resource(dst); |
| struct tc_resource_copy_region *p = |
| tc_add_call(tc, TC_CALL_resource_copy_region, |
| tc_resource_copy_region); |
| |
| if (dst->target == PIPE_BUFFER) |
| tc_buffer_disable_cpu_storage(dst); |
| |
| tc_set_resource_reference(&p->dst, dst); |
| p->dst_level = dst_level; |
| p->dstx = dstx; |
| p->dsty = dsty; |
| p->dstz = dstz; |
| tc_set_resource_reference(&p->src, src); |
| p->src_level = src_level; |
| p->src_box = *src_box; |
| |
| if (dst->target == PIPE_BUFFER) { |
| struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; |
| |
| tc_add_to_buffer_list(next, src); |
| tc_add_to_buffer_list(next, dst); |
| |
| util_range_add(&tdst->b, &tdst->valid_buffer_range, |
| dstx, dstx + src_box->width); |
| } |
| } |
| |
| struct tc_blit_call { |
| struct tc_call_base base; |
| struct pipe_blit_info info; |
| }; |
| |
| static uint16_t |
| tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info; |
| |
| pipe->blit(pipe, blit); |
| tc_drop_resource_reference(blit->dst.resource); |
| tc_drop_resource_reference(blit->src.resource); |
| return call_size(tc_blit_call); |
| } |
| |
| static void |
| tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call); |
| |
| tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource); |
| tc_set_resource_reference(&blit->info.src.resource, info->src.resource); |
| memcpy(&blit->info, info, sizeof(*info)); |
| } |
| |
| struct tc_generate_mipmap { |
| struct tc_call_base base; |
| enum pipe_format format; |
| unsigned base_level; |
| unsigned last_level; |
| unsigned first_layer; |
| unsigned last_layer; |
| struct pipe_resource *res; |
| }; |
| |
| static uint16_t |
| tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap); |
| ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format, |
| p->base_level, |
| p->last_level, |
| p->first_layer, |
| p->last_layer); |
| assert(result); |
| tc_drop_resource_reference(p->res); |
| return call_size(tc_generate_mipmap); |
| } |
| |
| static bool |
| tc_generate_mipmap(struct pipe_context *_pipe, |
| struct pipe_resource *res, |
| enum pipe_format format, |
| unsigned base_level, |
| unsigned last_level, |
| unsigned first_layer, |
| unsigned last_layer) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| struct pipe_screen *screen = pipe->screen; |
| unsigned bind = PIPE_BIND_SAMPLER_VIEW; |
| |
| if (util_format_is_depth_or_stencil(format)) |
| bind = PIPE_BIND_DEPTH_STENCIL; |
| else |
| bind = PIPE_BIND_RENDER_TARGET; |
| |
| if (!screen->is_format_supported(screen, format, res->target, |
| res->nr_samples, res->nr_storage_samples, |
| bind)) |
| return false; |
| |
| struct tc_generate_mipmap *p = |
| tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap); |
| |
| tc_set_resource_reference(&p->res, res); |
| p->format = format; |
| p->base_level = base_level; |
| p->last_level = last_level; |
| p->first_layer = first_layer; |
| p->last_layer = last_layer; |
| return true; |
| } |
| |
| struct tc_resource_call { |
| struct tc_call_base base; |
| struct pipe_resource *resource; |
| }; |
| |
| static uint16_t |
| tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_resource *resource = to_call(call, tc_resource_call)->resource; |
| |
| pipe->flush_resource(pipe, resource); |
| tc_drop_resource_reference(resource); |
| return call_size(tc_resource_call); |
| } |
| |
| static void |
| tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource, |
| tc_resource_call); |
| |
| tc_set_resource_reference(&call->resource, resource); |
| } |
| |
| static uint16_t |
| tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct pipe_resource *resource = to_call(call, tc_resource_call)->resource; |
| |
| pipe->invalidate_resource(pipe, resource); |
| tc_drop_resource_reference(resource); |
| return call_size(tc_resource_call); |
| } |
| |
| static void |
| tc_invalidate_resource(struct pipe_context *_pipe, |
| struct pipe_resource *resource) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| if (resource->target == PIPE_BUFFER) { |
| tc_invalidate_buffer(tc, threaded_resource(resource)); |
| return; |
| } |
| |
| struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource, |
| tc_resource_call); |
| tc_set_resource_reference(&call->resource, resource); |
| } |
| |
| struct tc_clear { |
| struct tc_call_base base; |
| bool scissor_state_set; |
| uint8_t stencil; |
| uint16_t buffers; |
| float depth; |
| struct pipe_scissor_state scissor_state; |
| union pipe_color_union color; |
| }; |
| |
| static uint16_t |
| tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_clear *p = to_call(call, tc_clear); |
| |
| pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil); |
| return call_size(tc_clear); |
| } |
| |
| static void |
| tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state, |
| const union pipe_color_union *color, double depth, |
| unsigned stencil) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear); |
| |
| p->buffers = buffers; |
| if (scissor_state) |
| p->scissor_state = *scissor_state; |
| p->scissor_state_set = !!scissor_state; |
| p->color = *color; |
| p->depth = depth; |
| p->stencil = stencil; |
| } |
| |
| struct tc_clear_render_target { |
| struct tc_call_base base; |
| bool render_condition_enabled; |
| unsigned dstx; |
| unsigned dsty; |
| unsigned width; |
| unsigned height; |
| union pipe_color_union color; |
| struct pipe_surface *dst; |
| }; |
| |
| static uint16_t |
| tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_clear_render_target *p = to_call(call, tc_clear_render_target); |
| |
| pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height, |
| p->render_condition_enabled); |
| tc_drop_surface_reference(p->dst); |
| return call_size(tc_clear_render_target); |
| } |
| |
| static void |
| tc_clear_render_target(struct pipe_context *_pipe, |
| struct pipe_surface *dst, |
| const union pipe_color_union *color, |
| unsigned dstx, unsigned dsty, |
| unsigned width, unsigned height, |
| bool render_condition_enabled) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target); |
| p->dst = NULL; |
| pipe_surface_reference(&p->dst, dst); |
| p->color = *color; |
| p->dstx = dstx; |
| p->dsty = dsty; |
| p->width = width; |
| p->height = height; |
| p->render_condition_enabled = render_condition_enabled; |
| } |
| |
| |
| struct tc_clear_depth_stencil { |
| struct tc_call_base base; |
| bool render_condition_enabled; |
| float depth; |
| unsigned clear_flags; |
| unsigned stencil; |
| unsigned dstx; |
| unsigned dsty; |
| unsigned width; |
| unsigned height; |
| struct pipe_surface *dst; |
| }; |
| |
| |
| static uint16_t |
| tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil); |
| |
| pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil, |
| p->dstx, p->dsty, p->width, p->height, |
| p->render_condition_enabled); |
| tc_drop_surface_reference(p->dst); |
| return call_size(tc_clear_depth_stencil); |
| } |
| |
| static void |
| tc_clear_depth_stencil(struct pipe_context *_pipe, |
| struct pipe_surface *dst, unsigned clear_flags, |
| double depth, unsigned stencil, unsigned dstx, |
| unsigned dsty, unsigned width, unsigned height, |
| bool render_condition_enabled) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil); |
| p->dst = NULL; |
| pipe_surface_reference(&p->dst, dst); |
| p->clear_flags = clear_flags; |
| p->depth = depth; |
| p->stencil = stencil; |
| p->dstx = dstx; |
| p->dsty = dsty; |
| p->width = width; |
| p->height = height; |
| p->render_condition_enabled = render_condition_enabled; |
| } |
| |
| struct tc_clear_buffer { |
| struct tc_call_base base; |
| uint8_t clear_value_size; |
| unsigned offset; |
| unsigned size; |
| char clear_value[16]; |
| struct pipe_resource *res; |
| }; |
| |
| static uint16_t |
| tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_clear_buffer *p = to_call(call, tc_clear_buffer); |
| |
| pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value, |
| p->clear_value_size); |
| tc_drop_resource_reference(p->res); |
| return call_size(tc_clear_buffer); |
| } |
| |
| static void |
| tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res, |
| unsigned offset, unsigned size, |
| const void *clear_value, int clear_value_size) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct threaded_resource *tres = threaded_resource(res); |
| struct tc_clear_buffer *p = |
| tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer); |
| |
| tc_buffer_disable_cpu_storage(res); |
| |
| tc_set_resource_reference(&p->res, res); |
| tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res); |
| p->offset = offset; |
| p->size = size; |
| memcpy(p->clear_value, clear_value, clear_value_size); |
| p->clear_value_size = clear_value_size; |
| |
| util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size); |
| } |
| |
| struct tc_clear_texture { |
| struct tc_call_base base; |
| unsigned level; |
| struct pipe_box box; |
| char data[16]; |
| struct pipe_resource *res; |
| }; |
| |
| static uint16_t |
| tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_clear_texture *p = to_call(call, tc_clear_texture); |
| |
| pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data); |
| tc_drop_resource_reference(p->res); |
| return call_size(tc_clear_texture); |
| } |
| |
| static void |
| tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res, |
| unsigned level, const struct pipe_box *box, const void *data) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_clear_texture *p = |
| tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture); |
| |
| tc_set_resource_reference(&p->res, res); |
| p->level = level; |
| p->box = *box; |
| memcpy(p->data, data, |
| util_format_get_blocksize(res->format)); |
| } |
| |
| struct tc_resource_commit { |
| struct tc_call_base base; |
| bool commit; |
| unsigned level; |
| struct pipe_box box; |
| struct pipe_resource *res; |
| }; |
| |
| static uint16_t |
| tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_resource_commit *p = to_call(call, tc_resource_commit); |
| |
| pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit); |
| tc_drop_resource_reference(p->res); |
| return call_size(tc_resource_commit); |
| } |
| |
| static bool |
| tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res, |
| unsigned level, struct pipe_box *box, bool commit) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct tc_resource_commit *p = |
| tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit); |
| |
| tc_set_resource_reference(&p->res, res); |
| p->level = level; |
| p->box = *box; |
| p->commit = commit; |
| return true; /* we don't care about the return value for this call */ |
| } |
| |
| static unsigned |
| tc_init_intel_perf_query_info(struct pipe_context *_pipe) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| return pipe->init_intel_perf_query_info(pipe); |
| } |
| |
| static void |
| tc_get_intel_perf_query_info(struct pipe_context *_pipe, |
| unsigned query_index, |
| const char **name, |
| uint32_t *data_size, |
| uint32_t *n_counters, |
| uint32_t *n_active) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); /* n_active vs begin/end_intel_perf_query */ |
| pipe->get_intel_perf_query_info(pipe, query_index, name, data_size, |
| n_counters, n_active); |
| } |
| |
| static void |
| tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe, |
| unsigned query_index, |
| unsigned counter_index, |
| const char **name, |
| const char **desc, |
| uint32_t *offset, |
| uint32_t *data_size, |
| uint32_t *type_enum, |
| uint32_t *data_type_enum, |
| uint64_t *raw_max) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index, |
| name, desc, offset, data_size, type_enum, data_type_enum, raw_max); |
| } |
| |
| static struct pipe_query * |
| tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| return pipe->new_intel_perf_query_obj(pipe, query_index); |
| } |
| |
| static uint16_t |
| tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query); |
| return call_size(tc_query_call); |
| } |
| |
| static bool |
| tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q; |
| |
| /* assume success, begin failure can be signaled from get_intel_perf_query_data */ |
| return true; |
| } |
| |
| static uint16_t |
| tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query); |
| return call_size(tc_query_call); |
| } |
| |
| static void |
| tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q; |
| } |
| |
| static void |
| tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ |
| pipe->delete_intel_perf_query(pipe, q); |
| } |
| |
| static void |
| tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ |
| pipe->wait_intel_perf_query(pipe, q); |
| } |
| |
| static bool |
| tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ |
| return pipe->is_intel_perf_query_ready(pipe, q); |
| } |
| |
| static bool |
| tc_get_intel_perf_query_data(struct pipe_context *_pipe, |
| struct pipe_query *q, |
| size_t data_size, |
| uint32_t *data, |
| uint32_t *bytes_written) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ |
| return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written); |
| } |
| |
| /******************************************************************** |
| * callback |
| */ |
| |
| struct tc_callback_call { |
| struct tc_call_base base; |
| void (*fn)(void *data); |
| void *data; |
| }; |
| |
| static uint16_t |
| tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last) |
| { |
| struct tc_callback_call *p = to_call(call, tc_callback_call); |
| |
| p->fn(p->data); |
| return call_size(tc_callback_call); |
| } |
| |
| static void |
| tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data, |
| bool asap) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| |
| if (asap && tc_is_sync(tc)) { |
| fn(data); |
| return; |
| } |
| |
| struct tc_callback_call *p = |
| tc_add_call(tc, TC_CALL_callback, tc_callback_call); |
| p->fn = fn; |
| p->data = data; |
| } |
| |
| |
| /******************************************************************** |
| * create & destroy |
| */ |
| |
| static void |
| tc_destroy(struct pipe_context *_pipe) |
| { |
| struct threaded_context *tc = threaded_context(_pipe); |
| struct pipe_context *pipe = tc->pipe; |
| |
| if (tc->base.const_uploader && |
| tc->base.stream_uploader != tc->base.const_uploader) |
| u_upload_destroy(tc->base.const_uploader); |
| |
| if (tc->base.stream_uploader) |
| u_upload_destroy(tc->base.stream_uploader); |
| |
| tc_sync(tc); |
| |
| if (util_queue_is_initialized(&tc->queue)) { |
| util_queue_destroy(&tc->queue); |
| |
| for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { |
| util_queue_fence_destroy(&tc->batch_slots[i].fence); |
| assert(!tc->batch_slots[i].token); |
| } |
| } |
| |
| slab_destroy_child(&tc->pool_transfers); |
| assert(tc->batch_slots[tc->next].num_total_slots == 0); |
| pipe->destroy(pipe); |
| |
| for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) { |
| if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence)) |
| util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence); |
| util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence); |
| } |
| |
| FREE(tc); |
| } |
| |
| static const tc_execute execute_func[TC_NUM_CALLS] = { |
| #define CALL(name) tc_call_##name, |
| #include "u_threaded_context_calls.h" |
| #undef CALL |
| }; |
| |
| void tc_driver_internal_flush_notify(struct threaded_context *tc) |
| { |
| /* Allow drivers to call this function even for internal contexts that |
| * don't have tc. It simplifies drivers. |
| */ |
| if (!tc) |
| return; |
| |
| /* Signal fences set by tc_batch_execute. */ |
| for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++) |
| util_queue_fence_signal(tc->signal_fences_next_flush[i]); |
| |
| tc->num_signal_fences_next_flush = 0; |
| } |
| |
| /** |
| * Wrap an existing pipe_context into a threaded_context. |
| * |
| * \param pipe pipe_context to wrap |
| * \param parent_transfer_pool parent slab pool set up for creating pipe_- |
| * transfer objects; the driver should have one |
| * in pipe_screen. |
| * \param replace_buffer callback for replacing a pipe_resource's storage |
| * with another pipe_resource's storage. |
| * \param options optional TC options/callbacks |
| * \param out if successful, the threaded_context will be returned here in |
| * addition to the return value if "out" != NULL |
| */ |
| struct pipe_context * |
| threaded_context_create(struct pipe_context *pipe, |
| struct slab_parent_pool *parent_transfer_pool, |
| tc_replace_buffer_storage_func replace_buffer, |
| const struct threaded_context_options *options, |
| struct threaded_context **out) |
| { |
| struct threaded_context *tc; |
| |
| if (!pipe) |
| return NULL; |
| |
| util_cpu_detect(); |
| |
| if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1)) |
| return pipe; |
| |
| tc = CALLOC_STRUCT(threaded_context); |
| if (!tc) { |
| pipe->destroy(pipe); |
| return NULL; |
| } |
| |
| if (options) |
| tc->options = *options; |
| |
| pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options); |
| |
| /* The driver context isn't wrapped, so set its "priv" to NULL. */ |
| pipe->priv = NULL; |
| |
| tc->pipe = pipe; |
| tc->replace_buffer_storage = replace_buffer; |
| tc->map_buffer_alignment = |
| pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT); |
| tc->ubo_alignment = |
| MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64); |
| tc->base.priv = pipe; /* priv points to the wrapped driver context */ |
| tc->base.screen = pipe->screen; |
| tc->base.destroy = tc_destroy; |
| tc->base.callback = tc_callback; |
| |
| tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader); |
| if (pipe->stream_uploader == pipe->const_uploader) |
| tc->base.const_uploader = tc->base.stream_uploader; |
| else |
| tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader); |
| |
| if (!tc->base.stream_uploader || !tc->base.const_uploader) |
| goto fail; |
| |
| tc->use_forced_staging_uploads = true; |
| |
| /* The queue size is the number of batches "waiting". Batches are removed |
| * from the queue before being executed, so keep one tc_batch slot for that |
| * execution. Also, keep one unused slot for an unflushed batch. |
| */ |
| if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL)) |
| goto fail; |
| |
| for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { |
| #if !defined(NDEBUG) && TC_DEBUG >= 1 |
| tc->batch_slots[i].sentinel = TC_SENTINEL; |
| #endif |
| tc->batch_slots[i].tc = tc; |
| util_queue_fence_init(&tc->batch_slots[i].fence); |
| } |
| for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) |
| util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence); |
| |
| list_inithead(&tc->unflushed_queries); |
| |
| slab_create_child(&tc->pool_transfers, parent_transfer_pool); |
| |
| /* If you have different limits in each shader stage, set the maximum. */ |
| struct pipe_screen *screen = pipe->screen;; |
| tc->max_vertex_buffers = |
| screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS); |
| tc->max_const_buffers = |
| screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, |
| PIPE_SHADER_CAP_MAX_CONST_BUFFERS); |
| tc->max_shader_buffers = |
| screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, |
| PIPE_SHADER_CAP_MAX_SHADER_BUFFERS); |
| tc->max_images = |
| screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, |
| PIPE_SHADER_CAP_MAX_SHADER_IMAGES); |
| tc->max_samplers = |
| screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, |
| PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS); |
| |
| tc->base.set_context_param = tc_set_context_param; /* always set this */ |
| |
| #define CTX_INIT(_member) \ |
| tc->base._member = tc->pipe->_member ? tc_##_member : NULL |
| |
| CTX_INIT(flush); |
| CTX_INIT(draw_vbo); |
| CTX_INIT(draw_vertex_state); |
| CTX_INIT(launch_grid); |
| CTX_INIT(resource_copy_region); |
| CTX_INIT(blit); |
| CTX_INIT(clear); |
| CTX_INIT(clear_render_target); |
| CTX_INIT(clear_depth_stencil); |
| CTX_INIT(clear_buffer); |
| CTX_INIT(clear_texture); |
| CTX_INIT(flush_resource); |
| CTX_INIT(generate_mipmap); |
| CTX_INIT(render_condition); |
| CTX_INIT(create_query); |
| CTX_INIT(create_batch_query); |
| CTX_INIT(destroy_query); |
| CTX_INIT(begin_query); |
| CTX_INIT(end_query); |
| CTX_INIT(get_query_result); |
| CTX_INIT(get_query_result_resource); |
| CTX_INIT(set_active_query_state); |
| CTX_INIT(create_blend_state); |
| CTX_INIT(bind_blend_state); |
| CTX_INIT(delete_blend_state); |
| CTX_INIT(create_sampler_state); |
| CTX_INIT(bind_sampler_states); |
| CTX_INIT(delete_sampler_state); |
| CTX_INIT(create_rasterizer_state); |
| CTX_INIT(bind_rasterizer_state); |
| CTX_INIT(delete_rasterizer_state); |
| CTX_INIT(create_depth_stencil_alpha_state); |
| CTX_INIT(bind_depth_stencil_alpha_state); |
| CTX_INIT(delete_depth_stencil_alpha_state); |
| CTX_INIT(create_fs_state); |
| CTX_INIT(bind_fs_state); |
| CTX_INIT(delete_fs_state); |
| CTX_INIT(create_vs_state); |
| CTX_INIT(bind_vs_state); |
| CTX_INIT(delete_vs_state); |
| CTX_INIT(create_gs_state); |
| CTX_INIT(bind_gs_state); |
| CTX_INIT(delete_gs_state); |
| CTX_INIT(create_tcs_state); |
| CTX_INIT(bind_tcs_state); |
| CTX_INIT(delete_tcs_state); |
| CTX_INIT(create_tes_state); |
| CTX_INIT(bind_tes_state); |
| CTX_INIT(delete_tes_state); |
| CTX_INIT(create_compute_state); |
| CTX_INIT(bind_compute_state); |
| CTX_INIT(delete_compute_state); |
| CTX_INIT(create_vertex_elements_state); |
| CTX_INIT(bind_vertex_elements_state); |
| CTX_INIT(delete_vertex_elements_state); |
| CTX_INIT(set_blend_color); |
| CTX_INIT(set_stencil_ref); |
| CTX_INIT(set_sample_mask); |
| CTX_INIT(set_min_samples); |
| CTX_INIT(set_clip_state); |
| CTX_INIT(set_constant_buffer); |
| CTX_INIT(set_inlinable_constants); |
| CTX_INIT(set_framebuffer_state); |
| CTX_INIT(set_polygon_stipple); |
| CTX_INIT(set_sample_locations); |
| CTX_INIT(set_scissor_states); |
| CTX_INIT(set_viewport_states); |
| CTX_INIT(set_window_rectangles); |
| CTX_INIT(set_sampler_views); |
| CTX_INIT(set_tess_state); |
| CTX_INIT(set_patch_vertices); |
| CTX_INIT(set_shader_buffers); |
| CTX_INIT(set_shader_images); |
| CTX_INIT(set_vertex_buffers); |
| CTX_INIT(create_stream_output_target); |
| CTX_INIT(stream_output_target_destroy); |
| CTX_INIT(set_stream_output_targets); |
| CTX_INIT(create_sampler_view); |
| CTX_INIT(sampler_view_destroy); |
| CTX_INIT(create_surface); |
| CTX_INIT(surface_destroy); |
| CTX_INIT(buffer_map); |
| CTX_INIT(texture_map); |
| CTX_INIT(transfer_flush_region); |
| CTX_INIT(buffer_unmap); |
| CTX_INIT(texture_unmap); |
| CTX_INIT(buffer_subdata); |
| CTX_INIT(texture_subdata); |
| CTX_INIT(texture_barrier); |
| CTX_INIT(memory_barrier); |
| CTX_INIT(resource_commit); |
| CTX_INIT(create_video_codec); |
| CTX_INIT(create_video_buffer); |
| CTX_INIT(set_compute_resources); |
| CTX_INIT(set_global_binding); |
| CTX_INIT(get_sample_position); |
| CTX_INIT(invalidate_resource); |
| CTX_INIT(get_device_reset_status); |
| CTX_INIT(set_device_reset_callback); |
| CTX_INIT(dump_debug_state); |
| CTX_INIT(set_log_context); |
| CTX_INIT(emit_string_marker); |
| CTX_INIT(set_debug_callback); |
| CTX_INIT(create_fence_fd); |
| CTX_INIT(fence_server_sync); |
| CTX_INIT(fence_server_signal); |
| CTX_INIT(get_timestamp); |
| CTX_INIT(create_texture_handle); |
| CTX_INIT(delete_texture_handle); |
| CTX_INIT(make_texture_handle_resident); |
| CTX_INIT(create_image_handle); |
| CTX_INIT(delete_image_handle); |
| CTX_INIT(make_image_handle_resident); |
| CTX_INIT(set_frontend_noop); |
| CTX_INIT(init_intel_perf_query_info); |
| CTX_INIT(get_intel_perf_query_info); |
| CTX_INIT(get_intel_perf_query_counter_info); |
| CTX_INIT(new_intel_perf_query_obj); |
| CTX_INIT(begin_intel_perf_query); |
| CTX_INIT(end_intel_perf_query); |
| CTX_INIT(delete_intel_perf_query); |
| CTX_INIT(wait_intel_perf_query); |
| CTX_INIT(is_intel_perf_query_ready); |
| CTX_INIT(get_intel_perf_query_data); |
| #undef CTX_INIT |
| |
| if (out) |
| *out = tc; |
| |
| tc_begin_next_buffer_list(tc); |
| return &tc->base; |
| |
| fail: |
| tc_destroy(&tc->base); |
| return NULL; |
| } |
| |
| void |
| threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor) |
| { |
| uint64_t total_ram; |
| if (os_get_total_physical_memory(&total_ram)) { |
| tc->bytes_mapped_limit = total_ram / divisor; |
| if (sizeof(void*) == 4) |
| tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL); |
| } |
| } |