| /********************************************************** |
| * Copyright 1998-2013 VMware, Inc. All rights reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person |
| * obtaining a copy of this software and associated documentation |
| * files (the "Software"), to deal in the Software without |
| * restriction, including without limitation the rights to use, copy, |
| * modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be |
| * included in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| * |
| **********************************************************/ |
| |
| /** |
| * @file svga_tgsi_vgpu10.c |
| * |
| * TGSI -> VGPU10 shader translation. |
| * |
| * \author Mingcheng Chen |
| * \author Brian Paul |
| */ |
| |
| #include "pipe/p_compiler.h" |
| #include "pipe/p_shader_tokens.h" |
| #include "pipe/p_defines.h" |
| #include "tgsi/tgsi_build.h" |
| #include "tgsi/tgsi_dump.h" |
| #include "tgsi/tgsi_info.h" |
| #include "tgsi/tgsi_parse.h" |
| #include "tgsi/tgsi_scan.h" |
| #include "tgsi/tgsi_strings.h" |
| #include "tgsi/tgsi_two_side.h" |
| #include "tgsi/tgsi_aa_point.h" |
| #include "tgsi/tgsi_util.h" |
| #include "util/u_math.h" |
| #include "util/u_memory.h" |
| #include "util/u_bitmask.h" |
| #include "util/u_debug.h" |
| #include "util/u_pstipple.h" |
| |
| #include "svga_context.h" |
| #include "svga_debug.h" |
| #include "svga_link.h" |
| #include "svga_shader.h" |
| #include "svga_tgsi.h" |
| |
| #include "VGPU10ShaderTokens.h" |
| |
| |
| #define INVALID_INDEX 99999 |
| #define MAX_INTERNAL_TEMPS 3 |
| #define MAX_SYSTEM_VALUES 4 |
| #define MAX_IMMEDIATE_COUNT \ |
| (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) |
| #define MAX_TEMP_ARRAYS 64 /* Enough? */ |
| |
| |
| /** |
| * Clipping is complicated. There's four different cases which we |
| * handle during VS/GS shader translation: |
| */ |
| enum clipping_mode |
| { |
| CLIP_NONE, /**< No clipping enabled */ |
| CLIP_LEGACY, /**< The shader has no clipping declarations or code but |
| * one or more user-defined clip planes are enabled. We |
| * generate extra code to emit clip distances. |
| */ |
| CLIP_DISTANCE, /**< The shader already declares clip distance output |
| * registers and has code to write to them. |
| */ |
| CLIP_VERTEX /**< The shader declares a clip vertex output register and |
| * has code that writes to the register. We convert the |
| * clipvertex position into one or more clip distances. |
| */ |
| }; |
| |
| |
| /* Shader signature info */ |
| struct svga_shader_signature |
| { |
| SVGA3dDXShaderSignatureHeader header; |
| SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS]; |
| SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS]; |
| SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS]; |
| }; |
| |
| static inline void |
| set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e, |
| unsigned index, |
| SVGA3dDXSignatureSemanticName sgnName, |
| unsigned mask, |
| SVGA3dDXSignatureRegisterComponentType compType, |
| SVGA3dDXSignatureMinPrecision minPrecision) |
| { |
| e->registerIndex = index; |
| e->semanticName = sgnName; |
| e->mask = mask; |
| e->componentType = compType; |
| e->minPrecision = minPrecision; |
| }; |
| |
| static const SVGA3dDXSignatureSemanticName |
| tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = { |
| SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED |
| }; |
| |
| |
| /** |
| * Map tgsi semantic name to SVGA signature semantic name |
| */ |
| static inline SVGA3dDXSignatureSemanticName |
| map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name) |
| { |
| assert(name < TGSI_SEMANTIC_COUNT); |
| |
| /* Do a few asserts here to spot check the mapping */ |
| assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] == |
| SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); |
| assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] == |
| SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX); |
| assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] == |
| SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID); |
| |
| return tgsi_semantic_to_sgn_name[name]; |
| } |
| |
| |
| struct svga_shader_emitter_v10 |
| { |
| /* The token output buffer */ |
| unsigned size; |
| char *buf; |
| char *ptr; |
| |
| /* Information about the shader and state (does not change) */ |
| struct svga_compile_key key; |
| struct tgsi_shader_info info; |
| unsigned unit; |
| unsigned version; /**< Either 40 or 41 at this time */ |
| |
| unsigned cur_tgsi_token; /**< current tgsi token position */ |
| unsigned inst_start_token; |
| boolean discard_instruction; /**< throw away current instruction? */ |
| boolean reemit_instruction; /**< reemit current instruction */ |
| boolean skip_instruction; /**< skip current instruction */ |
| |
| union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; |
| double (*immediates_dbl)[2]; |
| unsigned num_immediates; /**< Number of immediates emitted */ |
| unsigned common_immediate_pos[10]; /**< literals for common immediates */ |
| unsigned num_common_immediates; |
| boolean immediates_emitted; |
| |
| unsigned num_outputs; /**< include any extra outputs */ |
| /** The first extra output is reserved for |
| * non-adjusted vertex position for |
| * stream output purpose |
| */ |
| |
| /* Temporary Registers */ |
| unsigned num_shader_temps; /**< num of temps used by original shader */ |
| unsigned internal_temp_count; /**< currently allocated internal temps */ |
| struct { |
| unsigned start, size; |
| } temp_arrays[MAX_TEMP_ARRAYS]; |
| unsigned num_temp_arrays; |
| |
| /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ |
| struct { |
| unsigned arrayId, index; |
| boolean initialized; |
| } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ |
| |
| unsigned initialize_temp_index; |
| |
| /** Number of constants used by original shader for each constant buffer. |
| * The size should probably always match with that of svga_state.constbufs. |
| */ |
| unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; |
| |
| /* Samplers */ |
| unsigned num_samplers; |
| boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/ |
| ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ |
| ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ |
| |
| /* Index Range declaration */ |
| struct { |
| unsigned start_index; |
| unsigned count; |
| boolean required; |
| unsigned operandType; |
| unsigned size; |
| unsigned dim; |
| } index_range; |
| |
| /* Address regs (really implemented with temps) */ |
| unsigned num_address_regs; |
| unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; |
| |
| /* Output register usage masks */ |
| ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; |
| |
| /* To map TGSI system value index to VGPU shader input indexes */ |
| ubyte system_value_indexes[MAX_SYSTEM_VALUES]; |
| |
| struct { |
| /* vertex position scale/translation */ |
| unsigned out_index; /**< the real position output reg */ |
| unsigned tmp_index; /**< the fake/temp position output reg */ |
| unsigned so_index; /**< the non-adjusted position output reg */ |
| unsigned prescale_cbuf_index; /* index to the const buf for prescale */ |
| unsigned prescale_scale_index, prescale_trans_index; |
| unsigned num_prescale; /* number of prescale factor in const buf */ |
| unsigned viewport_index; |
| unsigned need_prescale:1; |
| unsigned have_prescale:1; |
| } vposition; |
| |
| /* For vertex shaders only */ |
| struct { |
| /* viewport constant */ |
| unsigned viewport_index; |
| |
| unsigned vertex_id_bias_index; |
| unsigned vertex_id_sys_index; |
| unsigned vertex_id_tmp_index; |
| |
| /* temp index of adjusted vertex attributes */ |
| unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; |
| } vs; |
| |
| /* For fragment shaders only */ |
| struct { |
| unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ |
| unsigned num_color_outputs; |
| unsigned color_tmp_index; /**< fake/temp color output reg */ |
| unsigned alpha_ref_index; /**< immediate constant for alpha ref */ |
| |
| /* front-face */ |
| unsigned face_input_index; /**< real fragment shader face reg (bool) */ |
| unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ |
| |
| unsigned pstipple_sampler_unit; |
| |
| unsigned fragcoord_input_index; /**< real fragment position input reg */ |
| unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ |
| |
| unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */ |
| |
| unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */ |
| unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */ |
| |
| /** TGSI index of sample mask input sys value */ |
| unsigned sample_mask_in_sys_index; |
| |
| /** Which texture units are doing shadow comparison in the FS code */ |
| unsigned shadow_compare_units; |
| |
| /* layer */ |
| unsigned layer_input_index; /**< TGSI index of layer */ |
| unsigned layer_imm_index; /**< immediate for default layer 0 */ |
| } fs; |
| |
| /* For geometry shaders only */ |
| struct { |
| VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ |
| VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ |
| unsigned input_size; /**< size of input arrays */ |
| unsigned prim_id_index; /**< primitive id register index */ |
| unsigned max_out_vertices; /**< maximum number of output vertices */ |
| unsigned invocations; |
| unsigned invocation_id_sys_index; |
| |
| unsigned viewport_index_out_index; |
| unsigned viewport_index_tmp_index; |
| } gs; |
| |
| /* For tessellation control shaders only */ |
| struct { |
| unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */ |
| unsigned imm_index; /**< immediate for tcs */ |
| unsigned invocation_id_sys_index; /**< invocation id */ |
| unsigned invocation_id_tmp_index; |
| unsigned instruction_token_pos; /* token pos for the first instruction */ |
| unsigned control_point_input_index; /* control point input register index */ |
| unsigned control_point_addr_index; /* control point input address register */ |
| unsigned control_point_out_index; /* control point output register index */ |
| unsigned control_point_tmp_index; /* control point temporary register */ |
| unsigned control_point_out_count; /* control point output count */ |
| boolean control_point_phase; /* true if in control point phase */ |
| boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */ |
| unsigned patch_generic_out_count; /* per-patch generic output count */ |
| unsigned patch_generic_out_index; /* per-patch generic output register index*/ |
| unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/ |
| unsigned prim_id_index; /* primitive id */ |
| struct { |
| unsigned out_index; /* real tessinner output register */ |
| unsigned temp_index; /* tessinner temp register */ |
| unsigned tgsi_index; /* tgsi tessinner output register */ |
| } inner; |
| struct { |
| unsigned out_index; /* real tessouter output register */ |
| unsigned temp_index; /* tessouter temp register */ |
| unsigned tgsi_index; /* tgsi tessouter output register */ |
| } outer; |
| } tcs; |
| |
| /* For tessellation evaluation shaders only */ |
| struct { |
| enum pipe_prim_type prim_mode; |
| enum pipe_tess_spacing spacing; |
| boolean vertices_order_cw; |
| boolean point_mode; |
| unsigned tesscoord_sys_index; |
| unsigned prim_id_index; /* primitive id */ |
| struct { |
| unsigned in_index; /* real tessinner input register */ |
| unsigned temp_index; /* tessinner temp register */ |
| unsigned tgsi_index; /* tgsi tessinner input register */ |
| } inner; |
| struct { |
| unsigned in_index; /* real tessouter input register */ |
| unsigned temp_index; /* tessouter temp register */ |
| unsigned tgsi_index; /* tgsi tessouter input register */ |
| } outer; |
| } tes; |
| |
| /* For vertex or geometry shaders */ |
| enum clipping_mode clip_mode; |
| unsigned clip_dist_out_index; /**< clip distance output register index */ |
| unsigned clip_dist_tmp_index; /**< clip distance temporary register */ |
| unsigned clip_dist_so_index; /**< clip distance shadow copy */ |
| |
| /** Index of temporary holding the clipvertex coordinate */ |
| unsigned clip_vertex_out_index; /**< clip vertex output register index */ |
| unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ |
| |
| /* user clip plane constant slot indexes */ |
| unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; |
| |
| unsigned num_output_writes; |
| boolean constant_color_output; |
| |
| boolean uses_flat_interp; |
| |
| unsigned reserved_token; /* index to the reserved token */ |
| boolean uses_precise_qualifier; |
| |
| /* For all shaders: const reg index for RECT coord scaling */ |
| unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; |
| |
| /* For all shaders: const reg index for texture buffer size */ |
| unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; |
| |
| /* VS/TCS/TES/GS/FS Linkage info */ |
| struct shader_linkage linkage; |
| struct tgsi_shader_info *prevShaderInfo; |
| |
| /* Shader signature */ |
| struct svga_shader_signature signature; |
| |
| bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ |
| |
| /* For pipe_debug_message */ |
| struct pipe_debug_callback svga_debug_callback; |
| |
| /* current loop depth in shader */ |
| unsigned current_loop_depth; |
| }; |
| |
| |
| static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit); |
| static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit); |
| static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit); |
| static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit); |
| static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit); |
| static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit); |
| static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit); |
| static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit); |
| static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit); |
| |
| static boolean |
| emit_post_helpers(struct svga_shader_emitter_v10 *emit); |
| |
| static boolean |
| emit_vertex(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst); |
| |
| static boolean |
| emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, |
| unsigned inst_number, |
| const struct tgsi_full_instruction *inst); |
| |
| static void |
| emit_input_declaration(struct svga_shader_emitter_v10 *emit, |
| unsigned opcodeType, unsigned operandType, |
| unsigned dim, unsigned index, unsigned size, |
| unsigned name, unsigned numComp, |
| unsigned selMode, unsigned usageMask, |
| unsigned interpMode, |
| boolean addSignature, |
| SVGA3dDXSignatureSemanticName sgnName); |
| |
| static void |
| create_temp_array(struct svga_shader_emitter_v10 *emit, |
| unsigned arrayID, unsigned first, unsigned count, |
| unsigned startIndex); |
| |
| static char err_buf[128]; |
| |
| static boolean |
| expand(struct svga_shader_emitter_v10 *emit) |
| { |
| char *new_buf; |
| unsigned newsize = emit->size * 2; |
| |
| if (emit->buf != err_buf) |
| new_buf = REALLOC(emit->buf, emit->size, newsize); |
| else |
| new_buf = NULL; |
| |
| if (!new_buf) { |
| emit->ptr = err_buf; |
| emit->buf = err_buf; |
| emit->size = sizeof(err_buf); |
| return FALSE; |
| } |
| |
| emit->size = newsize; |
| emit->ptr = new_buf + (emit->ptr - emit->buf); |
| emit->buf = new_buf; |
| return TRUE; |
| } |
| |
| /** |
| * Create and initialize a new svga_shader_emitter_v10 object. |
| */ |
| static struct svga_shader_emitter_v10 * |
| alloc_emitter(void) |
| { |
| struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); |
| |
| if (!emit) |
| return NULL; |
| |
| /* to initialize the output buffer */ |
| emit->size = 512; |
| if (!expand(emit)) { |
| FREE(emit); |
| return NULL; |
| } |
| return emit; |
| } |
| |
| /** |
| * Free an svga_shader_emitter_v10 object. |
| */ |
| static void |
| free_emitter(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit); |
| FREE(emit->buf); /* will be NULL if translation succeeded */ |
| FREE(emit); |
| } |
| |
| static inline boolean |
| reserve(struct svga_shader_emitter_v10 *emit, |
| unsigned nr_dwords) |
| { |
| while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { |
| if (!expand(emit)) |
| return FALSE; |
| } |
| |
| return TRUE; |
| } |
| |
| static boolean |
| emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) |
| { |
| if (!reserve(emit, 1)) |
| return FALSE; |
| |
| *(uint32 *)emit->ptr = dword; |
| emit->ptr += sizeof dword; |
| return TRUE; |
| } |
| |
| static boolean |
| emit_dwords(struct svga_shader_emitter_v10 *emit, |
| const uint32 *dwords, |
| unsigned nr) |
| { |
| if (!reserve(emit, nr)) |
| return FALSE; |
| |
| memcpy(emit->ptr, dwords, nr * sizeof *dwords); |
| emit->ptr += nr * sizeof *dwords; |
| return TRUE; |
| } |
| |
| /** Return the number of tokens in the emitter's buffer */ |
| static unsigned |
| emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) |
| { |
| return (emit->ptr - emit->buf) / sizeof(unsigned); |
| } |
| |
| |
| /** |
| * Check for register overflow. If we overflow we'll set an |
| * error flag. This function can be called for register declarations |
| * or use as src/dst instruction operands. |
| * \param type register type. One of VGPU10_OPERAND_TYPE_x |
| or VGPU10_OPCODE_DCL_x |
| * \param index the register index |
| */ |
| static void |
| check_register_index(struct svga_shader_emitter_v10 *emit, |
| unsigned operandType, unsigned index) |
| { |
| bool overflow_before = emit->register_overflow; |
| |
| switch (operandType) { |
| case VGPU10_OPERAND_TYPE_TEMP: |
| case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: |
| case VGPU10_OPCODE_DCL_TEMPS: |
| if (index >= VGPU10_MAX_TEMPS) { |
| emit->register_overflow = TRUE; |
| } |
| break; |
| case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: |
| case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: |
| if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { |
| emit->register_overflow = TRUE; |
| } |
| break; |
| case VGPU10_OPERAND_TYPE_INPUT: |
| case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: |
| case VGPU10_OPCODE_DCL_INPUT: |
| case VGPU10_OPCODE_DCL_INPUT_SGV: |
| case VGPU10_OPCODE_DCL_INPUT_SIV: |
| case VGPU10_OPCODE_DCL_INPUT_PS: |
| case VGPU10_OPCODE_DCL_INPUT_PS_SGV: |
| case VGPU10_OPCODE_DCL_INPUT_PS_SIV: |
| if ((emit->unit == PIPE_SHADER_VERTEX && |
| index >= VGPU10_MAX_VS_INPUTS) || |
| (emit->unit == PIPE_SHADER_GEOMETRY && |
| index >= VGPU10_MAX_GS_INPUTS) || |
| (emit->unit == PIPE_SHADER_FRAGMENT && |
| index >= VGPU10_MAX_FS_INPUTS) || |
| (emit->unit == PIPE_SHADER_TESS_CTRL && |
| index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) || |
| (emit->unit == PIPE_SHADER_TESS_EVAL && |
| index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) { |
| emit->register_overflow = TRUE; |
| } |
| break; |
| case VGPU10_OPERAND_TYPE_OUTPUT: |
| case VGPU10_OPCODE_DCL_OUTPUT: |
| case VGPU10_OPCODE_DCL_OUTPUT_SGV: |
| case VGPU10_OPCODE_DCL_OUTPUT_SIV: |
| /* Note: we are skipping two output indices in tcs for |
| * tessinner/outer levels. Implementation will not exceed |
| * number of output count but it allows index to go beyond |
| * VGPU11_MAX_HS_OUTPUTS. |
| * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2 |
| */ |
| if ((emit->unit == PIPE_SHADER_VERTEX && |
| index >= VGPU10_MAX_VS_OUTPUTS) || |
| (emit->unit == PIPE_SHADER_GEOMETRY && |
| index >= VGPU10_MAX_GS_OUTPUTS) || |
| (emit->unit == PIPE_SHADER_FRAGMENT && |
| index >= VGPU10_MAX_FS_OUTPUTS) || |
| (emit->unit == PIPE_SHADER_TESS_CTRL && |
| index >= VGPU11_MAX_HS_OUTPUTS + 2) || |
| (emit->unit == PIPE_SHADER_TESS_EVAL && |
| index >= VGPU11_MAX_DS_OUTPUTS)) { |
| emit->register_overflow = TRUE; |
| } |
| break; |
| case VGPU10_OPERAND_TYPE_SAMPLER: |
| case VGPU10_OPCODE_DCL_SAMPLER: |
| if (index >= VGPU10_MAX_SAMPLERS) { |
| emit->register_overflow = TRUE; |
| } |
| break; |
| case VGPU10_OPERAND_TYPE_RESOURCE: |
| case VGPU10_OPCODE_DCL_RESOURCE: |
| if (index >= VGPU10_MAX_RESOURCES) { |
| emit->register_overflow = TRUE; |
| } |
| break; |
| case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: |
| if (index >= MAX_IMMEDIATE_COUNT) { |
| emit->register_overflow = TRUE; |
| } |
| break; |
| case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: |
| /* nothing */ |
| break; |
| default: |
| assert(0); |
| ; /* nothing */ |
| } |
| |
| if (emit->register_overflow && !overflow_before) { |
| debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", |
| operandType, index); |
| } |
| } |
| |
| |
| /** |
| * Examine misc state to determine the clipping mode. |
| */ |
| static void |
| determine_clipping_mode(struct svga_shader_emitter_v10 *emit) |
| { |
| /* num_written_clipdistance in the shader info for tessellation |
| * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED |
| * is not defined for this shader. So we go through all the output declarations |
| * to set the num_written_clipdistance. This is just to determine the |
| * clipping mode. |
| */ |
| if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| unsigned i; |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { |
| emit->info.num_written_clipdistance = |
| 4 * (emit->info.output_semantic_index[i] + 1); |
| } |
| } |
| } |
| |
| if (emit->info.num_written_clipdistance > 0) { |
| emit->clip_mode = CLIP_DISTANCE; |
| } |
| else if (emit->info.writes_clipvertex) { |
| emit->clip_mode = CLIP_VERTEX; |
| } |
| else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) { |
| /* |
| * Only the last shader in the vertex processing stage needs to |
| * handle the legacy clip mode. |
| */ |
| emit->clip_mode = CLIP_LEGACY; |
| } |
| else { |
| emit->clip_mode = CLIP_NONE; |
| } |
| } |
| |
| |
| /** |
| * For clip distance register declarations and clip distance register |
| * writes we need to mask the declaration usage or instruction writemask |
| * (respectively) against the set of the really-enabled clipping planes. |
| * |
| * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables |
| * has a VS that writes to all 8 clip distance registers, but the plane enable |
| * flags are a subset of that. |
| * |
| * This function is used to apply the plane enable flags to the register |
| * declaration or instruction writemask. |
| * |
| * \param writemask the declaration usage mask or instruction writemask |
| * \param clip_reg_index which clip plane register is being declared/written. |
| * The legal values are 0 and 1 (two clip planes per |
| * register, for a total of 8 clip planes) |
| */ |
| static unsigned |
| apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, |
| unsigned writemask, unsigned clip_reg_index) |
| { |
| unsigned shift; |
| |
| assert(clip_reg_index < 2); |
| |
| /* four clip planes per clip register: */ |
| shift = clip_reg_index * 4; |
| writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); |
| |
| return writemask; |
| } |
| |
| |
| /** |
| * Translate gallium shader type into VGPU10 type. |
| */ |
| static VGPU10_PROGRAM_TYPE |
| translate_shader_type(unsigned type) |
| { |
| switch (type) { |
| case PIPE_SHADER_VERTEX: |
| return VGPU10_VERTEX_SHADER; |
| case PIPE_SHADER_GEOMETRY: |
| return VGPU10_GEOMETRY_SHADER; |
| case PIPE_SHADER_FRAGMENT: |
| return VGPU10_PIXEL_SHADER; |
| case PIPE_SHADER_TESS_CTRL: |
| return VGPU10_HULL_SHADER; |
| case PIPE_SHADER_TESS_EVAL: |
| return VGPU10_DOMAIN_SHADER; |
| case PIPE_SHADER_COMPUTE: |
| return VGPU10_COMPUTE_SHADER; |
| default: |
| assert(!"Unexpected shader type"); |
| return VGPU10_VERTEX_SHADER; |
| } |
| } |
| |
| |
| /** |
| * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x |
| * Note: we only need to translate the opcodes for "simple" instructions, |
| * as seen below. All other opcodes are handled/translated specially. |
| */ |
| static VGPU10_OPCODE_TYPE |
| translate_opcode(enum tgsi_opcode opcode) |
| { |
| switch (opcode) { |
| case TGSI_OPCODE_MOV: |
| return VGPU10_OPCODE_MOV; |
| case TGSI_OPCODE_MUL: |
| return VGPU10_OPCODE_MUL; |
| case TGSI_OPCODE_ADD: |
| return VGPU10_OPCODE_ADD; |
| case TGSI_OPCODE_DP3: |
| return VGPU10_OPCODE_DP3; |
| case TGSI_OPCODE_DP4: |
| return VGPU10_OPCODE_DP4; |
| case TGSI_OPCODE_MIN: |
| return VGPU10_OPCODE_MIN; |
| case TGSI_OPCODE_MAX: |
| return VGPU10_OPCODE_MAX; |
| case TGSI_OPCODE_MAD: |
| return VGPU10_OPCODE_MAD; |
| case TGSI_OPCODE_SQRT: |
| return VGPU10_OPCODE_SQRT; |
| case TGSI_OPCODE_FRC: |
| return VGPU10_OPCODE_FRC; |
| case TGSI_OPCODE_FLR: |
| return VGPU10_OPCODE_ROUND_NI; |
| case TGSI_OPCODE_FSEQ: |
| return VGPU10_OPCODE_EQ; |
| case TGSI_OPCODE_FSGE: |
| return VGPU10_OPCODE_GE; |
| case TGSI_OPCODE_FSNE: |
| return VGPU10_OPCODE_NE; |
| case TGSI_OPCODE_DDX: |
| return VGPU10_OPCODE_DERIV_RTX; |
| case TGSI_OPCODE_DDY: |
| return VGPU10_OPCODE_DERIV_RTY; |
| case TGSI_OPCODE_RET: |
| return VGPU10_OPCODE_RET; |
| case TGSI_OPCODE_DIV: |
| return VGPU10_OPCODE_DIV; |
| case TGSI_OPCODE_IDIV: |
| return VGPU10_OPCODE_VMWARE; |
| case TGSI_OPCODE_DP2: |
| return VGPU10_OPCODE_DP2; |
| case TGSI_OPCODE_BRK: |
| return VGPU10_OPCODE_BREAK; |
| case TGSI_OPCODE_IF: |
| return VGPU10_OPCODE_IF; |
| case TGSI_OPCODE_ELSE: |
| return VGPU10_OPCODE_ELSE; |
| case TGSI_OPCODE_ENDIF: |
| return VGPU10_OPCODE_ENDIF; |
| case TGSI_OPCODE_CEIL: |
| return VGPU10_OPCODE_ROUND_PI; |
| case TGSI_OPCODE_I2F: |
| return VGPU10_OPCODE_ITOF; |
| case TGSI_OPCODE_NOT: |
| return VGPU10_OPCODE_NOT; |
| case TGSI_OPCODE_TRUNC: |
| return VGPU10_OPCODE_ROUND_Z; |
| case TGSI_OPCODE_SHL: |
| return VGPU10_OPCODE_ISHL; |
| case TGSI_OPCODE_AND: |
| return VGPU10_OPCODE_AND; |
| case TGSI_OPCODE_OR: |
| return VGPU10_OPCODE_OR; |
| case TGSI_OPCODE_XOR: |
| return VGPU10_OPCODE_XOR; |
| case TGSI_OPCODE_CONT: |
| return VGPU10_OPCODE_CONTINUE; |
| case TGSI_OPCODE_EMIT: |
| return VGPU10_OPCODE_EMIT; |
| case TGSI_OPCODE_ENDPRIM: |
| return VGPU10_OPCODE_CUT; |
| case TGSI_OPCODE_BGNLOOP: |
| return VGPU10_OPCODE_LOOP; |
| case TGSI_OPCODE_ENDLOOP: |
| return VGPU10_OPCODE_ENDLOOP; |
| case TGSI_OPCODE_ENDSUB: |
| return VGPU10_OPCODE_RET; |
| case TGSI_OPCODE_NOP: |
| return VGPU10_OPCODE_NOP; |
| case TGSI_OPCODE_END: |
| return VGPU10_OPCODE_RET; |
| case TGSI_OPCODE_F2I: |
| return VGPU10_OPCODE_FTOI; |
| case TGSI_OPCODE_IMAX: |
| return VGPU10_OPCODE_IMAX; |
| case TGSI_OPCODE_IMIN: |
| return VGPU10_OPCODE_IMIN; |
| case TGSI_OPCODE_UDIV: |
| case TGSI_OPCODE_UMOD: |
| case TGSI_OPCODE_MOD: |
| return VGPU10_OPCODE_UDIV; |
| case TGSI_OPCODE_IMUL_HI: |
| return VGPU10_OPCODE_IMUL; |
| case TGSI_OPCODE_INEG: |
| return VGPU10_OPCODE_INEG; |
| case TGSI_OPCODE_ISHR: |
| return VGPU10_OPCODE_ISHR; |
| case TGSI_OPCODE_ISGE: |
| return VGPU10_OPCODE_IGE; |
| case TGSI_OPCODE_ISLT: |
| return VGPU10_OPCODE_ILT; |
| case TGSI_OPCODE_F2U: |
| return VGPU10_OPCODE_FTOU; |
| case TGSI_OPCODE_UADD: |
| return VGPU10_OPCODE_IADD; |
| case TGSI_OPCODE_U2F: |
| return VGPU10_OPCODE_UTOF; |
| case TGSI_OPCODE_UCMP: |
| return VGPU10_OPCODE_MOVC; |
| case TGSI_OPCODE_UMAD: |
| return VGPU10_OPCODE_UMAD; |
| case TGSI_OPCODE_UMAX: |
| return VGPU10_OPCODE_UMAX; |
| case TGSI_OPCODE_UMIN: |
| return VGPU10_OPCODE_UMIN; |
| case TGSI_OPCODE_UMUL: |
| case TGSI_OPCODE_UMUL_HI: |
| return VGPU10_OPCODE_UMUL; |
| case TGSI_OPCODE_USEQ: |
| return VGPU10_OPCODE_IEQ; |
| case TGSI_OPCODE_USGE: |
| return VGPU10_OPCODE_UGE; |
| case TGSI_OPCODE_USHR: |
| return VGPU10_OPCODE_USHR; |
| case TGSI_OPCODE_USLT: |
| return VGPU10_OPCODE_ULT; |
| case TGSI_OPCODE_USNE: |
| return VGPU10_OPCODE_INE; |
| case TGSI_OPCODE_SWITCH: |
| return VGPU10_OPCODE_SWITCH; |
| case TGSI_OPCODE_CASE: |
| return VGPU10_OPCODE_CASE; |
| case TGSI_OPCODE_DEFAULT: |
| return VGPU10_OPCODE_DEFAULT; |
| case TGSI_OPCODE_ENDSWITCH: |
| return VGPU10_OPCODE_ENDSWITCH; |
| case TGSI_OPCODE_FSLT: |
| return VGPU10_OPCODE_LT; |
| case TGSI_OPCODE_ROUND: |
| return VGPU10_OPCODE_ROUND_NE; |
| /* Begin SM5 opcodes */ |
| case TGSI_OPCODE_F2D: |
| return VGPU10_OPCODE_FTOD; |
| case TGSI_OPCODE_D2F: |
| return VGPU10_OPCODE_DTOF; |
| case TGSI_OPCODE_DMUL: |
| return VGPU10_OPCODE_DMUL; |
| case TGSI_OPCODE_DADD: |
| return VGPU10_OPCODE_DADD; |
| case TGSI_OPCODE_DMAX: |
| return VGPU10_OPCODE_DMAX; |
| case TGSI_OPCODE_DMIN: |
| return VGPU10_OPCODE_DMIN; |
| case TGSI_OPCODE_DSEQ: |
| return VGPU10_OPCODE_DEQ; |
| case TGSI_OPCODE_DSGE: |
| return VGPU10_OPCODE_DGE; |
| case TGSI_OPCODE_DSLT: |
| return VGPU10_OPCODE_DLT; |
| case TGSI_OPCODE_DSNE: |
| return VGPU10_OPCODE_DNE; |
| case TGSI_OPCODE_IBFE: |
| return VGPU10_OPCODE_IBFE; |
| case TGSI_OPCODE_UBFE: |
| return VGPU10_OPCODE_UBFE; |
| case TGSI_OPCODE_BFI: |
| return VGPU10_OPCODE_BFI; |
| case TGSI_OPCODE_BREV: |
| return VGPU10_OPCODE_BFREV; |
| case TGSI_OPCODE_POPC: |
| return VGPU10_OPCODE_COUNTBITS; |
| case TGSI_OPCODE_LSB: |
| return VGPU10_OPCODE_FIRSTBIT_LO; |
| case TGSI_OPCODE_IMSB: |
| return VGPU10_OPCODE_FIRSTBIT_SHI; |
| case TGSI_OPCODE_UMSB: |
| return VGPU10_OPCODE_FIRSTBIT_HI; |
| case TGSI_OPCODE_INTERP_CENTROID: |
| return VGPU10_OPCODE_EVAL_CENTROID; |
| case TGSI_OPCODE_INTERP_SAMPLE: |
| return VGPU10_OPCODE_EVAL_SAMPLE_INDEX; |
| case TGSI_OPCODE_BARRIER: |
| return VGPU10_OPCODE_SYNC; |
| |
| /* DX11.1 Opcodes */ |
| case TGSI_OPCODE_DDIV: |
| return VGPU10_OPCODE_DDIV; |
| case TGSI_OPCODE_DRCP: |
| return VGPU10_OPCODE_DRCP; |
| case TGSI_OPCODE_D2I: |
| return VGPU10_OPCODE_DTOI; |
| case TGSI_OPCODE_D2U: |
| return VGPU10_OPCODE_DTOU; |
| case TGSI_OPCODE_I2D: |
| return VGPU10_OPCODE_ITOD; |
| case TGSI_OPCODE_U2D: |
| return VGPU10_OPCODE_UTOD; |
| |
| case TGSI_OPCODE_SAMPLE_POS: |
| /* Note: we never actually get this opcode because there's no GLSL |
| * function to query multisample resource sample positions. There's |
| * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the |
| * position of the current sample in the render target. |
| */ |
| /* FALL-THROUGH */ |
| case TGSI_OPCODE_SAMPLE_INFO: |
| /* NOTE: we never actually get this opcode because the GLSL compiler |
| * implements the gl_NumSamples variable with a simple constant in the |
| * constant buffer. |
| */ |
| /* FALL-THROUGH */ |
| default: |
| assert(!"Unexpected TGSI opcode in translate_opcode()"); |
| return VGPU10_OPCODE_NOP; |
| } |
| } |
| |
| |
| /** |
| * Translate a TGSI register file type into a VGPU10 operand type. |
| * \param array is the TGSI_FILE_TEMPORARY register an array? |
| */ |
| static VGPU10_OPERAND_TYPE |
| translate_register_file(enum tgsi_file_type file, boolean array) |
| { |
| switch (file) { |
| case TGSI_FILE_CONSTANT: |
| return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; |
| case TGSI_FILE_INPUT: |
| return VGPU10_OPERAND_TYPE_INPUT; |
| case TGSI_FILE_OUTPUT: |
| return VGPU10_OPERAND_TYPE_OUTPUT; |
| case TGSI_FILE_TEMPORARY: |
| return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP |
| : VGPU10_OPERAND_TYPE_TEMP; |
| case TGSI_FILE_IMMEDIATE: |
| /* all immediates are 32-bit values at this time so |
| * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. |
| */ |
| return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; |
| case TGSI_FILE_SAMPLER: |
| return VGPU10_OPERAND_TYPE_SAMPLER; |
| case TGSI_FILE_SYSTEM_VALUE: |
| return VGPU10_OPERAND_TYPE_INPUT; |
| |
| /* XXX TODO more cases to finish */ |
| |
| default: |
| assert(!"Bad tgsi register file!"); |
| return VGPU10_OPERAND_TYPE_NULL; |
| } |
| } |
| |
| |
| /** |
| * Emit a null dst register |
| */ |
| static void |
| emit_null_dst_register(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OperandToken0 operand; |
| |
| operand.value = 0; |
| operand.operandType = VGPU10_OPERAND_TYPE_NULL; |
| operand.numComponents = VGPU10_OPERAND_0_COMPONENT; |
| |
| emit_dword(emit, operand.value); |
| } |
| |
| |
| /** |
| * If the given register is a temporary, return the array ID. |
| * Else return zero. |
| */ |
| static unsigned |
| get_temp_array_id(const struct svga_shader_emitter_v10 *emit, |
| enum tgsi_file_type file, unsigned index) |
| { |
| if (file == TGSI_FILE_TEMPORARY) { |
| return emit->temp_map[index].arrayId; |
| } |
| else { |
| return 0; |
| } |
| } |
| |
| |
| /** |
| * If the given register is a temporary, convert the index from a TGSI |
| * TEMPORARY index to a VGPU10 temp index. |
| */ |
| static unsigned |
| remap_temp_index(const struct svga_shader_emitter_v10 *emit, |
| enum tgsi_file_type file, unsigned index) |
| { |
| if (file == TGSI_FILE_TEMPORARY) { |
| return emit->temp_map[index].index; |
| } |
| else { |
| return index; |
| } |
| } |
| |
| |
| /** |
| * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). |
| * Note: the operandType field must already be initialized. |
| * \param file the register file being accessed |
| * \param indirect using indirect addressing of the register file? |
| * \param index2D if true, 2-D indexing is being used (const or temp registers) |
| * \param indirect2D if true, 2-D indirect indexing being used (for const buf) |
| */ |
| static VGPU10OperandToken0 |
| setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, |
| VGPU10OperandToken0 operand0, |
| enum tgsi_file_type file, |
| boolean indirect, |
| boolean index2D, bool indirect2D) |
| { |
| VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep; |
| VGPU10_OPERAND_INDEX_DIMENSION indexDim; |
| |
| /* |
| * Compute index dimensions |
| */ |
| if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || |
| operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || |
| operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || |
| operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || |
| operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP || |
| operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) { |
| /* there's no swizzle for in-line immediates */ |
| indexDim = VGPU10_OPERAND_INDEX_0D; |
| assert(operand0.selectionMode == 0); |
| } |
| else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) { |
| indexDim = VGPU10_OPERAND_INDEX_0D; |
| } |
| else { |
| indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D; |
| } |
| |
| /* |
| * Compute index representation(s) (immediate vs relative). |
| */ |
| if (indexDim == VGPU10_OPERAND_INDEX_2D) { |
| index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE |
| : VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE |
| : VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| } |
| else if (indexDim == VGPU10_OPERAND_INDEX_1D) { |
| index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE |
| : VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| index1Rep = 0; |
| } |
| else { |
| index0Rep = 0; |
| index1Rep = 0; |
| } |
| |
| operand0.indexDimension = indexDim; |
| operand0.index0Representation = index0Rep; |
| operand0.index1Representation = index1Rep; |
| |
| return operand0; |
| } |
| |
| |
| /** |
| * Emit the operand for expressing an address register for indirect indexing. |
| * Note that the address register is really just a temp register. |
| * \param addr_reg_index which address register to use |
| */ |
| static void |
| emit_indirect_register(struct svga_shader_emitter_v10 *emit, |
| unsigned addr_reg_index) |
| { |
| unsigned tmp_reg_index; |
| VGPU10OperandToken0 operand0; |
| |
| assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); |
| |
| tmp_reg_index = emit->address_reg_index[addr_reg_index]; |
| |
| /* operand0 is a simple temporary register, selecting one component */ |
| operand0.value = 0; |
| operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; |
| operand0.swizzleX = 0; |
| operand0.swizzleY = 1; |
| operand0.swizzleZ = 2; |
| operand0.swizzleW = 3; |
| |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); |
| } |
| |
| |
| /** |
| * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. |
| * \param emit the emitter context |
| * \param reg the TGSI dst register to translate |
| */ |
| static void |
| emit_dst_register(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_dst_register *reg) |
| { |
| enum tgsi_file_type file = reg->Register.File; |
| unsigned index = reg->Register.Index; |
| const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index]; |
| const unsigned sem_index = emit->info.output_semantic_index[index]; |
| unsigned writemask = reg->Register.WriteMask; |
| const boolean indirect = reg->Register.Indirect; |
| unsigned tempArrayId = get_temp_array_id(emit, file, index); |
| boolean index2d = reg->Register.Dimension || tempArrayId > 0; |
| VGPU10OperandToken0 operand0; |
| |
| if (file == TGSI_FILE_TEMPORARY) { |
| emit->temp_map[index].initialized = TRUE; |
| } |
| |
| if (file == TGSI_FILE_OUTPUT) { |
| if (emit->unit == PIPE_SHADER_VERTEX || |
| emit->unit == PIPE_SHADER_GEOMETRY || |
| emit->unit == PIPE_SHADER_TESS_EVAL) { |
| if (index == emit->vposition.out_index && |
| emit->vposition.tmp_index != INVALID_INDEX) { |
| /* replace OUTPUT[POS] with TEMP[POS]. We need to store the |
| * vertex position result in a temporary so that we can modify |
| * it in the post_helper() code. |
| */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->vposition.tmp_index; |
| } |
| else if (sem_name == TGSI_SEMANTIC_CLIPDIST && |
| emit->clip_dist_tmp_index != INVALID_INDEX) { |
| /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. |
| * We store the clip distance in a temporary first, then |
| * we'll copy it to the shadow copy and to CLIPDIST with the |
| * enabled planes mask in emit_clip_distance_instructions(). |
| */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->clip_dist_tmp_index + sem_index; |
| } |
| else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && |
| emit->clip_vertex_tmp_index != INVALID_INDEX) { |
| /* replace the CLIPVERTEX output register with a temporary */ |
| assert(emit->clip_mode == CLIP_VERTEX); |
| assert(sem_index == 0); |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->clip_vertex_tmp_index; |
| } |
| else if (sem_name == TGSI_SEMANTIC_COLOR && |
| emit->key.clamp_vertex_color) { |
| |
| /* set the saturate modifier of the instruction |
| * to clamp the vertex color. |
| */ |
| VGPU10OpcodeToken0 *token = |
| (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token; |
| token->saturate = TRUE; |
| } |
| else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX && |
| emit->gs.viewport_index_out_index != INVALID_INDEX) { |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->gs.viewport_index_tmp_index; |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_FRAGMENT) { |
| if (sem_name == TGSI_SEMANTIC_POSITION) { |
| /* Fragment depth output register */ |
| operand0.value = 0; |
| operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; |
| operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; |
| emit_dword(emit, operand0.value); |
| return; |
| } |
| else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) { |
| /* Fragment sample mask output */ |
| operand0.value = 0; |
| operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; |
| operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; |
| emit_dword(emit, operand0.value); |
| return; |
| } |
| else if (index == emit->fs.color_out_index[0] && |
| emit->fs.color_tmp_index != INVALID_INDEX) { |
| /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the |
| * fragment color result in a temporary so that we can read it |
| * it in the post_helper() code. |
| */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->fs.color_tmp_index; |
| } |
| else { |
| /* Typically, for fragment shaders, the output register index |
| * matches the color semantic index. But not when we write to |
| * the fragment depth register. In that case, OUT[0] will be |
| * fragdepth and OUT[1] will be the 0th color output. We need |
| * to use the semantic index for color outputs. |
| */ |
| assert(sem_name == TGSI_SEMANTIC_COLOR); |
| index = emit->info.output_semantic_index[index]; |
| |
| emit->num_output_writes++; |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| if (index == emit->tcs.inner.tgsi_index) { |
| /* replace OUTPUT[TESSLEVEL] with temp. We are storing it |
| * in temporary for now so that will be store into appropriate |
| * registers in post_helper() in patch constant phase. |
| */ |
| if (emit->tcs.control_point_phase) { |
| /* Discard writing into tessfactor in control point phase */ |
| emit->discard_instruction = TRUE; |
| } |
| else { |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->tcs.inner.temp_index; |
| } |
| } |
| else if (index == emit->tcs.outer.tgsi_index) { |
| /* replace OUTPUT[TESSLEVEL] with temp. We are storing it |
| * in temporary for now so that will be store into appropriate |
| * registers in post_helper(). |
| */ |
| if (emit->tcs.control_point_phase) { |
| /* Discard writing into tessfactor in control point phase */ |
| emit->discard_instruction = TRUE; |
| } |
| else { |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->tcs.outer.temp_index; |
| } |
| } |
| else if (index >= emit->tcs.patch_generic_out_index && |
| index < (emit->tcs.patch_generic_out_index + |
| emit->tcs.patch_generic_out_count)) { |
| if (emit->tcs.control_point_phase) { |
| /* Discard writing into generic patch constant outputs in |
| control point phase */ |
| emit->discard_instruction = TRUE; |
| } |
| else { |
| if (emit->reemit_instruction) { |
| /* Store results of reemitted instruction in temporary register. */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->tcs.patch_generic_tmp_index + |
| (index - emit->tcs.patch_generic_out_index); |
| /** |
| * Temporaries for patch constant data can be done |
| * as indexable temporaries. |
| */ |
| tempArrayId = get_temp_array_id(emit, file, index); |
| index2d = tempArrayId > 0; |
| |
| emit->reemit_instruction = FALSE; |
| } |
| else { |
| /* If per-patch outputs is been read in shader, we |
| * reemit instruction and store results in temporaries in |
| * patch constant phase. */ |
| if (emit->info.reads_perpatch_outputs) { |
| emit->reemit_instruction = TRUE; |
| } |
| } |
| } |
| } |
| else if (reg->Register.Dimension) { |
| /* Only control point outputs are declared 2D in tgsi */ |
| if (emit->tcs.control_point_phase) { |
| if (emit->reemit_instruction) { |
| /* Store results of reemitted instruction in temporary register. */ |
| index2d = FALSE; |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->tcs.control_point_tmp_index + |
| (index - emit->tcs.control_point_out_index); |
| emit->reemit_instruction = FALSE; |
| } |
| else { |
| /* The mapped control point outputs are 1-D */ |
| index2d = FALSE; |
| if (emit->info.reads_pervertex_outputs) { |
| /* If per-vertex outputs is been read in shader, we |
| * reemit instruction and store results in temporaries |
| * control point phase. */ |
| emit->reemit_instruction = TRUE; |
| } |
| } |
| |
| if (sem_name == TGSI_SEMANTIC_CLIPDIST && |
| emit->clip_dist_tmp_index != INVALID_INDEX) { |
| /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. |
| * We store the clip distance in a temporary first, then |
| * we'll copy it to the shadow copy and to CLIPDIST with the |
| * enabled planes mask in emit_clip_distance_instructions(). |
| */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->clip_dist_tmp_index + sem_index; |
| } |
| else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && |
| emit->clip_vertex_tmp_index != INVALID_INDEX) { |
| /* replace the CLIPVERTEX output register with a temporary */ |
| assert(emit->clip_mode == CLIP_VERTEX); |
| assert(sem_index == 0); |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->clip_vertex_tmp_index; |
| } |
| } |
| else { |
| /* Discard writing into control point outputs in |
| patch constant phase */ |
| emit->discard_instruction = TRUE; |
| } |
| } |
| } |
| } |
| |
| /* init operand tokens to all zero */ |
| operand0.value = 0; |
| |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| |
| /* the operand has a writemask */ |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; |
| |
| /* Which of the four dest components to write to. Note that we can use a |
| * simple assignment here since TGSI writemasks match VGPU10 writemasks. |
| */ |
| STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); |
| operand0.mask = writemask; |
| |
| /* translate TGSI register file type to VGPU10 operand type */ |
| operand0.operandType = translate_register_file(file, tempArrayId > 0); |
| |
| check_register_index(emit, operand0.operandType, index); |
| |
| operand0 = setup_operand0_indexing(emit, operand0, file, indirect, |
| index2d, FALSE); |
| |
| /* Emit tokens */ |
| emit_dword(emit, operand0.value); |
| if (tempArrayId > 0) { |
| emit_dword(emit, tempArrayId); |
| } |
| |
| emit_dword(emit, remap_temp_index(emit, file, index)); |
| |
| if (indirect) { |
| emit_indirect_register(emit, reg->Indirect.Index); |
| } |
| } |
| |
| |
| /** |
| * Check if temporary register needs to be initialize when |
| * shader is not using indirect addressing for temporary and uninitialized |
| * temporary is not used in loop. In these two scenarios, we cannot |
| * determine if temporary is initialized or not. |
| */ |
| static boolean |
| need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit, |
| unsigned index) |
| { |
| if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY)) |
| && emit->current_loop_depth == 0) { |
| if (!emit->temp_map[index].initialized && |
| emit->temp_map[index].index < emit->num_shader_temps) { |
| return TRUE; |
| } |
| } |
| |
| return FALSE; |
| } |
| |
| |
| /** |
| * Translate a src register of a TGSI instruction and emit VGPU10 tokens. |
| * In quite a few cases, we do register substitution. For example, if |
| * the TGSI register is the front/back-face register, we replace that with |
| * a temp register containing a value we computed earlier. |
| */ |
| static void |
| emit_src_register(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_src_register *reg) |
| { |
| enum tgsi_file_type file = reg->Register.File; |
| unsigned index = reg->Register.Index; |
| const boolean indirect = reg->Register.Indirect; |
| unsigned tempArrayId = get_temp_array_id(emit, file, index); |
| boolean index2d = (reg->Register.Dimension || |
| tempArrayId > 0 || |
| file == TGSI_FILE_CONSTANT); |
| unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; |
| boolean indirect2d = reg->Dimension.Indirect; |
| unsigned swizzleX = reg->Register.SwizzleX; |
| unsigned swizzleY = reg->Register.SwizzleY; |
| unsigned swizzleZ = reg->Register.SwizzleZ; |
| unsigned swizzleW = reg->Register.SwizzleW; |
| const boolean absolute = reg->Register.Absolute; |
| const boolean negate = reg->Register.Negate; |
| VGPU10OperandToken0 operand0; |
| VGPU10OperandToken1 operand1; |
| |
| operand0.value = operand1.value = 0; |
| |
| if (emit->unit == PIPE_SHADER_FRAGMENT){ |
| if (file == TGSI_FILE_INPUT) { |
| if (index == emit->fs.face_input_index) { |
| /* Replace INPUT[FACE] with TEMP[FACE] */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->fs.face_tmp_index; |
| } |
| else if (index == emit->fs.fragcoord_input_index) { |
| /* Replace INPUT[POSITION] with TEMP[POSITION] */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->fs.fragcoord_tmp_index; |
| } |
| else if (index == emit->fs.layer_input_index) { |
| /* Replace INPUT[LAYER] with zero.x */ |
| file = TGSI_FILE_IMMEDIATE; |
| index = emit->fs.layer_imm_index; |
| swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; |
| } |
| else { |
| /* We remap fragment shader inputs to that FS input indexes |
| * match up with VS/GS output indexes. |
| */ |
| index = emit->linkage.input_map[index]; |
| } |
| } |
| else if (file == TGSI_FILE_SYSTEM_VALUE) { |
| if (index == emit->fs.sample_pos_sys_index) { |
| assert(emit->version >= 41); |
| /* Current sample position is in a temp register */ |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->fs.sample_pos_tmp_index; |
| } |
| else if (index == emit->fs.sample_mask_in_sys_index) { |
| /* Emitted as vCoverage0.x */ |
| /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32) |
| * elements where s is the maximum number of color samples supported |
| * by the implementation. With current implementation, we should not |
| * have more than one element. So assert if Index != 0 |
| */ |
| assert((!reg->Register.Indirect && reg->Register.Index == 0) || |
| reg->Register.Indirect); |
| operand0.value = 0; |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; |
| emit_dword(emit, operand0.value); |
| return; |
| } |
| else { |
| /* Map the TGSI system value to a VGPU10 input register */ |
| assert(index < ARRAY_SIZE(emit->system_value_indexes)); |
| file = TGSI_FILE_INPUT; |
| index = emit->system_value_indexes[index]; |
| } |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_GEOMETRY) { |
| if (file == TGSI_FILE_INPUT) { |
| if (index == emit->gs.prim_id_index) { |
| operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; |
| } |
| index = emit->linkage.input_map[index]; |
| } |
| else if (file == TGSI_FILE_SYSTEM_VALUE && |
| index == emit->gs.invocation_id_sys_index) { |
| /* Emitted as vGSInstanceID0.x */ |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID; |
| index = 0; |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_VERTEX) { |
| if (file == TGSI_FILE_INPUT) { |
| /* if input is adjusted... */ |
| if ((emit->key.vs.adjust_attrib_w_1 | |
| emit->key.vs.adjust_attrib_itof | |
| emit->key.vs.adjust_attrib_utof | |
| emit->key.vs.attrib_is_bgra | |
| emit->key.vs.attrib_puint_to_snorm | |
| emit->key.vs.attrib_puint_to_uscaled | |
| emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->vs.adjusted_input[index]; |
| } |
| } |
| else if (file == TGSI_FILE_SYSTEM_VALUE) { |
| if (index == emit->vs.vertex_id_sys_index && |
| emit->vs.vertex_id_tmp_index != INVALID_INDEX) { |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->vs.vertex_id_tmp_index; |
| swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; |
| } |
| else { |
| /* Map the TGSI system value to a VGPU10 input register */ |
| assert(index < ARRAY_SIZE(emit->system_value_indexes)); |
| file = TGSI_FILE_INPUT; |
| index = emit->system_value_indexes[index]; |
| } |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| |
| if (file == TGSI_FILE_SYSTEM_VALUE) { |
| if (index == emit->tcs.vertices_per_patch_index) { |
| /** |
| * if source register is the system value for vertices_per_patch, |
| * replace it with the immediate. |
| */ |
| file = TGSI_FILE_IMMEDIATE; |
| index = emit->tcs.imm_index; |
| swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; |
| } |
| else if (index == emit->tcs.invocation_id_sys_index) { |
| if (emit->tcs.control_point_phase) { |
| /** |
| * Emitted as vOutputControlPointID.x |
| */ |
| operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; |
| operand0.mask = 0; |
| emit_dword(emit, operand0.value); |
| return; |
| } |
| else { |
| /* There is no control point ID input declaration in |
| * the patch constant phase in hull shader. |
| * Since for now we are emitting all instructions in |
| * the patch constant phase, we are replacing the |
| * control point ID reference with the immediate 0. |
| */ |
| file = TGSI_FILE_IMMEDIATE; |
| index = emit->tcs.imm_index; |
| swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W; |
| } |
| } |
| else if (index == emit->tcs.prim_id_index) { |
| /** |
| * Emitted as vPrim.x |
| */ |
| operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; |
| index = 0; |
| } |
| } |
| else if (file == TGSI_FILE_INPUT) { |
| index = emit->linkage.input_map[index]; |
| if (!emit->tcs.control_point_phase) { |
| /* Emitted as vicp */ |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; |
| assert(reg->Register.Dimension); |
| } |
| } |
| else if (file == TGSI_FILE_OUTPUT) { |
| if ((index >= emit->tcs.patch_generic_out_index && |
| index < (emit->tcs.patch_generic_out_index + |
| emit->tcs.patch_generic_out_count)) || |
| index == emit->tcs.inner.tgsi_index || |
| index == emit->tcs.outer.tgsi_index) { |
| if (emit->tcs.control_point_phase) { |
| emit->discard_instruction = TRUE; |
| } |
| else { |
| /* Device doesn't allow reading from output so |
| * use corresponding temporary register as source */ |
| file = TGSI_FILE_TEMPORARY; |
| if (index == emit->tcs.inner.tgsi_index) { |
| index = emit->tcs.inner.temp_index; |
| } |
| else if (index == emit->tcs.outer.tgsi_index) { |
| index = emit->tcs.outer.temp_index; |
| } |
| else { |
| index = emit->tcs.patch_generic_tmp_index + |
| (index - emit->tcs.patch_generic_out_index); |
| } |
| |
| /** |
| * Temporaries for patch constant data can be done |
| * as indexable temporaries. |
| */ |
| tempArrayId = get_temp_array_id(emit, file, index); |
| index2d = tempArrayId > 0; |
| index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; |
| } |
| } |
| else if (index2d) { |
| if (emit->tcs.control_point_phase) { |
| /* Device doesn't allow reading from output so |
| * use corresponding temporary register as source */ |
| file = TGSI_FILE_TEMPORARY; |
| index2d = FALSE; |
| index = emit->tcs.control_point_tmp_index + |
| (index - emit->tcs.control_point_out_index); |
| } |
| else { |
| emit->discard_instruction = TRUE; |
| } |
| } |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_EVAL) { |
| if (file == TGSI_FILE_SYSTEM_VALUE) { |
| if (index == emit->tes.tesscoord_sys_index) { |
| /** |
| * Emitted as vDomain |
| */ |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT; |
| index = 0; |
| } |
| else if (index == emit->tes.inner.tgsi_index) { |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->tes.inner.temp_index; |
| } |
| else if (index == emit->tes.outer.tgsi_index) { |
| file = TGSI_FILE_TEMPORARY; |
| index = emit->tes.outer.temp_index; |
| } |
| else if (index == emit->tes.prim_id_index) { |
| /** |
| * Emitted as vPrim.x |
| */ |
| operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; |
| index = 0; |
| } |
| |
| } |
| else if (file == TGSI_FILE_INPUT) { |
| if (index2d) { |
| /* 2D input is emitted as vcp (input control point). */ |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| |
| /* index specifies the element index and is remapped |
| * to align with the tcs output index. |
| */ |
| index = emit->linkage.input_map[index]; |
| |
| assert(index2 < emit->key.tes.vertices_per_patch); |
| } |
| else { |
| if (index < emit->key.tes.tessfactor_index) |
| /* index specifies the generic patch index. |
| * Remapped to match up with the tcs output index. |
| */ |
| index = emit->linkage.input_map[index]; |
| |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| } |
| } |
| } |
| |
| if (file == TGSI_FILE_ADDRESS) { |
| index = emit->address_reg_index[index]; |
| file = TGSI_FILE_TEMPORARY; |
| } |
| |
| if (file == TGSI_FILE_TEMPORARY) { |
| if (need_temp_reg_initialization(emit, index)) { |
| emit->initialize_temp_index = index; |
| emit->discard_instruction = TRUE; |
| } |
| } |
| |
| if (operand0.value == 0) { |
| /* if operand0 was not set above for a special case, do the general |
| * case now. |
| */ |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.operandType = translate_register_file(file, tempArrayId > 0); |
| } |
| operand0 = setup_operand0_indexing(emit, operand0, file, indirect, |
| index2d, indirect2d); |
| |
| if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && |
| operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { |
| /* there's no swizzle for in-line immediates */ |
| if (swizzleX == swizzleY && |
| swizzleX == swizzleZ && |
| swizzleX == swizzleW) { |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; |
| } |
| else { |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; |
| } |
| |
| operand0.swizzleX = swizzleX; |
| operand0.swizzleY = swizzleY; |
| operand0.swizzleZ = swizzleZ; |
| operand0.swizzleW = swizzleW; |
| |
| if (absolute || negate) { |
| operand0.extended = 1; |
| operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; |
| if (absolute && !negate) |
| operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; |
| if (!absolute && negate) |
| operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; |
| if (absolute && negate) |
| operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; |
| } |
| } |
| |
| /* Emit the operand tokens */ |
| emit_dword(emit, operand0.value); |
| if (operand0.extended) |
| emit_dword(emit, operand1.value); |
| |
| if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { |
| /* Emit the four float/int in-line immediate values */ |
| unsigned *c; |
| assert(index < ARRAY_SIZE(emit->immediates)); |
| assert(file == TGSI_FILE_IMMEDIATE); |
| assert(swizzleX < 4); |
| assert(swizzleY < 4); |
| assert(swizzleZ < 4); |
| assert(swizzleW < 4); |
| c = (unsigned *) emit->immediates[index]; |
| emit_dword(emit, c[swizzleX]); |
| emit_dword(emit, c[swizzleY]); |
| emit_dword(emit, c[swizzleZ]); |
| emit_dword(emit, c[swizzleW]); |
| } |
| else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { |
| /* Emit the register index(es) */ |
| if (index2d) { |
| emit_dword(emit, index2); |
| |
| if (indirect2d) { |
| emit_indirect_register(emit, reg->DimIndirect.Index); |
| } |
| } |
| |
| emit_dword(emit, remap_temp_index(emit, file, index)); |
| |
| if (indirect) { |
| emit_indirect_register(emit, reg->Indirect.Index); |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit a resource operand (for use with a SAMPLE instruction). |
| */ |
| static void |
| emit_resource_register(struct svga_shader_emitter_v10 *emit, |
| unsigned resource_number) |
| { |
| VGPU10OperandToken0 operand0; |
| |
| check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); |
| |
| /* init */ |
| operand0.value = 0; |
| |
| operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; |
| operand0.swizzleX = VGPU10_COMPONENT_X; |
| operand0.swizzleY = VGPU10_COMPONENT_Y; |
| operand0.swizzleZ = VGPU10_COMPONENT_Z; |
| operand0.swizzleW = VGPU10_COMPONENT_W; |
| |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, resource_number); |
| } |
| |
| |
| /** |
| * Emit a sampler operand (for use with a SAMPLE instruction). |
| */ |
| static void |
| emit_sampler_register(struct svga_shader_emitter_v10 *emit, |
| unsigned sampler_number) |
| { |
| VGPU10OperandToken0 operand0; |
| |
| check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); |
| |
| /* init */ |
| operand0.value = 0; |
| |
| operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, sampler_number); |
| } |
| |
| |
| /** |
| * Emit an operand which reads the IS_FRONT_FACING register. |
| */ |
| static void |
| emit_face_register(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OperandToken0 operand0; |
| unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; |
| |
| /* init */ |
| operand0.value = 0; |
| |
| operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| |
| operand0.swizzleX = VGPU10_COMPONENT_X; |
| operand0.swizzleY = VGPU10_COMPONENT_X; |
| operand0.swizzleZ = VGPU10_COMPONENT_X; |
| operand0.swizzleW = VGPU10_COMPONENT_X; |
| |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, index); |
| } |
| |
| |
| /** |
| * Emit tokens for the "rasterizer" register used by the SAMPLE_POS |
| * instruction. |
| */ |
| static void |
| emit_rasterizer_register(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OperandToken0 operand0; |
| |
| /* init */ |
| operand0.value = 0; |
| |
| /* No register index for rasterizer index (there's only one) */ |
| operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; |
| operand0.swizzleX = VGPU10_COMPONENT_X; |
| operand0.swizzleY = VGPU10_COMPONENT_Y; |
| operand0.swizzleZ = VGPU10_COMPONENT_Z; |
| operand0.swizzleW = VGPU10_COMPONENT_W; |
| |
| emit_dword(emit, operand0.value); |
| } |
| |
| |
| /** |
| * Emit tokens for the "stream" register used by the |
| * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions. |
| */ |
| static void |
| emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index) |
| { |
| VGPU10OperandToken0 operand0; |
| |
| /* init */ |
| operand0.value = 0; |
| |
| /* No register index for rasterizer index (there's only one) */ |
| operand0.operandType = VGPU10_OPERAND_TYPE_STREAM; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; |
| |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, index); |
| } |
| |
| |
| /** |
| * Emit the token for a VGPU10 opcode, with precise parameter. |
| * \param saturate clamp result to [0,1]? |
| */ |
| static void |
| emit_opcode_precise(struct svga_shader_emitter_v10 *emit, |
| unsigned vgpu10_opcode, boolean saturate, boolean precise) |
| { |
| VGPU10OpcodeToken0 token0; |
| |
| token0.value = 0; /* init all fields to zero */ |
| token0.opcodeType = vgpu10_opcode; |
| token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ |
| token0.saturate = saturate; |
| |
| /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for |
| * 'invariant' declarations. Only set preciseValues=1 if we have SM5. |
| */ |
| token0.preciseValues = precise && emit->version >= 50; |
| |
| emit_dword(emit, token0.value); |
| |
| emit->uses_precise_qualifier |= token0.preciseValues; |
| } |
| |
| |
| /** |
| * Emit the token for a VGPU10 opcode. |
| * \param saturate clamp result to [0,1]? |
| */ |
| static void |
| emit_opcode(struct svga_shader_emitter_v10 *emit, |
| unsigned vgpu10_opcode, boolean saturate) |
| { |
| emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE); |
| } |
| |
| |
| /** |
| * Emit the token for a VGPU10 resinfo instruction. |
| * \param modifier return type modifier, _uint or _rcpFloat. |
| * TODO: We may want to remove this parameter if it will |
| * only ever be used as _uint. |
| */ |
| static void |
| emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, |
| VGPU10_RESINFO_RETURN_TYPE modifier) |
| { |
| VGPU10OpcodeToken0 token0; |
| |
| token0.value = 0; /* init all fields to zero */ |
| token0.opcodeType = VGPU10_OPCODE_RESINFO; |
| token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ |
| token0.resinfoReturnType = modifier; |
| |
| emit_dword(emit, token0.value); |
| } |
| |
| |
| /** |
| * Emit opcode tokens for a texture sample instruction. Texture instructions |
| * can be rather complicated (texel offsets, etc) so we have this specialized |
| * function. |
| */ |
| static void |
| emit_sample_opcode(struct svga_shader_emitter_v10 *emit, |
| unsigned vgpu10_opcode, boolean saturate, |
| const int offsets[3]) |
| { |
| VGPU10OpcodeToken0 token0; |
| VGPU10OpcodeToken1 token1; |
| |
| token0.value = 0; /* init all fields to zero */ |
| token0.opcodeType = vgpu10_opcode; |
| token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ |
| token0.saturate = saturate; |
| |
| if (offsets[0] || offsets[1] || offsets[2]) { |
| assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); |
| assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); |
| assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); |
| assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); |
| assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); |
| assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); |
| |
| token0.extended = 1; |
| token1.value = 0; |
| token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; |
| token1.offsetU = offsets[0]; |
| token1.offsetV = offsets[1]; |
| token1.offsetW = offsets[2]; |
| } |
| |
| emit_dword(emit, token0.value); |
| if (token0.extended) { |
| emit_dword(emit, token1.value); |
| } |
| } |
| |
| |
| /** |
| * Emit a DISCARD opcode token. |
| * If nonzero is set, we'll discard the fragment if the X component is not 0. |
| * Otherwise, we'll discard the fragment if the X component is 0. |
| */ |
| static void |
| emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DISCARD; |
| if (nonzero) |
| opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; |
| |
| emit_dword(emit, opcode0.value); |
| } |
| |
| |
| /** |
| * We need to call this before we begin emitting a VGPU10 instruction. |
| */ |
| static void |
| begin_emit_instruction(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit->inst_start_token == 0); |
| /* Save location of the instruction's VGPU10OpcodeToken0 token. |
| * Note, we can't save a pointer because it would become invalid if |
| * we have to realloc the output buffer. |
| */ |
| emit->inst_start_token = emit_get_num_tokens(emit); |
| } |
| |
| |
| /** |
| * We need to call this after we emit the last token of a VGPU10 instruction. |
| * This function patches in the opcode token's instructionLength field. |
| */ |
| static void |
| end_emit_instruction(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; |
| unsigned inst_length; |
| |
| assert(emit->inst_start_token > 0); |
| |
| if (emit->discard_instruction) { |
| /* Back up the emit->ptr to where this instruction started so |
| * that we discard the current instruction. |
| */ |
| emit->ptr = (char *) (tokens + emit->inst_start_token); |
| } |
| else { |
| /* Compute instruction length and patch that into the start of |
| * the instruction. |
| */ |
| inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; |
| |
| assert(inst_length > 0); |
| |
| tokens[emit->inst_start_token].instructionLength = inst_length; |
| } |
| |
| emit->inst_start_token = 0; /* reset to zero for error checking */ |
| emit->discard_instruction = FALSE; |
| } |
| |
| |
| /** |
| * Return index for a free temporary register. |
| */ |
| static unsigned |
| get_temp_index(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); |
| return emit->num_shader_temps + emit->internal_temp_count++; |
| } |
| |
| |
| /** |
| * Release the temporaries which were generated by get_temp_index(). |
| */ |
| static void |
| free_temp_indexes(struct svga_shader_emitter_v10 *emit) |
| { |
| emit->internal_temp_count = 0; |
| } |
| |
| |
| /** |
| * Create a tgsi_full_src_register. |
| */ |
| static struct tgsi_full_src_register |
| make_src_reg(enum tgsi_file_type file, unsigned index) |
| { |
| struct tgsi_full_src_register reg; |
| |
| memset(®, 0, sizeof(reg)); |
| reg.Register.File = file; |
| reg.Register.Index = index; |
| reg.Register.SwizzleX = TGSI_SWIZZLE_X; |
| reg.Register.SwizzleY = TGSI_SWIZZLE_Y; |
| reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; |
| reg.Register.SwizzleW = TGSI_SWIZZLE_W; |
| return reg; |
| } |
| |
| |
| /** |
| * Create a tgsi_full_src_register with a swizzle such that all four |
| * vector components have the same scalar value. |
| */ |
| static struct tgsi_full_src_register |
| make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component) |
| { |
| struct tgsi_full_src_register reg; |
| |
| assert(component >= TGSI_SWIZZLE_X); |
| assert(component <= TGSI_SWIZZLE_W); |
| |
| memset(®, 0, sizeof(reg)); |
| reg.Register.File = file; |
| reg.Register.Index = index; |
| reg.Register.SwizzleX = |
| reg.Register.SwizzleY = |
| reg.Register.SwizzleZ = |
| reg.Register.SwizzleW = component; |
| return reg; |
| } |
| |
| |
| /** |
| * Create a tgsi_full_src_register for a temporary. |
| */ |
| static struct tgsi_full_src_register |
| make_src_temp_reg(unsigned index) |
| { |
| return make_src_reg(TGSI_FILE_TEMPORARY, index); |
| } |
| |
| |
| /** |
| * Create a tgsi_full_src_register for a constant. |
| */ |
| static struct tgsi_full_src_register |
| make_src_const_reg(unsigned index) |
| { |
| return make_src_reg(TGSI_FILE_CONSTANT, index); |
| } |
| |
| |
| /** |
| * Create a tgsi_full_src_register for an immediate constant. |
| */ |
| static struct tgsi_full_src_register |
| make_src_immediate_reg(unsigned index) |
| { |
| return make_src_reg(TGSI_FILE_IMMEDIATE, index); |
| } |
| |
| |
| /** |
| * Create a tgsi_full_dst_register. |
| */ |
| static struct tgsi_full_dst_register |
| make_dst_reg(enum tgsi_file_type file, unsigned index) |
| { |
| struct tgsi_full_dst_register reg; |
| |
| memset(®, 0, sizeof(reg)); |
| reg.Register.File = file; |
| reg.Register.Index = index; |
| reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; |
| return reg; |
| } |
| |
| |
| /** |
| * Create a tgsi_full_dst_register for a temporary. |
| */ |
| static struct tgsi_full_dst_register |
| make_dst_temp_reg(unsigned index) |
| { |
| return make_dst_reg(TGSI_FILE_TEMPORARY, index); |
| } |
| |
| |
| /** |
| * Create a tgsi_full_dst_register for an output. |
| */ |
| static struct tgsi_full_dst_register |
| make_dst_output_reg(unsigned index) |
| { |
| return make_dst_reg(TGSI_FILE_OUTPUT, index); |
| } |
| |
| |
| /** |
| * Create negated tgsi_full_src_register. |
| */ |
| static struct tgsi_full_src_register |
| negate_src(const struct tgsi_full_src_register *reg) |
| { |
| struct tgsi_full_src_register neg = *reg; |
| neg.Register.Negate = !reg->Register.Negate; |
| return neg; |
| } |
| |
| /** |
| * Create absolute value of a tgsi_full_src_register. |
| */ |
| static struct tgsi_full_src_register |
| absolute_src(const struct tgsi_full_src_register *reg) |
| { |
| struct tgsi_full_src_register absolute = *reg; |
| absolute.Register.Absolute = 1; |
| return absolute; |
| } |
| |
| |
| /** Return the named swizzle term from the src register */ |
| static inline unsigned |
| get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term) |
| { |
| switch (term) { |
| case TGSI_SWIZZLE_X: |
| return reg->Register.SwizzleX; |
| case TGSI_SWIZZLE_Y: |
| return reg->Register.SwizzleY; |
| case TGSI_SWIZZLE_Z: |
| return reg->Register.SwizzleZ; |
| case TGSI_SWIZZLE_W: |
| return reg->Register.SwizzleW; |
| default: |
| assert(!"Bad swizzle"); |
| return TGSI_SWIZZLE_X; |
| } |
| } |
| |
| |
| /** |
| * Create swizzled tgsi_full_src_register. |
| */ |
| static struct tgsi_full_src_register |
| swizzle_src(const struct tgsi_full_src_register *reg, |
| enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY, |
| enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW) |
| { |
| struct tgsi_full_src_register swizzled = *reg; |
| /* Note: we swizzle the current swizzle */ |
| swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); |
| swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); |
| swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); |
| swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); |
| return swizzled; |
| } |
| |
| |
| /** |
| * Create swizzled tgsi_full_src_register where all the swizzle |
| * terms are the same. |
| */ |
| static struct tgsi_full_src_register |
| scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle) |
| { |
| struct tgsi_full_src_register swizzled = *reg; |
| /* Note: we swizzle the current swizzle */ |
| swizzled.Register.SwizzleX = |
| swizzled.Register.SwizzleY = |
| swizzled.Register.SwizzleZ = |
| swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); |
| return swizzled; |
| } |
| |
| |
| /** |
| * Create new tgsi_full_dst_register with writemask. |
| * \param mask bitmask of TGSI_WRITEMASK_[XYZW] |
| */ |
| static struct tgsi_full_dst_register |
| writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) |
| { |
| struct tgsi_full_dst_register masked = *reg; |
| masked.Register.WriteMask = mask; |
| return masked; |
| } |
| |
| |
| /** |
| * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. |
| */ |
| static boolean |
| same_swizzle_terms(const struct tgsi_full_src_register *reg) |
| { |
| return (reg->Register.SwizzleX == reg->Register.SwizzleY && |
| reg->Register.SwizzleY == reg->Register.SwizzleZ && |
| reg->Register.SwizzleZ == reg->Register.SwizzleW); |
| } |
| |
| |
| /** |
| * Search the vector for the value 'x' and return its position. |
| */ |
| static int |
| find_imm_in_vec4(const union tgsi_immediate_data vec[4], |
| union tgsi_immediate_data x) |
| { |
| unsigned i; |
| for (i = 0; i < 4; i++) { |
| if (vec[i].Int == x.Int) |
| return i; |
| } |
| return -1; |
| } |
| |
| |
| /** |
| * Helper used by make_immediate_reg(), make_immediate_reg_4(). |
| */ |
| static int |
| find_immediate(struct svga_shader_emitter_v10 *emit, |
| union tgsi_immediate_data x, unsigned startIndex) |
| { |
| const unsigned endIndex = emit->num_immediates; |
| unsigned i; |
| |
| assert(emit->immediates_emitted); |
| |
| /* Search immediates for x, y, z, w */ |
| for (i = startIndex; i < endIndex; i++) { |
| if (x.Int == emit->immediates[i][0].Int || |
| x.Int == emit->immediates[i][1].Int || |
| x.Int == emit->immediates[i][2].Int || |
| x.Int == emit->immediates[i][3].Int) { |
| return i; |
| } |
| } |
| /* Should never try to use an immediate value that wasn't pre-declared */ |
| assert(!"find_immediate() failed!"); |
| return -1; |
| } |
| |
| |
| /** |
| * As above, but search for a double[2] pair. |
| */ |
| static int |
| find_immediate_dbl(struct svga_shader_emitter_v10 *emit, |
| double x, double y) |
| { |
| const unsigned endIndex = emit->num_immediates; |
| unsigned i; |
| |
| assert(emit->immediates_emitted); |
| |
| /* Search immediates for x, y, z, w */ |
| for (i = 0; i < endIndex; i++) { |
| if (x == emit->immediates_dbl[i][0] && |
| y == emit->immediates_dbl[i][1]) { |
| return i; |
| } |
| } |
| /* Should never try to use an immediate value that wasn't pre-declared */ |
| assert(!"find_immediate_dbl() failed!"); |
| return -1; |
| } |
| |
| |
| |
| /** |
| * Return a tgsi_full_src_register for an immediate/literal |
| * union tgsi_immediate_data[4] value. |
| * Note: the values must have been previously declared/allocated in |
| * emit_pre_helpers(). And, all of x,y,z,w must be located in the same |
| * vec4 immediate. |
| */ |
| static struct tgsi_full_src_register |
| make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, |
| const union tgsi_immediate_data imm[4]) |
| { |
| struct tgsi_full_src_register reg; |
| unsigned i; |
| |
| for (i = 0; i < emit->num_common_immediates; i++) { |
| /* search for first component value */ |
| int immpos = find_immediate(emit, imm[0], i); |
| int x, y, z, w; |
| |
| assert(immpos >= 0); |
| |
| /* find remaining components within the immediate vector */ |
| x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); |
| y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); |
| z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); |
| w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); |
| |
| if (x >=0 && y >= 0 && z >= 0 && w >= 0) { |
| /* found them all */ |
| memset(®, 0, sizeof(reg)); |
| reg.Register.File = TGSI_FILE_IMMEDIATE; |
| reg.Register.Index = immpos; |
| reg.Register.SwizzleX = x; |
| reg.Register.SwizzleY = y; |
| reg.Register.SwizzleZ = z; |
| reg.Register.SwizzleW = w; |
| return reg; |
| } |
| /* else, keep searching */ |
| } |
| |
| assert(!"Failed to find immediate register!"); |
| |
| /* Just return IMM[0].xxxx */ |
| memset(®, 0, sizeof(reg)); |
| reg.Register.File = TGSI_FILE_IMMEDIATE; |
| return reg; |
| } |
| |
| |
| /** |
| * Return a tgsi_full_src_register for an immediate/literal |
| * union tgsi_immediate_data value of the form {value, value, value, value}. |
| * \sa make_immediate_reg_4() regarding allowed values. |
| */ |
| static struct tgsi_full_src_register |
| make_immediate_reg(struct svga_shader_emitter_v10 *emit, |
| union tgsi_immediate_data value) |
| { |
| struct tgsi_full_src_register reg; |
| int immpos = find_immediate(emit, value, 0); |
| |
| assert(immpos >= 0); |
| |
| memset(®, 0, sizeof(reg)); |
| reg.Register.File = TGSI_FILE_IMMEDIATE; |
| reg.Register.Index = immpos; |
| reg.Register.SwizzleX = |
| reg.Register.SwizzleY = |
| reg.Register.SwizzleZ = |
| reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); |
| |
| return reg; |
| } |
| |
| |
| /** |
| * Return a tgsi_full_src_register for an immediate/literal float[4] value. |
| * \sa make_immediate_reg_4() regarding allowed values. |
| */ |
| static struct tgsi_full_src_register |
| make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, |
| float x, float y, float z, float w) |
| { |
| union tgsi_immediate_data imm[4]; |
| imm[0].Float = x; |
| imm[1].Float = y; |
| imm[2].Float = z; |
| imm[3].Float = w; |
| return make_immediate_reg_4(emit, imm); |
| } |
| |
| |
| /** |
| * Return a tgsi_full_src_register for an immediate/literal float value |
| * of the form {value, value, value, value}. |
| * \sa make_immediate_reg_4() regarding allowed values. |
| */ |
| static struct tgsi_full_src_register |
| make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) |
| { |
| union tgsi_immediate_data imm; |
| imm.Float = value; |
| return make_immediate_reg(emit, imm); |
| } |
| |
| |
| /** |
| * Return a tgsi_full_src_register for an immediate/literal int[4] vector. |
| */ |
| static struct tgsi_full_src_register |
| make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, |
| int x, int y, int z, int w) |
| { |
| union tgsi_immediate_data imm[4]; |
| imm[0].Int = x; |
| imm[1].Int = y; |
| imm[2].Int = z; |
| imm[3].Int = w; |
| return make_immediate_reg_4(emit, imm); |
| } |
| |
| |
| /** |
| * Return a tgsi_full_src_register for an immediate/literal int value |
| * of the form {value, value, value, value}. |
| * \sa make_immediate_reg_4() regarding allowed values. |
| */ |
| static struct tgsi_full_src_register |
| make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) |
| { |
| union tgsi_immediate_data imm; |
| imm.Int = value; |
| return make_immediate_reg(emit, imm); |
| } |
| |
| |
| static struct tgsi_full_src_register |
| make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value) |
| { |
| struct tgsi_full_src_register reg; |
| int immpos = find_immediate_dbl(emit, value, value); |
| |
| assert(immpos >= 0); |
| |
| memset(®, 0, sizeof(reg)); |
| reg.Register.File = TGSI_FILE_IMMEDIATE; |
| reg.Register.Index = immpos; |
| reg.Register.SwizzleX = TGSI_SWIZZLE_X; |
| reg.Register.SwizzleY = TGSI_SWIZZLE_Y; |
| reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; |
| reg.Register.SwizzleW = TGSI_SWIZZLE_W; |
| |
| return reg; |
| } |
| |
| |
| /** |
| * Allocate space for a union tgsi_immediate_data[4] immediate. |
| * \return the index/position of the immediate. |
| */ |
| static unsigned |
| alloc_immediate_4(struct svga_shader_emitter_v10 *emit, |
| const union tgsi_immediate_data imm[4]) |
| { |
| unsigned n = emit->num_immediates++; |
| assert(!emit->immediates_emitted); |
| assert(n < ARRAY_SIZE(emit->immediates)); |
| emit->immediates[n][0] = imm[0]; |
| emit->immediates[n][1] = imm[1]; |
| emit->immediates[n][2] = imm[2]; |
| emit->immediates[n][3] = imm[3]; |
| return n; |
| } |
| |
| |
| /** |
| * Allocate space for a float[4] immediate. |
| * \return the index/position of the immediate. |
| */ |
| static unsigned |
| alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, |
| float x, float y, float z, float w) |
| { |
| union tgsi_immediate_data imm[4]; |
| imm[0].Float = x; |
| imm[1].Float = y; |
| imm[2].Float = z; |
| imm[3].Float = w; |
| return alloc_immediate_4(emit, imm); |
| } |
| |
| |
| /** |
| * Allocate space for an int[4] immediate. |
| * \return the index/position of the immediate. |
| */ |
| static unsigned |
| alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, |
| int x, int y, int z, int w) |
| { |
| union tgsi_immediate_data imm[4]; |
| imm[0].Int = x; |
| imm[1].Int = y; |
| imm[2].Int = z; |
| imm[3].Int = w; |
| return alloc_immediate_4(emit, imm); |
| } |
| |
| |
| static unsigned |
| alloc_immediate_double2(struct svga_shader_emitter_v10 *emit, |
| double x, double y) |
| { |
| unsigned n = emit->num_immediates++; |
| assert(!emit->immediates_emitted); |
| assert(n < ARRAY_SIZE(emit->immediates)); |
| emit->immediates_dbl[n][0] = x; |
| emit->immediates_dbl[n][1] = y; |
| return n; |
| |
| } |
| |
| |
| /** |
| * Allocate a shader input to store a system value. |
| */ |
| static unsigned |
| alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) |
| { |
| const unsigned n = emit->linkage.input_map_max + 1 + index; |
| assert(index < ARRAY_SIZE(emit->system_value_indexes)); |
| emit->system_value_indexes[index] = n; |
| return n; |
| } |
| |
| |
| /** |
| * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. |
| */ |
| static boolean |
| emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_immediate *imm) |
| { |
| /* We don't actually emit any code here. We just save the |
| * immediate values and emit them later. |
| */ |
| alloc_immediate_4(emit, imm->u); |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block |
| * containing all the immediate values previously allocated |
| * with alloc_immediate_4(). |
| */ |
| static boolean |
| emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 token; |
| |
| assert(!emit->immediates_emitted); |
| |
| token.value = 0; |
| token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; |
| token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; |
| |
| /* Note: no begin/end_emit_instruction() calls */ |
| emit_dword(emit, token.value); |
| emit_dword(emit, 2 + 4 * emit->num_immediates); |
| emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); |
| |
| emit->immediates_emitted = TRUE; |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 |
| * interpolation mode. |
| * \return a VGPU10_INTERPOLATION_x value |
| */ |
| static unsigned |
| translate_interpolation(const struct svga_shader_emitter_v10 *emit, |
| enum tgsi_interpolate_mode interp, |
| enum tgsi_interpolate_loc interpolate_loc) |
| { |
| if (interp == TGSI_INTERPOLATE_COLOR) { |
| interp = emit->key.fs.flatshade ? |
| TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; |
| } |
| |
| switch (interp) { |
| case TGSI_INTERPOLATE_CONSTANT: |
| return VGPU10_INTERPOLATION_CONSTANT; |
| case TGSI_INTERPOLATE_LINEAR: |
| if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { |
| return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID; |
| } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && |
| emit->version >= 41) { |
| return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE; |
| } else { |
| return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; |
| } |
| break; |
| case TGSI_INTERPOLATE_PERSPECTIVE: |
| if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { |
| return VGPU10_INTERPOLATION_LINEAR_CENTROID; |
| } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && |
| emit->version >= 41) { |
| return VGPU10_INTERPOLATION_LINEAR_SAMPLE; |
| } else { |
| return VGPU10_INTERPOLATION_LINEAR; |
| } |
| break; |
| default: |
| assert(!"Unexpected interpolation mode"); |
| return VGPU10_INTERPOLATION_CONSTANT; |
| } |
| } |
| |
| |
| /** |
| * Translate a TGSI property to VGPU10. |
| * Don't emit any instructions yet, only need to gather the primitive property |
| * information. The output primitive topology might be changed later. The |
| * final property instructions will be emitted as part of the pre-helper code. |
| */ |
| static boolean |
| emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_property *prop) |
| { |
| static const VGPU10_PRIMITIVE primType[] = { |
| VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ |
| VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ |
| VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ |
| VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ |
| VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ |
| VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ |
| VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ |
| VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ |
| VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ |
| VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ |
| VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ |
| VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ |
| VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ |
| VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ |
| }; |
| |
| static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { |
| VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ |
| VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ |
| VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ |
| VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ |
| VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ |
| VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ |
| VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ |
| VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ |
| VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ |
| VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ |
| VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ |
| VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ |
| VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ |
| VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ |
| }; |
| |
| static const unsigned inputArraySize[] = { |
| 0, /* VGPU10_PRIMITIVE_UNDEFINED */ |
| 1, /* VGPU10_PRIMITIVE_POINT */ |
| 2, /* VGPU10_PRIMITIVE_LINE */ |
| 3, /* VGPU10_PRIMITIVE_TRIANGLE */ |
| 0, |
| 0, |
| 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ |
| 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ |
| }; |
| |
| switch (prop->Property.PropertyName) { |
| case TGSI_PROPERTY_GS_INPUT_PRIM: |
| assert(prop->u[0].Data < ARRAY_SIZE(primType)); |
| emit->gs.prim_type = primType[prop->u[0].Data]; |
| assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); |
| emit->gs.input_size = inputArraySize[emit->gs.prim_type]; |
| break; |
| |
| case TGSI_PROPERTY_GS_OUTPUT_PRIM: |
| assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); |
| emit->gs.prim_topology = primTopology[prop->u[0].Data]; |
| assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); |
| break; |
| |
| case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: |
| emit->gs.max_out_vertices = prop->u[0].Data; |
| break; |
| |
| case TGSI_PROPERTY_GS_INVOCATIONS: |
| emit->gs.invocations = prop->u[0].Data; |
| break; |
| |
| case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: |
| case TGSI_PROPERTY_NEXT_SHADER: |
| case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: |
| /* no-op */ |
| break; |
| |
| case TGSI_PROPERTY_TCS_VERTICES_OUT: |
| /* This info is already captured in the shader key */ |
| break; |
| |
| case TGSI_PROPERTY_TES_PRIM_MODE: |
| emit->tes.prim_mode = prop->u[0].Data; |
| break; |
| |
| case TGSI_PROPERTY_TES_SPACING: |
| emit->tes.spacing = prop->u[0].Data; |
| break; |
| |
| case TGSI_PROPERTY_TES_VERTEX_ORDER_CW: |
| emit->tes.vertices_order_cw = prop->u[0].Data; |
| break; |
| |
| case TGSI_PROPERTY_TES_POINT_MODE: |
| emit->tes.point_mode = prop->u[0].Data; |
| break; |
| |
| default: |
| debug_printf("Unexpected TGSI property %s\n", |
| tgsi_property_names[prop->Property.PropertyName]); |
| } |
| |
| return TRUE; |
| } |
| |
| |
| static void |
| emit_property_instruction(struct svga_shader_emitter_v10 *emit, |
| VGPU10OpcodeToken0 opcode0, unsigned nData, |
| unsigned data) |
| { |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| if (nData) |
| emit_dword(emit, data); |
| end_emit_instruction(emit); |
| } |
| |
| |
| /** |
| * Emit property instructions |
| */ |
| static void |
| emit_property_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| |
| assert(emit->unit == PIPE_SHADER_GEOMETRY); |
| |
| /* emit input primitive type declaration */ |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; |
| opcode0.primitive = emit->gs.prim_type; |
| emit_property_instruction(emit, opcode0, 0, 0); |
| |
| /* emit max output vertices */ |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; |
| emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); |
| |
| if (emit->version >= 50 && emit->gs.invocations > 0) { |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT; |
| emit_property_instruction(emit, opcode0, 1, emit->gs.invocations); |
| } |
| } |
| |
| |
| /** |
| * A helper function to declare tessellator domain in a hull shader or |
| * in the domain shader. |
| */ |
| static void |
| emit_tessellator_domain(struct svga_shader_emitter_v10 *emit, |
| enum pipe_prim_type prim_mode) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN; |
| switch (prim_mode) { |
| case PIPE_PRIM_QUADS: |
| case PIPE_PRIM_LINES: |
| opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD; |
| break; |
| case PIPE_PRIM_TRIANGLES: |
| opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI; |
| break; |
| default: |
| debug_printf("Invalid tessellator prim mode %d\n", prim_mode); |
| opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED; |
| } |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| } |
| |
| |
| /** |
| * Emit domain shader declarations. |
| */ |
| static void |
| emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| |
| assert(emit->unit == PIPE_SHADER_TESS_EVAL); |
| |
| /* Emit the input control point count */ |
| assert(emit->key.tes.vertices_per_patch >= 0 && |
| emit->key.tes.vertices_per_patch <= 32); |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; |
| opcode0.controlPointCount = emit->key.tes.vertices_per_patch; |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| |
| emit_tessellator_domain(emit, emit->tes.prim_mode); |
| } |
| |
| |
| /** |
| * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed |
| * to implement some instructions. We pre-allocate those values here |
| * in the immediate constant buffer. |
| */ |
| static void |
| alloc_common_immediates(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned n = 0; |
| |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); |
| |
| if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f); |
| } |
| |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_int4(emit, 0, 1, 0, -1); |
| |
| if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 || |
| emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_int4(emit, 31, 0, 0, 0); |
| } |
| |
| if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 || |
| emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 || |
| emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_int4(emit, 32, 0, 0, 0); |
| } |
| |
| if (emit->key.vs.attrib_puint_to_snorm) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f); |
| } |
| |
| if (emit->key.vs.attrib_puint_to_uscaled) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); |
| } |
| |
| if (emit->key.vs.attrib_puint_to_sscaled) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_int4(emit, 22, 12, 2, 0); |
| |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_int4(emit, 22, 30, 0, 0); |
| } |
| |
| if (emit->vposition.num_prescale > 1) { |
| unsigned i; |
| for (i = 0; i < emit->vposition.num_prescale; i+=4) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_int4(emit, i, i+1, i+2, i+3); |
| } |
| } |
| |
| emit->immediates_dbl = (double (*)[2]) emit->immediates; |
| |
| if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_double2(emit, -1.0, -1.0); |
| } |
| |
| if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_double2(emit, 0.0, 0.0); |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_double2(emit, 1.0, 1.0); |
| } |
| |
| if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) { |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0); |
| } |
| |
| assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); |
| |
| unsigned i; |
| |
| for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { |
| if (emit->key.tex[i].texel_bias) { |
| /* Replace 0.0f if more immediate float value is needed */ |
| emit->common_immediate_pos[n++] = |
| alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f); |
| break; |
| } |
| } |
| |
| assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); |
| emit->num_common_immediates = n; |
| } |
| |
| |
| /** |
| * Emit hull shader declarations. |
| */ |
| static void |
| emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| |
| /* Emit the input control point count */ |
| assert(emit->key.tcs.vertices_per_patch > 0 && |
| emit->key.tcs.vertices_per_patch <= 32); |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; |
| opcode0.controlPointCount = emit->key.tcs.vertices_per_patch; |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| |
| /* Emit the output control point count */ |
| assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32); |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT; |
| opcode0.controlPointCount = emit->key.tcs.vertices_out; |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| |
| /* Emit tessellator domain */ |
| emit_tessellator_domain(emit, emit->key.tcs.prim_mode); |
| |
| /* Emit tessellator output primitive */ |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE; |
| if (emit->key.tcs.point_mode) { |
| opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT; |
| } |
| else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { |
| opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE; |
| } |
| else { |
| assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS || |
| emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES); |
| |
| if (emit->key.tcs.vertices_order_cw) |
| opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW; |
| else |
| opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW; |
| } |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| |
| /* Emit tessellator partitioning */ |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING; |
| switch (emit->key.tcs.spacing) { |
| case PIPE_TESS_SPACING_FRACTIONAL_ODD: |
| opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; |
| break; |
| case PIPE_TESS_SPACING_FRACTIONAL_EVEN: |
| opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; |
| break; |
| case PIPE_TESS_SPACING_EQUAL: |
| opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER; |
| break; |
| default: |
| debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing); |
| opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED; |
| } |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| |
| /* Declare constant registers */ |
| emit_constant_declaration(emit); |
| |
| /* Declare samplers and resources */ |
| emit_sampler_declarations(emit); |
| emit_resource_declarations(emit); |
| |
| alloc_common_immediates(emit); |
| |
| int nVertices = emit->key.tcs.vertices_per_patch; |
| emit->tcs.imm_index = |
| alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0); |
| |
| /* Now, emit the constant block containing all the immediates |
| * declared by shader, as well as the extra ones seen above. |
| */ |
| emit_vgpu10_immediates_block(emit); |
| |
| } |
| |
| |
| /** |
| * A helper function to determine if control point phase is needed. |
| * Returns TRUE if there is control point output. |
| */ |
| static boolean |
| needs_control_point_phase(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| |
| assert(emit->unit == PIPE_SHADER_TESS_CTRL); |
| |
| /* If output control point count does not match the input count, |
| * we need a control point phase to explicitly set the output control |
| * points. |
| */ |
| if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) && |
| emit->key.tcs.vertices_out) |
| return TRUE; |
| |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| switch (emit->info.output_semantic_name[i]) { |
| case TGSI_SEMANTIC_PATCH: |
| case TGSI_SEMANTIC_TESSOUTER: |
| case TGSI_SEMANTIC_TESSINNER: |
| break; |
| default: |
| return TRUE; |
| } |
| } |
| return FALSE; |
| } |
| |
| |
| /** |
| * A helper function to add shader signature for passthrough control point |
| * phase. This signature is also generated for passthrough control point |
| * phase from HLSL compiler and is needed by Metal Renderer. |
| */ |
| static void |
| emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit) |
| { |
| struct svga_shader_signature *sgn = &emit->signature; |
| SVGA3dDXShaderSignatureEntry *sgnEntry; |
| unsigned i; |
| |
| for (i = 0; i < emit->info.num_inputs; i++) { |
| unsigned index = emit->linkage.input_map[i]; |
| enum tgsi_semantic sem_name = emit->info.input_semantic_name[i]; |
| |
| sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++]; |
| |
| set_shader_signature_entry(sgnEntry, index, |
| tgsi_semantic_to_sgn_name[sem_name], |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| |
| sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; |
| |
| set_shader_signature_entry(sgnEntry, i, |
| tgsi_semantic_to_sgn_name[sem_name], |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| } |
| } |
| |
| |
| /** |
| * A helper function to emit an instruction to start the control point phase |
| * in the hull shader. |
| */ |
| static void |
| emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE; |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| } |
| |
| |
| /** |
| * Start the hull shader control point phase |
| */ |
| static boolean |
| emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) |
| { |
| /* If there is no control point output, skip the control point phase. */ |
| if (!needs_control_point_phase(emit)) { |
| if (!emit->key.tcs.vertices_out) { |
| /** |
| * If the tcs does not explicitly generate any control point output |
| * and the tes does not use any input control point, then |
| * emit an empty control point phase with zero output control |
| * point count. |
| */ |
| emit_control_point_phase_instruction(emit); |
| |
| /** |
| * Since this is an empty control point phase, we will need to |
| * add input signatures when we parse the tcs again in the |
| * patch constant phase. |
| */ |
| emit->tcs.fork_phase_add_signature = TRUE; |
| } |
| else { |
| /** |
| * Before skipping the control point phase, add the signature for |
| * the passthrough control point. |
| */ |
| emit_passthrough_control_point_signature(emit); |
| } |
| return FALSE; |
| } |
| |
| /* Start the control point phase in the hull shader */ |
| emit_control_point_phase_instruction(emit); |
| |
| /* Declare the output control point ID */ |
| if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) { |
| /* Add invocation id declaration if it does not exist */ |
| emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1; |
| } |
| |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID, |
| VGPU10_OPERAND_INDEX_0D, |
| 0, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_0_COMPONENT, 0, |
| 0, |
| VGPU10_INTERPOLATION_CONSTANT, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| |
| if (emit->tcs.prim_id_index != INVALID_INDEX) { |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, |
| VGPU10_OPERAND_INDEX_0D, |
| 0, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_0_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| 0, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Start the hull shader patch constant phase and |
| * do the second pass of the tcs translation and emit |
| * the relevant declarations and instructions for this phase. |
| */ |
| static boolean |
| emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, |
| struct tgsi_parse_context *parse) |
| { |
| unsigned inst_number = 0; |
| boolean ret = TRUE; |
| VGPU10OpcodeToken0 opcode0; |
| |
| emit->skip_instruction = FALSE; |
| |
| /* Start the patch constant phase */ |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE; |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| end_emit_instruction(emit); |
| |
| /* Set the current phase to patch constant phase */ |
| emit->tcs.control_point_phase = FALSE; |
| |
| if (emit->tcs.prim_id_index != INVALID_INDEX) { |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, |
| VGPU10_OPERAND_INDEX_0D, |
| 0, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_0_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| 0, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); |
| } |
| |
| /* Emit declarations for this phase */ |
| emit->index_range.required = |
| emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; |
| emit_tcs_input_declarations(emit); |
| |
| if (emit->index_range.start_index != INVALID_INDEX) { |
| emit_index_range_declaration(emit); |
| } |
| |
| emit->index_range.required = |
| emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; |
| emit_tcs_output_declarations(emit); |
| |
| if (emit->index_range.start_index != INVALID_INDEX) { |
| emit_index_range_declaration(emit); |
| } |
| emit->index_range.required = FALSE; |
| |
| emit_temporaries_declaration(emit); |
| |
| /* Reset the token position to the first instruction token |
| * in preparation for the second pass of the shader |
| */ |
| parse->Position = emit->tcs.instruction_token_pos; |
| |
| while (!tgsi_parse_end_of_tokens(parse)) { |
| tgsi_parse_token(parse); |
| |
| assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); |
| ret = emit_vgpu10_instruction(emit, inst_number++, |
| &parse->FullToken.FullInstruction); |
| |
| /* Usually this applies to TCS only. If shader is reading output of |
| * patch constant in fork phase, we should reemit all instructions |
| * which are writting into ouput of patch constant in fork phase |
| * to store results into temporaries. |
| */ |
| if (emit->reemit_instruction) { |
| assert(emit->unit == PIPE_SHADER_TESS_CTRL); |
| ret = emit_vgpu10_instruction(emit, inst_number, |
| &parse->FullToken.FullInstruction); |
| } |
| |
| if (!ret) |
| return FALSE; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit index range declaration. |
| */ |
| static boolean |
| emit_index_range_declaration(struct svga_shader_emitter_v10 *emit) |
| { |
| if (emit->version < 50) |
| return TRUE; |
| |
| assert(emit->index_range.start_index != INVALID_INDEX); |
| assert(emit->index_range.count != 0); |
| assert(emit->index_range.required); |
| assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS); |
| assert(emit->index_range.dim != 0); |
| assert(emit->index_range.size != 0); |
| |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE; |
| |
| operand0.value = 0; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.indexDimension = emit->index_range.dim; |
| operand0.operandType = emit->index_range.operandType; |
| operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) |
| operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dword(emit, operand0.value); |
| |
| if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) { |
| emit_dword(emit, emit->index_range.size); |
| emit_dword(emit, emit->index_range.start_index); |
| emit_dword(emit, emit->index_range.count); |
| } |
| else { |
| emit_dword(emit, emit->index_range.start_index); |
| emit_dword(emit, emit->index_range.count); |
| } |
| |
| end_emit_instruction(emit); |
| |
| /* Reset fields in emit->index_range struct except |
| * emit->index_range.required which will be reset afterwards |
| */ |
| emit->index_range.count = 0; |
| emit->index_range.operandType = VGPU10_NUM_OPERANDS; |
| emit->index_range.start_index = INVALID_INDEX; |
| emit->index_range.size = 0; |
| emit->index_range.dim = 0; |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit a vgpu10 declaration "instruction". |
| * \param index the register index |
| * \param size array size of the operand. In most cases, it is 1, |
| * but for inputs to geometry shader, the array size varies |
| * depending on the primitive type. |
| */ |
| static void |
| emit_decl_instruction(struct svga_shader_emitter_v10 *emit, |
| VGPU10OpcodeToken0 opcode0, |
| VGPU10OperandToken0 operand0, |
| VGPU10NameToken name_token, |
| unsigned index, unsigned size) |
| { |
| assert(opcode0.opcodeType); |
| assert(operand0.mask || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) || |
| (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM)); |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| |
| emit_dword(emit, operand0.value); |
| |
| if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { |
| /* Next token is the index of the register to declare */ |
| emit_dword(emit, index); |
| } |
| else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { |
| /* Next token is the size of the register */ |
| emit_dword(emit, size); |
| |
| /* Followed by the index of the register */ |
| emit_dword(emit, index); |
| } |
| |
| if (name_token.value) { |
| emit_dword(emit, name_token.value); |
| } |
| |
| end_emit_instruction(emit); |
| } |
| |
| |
| /** |
| * Emit the declaration for a shader input. |
| * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx |
| * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x |
| * \param dim index dimension |
| * \param index the input register index |
| * \param size array size of the operand. In most cases, it is 1, |
| * but for inputs to geometry shader, the array size varies |
| * depending on the primitive type. For tessellation control |
| * shader, the array size is the vertex count per patch. |
| * \param name one of VGPU10_NAME_x |
| * \parma numComp number of components |
| * \param selMode component selection mode |
| * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values |
| * \param interpMode interpolation mode |
| */ |
| static void |
| emit_input_declaration(struct svga_shader_emitter_v10 *emit, |
| VGPU10_OPCODE_TYPE opcodeType, |
| VGPU10_OPERAND_TYPE operandType, |
| VGPU10_OPERAND_INDEX_DIMENSION dim, |
| unsigned index, unsigned size, |
| VGPU10_SYSTEM_NAME name, |
| VGPU10_OPERAND_NUM_COMPONENTS numComp, |
| VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode, |
| unsigned usageMask, |
| VGPU10_INTERPOLATION_MODE interpMode, |
| boolean addSignature, |
| SVGA3dDXSignatureSemanticName sgnName) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| VGPU10NameToken name_token; |
| |
| assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); |
| assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || |
| opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || |
| opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV || |
| opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || |
| opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV || |
| opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); |
| assert(operandType == VGPU10_OPERAND_TYPE_INPUT || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || |
| operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP); |
| |
| assert(numComp <= VGPU10_OPERAND_4_COMPONENT); |
| assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); |
| assert(dim <= VGPU10_OPERAND_INDEX_3D); |
| assert(name == VGPU10_NAME_UNDEFINED || |
| name == VGPU10_NAME_POSITION || |
| name == VGPU10_NAME_INSTANCE_ID || |
| name == VGPU10_NAME_VERTEX_ID || |
| name == VGPU10_NAME_PRIMITIVE_ID || |
| name == VGPU10_NAME_IS_FRONT_FACE || |
| name == VGPU10_NAME_SAMPLE_INDEX || |
| name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || |
| name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX); |
| |
| assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || |
| interpMode == VGPU10_INTERPOLATION_CONSTANT || |
| interpMode == VGPU10_INTERPOLATION_LINEAR || |
| interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || |
| interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || |
| interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID || |
| interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE || |
| interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE); |
| |
| check_register_index(emit, opcodeType, index); |
| |
| opcode0.value = operand0.value = name_token.value = 0; |
| |
| opcode0.opcodeType = opcodeType; |
| opcode0.interpolationMode = interpMode; |
| |
| operand0.operandType = operandType; |
| operand0.numComponents = numComp; |
| operand0.selectionMode = selMode; |
| operand0.mask = usageMask; |
| operand0.indexDimension = dim; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| if (dim == VGPU10_OPERAND_INDEX_2D) |
| operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| name_token.name = name; |
| |
| emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); |
| |
| if (addSignature) { |
| struct svga_shader_signature *sgn = &emit->signature; |
| if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) { |
| /* Set patch constant signature */ |
| SVGA3dDXShaderSignatureEntry *sgnEntry = |
| &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; |
| set_shader_signature_entry(sgnEntry, index, |
| sgnName, usageMask, |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| |
| } else if (operandType == VGPU10_OPERAND_TYPE_INPUT || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) { |
| /* Set input signature */ |
| SVGA3dDXShaderSignatureEntry *sgnEntry = |
| &sgn->inputs[sgn->header.numInputSignatures++]; |
| set_shader_signature_entry(sgnEntry, index, |
| sgnName, usageMask, |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| } |
| } |
| |
| if (emit->index_range.required) { |
| /* Here, index_range declaration is only applicable for opcodeType |
| * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and |
| * for operandType VGPU10_OPERAND_TYPE_INPUT, |
| * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and |
| * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT. |
| */ |
| if ((opcodeType != VGPU10_OPCODE_DCL_INPUT && |
| opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) || |
| (operandType != VGPU10_OPERAND_TYPE_INPUT && |
| operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT && |
| operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) { |
| if (emit->index_range.start_index != INVALID_INDEX) { |
| emit_index_range_declaration(emit); |
| } |
| return; |
| } |
| |
| if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { |
| /* Need record new index_range */ |
| emit->index_range.count = 1; |
| emit->index_range.operandType = operandType; |
| emit->index_range.start_index = index; |
| emit->index_range.size = size; |
| emit->index_range.dim = dim; |
| } |
| else if (index != |
| (emit->index_range.start_index + emit->index_range.count) || |
| emit->index_range.operandType != operandType) { |
| /* Input index is not contiguous with index range or operandType is |
| * different from index range's operandType. We need to emit current |
| * index_range first and then start recording next index range. |
| */ |
| emit_index_range_declaration(emit); |
| |
| emit->index_range.count = 1; |
| emit->index_range.operandType = operandType; |
| emit->index_range.start_index = index; |
| emit->index_range.size = size; |
| emit->index_range.dim = dim; |
| } |
| else if (emit->index_range.operandType == operandType) { |
| /* Since input index is contiguous with index range and operandType |
| * is same as index range's operandType, increment index range count. |
| */ |
| emit->index_range.count++; |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit the declaration for a shader output. |
| * \param type one of VGPU10_OPCODE_DCL_OUTPUTx |
| * \param index the output register index |
| * \param name one of VGPU10_NAME_x |
| * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values |
| */ |
| static void |
| emit_output_declaration(struct svga_shader_emitter_v10 *emit, |
| VGPU10_OPCODE_TYPE type, unsigned index, |
| VGPU10_SYSTEM_NAME name, |
| unsigned writemask, |
| boolean addSignature, |
| SVGA3dDXSignatureSemanticName sgnName) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| VGPU10NameToken name_token; |
| |
| assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); |
| assert(type == VGPU10_OPCODE_DCL_OUTPUT || |
| type == VGPU10_OPCODE_DCL_OUTPUT_SGV || |
| type == VGPU10_OPCODE_DCL_OUTPUT_SIV); |
| assert(name == VGPU10_NAME_UNDEFINED || |
| name == VGPU10_NAME_POSITION || |
| name == VGPU10_NAME_PRIMITIVE_ID || |
| name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || |
| name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX || |
| name == VGPU10_NAME_CLIP_DISTANCE); |
| |
| check_register_index(emit, type, index); |
| |
| opcode0.value = operand0.value = name_token.value = 0; |
| |
| opcode0.opcodeType = type; |
| operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; |
| operand0.mask = writemask; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| name_token.name = name; |
| |
| emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); |
| |
| /* Capture output signature */ |
| if (addSignature) { |
| struct svga_shader_signature *sgn = &emit->signature; |
| SVGA3dDXShaderSignatureEntry *sgnEntry = |
| &sgn->outputs[sgn->header.numOutputSignatures++]; |
| set_shader_signature_entry(sgnEntry, index, |
| sgnName, writemask, |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| } |
| |
| if (emit->index_range.required) { |
| /* Here, index_range declaration is only applicable for opcodeType |
| * VGPU10_OPCODE_DCL_OUTPUT and for operandType |
| * VGPU10_OPERAND_TYPE_OUTPUT. |
| */ |
| if (type != VGPU10_OPCODE_DCL_OUTPUT) { |
| if (emit->index_range.start_index != INVALID_INDEX) { |
| emit_index_range_declaration(emit); |
| } |
| return; |
| } |
| |
| if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { |
| /* Need record new index_range */ |
| emit->index_range.count = 1; |
| emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; |
| emit->index_range.start_index = index; |
| emit->index_range.size = 1; |
| emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; |
| } |
| else if (index != |
| (emit->index_range.start_index + emit->index_range.count)) { |
| /* Output index is not contiguous with index range. We need to |
| * emit current index_range first and then start recording next |
| * index range. |
| */ |
| emit_index_range_declaration(emit); |
| |
| emit->index_range.count = 1; |
| emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; |
| emit->index_range.start_index = index; |
| emit->index_range.size = 1; |
| emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; |
| } |
| else { |
| /* Since output index is contiguous with index range, increment |
| * index range count. |
| */ |
| emit->index_range.count++; |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit the declaration for the fragment depth output. |
| */ |
| static void |
| emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| VGPU10NameToken name_token; |
| |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| |
| opcode0.value = operand0.value = name_token.value = 0; |
| |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; |
| operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; |
| operand0.mask = 0; |
| |
| emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); |
| } |
| |
| |
| /** |
| * Emit the declaration for the fragment sample mask/coverage output. |
| */ |
| static void |
| emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| VGPU10NameToken name_token; |
| |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| assert(emit->version >= 41); |
| |
| opcode0.value = operand0.value = name_token.value = 0; |
| |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; |
| operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; |
| operand0.mask = 0; |
| |
| emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); |
| } |
| |
| |
| /** |
| * Emit output declarations for fragment shader. |
| */ |
| static void |
| emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ |
| const enum tgsi_semantic semantic_name = |
| emit->info.output_semantic_name[i]; |
| const unsigned semantic_index = emit->info.output_semantic_index[i]; |
| unsigned index = i; |
| |
| if (semantic_name == TGSI_SEMANTIC_COLOR) { |
| assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); |
| |
| emit->fs.color_out_index[semantic_index] = index; |
| |
| emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs, |
| index + 1); |
| |
| /* The semantic index is the shader's color output/buffer index */ |
| emit_output_declaration(emit, |
| VGPU10_OPCODE_DCL_OUTPUT, semantic_index, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| |
| if (semantic_index == 0) { |
| if (emit->key.fs.write_color0_to_n_cbufs > 1) { |
| /* Emit declarations for the additional color outputs |
| * for broadcasting. |
| */ |
| unsigned j; |
| for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { |
| /* Allocate a new output index */ |
| unsigned idx = emit->info.num_outputs + j - 1; |
| emit->fs.color_out_index[j] = idx; |
| emit_output_declaration(emit, |
| VGPU10_OPCODE_DCL_OUTPUT, idx, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| emit->info.output_semantic_index[idx] = j; |
| } |
| |
| emit->fs.num_color_outputs = |
| emit->key.fs.write_color0_to_n_cbufs; |
| } |
| } |
| } |
| else if (semantic_name == TGSI_SEMANTIC_POSITION) { |
| /* Fragment depth output */ |
| emit_fragdepth_output_declaration(emit); |
| } |
| else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) { |
| /* Sample mask output */ |
| emit_samplemask_output_declaration(emit); |
| } |
| else { |
| assert(!"Bad output semantic name"); |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit common output declaration for vertex processing. |
| */ |
| static void |
| emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit, |
| unsigned index, unsigned writemask, |
| boolean addSignature) |
| { |
| const enum tgsi_semantic semantic_name = |
| emit->info.output_semantic_name[index]; |
| const unsigned semantic_index = emit->info.output_semantic_index[index]; |
| unsigned name, type; |
| unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; |
| |
| assert(emit->unit != PIPE_SHADER_FRAGMENT && |
| emit->unit != PIPE_SHADER_COMPUTE); |
| |
| switch (semantic_name) { |
| case TGSI_SEMANTIC_POSITION: |
| if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| /* position will be declared in control point only */ |
| assert(emit->tcs.control_point_phase); |
| type = VGPU10_OPCODE_DCL_OUTPUT; |
| name = VGPU10_NAME_UNDEFINED; |
| emit_output_declaration(emit, type, index, name, final_mask, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| return; |
| } |
| else { |
| type = VGPU10_OPCODE_DCL_OUTPUT_SIV; |
| name = VGPU10_NAME_POSITION; |
| } |
| /* Save the index of the vertex position output register */ |
| emit->vposition.out_index = index; |
| break; |
| case TGSI_SEMANTIC_CLIPDIST: |
| type = VGPU10_OPCODE_DCL_OUTPUT_SIV; |
| name = VGPU10_NAME_CLIP_DISTANCE; |
| /* save the starting index of the clip distance output register */ |
| if (semantic_index == 0) |
| emit->clip_dist_out_index = index; |
| final_mask = apply_clip_plane_mask(emit, writemask, semantic_index); |
| if (final_mask == 0x0) |
| return; /* discard this do-nothing declaration */ |
| break; |
| case TGSI_SEMANTIC_CLIPVERTEX: |
| type = VGPU10_OPCODE_DCL_OUTPUT; |
| name = VGPU10_NAME_UNDEFINED; |
| emit->clip_vertex_out_index = index; |
| break; |
| default: |
| /* generic output */ |
| type = VGPU10_OPCODE_DCL_OUTPUT; |
| name = VGPU10_NAME_UNDEFINED; |
| } |
| |
| emit_output_declaration(emit, type, index, name, final_mask, addSignature, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| } |
| |
| |
| /** |
| * Emit declaration for outputs in vertex shader. |
| */ |
| static void |
| emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); |
| } |
| } |
| |
| |
| /** |
| * A helper function to determine the writemask for an output |
| * for the specified stream. |
| */ |
| static unsigned |
| output_writemask_for_stream(unsigned stream, ubyte output_streams, |
| ubyte output_usagemask) |
| { |
| unsigned i; |
| unsigned writemask = 0; |
| |
| for (i = 0; i < 4; i++) { |
| if ((output_streams & 0x3) == stream) |
| writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i); |
| output_streams >>= 2; |
| } |
| return writemask & output_usagemask; |
| } |
| |
| |
| /** |
| * Emit declaration for outputs in geometry shader. |
| */ |
| static void |
| emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| VGPU10OpcodeToken0 opcode0; |
| unsigned numStreamsSupported = 1; |
| int s; |
| |
| if (emit->version >= 50) { |
| numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components); |
| } |
| |
| /** |
| * Start emitting from the last stream first, so we end with |
| * stream 0, so any of the auxiliary output declarations will |
| * go to stream 0. |
| */ |
| for (s = numStreamsSupported-1; s >= 0; s--) { |
| |
| if (emit->info.num_stream_output_components[s] == 0) |
| continue; |
| |
| if (emit->version >= 50) { |
| /* DCL_STREAM stream */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE); |
| emit_stream_register(emit, s); |
| end_emit_instruction(emit); |
| } |
| |
| /* emit output primitive topology declaration */ |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; |
| opcode0.primitiveTopology = emit->gs.prim_topology; |
| emit_property_instruction(emit, opcode0, 0, 0); |
| |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| unsigned writemask; |
| |
| /* find out the writemask for this stream */ |
| writemask = output_writemask_for_stream(s, emit->info.output_streams[i], |
| emit->output_usage_mask[i]); |
| |
| if (writemask) { |
| enum tgsi_semantic semantic_name = |
| emit->info.output_semantic_name[i]; |
| |
| /* TODO: Still need to take care of a special case where a |
| * single varying spans across multiple output registers. |
| */ |
| switch(semantic_name) { |
| case TGSI_SEMANTIC_PRIMID: |
| emit_output_declaration(emit, |
| VGPU10_OPCODE_DCL_OUTPUT_SGV, i, |
| VGPU10_NAME_PRIMITIVE_ID, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| FALSE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| break; |
| case TGSI_SEMANTIC_LAYER: |
| emit_output_declaration(emit, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, i, |
| VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX, |
| VGPU10_OPERAND_4_COMPONENT_MASK_X, |
| FALSE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| break; |
| case TGSI_SEMANTIC_VIEWPORT_INDEX: |
| emit_output_declaration(emit, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, i, |
| VGPU10_NAME_VIEWPORT_ARRAY_INDEX, |
| VGPU10_OPERAND_4_COMPONENT_MASK_X, |
| FALSE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| emit->gs.viewport_index_out_index = i; |
| break; |
| default: |
| emit_vertex_output_declaration(emit, i, writemask, FALSE); |
| } |
| } |
| } |
| } |
| |
| /* For geometry shader outputs, it is possible the same register is |
| * declared multiple times for different streams. So to avoid |
| * redundant signature entries, geometry shader output signature is done |
| * outside of the declaration. |
| */ |
| struct svga_shader_signature *sgn = &emit->signature; |
| SVGA3dDXShaderSignatureEntry *sgnEntry; |
| |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| if (emit->output_usage_mask[i]) { |
| enum tgsi_semantic sem_name = emit->info.output_semantic_name[i]; |
| |
| sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; |
| set_shader_signature_entry(sgnEntry, i, |
| map_tgsi_semantic_to_sgn_name(sem_name), |
| emit->output_usage_mask[i], |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit the declaration for the tess inner/outer output. |
| * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV |
| * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT |
| * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value |
| */ |
| static void |
| emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit, |
| unsigned index, unsigned opcodeType, |
| unsigned operandType, VGPU10_SYSTEM_NAME name, |
| SVGA3dDXSignatureSemanticName sgnName) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| VGPU10NameToken name_token; |
| |
| assert(emit->version >= 50); |
| assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR || |
| (emit->key.tcs.prim_mode == PIPE_PRIM_LINES && |
| name == VGPU10_NAME_UNDEFINED)); |
| assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR); |
| |
| assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT || |
| operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT); |
| |
| opcode0.value = operand0.value = name_token.value = 0; |
| |
| opcode0.opcodeType = opcodeType; |
| operand0.operandType = operandType; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| name_token.name = name; |
| emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); |
| |
| /* Capture patch constant signature */ |
| struct svga_shader_signature *sgn = &emit->signature; |
| SVGA3dDXShaderSignatureEntry *sgnEntry = |
| &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; |
| set_shader_signature_entry(sgnEntry, index, |
| sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X, |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| } |
| |
| |
| /** |
| * Emit output declarations for tessellation control shader. |
| */ |
| static void |
| emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned int i; |
| unsigned outputIndex = emit->num_outputs; |
| struct svga_shader_signature *sgn = &emit->signature; |
| |
| /** |
| * Initialize patch_generic_out_count so it won't be counted twice |
| * since this function is called twice, one for control point phase |
| * and another time for patch constant phase. |
| */ |
| emit->tcs.patch_generic_out_count = 0; |
| |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| unsigned index = i; |
| const enum tgsi_semantic semantic_name = |
| emit->info.output_semantic_name[i]; |
| |
| switch (semantic_name) { |
| case TGSI_SEMANTIC_TESSINNER: |
| emit->tcs.inner.tgsi_index = i; |
| |
| /* skip per-patch output declarations in control point phase */ |
| if (emit->tcs.control_point_phase) |
| break; |
| |
| emit->tcs.inner.out_index = outputIndex; |
| switch (emit->key.tcs.prim_mode) { |
| case PIPE_PRIM_QUADS: |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); |
| |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); |
| break; |
| case PIPE_PRIM_TRIANGLES: |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); |
| break; |
| case PIPE_PRIM_LINES: |
| break; |
| default: |
| debug_printf("Unsupported primitive type"); |
| } |
| break; |
| |
| case TGSI_SEMANTIC_TESSOUTER: |
| emit->tcs.outer.tgsi_index = i; |
| |
| /* skip per-patch output declarations in control point phase */ |
| if (emit->tcs.control_point_phase) |
| break; |
| |
| emit->tcs.outer.out_index = outputIndex; |
| switch (emit->key.tcs.prim_mode) { |
| case PIPE_PRIM_QUADS: |
| for (int j = 0; j < 4; j++) { |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j); |
| } |
| break; |
| case PIPE_PRIM_TRIANGLES: |
| for (int j = 0; j < 3; j++) { |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j); |
| } |
| break; |
| case PIPE_PRIM_LINES: |
| for (int j = 0; j < 2; j++) { |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j); |
| } |
| break; |
| default: |
| debug_printf("Unsupported primitive type"); |
| } |
| break; |
| |
| case TGSI_SEMANTIC_PATCH: |
| if (emit->tcs.patch_generic_out_index == INVALID_INDEX) |
| emit->tcs.patch_generic_out_index= i; |
| emit->tcs.patch_generic_out_count++; |
| |
| /* skip per-patch output declarations in control point phase */ |
| if (emit->tcs.control_point_phase) |
| break; |
| |
| emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| FALSE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| |
| SVGA3dDXShaderSignatureEntry *sgnEntry = |
| &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; |
| set_shader_signature_entry(sgnEntry, index, |
| map_tgsi_semantic_to_sgn_name(semantic_name), |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, |
| SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); |
| |
| break; |
| |
| default: |
| /* save the starting index of control point outputs */ |
| if (emit->tcs.control_point_out_index == INVALID_INDEX) |
| emit->tcs.control_point_out_index = i; |
| emit->tcs.control_point_out_count++; |
| |
| /* skip control point output declarations in patch constant phase */ |
| if (!emit->tcs.control_point_phase) |
| break; |
| |
| emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], |
| TRUE); |
| |
| } |
| } |
| |
| if (emit->tcs.control_point_phase) { |
| /** |
| * Add missing control point output in control point phase. |
| */ |
| if (emit->tcs.control_point_out_index == INVALID_INDEX) { |
| /* use register index after tessellation factors */ |
| switch (emit->key.tcs.prim_mode) { |
| case PIPE_PRIM_QUADS: |
| emit->tcs.control_point_out_index = outputIndex + 6; |
| break; |
| case PIPE_PRIM_TRIANGLES: |
| emit->tcs.control_point_out_index = outputIndex + 4; |
| break; |
| default: |
| emit->tcs.control_point_out_index = outputIndex + 2; |
| break; |
| } |
| emit->tcs.control_point_out_count++; |
| emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, |
| emit->tcs.control_point_out_index, |
| VGPU10_NAME_POSITION, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); |
| |
| /* If tcs does not output any control point output, |
| * we can end the hull shader control point phase here |
| * after emitting the default control point output. |
| */ |
| emit->skip_instruction = TRUE; |
| } |
| } |
| else { |
| if (emit->tcs.outer.out_index == INVALID_INDEX) { |
| /* since the TCS did not declare out outer tess level output register, |
| * we declare it here for patch constant phase only. |
| */ |
| emit->tcs.outer.out_index = outputIndex; |
| if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { |
| for (int i = 0; i < 4; i++) { |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); |
| } |
| } |
| else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { |
| for (int i = 0; i < 3; i++) { |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); |
| } |
| } |
| } |
| |
| if (emit->tcs.inner.out_index == INVALID_INDEX) { |
| /* since the TCS did not declare out inner tess level output register, |
| * we declare it here |
| */ |
| emit->tcs.inner.out_index = outputIndex; |
| if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); |
| } |
| else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { |
| emit_tesslevel_declaration(emit, outputIndex++, |
| VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, |
| VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); |
| } |
| } |
| } |
| emit->num_outputs = outputIndex; |
| } |
| |
| |
| /** |
| * Emit output declarations for tessellation evaluation shader. |
| */ |
| static void |
| emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < emit->info.num_outputs; i++) { |
| emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); |
| } |
| } |
| |
| |
| /** |
| * Emit the declaration for a system value input/output. |
| */ |
| static void |
| emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, |
| enum tgsi_semantic semantic_name, unsigned index) |
| { |
| switch (semantic_name) { |
| case TGSI_SEMANTIC_INSTANCEID: |
| index = alloc_system_value_index(emit, index); |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT, |
| VGPU10_OPERAND_INDEX_1D, |
| index, 1, |
| VGPU10_NAME_INSTANCE_ID, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_X, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| break; |
| case TGSI_SEMANTIC_VERTEXID: |
| emit->vs.vertex_id_sys_index = index; |
| index = alloc_system_value_index(emit, index); |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT, |
| VGPU10_OPERAND_INDEX_1D, |
| index, 1, |
| VGPU10_NAME_VERTEX_ID, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_X, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| break; |
| case TGSI_SEMANTIC_SAMPLEID: |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| emit->fs.sample_id_sys_index = index; |
| index = alloc_system_value_index(emit, index); |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV, |
| VGPU10_OPERAND_TYPE_INPUT, |
| VGPU10_OPERAND_INDEX_1D, |
| index, 1, |
| VGPU10_NAME_SAMPLE_INDEX, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_X, |
| VGPU10_INTERPOLATION_CONSTANT, TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| break; |
| case TGSI_SEMANTIC_SAMPLEPOS: |
| /* This system value contains the position of the current sample |
| * when using per-sample shading. We implement this by calling |
| * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample |
| * index as the argument. See emit_sample_position_instructions(). |
| */ |
| assert(emit->version >= 41); |
| emit->fs.sample_pos_sys_index = index; |
| index = alloc_system_value_index(emit, index); |
| break; |
| case TGSI_SEMANTIC_INVOCATIONID: |
| /* Note: invocation id input is mapped to different register depending |
| * on the shader type. In GS, it will be mapped to vGSInstanceID#. |
| * In TCS, it will be mapped to vOutputControlPointID#. |
| * Since in both cases, the mapped name is unique rather than |
| * just a generic input name ("v#"), so there is no need to remap |
| * the index value. |
| */ |
| assert(emit->unit == PIPE_SHADER_GEOMETRY || |
| emit->unit == PIPE_SHADER_TESS_CTRL); |
| assert(emit->version >= 50); |
| |
| if (emit->unit == PIPE_SHADER_GEOMETRY) { |
| emit->gs.invocation_id_sys_index = index; |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID, |
| VGPU10_OPERAND_INDEX_0D, |
| index, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_0_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| 0, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| } else if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| /* The emission of the control point id will be done |
| * in the control point phase in emit_hull_shader_control_point_phase(). |
| */ |
| emit->tcs.invocation_id_sys_index = index; |
| } |
| break; |
| case TGSI_SEMANTIC_SAMPLEMASK: |
| /* Note: the PS sample mask input has a unique name ("vCoverage#") |
| * rather than just a generic input name ("v#") so no need to remap the |
| * index value. |
| */ |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| assert(emit->version >= 50); |
| emit->fs.sample_mask_in_sys_index = index; |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK, |
| VGPU10_OPERAND_INDEX_0D, |
| index, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_1_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| 0, |
| VGPU10_INTERPOLATION_CONSTANT, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| break; |
| case TGSI_SEMANTIC_TESSCOORD: |
| assert(emit->version >= 50); |
| |
| unsigned usageMask = 0; |
| |
| if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { |
| usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ; |
| } |
| else if (emit->tes.prim_mode == PIPE_PRIM_LINES || |
| emit->tes.prim_mode == PIPE_PRIM_QUADS) { |
| usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY; |
| } |
| |
| emit->tes.tesscoord_sys_index = index; |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT, |
| VGPU10_OPERAND_INDEX_0D, |
| index, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| usageMask, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| break; |
| case TGSI_SEMANTIC_TESSINNER: |
| assert(emit->version >= 50); |
| emit->tes.inner.tgsi_index = index; |
| break; |
| case TGSI_SEMANTIC_TESSOUTER: |
| assert(emit->version >= 50); |
| emit->tes.outer.tgsi_index = index; |
| break; |
| case TGSI_SEMANTIC_VERTICESIN: |
| assert(emit->unit == PIPE_SHADER_TESS_CTRL); |
| assert(emit->version >= 50); |
| |
| /* save the system value index */ |
| emit->tcs.vertices_per_patch_index = index; |
| break; |
| case TGSI_SEMANTIC_PRIMID: |
| assert(emit->version >= 50); |
| if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| emit->tcs.prim_id_index = index; |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_EVAL) { |
| emit->tes.prim_id_index = index; |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, |
| VGPU10_OPERAND_INDEX_0D, |
| index, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_0_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| 0, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| } |
| break; |
| default: |
| debug_printf("unexpected system value semantic index %u / %s\n", |
| semantic_name, tgsi_semantic_names[semantic_name]); |
| } |
| } |
| |
| /** |
| * Translate a TGSI declaration to VGPU10. |
| */ |
| static boolean |
| emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_declaration *decl) |
| { |
| switch (decl->Declaration.File) { |
| case TGSI_FILE_INPUT: |
| /* do nothing - see emit_input_declarations() */ |
| return TRUE; |
| |
| case TGSI_FILE_OUTPUT: |
| assert(decl->Range.First == decl->Range.Last); |
| emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; |
| return TRUE; |
| |
| case TGSI_FILE_TEMPORARY: |
| /* Don't declare the temps here. Just keep track of how many |
| * and emit the declaration later. |
| */ |
| if (decl->Declaration.Array) { |
| /* Indexed temporary array. Save the start index of the array |
| * and the size of the array. |
| */ |
| const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); |
| assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); |
| |
| /* Save this array so we can emit the declaration for it later */ |
| create_temp_array(emit, arrayID, decl->Range.First, |
| decl->Range.Last - decl->Range.First + 1, |
| decl->Range.First); |
| } |
| |
| /* for all temps, indexed or not, keep track of highest index */ |
| emit->num_shader_temps = MAX2(emit->num_shader_temps, |
| decl->Range.Last + 1); |
| return TRUE; |
| |
| case TGSI_FILE_CONSTANT: |
| /* Don't declare constants here. Just keep track and emit later. */ |
| { |
| unsigned constbuf = 0, num_consts; |
| if (decl->Declaration.Dimension) { |
| constbuf = decl->Dim.Index2D; |
| } |
| /* We throw an assertion here when, in fact, the shader should never |
| * have linked due to constbuf index out of bounds, so we shouldn't |
| * have reached here. |
| */ |
| assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); |
| |
| num_consts = MAX2(emit->num_shader_consts[constbuf], |
| decl->Range.Last + 1); |
| |
| if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { |
| debug_printf("Warning: constant buffer is declared to size [%u]" |
| " but [%u] is the limit.\n", |
| num_consts, |
| VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); |
| } |
| /* The linker doesn't enforce the max UBO size so we clamp here */ |
| emit->num_shader_consts[constbuf] = |
| MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); |
| } |
| return TRUE; |
| |
| case TGSI_FILE_IMMEDIATE: |
| assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); |
| return FALSE; |
| |
| case TGSI_FILE_SYSTEM_VALUE: |
| emit_system_value_declaration(emit, decl->Semantic.Name, |
| decl->Range.First); |
| return TRUE; |
| |
| case TGSI_FILE_SAMPLER: |
| /* Don't declare samplers here. Just keep track and emit later. */ |
| emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); |
| return TRUE; |
| |
| #if 0 |
| case TGSI_FILE_RESOURCE: |
| /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ |
| /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ |
| assert(!"TGSI_FILE_RESOURCE not handled yet"); |
| return FALSE; |
| #endif |
| |
| case TGSI_FILE_ADDRESS: |
| emit->num_address_regs = MAX2(emit->num_address_regs, |
| decl->Range.Last + 1); |
| return TRUE; |
| |
| case TGSI_FILE_SAMPLER_VIEW: |
| { |
| unsigned unit = decl->Range.First; |
| assert(decl->Range.First == decl->Range.Last); |
| emit->sampler_target[unit] = decl->SamplerView.Resource; |
| /* Note: we can ignore YZW return types for now */ |
| emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; |
| emit->sampler_view[unit] = TRUE; |
| } |
| return TRUE; |
| |
| default: |
| assert(!"Unexpected type of declaration"); |
| return FALSE; |
| } |
| } |
| |
| |
| |
| /** |
| * Emit input declarations for fragment shader. |
| */ |
| static void |
| emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| |
| for (i = 0; i < emit->linkage.num_inputs; i++) { |
| enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; |
| unsigned usage_mask = emit->info.input_usage_mask[i]; |
| unsigned index = emit->linkage.input_map[i]; |
| unsigned type, interpolationMode, name; |
| unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; |
| |
| if (usage_mask == 0) |
| continue; /* register is not actually used */ |
| |
| if (semantic_name == TGSI_SEMANTIC_POSITION) { |
| /* fragment position input */ |
| type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; |
| interpolationMode = VGPU10_INTERPOLATION_LINEAR; |
| name = VGPU10_NAME_POSITION; |
| if (usage_mask & TGSI_WRITEMASK_W) { |
| /* we need to replace use of 'w' with '1/w' */ |
| emit->fs.fragcoord_input_index = i; |
| } |
| } |
| else if (semantic_name == TGSI_SEMANTIC_FACE) { |
| /* fragment front-facing input */ |
| type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; |
| interpolationMode = VGPU10_INTERPOLATION_CONSTANT; |
| name = VGPU10_NAME_IS_FRONT_FACE; |
| emit->fs.face_input_index = i; |
| } |
| else if (semantic_name == TGSI_SEMANTIC_PRIMID) { |
| /* primitive ID */ |
| type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; |
| interpolationMode = VGPU10_INTERPOLATION_CONSTANT; |
| name = VGPU10_NAME_PRIMITIVE_ID; |
| } |
| else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) { |
| /* sample index / ID */ |
| type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; |
| interpolationMode = VGPU10_INTERPOLATION_CONSTANT; |
| name = VGPU10_NAME_SAMPLE_INDEX; |
| } |
| else if (semantic_name == TGSI_SEMANTIC_LAYER) { |
| /* render target array index */ |
| if (emit->key.fs.layer_to_zero) { |
| /** |
| * The shader from the previous stage does not write to layer, |
| * so reading the layer index in fragment shader should return 0. |
| */ |
| emit->fs.layer_input_index = i; |
| continue; |
| } else { |
| type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; |
| interpolationMode = VGPU10_INTERPOLATION_CONSTANT; |
| name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; |
| mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; |
| } |
| } |
| else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { |
| /* viewport index */ |
| type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; |
| interpolationMode = VGPU10_INTERPOLATION_CONSTANT; |
| name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX; |
| mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; |
| } |
| else { |
| /* general fragment input */ |
| type = VGPU10_OPCODE_DCL_INPUT_PS; |
| interpolationMode = |
| translate_interpolation(emit, |
| emit->info.input_interpolate[i], |
| emit->info.input_interpolate_loc[i]); |
| |
| /* keeps track if flat interpolation mode is being used */ |
| emit->uses_flat_interp = emit->uses_flat_interp || |
| (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); |
| |
| name = VGPU10_NAME_UNDEFINED; |
| } |
| |
| emit_input_declaration(emit, type, |
| VGPU10_OPERAND_TYPE_INPUT, |
| VGPU10_OPERAND_INDEX_1D, index, 1, |
| name, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| mask, |
| interpolationMode, TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| } |
| } |
| |
| |
| /** |
| * Emit input declarations for vertex shader. |
| */ |
| static void |
| emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| |
| for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { |
| unsigned usage_mask = emit->info.input_usage_mask[i]; |
| unsigned index = i; |
| |
| if (usage_mask == 0) |
| continue; /* register is not actually used */ |
| |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT, |
| VGPU10_OPERAND_INDEX_1D, index, 1, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| } |
| } |
| |
| |
| /** |
| * Emit input declarations for geometry shader. |
| */ |
| static void |
| emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| |
| for (i = 0; i < emit->info.num_inputs; i++) { |
| enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; |
| unsigned usage_mask = emit->info.input_usage_mask[i]; |
| unsigned index = emit->linkage.input_map[i]; |
| unsigned opcodeType, operandType; |
| unsigned numComp, selMode; |
| unsigned name; |
| unsigned dim; |
| |
| if (usage_mask == 0) |
| continue; /* register is not actually used */ |
| |
| opcodeType = VGPU10_OPCODE_DCL_INPUT; |
| operandType = VGPU10_OPERAND_TYPE_INPUT; |
| numComp = VGPU10_OPERAND_4_COMPONENT; |
| selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; |
| name = VGPU10_NAME_UNDEFINED; |
| |
| /* all geometry shader inputs are two dimensional except |
| * gl_PrimitiveID |
| */ |
| dim = VGPU10_OPERAND_INDEX_2D; |
| |
| if (semantic_name == TGSI_SEMANTIC_PRIMID) { |
| /* Primitive ID */ |
| operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; |
| dim = VGPU10_OPERAND_INDEX_0D; |
| numComp = VGPU10_OPERAND_0_COMPONENT; |
| selMode = 0; |
| |
| /* also save the register index so we can check for |
| * primitive id when emit src register. We need to modify the |
| * operand type, index dimension when emit primitive id src reg. |
| */ |
| emit->gs.prim_id_index = i; |
| } |
| else if (semantic_name == TGSI_SEMANTIC_POSITION) { |
| /* vertex position input */ |
| opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; |
| name = VGPU10_NAME_POSITION; |
| } |
| |
| emit_input_declaration(emit, opcodeType, operandType, |
| dim, index, |
| emit->gs.input_size, |
| name, |
| numComp, selMode, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| map_tgsi_semantic_to_sgn_name(semantic_name)); |
| } |
| } |
| |
| |
| /** |
| * Emit input declarations for tessellation control shader. |
| */ |
| static void |
| emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| unsigned size = emit->key.tcs.vertices_per_patch; |
| unsigned indicesMask = 0; |
| boolean addSignature = TRUE; |
| |
| if (!emit->tcs.control_point_phase) |
| addSignature = emit->tcs.fork_phase_add_signature; |
| |
| for (i = 0; i < emit->info.num_inputs; i++) { |
| unsigned usage_mask = emit->info.input_usage_mask[i]; |
| unsigned index = emit->linkage.input_map[i]; |
| enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; |
| VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED; |
| VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT; |
| SVGA3dDXSignatureSemanticName sgn_name = |
| map_tgsi_semantic_to_sgn_name(semantic_name); |
| |
| /* indices that are declared */ |
| indicesMask |= 1 << index; |
| |
| if (semantic_name == TGSI_SEMANTIC_POSITION || |
| index == emit->linkage.position_index) { |
| /* save the input control point index for later use */ |
| emit->tcs.control_point_input_index = i; |
| } |
| else if (usage_mask == 0) { |
| continue; /* register is not actually used */ |
| } |
| else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { |
| /* The shadow copy is being used here. So set the signature name |
| * to UNDEFINED. |
| */ |
| sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; |
| } |
| |
| /* input control points in the patch constant phase are emitted in the |
| * vicp register rather than the v register. |
| */ |
| if (!emit->tcs.control_point_phase) { |
| operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; |
| } |
| |
| /* Tessellation control shader inputs are two dimensional. |
| * The array size is determined by the patch vertex count. |
| */ |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| operandType, |
| VGPU10_OPERAND_INDEX_2D, |
| index, size, name, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| VGPU10_INTERPOLATION_UNDEFINED, |
| addSignature, sgn_name); |
| } |
| |
| if (emit->tcs.control_point_phase) { |
| if (emit->tcs.control_point_input_index == INVALID_INDEX) { |
| |
| /* Add input control point declaration if it does not exist */ |
| if ((indicesMask & (1 << emit->linkage.position_index)) == 0) { |
| emit->linkage.input_map[emit->linkage.num_inputs] = |
| emit->linkage.position_index; |
| emit->tcs.control_point_input_index = emit->linkage.num_inputs++; |
| |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT, |
| VGPU10_OPERAND_INDEX_2D, |
| emit->linkage.position_index, |
| emit->key.tcs.vertices_per_patch, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| VGPU10_INTERPOLATION_UNDEFINED, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); |
| } |
| } |
| |
| /* Also add an address register for the indirection to the |
| * input control points |
| */ |
| emit->tcs.control_point_addr_index = emit->num_address_regs++; |
| } |
| } |
| |
| |
| static void |
| emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| |
| /* In tcs, tess factors are emitted as extra outputs. |
| * The starting register index for the tess factors is captured |
| * in the compile key. |
| */ |
| unsigned inputIndex = emit->key.tes.tessfactor_index; |
| |
| if (emit->tes.prim_mode == PIPE_PRIM_QUADS) { |
| if (emit->key.tes.need_tessouter) { |
| emit->tes.outer.in_index = inputIndex; |
| for (int i = 0; i < 4; i++) { |
| emit_tesslevel_declaration(emit, inputIndex++, |
| VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); |
| } |
| } |
| |
| if (emit->key.tes.need_tessinner) { |
| emit->tes.inner.in_index = inputIndex; |
| emit_tesslevel_declaration(emit, inputIndex++, |
| VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); |
| |
| emit_tesslevel_declaration(emit, inputIndex++, |
| VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); |
| } |
| } |
| else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { |
| if (emit->key.tes.need_tessouter) { |
| emit->tes.outer.in_index = inputIndex; |
| for (int i = 0; i < 3; i++) { |
| emit_tesslevel_declaration(emit, inputIndex++, |
| VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); |
| } |
| } |
| |
| if (emit->key.tes.need_tessinner) { |
| emit->tes.inner.in_index = inputIndex; |
| emit_tesslevel_declaration(emit, inputIndex++, |
| VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); |
| } |
| } |
| else if (emit->tes.prim_mode == PIPE_PRIM_LINES) { |
| if (emit->key.tes.need_tessouter) { |
| emit->tes.outer.in_index = inputIndex; |
| emit_tesslevel_declaration(emit, inputIndex++, |
| VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR); |
| |
| emit_tesslevel_declaration(emit, inputIndex++, |
| VGPU10_OPCODE_DCL_INPUT_SIV, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR); |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit input declarations for tessellation evaluation shader. |
| */ |
| static void |
| emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| |
| for (i = 0; i < emit->info.num_inputs; i++) { |
| unsigned usage_mask = emit->info.input_usage_mask[i]; |
| unsigned index = emit->linkage.input_map[i]; |
| unsigned size; |
| const enum tgsi_semantic semantic_name = |
| emit->info.input_semantic_name[i]; |
| SVGA3dDXSignatureSemanticName sgn_name; |
| VGPU10_OPERAND_TYPE operandType; |
| VGPU10_OPERAND_INDEX_DIMENSION dim; |
| |
| if (usage_mask == 0) |
| usage_mask = 1; /* at least set usage mask to one */ |
| |
| if (semantic_name == TGSI_SEMANTIC_PATCH) { |
| operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; |
| dim = VGPU10_OPERAND_INDEX_1D; |
| size = 1; |
| sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name); |
| } |
| else { |
| operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; |
| dim = VGPU10_OPERAND_INDEX_2D; |
| size = emit->key.tes.vertices_per_patch; |
| sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; |
| } |
| |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType, |
| dim, index, size, VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| VGPU10_INTERPOLATION_UNDEFINED, |
| TRUE, sgn_name); |
| } |
| |
| emit_tessfactor_input_declarations(emit); |
| |
| /* DX spec requires DS input controlpoint/patch-constant signatures to match |
| * the HS output controlpoint/patch-constant signatures exactly. |
| * Add missing input declarations even if they are not used in the shader. |
| */ |
| if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) { |
| struct tgsi_shader_info *prevInfo = emit->prevShaderInfo; |
| for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) { |
| |
| /* If a tcs output does not have a corresponding input register in |
| * tes, add one. |
| */ |
| if (emit->linkage.prevShader.output_map[i] > |
| emit->linkage.input_map_max) { |
| const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i]; |
| |
| if (sem_name == TGSI_SEMANTIC_PATCH) { |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, |
| VGPU10_OPERAND_INDEX_1D, |
| i, 1, VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| VGPU10_INTERPOLATION_UNDEFINED, |
| TRUE, |
| map_tgsi_semantic_to_sgn_name(sem_name)); |
| |
| } else if (sem_name != TGSI_SEMANTIC_TESSINNER && |
| sem_name != TGSI_SEMANTIC_TESSOUTER) { |
| emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, |
| VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT, |
| VGPU10_OPERAND_INDEX_2D, |
| i, emit->key.tes.vertices_per_patch, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT, |
| VGPU10_OPERAND_4_COMPONENT_MASK_MODE, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| VGPU10_INTERPOLATION_UNDEFINED, |
| TRUE, |
| map_tgsi_semantic_to_sgn_name(sem_name)); |
| } |
| /* tessellation factors are taken care of in |
| * emit_tessfactor_input_declarations(). |
| */ |
| } |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit all input declarations. |
| */ |
| static boolean |
| emit_input_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| emit->index_range.required = |
| emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; |
| |
| switch (emit->unit) { |
| case PIPE_SHADER_FRAGMENT: |
| emit_fs_input_declarations(emit); |
| break; |
| case PIPE_SHADER_GEOMETRY: |
| emit_gs_input_declarations(emit); |
| break; |
| case PIPE_SHADER_VERTEX: |
| emit_vs_input_declarations(emit); |
| break; |
| case PIPE_SHADER_TESS_CTRL: |
| emit_tcs_input_declarations(emit); |
| break; |
| case PIPE_SHADER_TESS_EVAL: |
| emit_tes_input_declarations(emit); |
| break; |
| case PIPE_SHADER_COMPUTE: |
| //XXX emit_cs_input_declarations(emit); |
| break; |
| default: |
| assert(0); |
| } |
| |
| if (emit->index_range.start_index != INVALID_INDEX) { |
| emit_index_range_declaration(emit); |
| } |
| emit->index_range.required = FALSE; |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit all output declarations. |
| */ |
| static boolean |
| emit_output_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| emit->index_range.required = |
| emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; |
| |
| switch (emit->unit) { |
| case PIPE_SHADER_FRAGMENT: |
| emit_fs_output_declarations(emit); |
| break; |
| case PIPE_SHADER_GEOMETRY: |
| emit_gs_output_declarations(emit); |
| break; |
| case PIPE_SHADER_VERTEX: |
| emit_vs_output_declarations(emit); |
| break; |
| case PIPE_SHADER_TESS_CTRL: |
| emit_tcs_output_declarations(emit); |
| break; |
| case PIPE_SHADER_TESS_EVAL: |
| emit_tes_output_declarations(emit); |
| break; |
| case PIPE_SHADER_COMPUTE: |
| //XXX emit_cs_output_declarations(emit); |
| break; |
| default: |
| assert(0); |
| } |
| |
| if (emit->vposition.so_index != INVALID_INDEX && |
| emit->vposition.out_index != INVALID_INDEX) { |
| |
| assert(emit->unit != PIPE_SHADER_FRAGMENT); |
| |
| /* Emit the declaration for the non-adjusted vertex position |
| * for stream output purpose |
| */ |
| emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, |
| emit->vposition.so_index, |
| VGPU10_NAME_UNDEFINED, |
| VGPU10_OPERAND_4_COMPONENT_MASK_ALL, |
| TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); |
| } |
| |
| if (emit->clip_dist_so_index != INVALID_INDEX && |
| emit->clip_dist_out_index != INVALID_INDEX) { |
| |
| assert(emit->unit != PIPE_SHADER_FRAGMENT); |
| |
| /* Emit the declaration for the clip distance shadow copy which |
| * will be used for stream output purpose and for clip distance |
| * varying variable |
| */ |
| emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, |
| emit->clip_dist_so_index, |
| VGPU10_NAME_UNDEFINED, |
| emit->output_usage_mask[emit->clip_dist_out_index], |
| TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| |
| if (emit->info.num_written_clipdistance > 4) { |
| /* for the second clip distance register, each handles 4 planes */ |
| emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, |
| emit->clip_dist_so_index + 1, |
| VGPU10_NAME_UNDEFINED, |
| emit->output_usage_mask[emit->clip_dist_out_index+1], |
| TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); |
| } |
| } |
| |
| if (emit->index_range.start_index != INVALID_INDEX) { |
| emit_index_range_declaration(emit); |
| } |
| emit->index_range.required = FALSE; |
| return TRUE; |
| } |
| |
| |
| /** |
| * A helper function to create a temporary indexable array |
| * and initialize the corresponding entries in the temp_map array. |
| */ |
| static void |
| create_temp_array(struct svga_shader_emitter_v10 *emit, |
| unsigned arrayID, unsigned first, unsigned count, |
| unsigned startIndex) |
| { |
| unsigned i, tempIndex = startIndex; |
| |
| emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); |
| assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); |
| emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); |
| |
| emit->temp_arrays[arrayID].start = first; |
| emit->temp_arrays[arrayID].size = count; |
| |
| /* Fill in the temp_map entries for this temp array */ |
| for (i = 0; i < count; i++, tempIndex++) { |
| emit->temp_map[tempIndex].arrayId = arrayID; |
| emit->temp_map[tempIndex].index = i; |
| } |
| } |
| |
| |
| /** |
| * Emit the declaration for the temporary registers. |
| */ |
| static boolean |
| emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned total_temps, reg, i; |
| |
| total_temps = emit->num_shader_temps; |
| |
| /* If there is indirect access to non-indexable temps in the shader, |
| * convert those temps to indexable temps. This works around a bug |
| * in the GLSL->TGSI translator exposed in piglit test |
| * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. |
| * Internal temps added by the driver remain as non-indexable temps. |
| */ |
| if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && |
| emit->num_temp_arrays == 0) { |
| create_temp_array(emit, 1, 0, total_temps, 0); |
| } |
| |
| /* Allocate extra temps for specially-implemented instructions, |
| * such as LIT. |
| */ |
| total_temps += MAX_INTERNAL_TEMPS; |
| |
| /* Allocate extra temps for clip distance or clip vertex. |
| */ |
| if (emit->clip_mode == CLIP_DISTANCE) { |
| /* We need to write the clip distance to a temporary register |
| * first. Then it will be copied to the shadow copy for |
| * the clip distance varying variable and stream output purpose. |
| * It will also be copied to the actual CLIPDIST register |
| * according to the enabled clip planes |
| */ |
| emit->clip_dist_tmp_index = total_temps++; |
| if (emit->info.num_written_clipdistance > 4) |
| total_temps++; /* second clip register */ |
| } |
| else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) { |
| /* If the current shader is in the last vertex processing stage, |
| * We need to convert the TGSI CLIPVERTEX output to one or more |
| * clip distances. Allocate a temp reg for the clipvertex here. |
| */ |
| assert(emit->info.writes_clipvertex > 0); |
| emit->clip_vertex_tmp_index = total_temps; |
| total_temps++; |
| } |
| |
| if (emit->info.uses_vertexid) { |
| assert(emit->unit == PIPE_SHADER_VERTEX); |
| emit->vs.vertex_id_tmp_index = total_temps++; |
| } |
| |
| if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { |
| if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || |
| emit->key.clip_plane_enable || |
| emit->vposition.so_index != INVALID_INDEX) { |
| emit->vposition.tmp_index = total_temps; |
| total_temps += 1; |
| } |
| |
| if (emit->vposition.need_prescale) { |
| emit->vposition.prescale_scale_index = total_temps++; |
| emit->vposition.prescale_trans_index = total_temps++; |
| } |
| |
| if (emit->unit == PIPE_SHADER_VERTEX) { |
| unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | |
| emit->key.vs.adjust_attrib_itof | |
| emit->key.vs.adjust_attrib_utof | |
| emit->key.vs.attrib_is_bgra | |
| emit->key.vs.attrib_puint_to_snorm | |
| emit->key.vs.attrib_puint_to_uscaled | |
| emit->key.vs.attrib_puint_to_sscaled); |
| while (attrib_mask) { |
| unsigned index = u_bit_scan(&attrib_mask); |
| emit->vs.adjusted_input[index] = total_temps++; |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_GEOMETRY) { |
| if (emit->key.gs.writes_viewport_index) |
| emit->gs.viewport_index_tmp_index = total_temps++; |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_FRAGMENT) { |
| if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || |
| emit->key.fs.write_color0_to_n_cbufs > 1) { |
| /* Allocate a temp to hold the output color */ |
| emit->fs.color_tmp_index = total_temps; |
| total_temps += 1; |
| } |
| |
| if (emit->fs.face_input_index != INVALID_INDEX) { |
| /* Allocate a temp for the +/-1 face register */ |
| emit->fs.face_tmp_index = total_temps; |
| total_temps += 1; |
| } |
| |
| if (emit->fs.fragcoord_input_index != INVALID_INDEX) { |
| /* Allocate a temp for modified fragment position register */ |
| emit->fs.fragcoord_tmp_index = total_temps; |
| total_temps += 1; |
| } |
| |
| if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { |
| /* Allocate a temp for the sample position */ |
| emit->fs.sample_pos_tmp_index = total_temps++; |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_EVAL) { |
| if (emit->vposition.need_prescale) { |
| emit->vposition.tmp_index = total_temps++; |
| emit->vposition.prescale_scale_index = total_temps++; |
| emit->vposition.prescale_trans_index = total_temps++; |
| } |
| |
| if (emit->tes.inner.tgsi_index) { |
| emit->tes.inner.temp_index = total_temps; |
| total_temps += 1; |
| } |
| |
| if (emit->tes.outer.tgsi_index) { |
| emit->tes.outer.temp_index = total_temps; |
| total_temps += 1; |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| if (emit->tcs.inner.tgsi_index != INVALID_INDEX) { |
| if (!emit->tcs.control_point_phase) { |
| emit->tcs.inner.temp_index = total_temps; |
| total_temps += 1; |
| } |
| } |
| if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { |
| if (!emit->tcs.control_point_phase) { |
| emit->tcs.outer.temp_index = total_temps; |
| total_temps += 1; |
| } |
| } |
| |
| if (emit->tcs.control_point_phase && |
| emit->info.reads_pervertex_outputs) { |
| emit->tcs.control_point_tmp_index = total_temps; |
| total_temps += emit->tcs.control_point_out_count; |
| } |
| else if (!emit->tcs.control_point_phase && |
| emit->info.reads_perpatch_outputs) { |
| |
| /* If there is indirect access to the patch constant outputs |
| * in the control point phase, then an indexable temporary array |
| * will be created for these patch constant outputs. |
| * Note, indirect access can only be applicable to |
| * patch constant outputs in the control point phase. |
| */ |
| if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { |
| unsigned arrayID = |
| emit->num_temp_arrays ? emit->num_temp_arrays : 1; |
| create_temp_array(emit, arrayID, 0, |
| emit->tcs.patch_generic_out_count, total_temps); |
| } |
| emit->tcs.patch_generic_tmp_index = total_temps; |
| total_temps += emit->tcs.patch_generic_out_count; |
| } |
| |
| emit->tcs.invocation_id_tmp_index = total_temps++; |
| } |
| |
| for (i = 0; i < emit->num_address_regs; i++) { |
| emit->address_reg_index[i] = total_temps++; |
| } |
| |
| /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 |
| * temp indexes. Basically, we compact all the non-array temp register |
| * indexes into a consecutive series. |
| * |
| * Before, we may have some TGSI declarations like: |
| * DCL TEMP[0..1], LOCAL |
| * DCL TEMP[2..4], ARRAY(1), LOCAL |
| * DCL TEMP[5..7], ARRAY(2), LOCAL |
| * plus, some extra temps, like TEMP[8], TEMP[9] for misc things |
| * |
| * After, we'll have a map like this: |
| * temp_map[0] = { array 0, index 0 } |
| * temp_map[1] = { array 0, index 1 } |
| * temp_map[2] = { array 1, index 0 } |
| * temp_map[3] = { array 1, index 1 } |
| * temp_map[4] = { array 1, index 2 } |
| * temp_map[5] = { array 2, index 0 } |
| * temp_map[6] = { array 2, index 1 } |
| * temp_map[7] = { array 2, index 2 } |
| * temp_map[8] = { array 0, index 2 } |
| * temp_map[9] = { array 0, index 3 } |
| * |
| * We'll declare two arrays of 3 elements, plus a set of four non-indexed |
| * temps numbered 0..3 |
| * |
| * Any time we emit a temporary register index, we'll have to use the |
| * temp_map[] table to convert the TGSI index to the VGPU10 index. |
| * |
| * Finally, we recompute the total_temps value here. |
| */ |
| reg = 0; |
| for (i = 0; i < total_temps; i++) { |
| if (emit->temp_map[i].arrayId == 0) { |
| emit->temp_map[i].index = reg++; |
| } |
| } |
| |
| if (0) { |
| debug_printf("total_temps %u\n", total_temps); |
| for (i = 0; i < total_temps; i++) { |
| debug_printf("temp %u -> array %u index %u\n", |
| i, emit->temp_map[i].arrayId, emit->temp_map[i].index); |
| } |
| } |
| |
| total_temps = reg; |
| |
| /* Emit declaration of ordinary temp registers */ |
| if (total_temps > 0) { |
| VGPU10OpcodeToken0 opcode0; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dword(emit, total_temps); |
| end_emit_instruction(emit); |
| } |
| |
| /* Emit declarations for indexable temp arrays. Skip 0th entry since |
| * it's unused. |
| */ |
| for (i = 1; i < emit->num_temp_arrays; i++) { |
| unsigned num_temps = emit->temp_arrays[i].size; |
| |
| if (num_temps > 0) { |
| VGPU10OpcodeToken0 opcode0; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dword(emit, i); /* which array */ |
| emit_dword(emit, num_temps); |
| emit_dword(emit, 4); /* num components */ |
| end_emit_instruction(emit); |
| |
| total_temps += num_temps; |
| } |
| } |
| |
| /* Check that the grand total of all regular and indexed temps is |
| * under the limit. |
| */ |
| check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); |
| |
| return TRUE; |
| } |
| |
| |
| static boolean |
| emit_constant_declaration(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| unsigned total_consts, i; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; |
| opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; |
| /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ |
| |
| operand0.value = 0; |
| operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; |
| operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; |
| operand0.swizzleX = 0; |
| operand0.swizzleY = 1; |
| operand0.swizzleZ = 2; |
| operand0.swizzleW = 3; |
| |
| /** |
| * Emit declaration for constant buffer [0]. We also allocate |
| * room for the extra constants here. |
| */ |
| total_consts = emit->num_shader_consts[0]; |
| |
| /* Now, allocate constant slots for the "extra" constants. |
| * Note: it's critical that these extra constant locations |
| * exactly match what's emitted by the "extra" constants code |
| * in svga_state_constants.c |
| */ |
| |
| /* Vertex position scale/translation */ |
| if (emit->vposition.need_prescale) { |
| emit->vposition.prescale_cbuf_index = total_consts; |
| total_consts += (2 * emit->vposition.num_prescale); |
| } |
| |
| if (emit->unit == PIPE_SHADER_VERTEX) { |
| if (emit->key.vs.undo_viewport) { |
| emit->vs.viewport_index = total_consts++; |
| } |
| if (emit->key.vs.need_vertex_id_bias) { |
| emit->vs.vertex_id_bias_index = total_consts++; |
| } |
| } |
| |
| /* user-defined clip planes */ |
| if (emit->key.clip_plane_enable) { |
| unsigned n = util_bitcount(emit->key.clip_plane_enable); |
| assert(emit->unit != PIPE_SHADER_FRAGMENT && |
| emit->unit != PIPE_SHADER_COMPUTE); |
| for (i = 0; i < n; i++) { |
| emit->clip_plane_const[i] = total_consts++; |
| } |
| } |
| |
| for (i = 0; i < emit->num_samplers; i++) { |
| |
| if (emit->sampler_view[i]) { |
| |
| /* Texcoord scale factors for RECT textures */ |
| if (emit->key.tex[i].unnormalized) { |
| emit->texcoord_scale_index[i] = total_consts++; |
| } |
| |
| /* Texture buffer sizes */ |
| if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) { |
| emit->texture_buffer_size_index[i] = total_consts++; |
| } |
| } |
| } |
| |
| if (total_consts > 0) { |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, 0); /* which const buffer slot */ |
| emit_dword(emit, total_consts); |
| end_emit_instruction(emit); |
| } |
| |
| /* Declare remaining constant buffers (UBOs) */ |
| for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { |
| if (emit->num_shader_consts[i] > 0) { |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, i); /* which const buffer slot */ |
| emit_dword(emit, emit->num_shader_consts[i]); |
| end_emit_instruction(emit); |
| } |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit declarations for samplers. |
| */ |
| static boolean |
| emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| |
| for (i = 0; i < emit->num_samplers; i++) { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; |
| opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; |
| |
| operand0.value = 0; |
| operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, i); |
| end_emit_instruction(emit); |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x. |
| */ |
| static unsigned |
| tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, |
| unsigned num_samples, |
| boolean is_array) |
| { |
| if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) { |
| target = TGSI_TEXTURE_2D; |
| } |
| else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) { |
| target = TGSI_TEXTURE_2D_ARRAY; |
| } |
| |
| switch (target) { |
| case TGSI_TEXTURE_BUFFER: |
| return VGPU10_RESOURCE_DIMENSION_BUFFER; |
| case TGSI_TEXTURE_1D: |
| return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; |
| case TGSI_TEXTURE_2D: |
| case TGSI_TEXTURE_RECT: |
| return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; |
| case TGSI_TEXTURE_3D: |
| return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; |
| case TGSI_TEXTURE_CUBE: |
| case TGSI_TEXTURE_SHADOWCUBE: |
| return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; |
| case TGSI_TEXTURE_SHADOW1D: |
| return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; |
| case TGSI_TEXTURE_SHADOW2D: |
| case TGSI_TEXTURE_SHADOWRECT: |
| return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; |
| case TGSI_TEXTURE_1D_ARRAY: |
| case TGSI_TEXTURE_SHADOW1D_ARRAY: |
| return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY |
| : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; |
| case TGSI_TEXTURE_2D_ARRAY: |
| case TGSI_TEXTURE_SHADOW2D_ARRAY: |
| return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY |
| : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; |
| case TGSI_TEXTURE_2D_MSAA: |
| return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; |
| case TGSI_TEXTURE_2D_ARRAY_MSAA: |
| return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY |
| : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; |
| case TGSI_TEXTURE_CUBE_ARRAY: |
| case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
| return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY |
| : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; |
| default: |
| assert(!"Unexpected resource type"); |
| return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; |
| } |
| } |
| |
| |
| /** |
| * Given a tgsi_return_type, return true iff it is an integer type. |
| */ |
| static boolean |
| is_integer_type(enum tgsi_return_type type) |
| { |
| switch (type) { |
| case TGSI_RETURN_TYPE_SINT: |
| case TGSI_RETURN_TYPE_UINT: |
| return TRUE; |
| case TGSI_RETURN_TYPE_FLOAT: |
| case TGSI_RETURN_TYPE_UNORM: |
| case TGSI_RETURN_TYPE_SNORM: |
| return FALSE; |
| case TGSI_RETURN_TYPE_COUNT: |
| default: |
| assert(!"is_integer_type: Unknown tgsi_return_type"); |
| return FALSE; |
| } |
| } |
| |
| |
| /** |
| * Emit declarations for resources. |
| * XXX When we're sure that all TGSI shaders will be generated with |
| * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may |
| * rework this code. |
| */ |
| static boolean |
| emit_resource_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned i; |
| |
| /* Emit resource decl for each sampler */ |
| for (i = 0; i < emit->num_samplers; i++) { |
| VGPU10OpcodeToken0 opcode0; |
| VGPU10OperandToken0 operand0; |
| VGPU10ResourceReturnTypeToken return_type; |
| VGPU10_RESOURCE_RETURN_TYPE rt; |
| |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; |
| opcode0.resourceDimension = |
| tgsi_texture_to_resource_dimension(emit->sampler_target[i], |
| emit->key.tex[i].num_samples, |
| emit->key.tex[i].is_array); |
| opcode0.sampleCount = emit->key.tex[i].num_samples; |
| operand0.value = 0; |
| operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; |
| operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; |
| operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; |
| operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; |
| |
| #if 1 |
| /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ |
| STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); |
| STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); |
| STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); |
| STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); |
| STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); |
| assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); |
| rt = emit->sampler_return_type[i] + 1; |
| #else |
| switch (emit->sampler_return_type[i]) { |
| case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; |
| case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; |
| case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; |
| case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; |
| case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; |
| case TGSI_RETURN_TYPE_COUNT: |
| default: |
| rt = VGPU10_RETURN_TYPE_FLOAT; |
| assert(!"emit_resource_declarations: Unknown tgsi_return_type"); |
| } |
| #endif |
| |
| return_type.value = 0; |
| return_type.component0 = rt; |
| return_type.component1 = rt; |
| return_type.component2 = rt; |
| return_type.component3 = rt; |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dword(emit, operand0.value); |
| emit_dword(emit, i); |
| emit_dword(emit, return_type.value); |
| end_emit_instruction(emit); |
| } |
| |
| return TRUE; |
| } |
| |
| /** |
| * Emit instruction with n=1, 2 or 3 source registers. |
| */ |
| static void |
| emit_instruction_opn(struct svga_shader_emitter_v10 *emit, |
| unsigned opcode, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src1, |
| const struct tgsi_full_src_register *src2, |
| const struct tgsi_full_src_register *src3, |
| boolean saturate, bool precise) |
| { |
| begin_emit_instruction(emit); |
| emit_opcode_precise(emit, opcode, saturate, precise); |
| emit_dst_register(emit, dst); |
| emit_src_register(emit, src1); |
| if (src2) { |
| emit_src_register(emit, src2); |
| } |
| if (src3) { |
| emit_src_register(emit, src3); |
| } |
| end_emit_instruction(emit); |
| } |
| |
| static void |
| emit_instruction_op1(struct svga_shader_emitter_v10 *emit, |
| unsigned opcode, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src) |
| { |
| emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE); |
| } |
| |
| static void |
| emit_instruction_op2(struct svga_shader_emitter_v10 *emit, |
| VGPU10_OPCODE_TYPE opcode, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src1, |
| const struct tgsi_full_src_register *src2) |
| { |
| emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE); |
| } |
| |
| static void |
| emit_instruction_op3(struct svga_shader_emitter_v10 *emit, |
| VGPU10_OPCODE_TYPE opcode, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src1, |
| const struct tgsi_full_src_register *src2, |
| const struct tgsi_full_src_register *src3) |
| { |
| emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE); |
| } |
| |
| static void |
| emit_instruction_op0(struct svga_shader_emitter_v10 *emit, |
| VGPU10_OPCODE_TYPE opcode) |
| { |
| begin_emit_instruction(emit); |
| emit_opcode(emit, opcode, FALSE); |
| end_emit_instruction(emit); |
| } |
| |
| /** |
| * Tessellation inner/outer levels needs to be store into its |
| * appropriate registers depending on prim_mode. |
| */ |
| static void |
| store_tesslevels(struct svga_shader_emitter_v10 *emit) |
| { |
| int i; |
| |
| /* tessellation levels are required input/out in hull shader. |
| * emitting the inner/outer tessellation levels, either from |
| * values provided in tcs or fallback default values which is 1.0 |
| */ |
| if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { |
| struct tgsi_full_src_register temp_src; |
| |
| if (emit->tcs.inner.tgsi_index != INVALID_INDEX) |
| temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); |
| else |
| temp_src = make_immediate_reg_float(emit, 1.0f); |
| |
| for (i = 0; i < 2; i++) { |
| struct tgsi_full_src_register src = |
| scalar_src(&temp_src, TGSI_SWIZZLE_X + i); |
| struct tgsi_full_dst_register dst = |
| make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| } |
| |
| if (emit->tcs.outer.tgsi_index != INVALID_INDEX) |
| temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); |
| else |
| temp_src = make_immediate_reg_float(emit, 1.0f); |
| |
| for (i = 0; i < 4; i++) { |
| struct tgsi_full_src_register src = |
| scalar_src(&temp_src, TGSI_SWIZZLE_X + i); |
| struct tgsi_full_dst_register dst = |
| make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| } |
| } |
| else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { |
| struct tgsi_full_src_register temp_src; |
| |
| if (emit->tcs.inner.tgsi_index != INVALID_INDEX) |
| temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); |
| else |
| temp_src = make_immediate_reg_float(emit, 1.0f); |
| |
| struct tgsi_full_src_register src = |
| scalar_src(&temp_src, TGSI_SWIZZLE_X); |
| struct tgsi_full_dst_register dst = |
| make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| |
| if (emit->tcs.outer.tgsi_index != INVALID_INDEX) |
| temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); |
| else |
| temp_src = make_immediate_reg_float(emit, 1.0f); |
| |
| for (i = 0; i < 3; i++) { |
| struct tgsi_full_src_register src = |
| scalar_src(&temp_src, TGSI_SWIZZLE_X + i); |
| struct tgsi_full_dst_register dst = |
| make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| } |
| } |
| else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { |
| if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { |
| struct tgsi_full_src_register temp_src = |
| make_src_temp_reg(emit->tcs.outer.temp_index); |
| for (i = 0; i < 2; i++) { |
| struct tgsi_full_src_register src = |
| scalar_src(&temp_src, TGSI_SWIZZLE_X + i); |
| struct tgsi_full_dst_register dst = |
| make_dst_reg(TGSI_FILE_OUTPUT, |
| emit->tcs.outer.out_index + i); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| } |
| } |
| } |
| else { |
| debug_printf("Unsupported primitive type"); |
| } |
| } |
| |
| |
| /** |
| * Emit the actual clip distance instructions to be used for clipping |
| * by copying the clip distance from the temporary registers to the |
| * CLIPDIST registers written with the enabled planes mask. |
| * Also copy the clip distance from the temporary to the clip distance |
| * shadow copy register which will be referenced by the input shader |
| */ |
| static void |
| emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| struct tgsi_full_src_register tmp_clip_dist_src; |
| struct tgsi_full_dst_register clip_dist_dst; |
| |
| unsigned i; |
| unsigned clip_plane_enable = emit->key.clip_plane_enable; |
| unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; |
| int num_written_clipdist = emit->info.num_written_clipdistance; |
| |
| assert(emit->clip_dist_out_index != INVALID_INDEX); |
| assert(emit->clip_dist_tmp_index != INVALID_INDEX); |
| |
| /** |
| * Temporary reset the temporary clip dist register index so |
| * that the copy to the real clip dist register will not |
| * attempt to copy to the temporary register again |
| */ |
| emit->clip_dist_tmp_index = INVALID_INDEX; |
| |
| for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { |
| |
| tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); |
| |
| /** |
| * copy to the shadow copy for use by varying variable and |
| * stream output. All clip distances |
| * will be written regardless of the enabled clipping planes. |
| */ |
| clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, |
| emit->clip_dist_so_index + i); |
| |
| /* MOV clip_dist_so, tmp_clip_dist */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, |
| &tmp_clip_dist_src); |
| |
| /** |
| * copy those clip distances to enabled clipping planes |
| * to CLIPDIST registers for clipping |
| */ |
| if (clip_plane_enable & 0xf) { |
| clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, |
| emit->clip_dist_out_index + i); |
| clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); |
| |
| /* MOV CLIPDIST, tmp_clip_dist */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, |
| &tmp_clip_dist_src); |
| } |
| /* four clip planes per clip register */ |
| clip_plane_enable >>= 4; |
| } |
| /** |
| * set the temporary clip dist register index back to the |
| * temporary index for the next vertex |
| */ |
| emit->clip_dist_tmp_index = clip_dist_tmp_index; |
| } |
| |
| /* Declare clip distance output registers for user-defined clip planes |
| * or the TGSI_CLIPVERTEX output. |
| */ |
| static void |
| emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) |
| { |
| unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); |
| unsigned index = emit->num_outputs; |
| unsigned plane_mask; |
| |
| assert(emit->unit != PIPE_SHADER_FRAGMENT); |
| assert(num_clip_planes <= 8); |
| |
| if (emit->clip_mode != CLIP_LEGACY && |
| emit->clip_mode != CLIP_VERTEX) { |
| return; |
| } |
| |
| if (num_clip_planes == 0) |
| return; |
| |
| /* Convert clip vertex to clip distances only in the last vertex stage */ |
| if (!emit->key.last_vertex_stage) |
| return; |
| |
| /* Declare one or two clip output registers. The number of components |
| * in the mask reflects the number of clip planes. For example, if 5 |
| * clip planes are needed, we'll declare outputs similar to: |
| * dcl_output_siv o2.xyzw, clip_distance |
| * dcl_output_siv o3.x, clip_distance |
| */ |
| emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ |
| |
| plane_mask = (1 << num_clip_planes) - 1; |
| if (plane_mask & 0xf) { |
| unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; |
| emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, |
| VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); |
| emit->num_outputs++; |
| } |
| if (plane_mask & 0xf0) { |
| unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; |
| emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, |
| VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, |
| SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); |
| emit->num_outputs++; |
| } |
| } |
| |
| |
| /** |
| * Emit the instructions for writing to the clip distance registers |
| * to handle legacy/automatic clip planes. |
| * For each clip plane, the distance is the dot product of the vertex |
| * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. |
| * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE |
| * output registers already declared. |
| */ |
| static void |
| emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, |
| unsigned vpos_tmp_index) |
| { |
| unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); |
| |
| assert(emit->clip_mode == CLIP_LEGACY); |
| assert(num_clip_planes <= 8); |
| |
| assert(emit->unit == PIPE_SHADER_VERTEX || |
| emit->unit == PIPE_SHADER_GEOMETRY || |
| emit->unit == PIPE_SHADER_TESS_EVAL); |
| |
| for (i = 0; i < num_clip_planes; i++) { |
| struct tgsi_full_dst_register dst; |
| struct tgsi_full_src_register plane_src, vpos_src; |
| unsigned reg_index = emit->clip_dist_out_index + i / 4; |
| unsigned comp = i % 4; |
| unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; |
| |
| /* create dst, src regs */ |
| dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); |
| dst = writemask_dst(&dst, writemask); |
| |
| plane_src = make_src_const_reg(emit->clip_plane_const[i]); |
| vpos_src = make_src_temp_reg(vpos_tmp_index); |
| |
| /* DP4 clip_dist, plane, vpos */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, |
| &plane_src, &vpos_src); |
| } |
| } |
| |
| |
| /** |
| * Emit the instructions for computing the clip distance results from |
| * the clip vertex temporary. |
| * For each clip plane, the distance is the dot product of the clip vertex |
| * position (found in a temp reg) and the clip plane coefficients. |
| */ |
| static void |
| emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); |
| unsigned i; |
| struct tgsi_full_dst_register dst; |
| struct tgsi_full_src_register clipvert_src; |
| const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; |
| |
| assert(emit->unit == PIPE_SHADER_VERTEX || |
| emit->unit == PIPE_SHADER_GEOMETRY || |
| emit->unit == PIPE_SHADER_TESS_EVAL); |
| |
| assert(emit->clip_mode == CLIP_VERTEX); |
| |
| clipvert_src = make_src_temp_reg(clip_vertex_tmp); |
| |
| for (i = 0; i < num_clip; i++) { |
| struct tgsi_full_src_register plane_src; |
| unsigned reg_index = emit->clip_dist_out_index + i / 4; |
| unsigned comp = i % 4; |
| unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; |
| |
| /* create dst, src regs */ |
| dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); |
| dst = writemask_dst(&dst, writemask); |
| |
| plane_src = make_src_const_reg(emit->clip_plane_const[i]); |
| |
| /* DP4 clip_dist, plane, vpos */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, |
| &plane_src, &clipvert_src); |
| } |
| |
| /* copy temporary clip vertex register to the clip vertex register */ |
| |
| assert(emit->clip_vertex_out_index != INVALID_INDEX); |
| |
| /** |
| * temporary reset the temporary clip vertex register index so |
| * that copy to the clip vertex register will not attempt |
| * to copy to the temporary register again |
| */ |
| emit->clip_vertex_tmp_index = INVALID_INDEX; |
| |
| /* MOV clip_vertex, clip_vertex_tmp */ |
| dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, |
| &dst, &clipvert_src); |
| |
| /** |
| * set the temporary clip vertex register index back to the |
| * temporary index for the next vertex |
| */ |
| emit->clip_vertex_tmp_index = clip_vertex_tmp; |
| } |
| |
| /** |
| * Emit code to convert RGBA to BGRA |
| */ |
| static void |
| emit_swap_r_b(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src) |
| { |
| struct tgsi_full_src_register bgra_src = |
| swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); |
| |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); |
| emit_dst_register(emit, dst); |
| emit_src_register(emit, &bgra_src); |
| end_emit_instruction(emit); |
| } |
| |
| |
| /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ |
| static void |
| emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src) |
| { |
| struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); |
| struct tgsi_full_src_register two = |
| make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); |
| struct tgsi_full_src_register neg_two = |
| make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); |
| |
| unsigned val_tmp = get_temp_index(emit); |
| struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); |
| struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); |
| |
| unsigned bias_tmp = get_temp_index(emit); |
| struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); |
| struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); |
| |
| /* val = src * 2.0 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two); |
| |
| /* bias = src > 0.5 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half); |
| |
| /* bias = bias & -2.0 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, |
| &bias_src, &neg_two); |
| |
| /* dst = val + bias */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, |
| &val_src, &bias_src); |
| |
| free_temp_indexes(emit); |
| } |
| |
| |
| /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ |
| static void |
| emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src) |
| { |
| struct tgsi_full_src_register scale = |
| make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); |
| |
| /* dst = src * scale */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale); |
| } |
| |
| |
| /** Convert from R32_UINT to 10_10_10_2_sscaled */ |
| static void |
| emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src) |
| { |
| struct tgsi_full_src_register lshift = |
| make_immediate_reg_int4(emit, 22, 12, 2, 0); |
| struct tgsi_full_src_register rshift = |
| make_immediate_reg_int4(emit, 22, 22, 22, 30); |
| |
| struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); |
| |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| |
| /* |
| * r = (pixel << 22) >> 22; # signed int in [511, -512] |
| * g = (pixel << 12) >> 22; # signed int in [511, -512] |
| * b = (pixel << 2) >> 22; # signed int in [511, -512] |
| * a = (pixel << 0) >> 30; # signed int in [1, -2] |
| * dst = i_to_f(r,g,b,a); # convert to float |
| */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, |
| &src_xxxx, &lshift); |
| emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, |
| &tmp_src, &rshift); |
| emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src); |
| |
| free_temp_indexes(emit); |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. |
| */ |
| static boolean |
| emit_arl_uarl(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| unsigned index = inst->Dst[0].Register.Index; |
| struct tgsi_full_dst_register dst; |
| VGPU10_OPCODE_TYPE opcode; |
| |
| assert(index < MAX_VGPU10_ADDR_REGS); |
| dst = make_dst_temp_reg(emit->address_reg_index[index]); |
| dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask); |
| |
| /* ARL dst, s0 |
| * Translates into: |
| * FTOI address_tmp, s0 |
| * |
| * UARL dst, s0 |
| * Translates into: |
| * MOV address_tmp, s0 |
| */ |
| if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) |
| opcode = VGPU10_OPCODE_FTOI; |
| else |
| opcode = VGPU10_OPCODE_MOV; |
| |
| emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_CAL instruction. |
| */ |
| static boolean |
| emit_cal(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| unsigned label = inst->Label.Label; |
| VGPU10OperandToken0 operand; |
| operand.value = 0; |
| operand.operandType = VGPU10_OPERAND_TYPE_LABEL; |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, operand.value); |
| emit_dword(emit, label); |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_IABS instruction. |
| */ |
| static boolean |
| emit_iabs(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst.x = (src0.x < 0) ? -src0.x : src0.x |
| * dst.y = (src0.y < 0) ? -src0.y : src0.y |
| * dst.z = (src0.z < 0) ? -src0.z : src0.z |
| * dst.w = (src0.w < 0) ? -src0.w : src0.w |
| * |
| * Translates into |
| * IMAX dst, src, neg(src) |
| */ |
| struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); |
| emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], |
| &inst->Src[0], &neg_src); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_CMP instruction. |
| */ |
| static boolean |
| emit_cmp(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst.x = (src0.x < 0) ? src1.x : src2.x |
| * dst.y = (src0.y < 0) ? src1.y : src2.y |
| * dst.z = (src0.z < 0) ? src1.z : src2.z |
| * dst.w = (src0.w < 0) ? src1.w : src2.w |
| * |
| * Translates into |
| * LT tmp, src0, 0.0 |
| * MOVC dst, tmp, src1, src2 |
| */ |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst, |
| &inst->Src[0], &zero, NULL, FALSE, |
| inst->Instruction.Precise); |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], |
| &tmp_src, &inst->Src[1], &inst->Src[2], |
| inst->Instruction.Saturate, FALSE); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_DST instruction. |
| */ |
| static boolean |
| emit_dst(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* |
| * dst.x = 1 |
| * dst.y = src0.y * src1.y |
| * dst.z = src0.z |
| * dst.w = src1.w |
| */ |
| |
| struct tgsi_full_src_register s0_yyyy = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); |
| struct tgsi_full_src_register s0_zzzz = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); |
| struct tgsi_full_src_register s1_yyyy = |
| scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); |
| struct tgsi_full_src_register s1_wwww = |
| scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); |
| |
| /* |
| * If dst and either src0 and src1 are the same we need |
| * to create a temporary for it and insert a extra move. |
| */ |
| unsigned tmp_move = get_temp_index(emit); |
| struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); |
| struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); |
| |
| /* MOV dst.x, 1.0 */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
| struct tgsi_full_dst_register dst_x = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_X); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); |
| } |
| |
| /* MUL dst.y, s0.y, s1.y */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
| struct tgsi_full_dst_register dst_y = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_Y); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, |
| &s1_yyyy, NULL, inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| } |
| |
| /* MOV dst.z, s0.z */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
| struct tgsi_full_dst_register dst_z = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_Z); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOV, |
| &dst_z, &s0_zzzz, NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| } |
| |
| /* MOV dst.w, s1.w */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
| struct tgsi_full_dst_register dst_w = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_W); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOV, |
| &dst_w, &s1_wwww, NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| } |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * A helper function to return the stream index as specified in |
| * the immediate register |
| */ |
| static inline unsigned |
| find_stream_index(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_src_register *src) |
| { |
| return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_ENDPRIM (GS only) |
| */ |
| static boolean |
| emit_endprim(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| assert(emit->unit == PIPE_SHADER_GEOMETRY); |
| |
| begin_emit_instruction(emit); |
| if (emit->version >= 50) { |
| unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); |
| |
| if (emit->info.num_stream_output_components[streamIndex] == 0) { |
| /** |
| * If there is no output for this stream, discard this instruction. |
| */ |
| emit->discard_instruction = TRUE; |
| } |
| else { |
| emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE); |
| assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); |
| emit_stream_register(emit, streamIndex); |
| } |
| } |
| else { |
| emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); |
| } |
| end_emit_instruction(emit); |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. |
| */ |
| static boolean |
| emit_ex2(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* Note that TGSI_OPCODE_EX2 computes only one value from src.x |
| * while VGPU10 computes four values. |
| * |
| * dst = EX2(src): |
| * dst.xyzw = 2.0 ^ src.x |
| */ |
| |
| struct tgsi_full_src_register src_xxxx = |
| swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, |
| TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); |
| |
| /* EXP tmp, s0.xxxx */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, |
| NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_EXP instruction. |
| */ |
| static boolean |
| emit_exp(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* |
| * dst.x = 2 ^ floor(s0.x) |
| * dst.y = s0.x - floor(s0.x) |
| * dst.z = 2 ^ s0.x |
| * dst.w = 1.0 |
| */ |
| |
| struct tgsi_full_src_register src_xxxx = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| /* |
| * If dst and src are the same we need to create |
| * a temporary for it and insert a extra move. |
| */ |
| unsigned tmp_move = get_temp_index(emit); |
| struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); |
| struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); |
| |
| /* only use X component of temp reg */ |
| tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); |
| tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| |
| /* ROUND_NI tmp.x, s0.x */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, |
| &src_xxxx); /* round to -infinity */ |
| |
| /* EXP dst.x, tmp.x */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
| struct tgsi_full_dst_register dst_x = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_X); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, |
| NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| } |
| |
| /* ADD dst.y, s0.x, -tmp */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
| struct tgsi_full_dst_register dst_y = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_Y); |
| struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, |
| &neg_tmp_src, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| } |
| |
| /* EXP dst.z, s0.x */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
| struct tgsi_full_dst_register dst_z = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_Z); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, |
| NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| } |
| |
| /* MOV dst.w, 1.0 */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
| struct tgsi_full_dst_register dst_w = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_W); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); |
| } |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_IF instruction. |
| */ |
| static boolean |
| emit_if(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_src_register *src) |
| { |
| VGPU10OpcodeToken0 opcode0; |
| |
| /* The src register should be a scalar */ |
| assert(src->Register.SwizzleX == src->Register.SwizzleY && |
| src->Register.SwizzleX == src->Register.SwizzleZ && |
| src->Register.SwizzleX == src->Register.SwizzleW); |
| |
| /* The only special thing here is that we need to set the |
| * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if |
| * src.x is non-zero. |
| */ |
| opcode0.value = 0; |
| opcode0.opcodeType = VGPU10_OPCODE_IF; |
| opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_src_register(emit, src); |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of |
| * the register components are negative). |
| */ |
| static boolean |
| emit_kill_if(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| |
| struct tgsi_full_dst_register tmp_dst_x = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); |
| struct tgsi_full_src_register tmp_src_xxxx = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| |
| /* tmp = src[0] < 0.0 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero); |
| |
| if (!same_swizzle_terms(&inst->Src[0])) { |
| /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to |
| * logically OR the swizzle terms. Most uses of KILL_IF only |
| * test one channel so it's good to avoid these extra steps. |
| */ |
| struct tgsi_full_src_register tmp_src_yyyy = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_Y); |
| struct tgsi_full_src_register tmp_src_zzzz = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_Z); |
| struct tgsi_full_src_register tmp_src_wwww = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_W); |
| |
| emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, |
| &tmp_src_yyyy); |
| emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, |
| &tmp_src_zzzz); |
| emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, |
| &tmp_src_wwww); |
| } |
| |
| begin_emit_instruction(emit); |
| emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ |
| emit_src_register(emit, &tmp_src_xxxx); |
| end_emit_instruction(emit); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). |
| */ |
| static boolean |
| emit_kill(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| |
| /* DISCARD if 0.0 is zero */ |
| begin_emit_instruction(emit); |
| emit_discard_opcode(emit, FALSE); |
| emit_src_register(emit, &zero); |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_LG2 instruction. |
| */ |
| static boolean |
| emit_lg2(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* Note that TGSI_OPCODE_LG2 computes only one value from src.x |
| * while VGPU10 computes four values. |
| * |
| * dst = LG2(src): |
| * dst.xyzw = log2(src.x) |
| */ |
| |
| struct tgsi_full_src_register src_xxxx = |
| swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, |
| TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); |
| |
| /* LOG tmp, s0.xxxx */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_LOG, |
| &inst->Dst[0], &src_xxxx, NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_LIT instruction. |
| */ |
| static boolean |
| emit_lit(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| /* |
| * If dst and src are the same we need to create |
| * a temporary for it and insert a extra move. |
| */ |
| unsigned tmp_move = get_temp_index(emit); |
| struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); |
| struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); |
| |
| /* |
| * dst.x = 1 |
| * dst.y = max(src.x, 0) |
| * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 |
| * dst.w = 1 |
| */ |
| |
| /* MOV dst.x, 1.0 */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
| struct tgsi_full_dst_register dst_x = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); |
| } |
| |
| /* MOV dst.w, 1.0 */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
| struct tgsi_full_dst_register dst_w = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_W); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); |
| } |
| |
| /* MAX dst.y, src.x, 0.0 */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
| struct tgsi_full_dst_register dst_y = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_Y); |
| struct tgsi_full_src_register zero = |
| make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register src_xxxx = |
| swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, |
| TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, |
| &zero, NULL, inst->Instruction.Saturate, FALSE); |
| } |
| |
| /* |
| * tmp1 = clamp(src.w, -128, 128); |
| * MAX tmp1, src.w, -128 |
| * MIN tmp1, tmp1, 128 |
| * |
| * tmp2 = max(tmp2, 0); |
| * MAX tmp2, src.y, 0 |
| * |
| * tmp1 = pow(tmp2, tmp1); |
| * LOG tmp2, tmp2 |
| * MUL tmp1, tmp2, tmp1 |
| * EXP tmp1, tmp1 |
| * |
| * tmp1 = (src.w == 0) ? 1 : tmp1; |
| * EQ tmp2, 0, src.w |
| * MOVC tmp1, tmp2, 1.0, tmp1 |
| * |
| * dst.z = (0 < src.x) ? tmp1 : 0; |
| * LT tmp2, 0, src.x |
| * MOVC dst.z, tmp2, tmp1, 0.0 |
| */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
| struct tgsi_full_dst_register dst_z = |
| writemask_dst(&move_dst, TGSI_WRITEMASK_Z); |
| |
| unsigned tmp1 = get_temp_index(emit); |
| struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); |
| struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); |
| unsigned tmp2 = get_temp_index(emit); |
| struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); |
| struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); |
| |
| struct tgsi_full_src_register src_xxxx = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); |
| struct tgsi_full_src_register src_yyyy = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); |
| struct tgsi_full_src_register src_wwww = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); |
| |
| struct tgsi_full_src_register zero = |
| make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register lowerbound = |
| make_immediate_reg_float(emit, -128.0f); |
| struct tgsi_full_src_register upperbound = |
| make_immediate_reg_float(emit, 128.0f); |
| |
| emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, |
| &lowerbound); |
| emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, |
| &upperbound); |
| emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, |
| &zero); |
| |
| /* POW tmp1, tmp2, tmp1 */ |
| /* LOG tmp2, tmp2 */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src); |
| |
| /* MUL tmp1, tmp2, tmp1 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, |
| &tmp1_src); |
| |
| /* EXP tmp1, tmp1 */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src); |
| |
| /* EQ tmp2, 0, src.w */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww); |
| /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, |
| &tmp2_src, &one, &tmp1_src); |
| |
| /* LT tmp2, 0, src.x */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx); |
| /* MOVC dst.z, tmp2, tmp1, 0.0 */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, |
| &tmp2_src, &tmp1_src, &zero); |
| } |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit Level Of Detail Query (LODQ) instruction. |
| */ |
| static boolean |
| emit_lodq(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[1].Register.Index; |
| |
| assert(emit->version >= 41); |
| |
| /* LOD dst, coord, resource, sampler */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE); |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_src_register(emit, &inst->Src[0]); /* coord */ |
| emit_resource_register(emit, unit); |
| emit_sampler_register(emit, unit); |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_LOG instruction. |
| */ |
| static boolean |
| emit_log(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* |
| * dst.x = floor(lg2(abs(s0.x))) |
| * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) |
| * dst.z = lg2(abs(s0.x)) |
| * dst.w = 1.0 |
| */ |
| |
| struct tgsi_full_src_register src_xxxx = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); |
| |
| /* only use X component of temp reg */ |
| tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); |
| tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| |
| /* LOG tmp.x, abs(s0.x) */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { |
| emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx); |
| } |
| |
| /* MOV dst.z, tmp.x */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
| struct tgsi_full_dst_register dst_z = |
| writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOV, |
| &dst_z, &tmp_src, NULL, NULL, |
| inst->Instruction.Saturate, FALSE); |
| } |
| |
| /* FLR tmp.x, tmp.x */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { |
| emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src); |
| } |
| |
| /* MOV dst.x, tmp.x */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
| struct tgsi_full_dst_register dst_x = |
| writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOV, |
| &dst_x, &tmp_src, NULL, NULL, |
| inst->Instruction.Saturate, FALSE); |
| } |
| |
| /* EXP tmp.x, tmp.x */ |
| /* DIV dst.y, abs(s0.x), tmp.x */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
| struct tgsi_full_dst_register dst_y = |
| writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src); |
| emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, |
| &tmp_src, NULL, inst->Instruction.Saturate, FALSE); |
| } |
| |
| /* MOV dst.w, 1.0 */ |
| if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
| struct tgsi_full_dst_register dst_w = |
| writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); |
| struct tgsi_full_src_register one = |
| make_immediate_reg_float(emit, 1.0f); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); |
| } |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_LRP instruction. |
| */ |
| static boolean |
| emit_lrp(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = LRP(s0, s1, s2): |
| * dst = s0 * (s1 - s2) + s2 |
| * Translates into: |
| * SUB tmp, s1, s2; tmp = s1 - s2 |
| * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); |
| |
| /* ADD tmp, s1, -s2 */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp, |
| &inst->Src[1], &neg_src2, NULL, FALSE, |
| inst->Instruction.Precise); |
| |
| /* MAD dst, s1, tmp, s3 */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], |
| &inst->Src[0], &src_tmp, &inst->Src[2], |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_POW instruction. |
| */ |
| static boolean |
| emit_pow(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* Note that TGSI_OPCODE_POW computes only one value from src0.x and |
| * src1.x while VGPU10 computes four values. |
| * |
| * dst = POW(src0, src1): |
| * dst.xyzw = src0.x ^ src1.x |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register src0_xxxx = |
| swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, |
| TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); |
| struct tgsi_full_src_register src1_xxxx = |
| swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, |
| TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); |
| |
| /* LOG tmp, s0.xxxx */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_LOG, |
| &tmp_dst, &src0_xxxx, NULL, NULL, |
| FALSE, inst->Instruction.Precise); |
| |
| /* MUL tmp, tmp, s1.xxxx */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_MUL, |
| &tmp_dst, &tmp_src, &src1_xxxx, NULL, |
| FALSE, inst->Instruction.Precise); |
| |
| /* EXP tmp, s0.xxxx */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_EXP, |
| &inst->Dst[0], &tmp_src, NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| |
| /* free tmp */ |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. |
| */ |
| static boolean |
| emit_rcp(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| if (emit->version >= 50) { |
| /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise |
| * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need |
| * to manipulate the src register's swizzle. |
| */ |
| struct tgsi_full_src_register src = inst->Src[0]; |
| src.Register.SwizzleY = |
| src.Register.SwizzleZ = |
| src.Register.SwizzleW = src.Register.SwizzleX; |
| |
| begin_emit_instruction(emit); |
| emit_opcode_precise(emit, VGPU10_OPCODE_RCP, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_src_register(emit, &src); |
| end_emit_instruction(emit); |
| } |
| else { |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| struct tgsi_full_dst_register tmp_dst_x = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); |
| struct tgsi_full_src_register tmp_src_xxxx = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| |
| /* DIV tmp.x, 1.0, s0 */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_DIV, |
| &tmp_dst_x, &one, &inst->Src[0], NULL, |
| FALSE, inst->Instruction.Precise); |
| |
| /* MOV dst, tmp.xxxx */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOV, |
| &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| |
| free_temp_indexes(emit); |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_RSQ instruction. |
| */ |
| static boolean |
| emit_rsq(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = RSQ(src): |
| * dst.xyzw = 1 / sqrt(src.x) |
| * Translates into: |
| * RSQ tmp, src.x |
| * MOV dst, tmp.xxxx |
| */ |
| |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| struct tgsi_full_dst_register tmp_dst_x = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); |
| struct tgsi_full_src_register tmp_src_xxxx = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| |
| /* RSQ tmp, src.x */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_RSQ, |
| &tmp_dst_x, &inst->Src[0], NULL, NULL, |
| FALSE, inst->Instruction.Precise); |
| |
| /* MOV dst, tmp.xxxx */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOV, |
| &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| |
| /* free tmp */ |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. |
| */ |
| static boolean |
| emit_seq(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = SEQ(s0, s1): |
| * dst = s0 == s1 ? 1.0 : 0.0 (per component) |
| * Translates into: |
| * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) |
| * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| /* EQ tmp, s0, s1 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], |
| &inst->Src[1]); |
| |
| /* MOVC dst, tmp, one, zero */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, |
| &one, &zero); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. |
| */ |
| static boolean |
| emit_sge(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = SGE(s0, s1): |
| * dst = s0 >= s1 ? 1.0 : 0.0 (per component) |
| * Translates into: |
| * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) |
| * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| /* GE tmp, s0, s1 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], |
| &inst->Src[1]); |
| |
| /* MOVC dst, tmp, one, zero */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, |
| &one, &zero); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. |
| */ |
| static boolean |
| emit_sgt(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = SGT(s0, s1): |
| * dst = s0 > s1 ? 1.0 : 0.0 (per component) |
| * Translates into: |
| * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) |
| * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| /* LT tmp, s1, s0 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], |
| &inst->Src[0]); |
| |
| /* MOVC dst, tmp, one, zero */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, |
| &one, &zero); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. |
| */ |
| static boolean |
| emit_sincos(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| struct tgsi_full_src_register tmp_src_xxxx = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| struct tgsi_full_dst_register tmp_dst_x = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); |
| |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); |
| |
| if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) |
| { |
| emit_dst_register(emit, &tmp_dst_x); /* first destination register */ |
| emit_null_dst_register(emit); /* second destination register */ |
| } |
| else { |
| emit_null_dst_register(emit); |
| emit_dst_register(emit, &tmp_dst_x); |
| } |
| |
| emit_src_register(emit, &inst->Src[0]); |
| end_emit_instruction(emit); |
| |
| emit_instruction_opn(emit, VGPU10_OPCODE_MOV, |
| &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. |
| */ |
| static boolean |
| emit_sle(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = SLE(s0, s1): |
| * dst = s0 <= s1 ? 1.0 : 0.0 (per component) |
| * Translates into: |
| * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) |
| * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| /* GE tmp, s1, s0 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], |
| &inst->Src[0]); |
| |
| /* MOVC dst, tmp, one, zero */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, |
| &one, &zero); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. |
| */ |
| static boolean |
| emit_slt(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = SLT(s0, s1): |
| * dst = s0 < s1 ? 1.0 : 0.0 (per component) |
| * Translates into: |
| * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) |
| * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| /* LT tmp, s0, s1 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], |
| &inst->Src[1]); |
| |
| /* MOVC dst, tmp, one, zero */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, |
| &one, &zero); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. |
| */ |
| static boolean |
| emit_sne(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst = SNE(s0, s1): |
| * dst = s0 != s1 ? 1.0 : 0.0 (per component) |
| * Translates into: |
| * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) |
| * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| |
| /* NE tmp, s0, s1 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], |
| &inst->Src[1]); |
| |
| /* MOVC dst, tmp, one, zero */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, |
| &one, &zero); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. |
| */ |
| static boolean |
| emit_ssg(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 |
| * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 |
| * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 |
| * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 |
| * Translates into: |
| * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) |
| * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) |
| * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) |
| * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) |
| */ |
| struct tgsi_full_src_register zero = |
| make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_src_register one = |
| make_immediate_reg_float(emit, 1.0f); |
| struct tgsi_full_src_register neg_one = |
| make_immediate_reg_float(emit, -1.0f); |
| |
| unsigned tmp1 = get_temp_index(emit); |
| struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); |
| struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); |
| |
| unsigned tmp2 = get_temp_index(emit); |
| struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); |
| struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); |
| |
| emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], |
| &zero); |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, |
| &neg_one, &zero); |
| emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, |
| &inst->Src[0]); |
| emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, |
| &one, &tmp2_src); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. |
| */ |
| static boolean |
| emit_issg(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 |
| * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 |
| * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 |
| * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 |
| * Translates into: |
| * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) |
| * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) |
| * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) |
| */ |
| struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); |
| |
| unsigned tmp1 = get_temp_index(emit); |
| struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); |
| struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); |
| |
| unsigned tmp2 = get_temp_index(emit); |
| struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); |
| struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); |
| |
| struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); |
| |
| emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, |
| &inst->Src[0], &zero); |
| emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, |
| &zero, &inst->Src[0]); |
| emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], |
| &tmp1_src, &neg_tmp2); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit a comparison instruction. The dest register will get |
| * 0 or ~0 values depending on the outcome of comparing src0 to src1. |
| */ |
| static void |
| emit_comparison(struct svga_shader_emitter_v10 *emit, |
| SVGA3dCmpFunc func, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src0, |
| const struct tgsi_full_src_register *src1) |
| { |
| struct tgsi_full_src_register immediate; |
| VGPU10OpcodeToken0 opcode0; |
| boolean swapSrc = FALSE; |
| |
| /* Sanity checks for svga vs. gallium enums */ |
| STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); |
| STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); |
| |
| opcode0.value = 0; |
| |
| switch (func) { |
| case SVGA3D_CMP_NEVER: |
| immediate = make_immediate_reg_int(emit, 0); |
| /* MOV dst, {0} */ |
| begin_emit_instruction(emit); |
| emit_dword(emit, VGPU10_OPCODE_MOV); |
| emit_dst_register(emit, dst); |
| emit_src_register(emit, &immediate); |
| end_emit_instruction(emit); |
| return; |
| case SVGA3D_CMP_ALWAYS: |
| immediate = make_immediate_reg_int(emit, -1); |
| /* MOV dst, {-1} */ |
| begin_emit_instruction(emit); |
| emit_dword(emit, VGPU10_OPCODE_MOV); |
| emit_dst_register(emit, dst); |
| emit_src_register(emit, &immediate); |
| end_emit_instruction(emit); |
| return; |
| case SVGA3D_CMP_LESS: |
| opcode0.opcodeType = VGPU10_OPCODE_LT; |
| break; |
| case SVGA3D_CMP_EQUAL: |
| opcode0.opcodeType = VGPU10_OPCODE_EQ; |
| break; |
| case SVGA3D_CMP_LESSEQUAL: |
| opcode0.opcodeType = VGPU10_OPCODE_GE; |
| swapSrc = TRUE; |
| break; |
| case SVGA3D_CMP_GREATER: |
| opcode0.opcodeType = VGPU10_OPCODE_LT; |
| swapSrc = TRUE; |
| break; |
| case SVGA3D_CMP_NOTEQUAL: |
| opcode0.opcodeType = VGPU10_OPCODE_NE; |
| break; |
| case SVGA3D_CMP_GREATEREQUAL: |
| opcode0.opcodeType = VGPU10_OPCODE_GE; |
| break; |
| default: |
| assert(!"Unexpected comparison mode"); |
| opcode0.opcodeType = VGPU10_OPCODE_EQ; |
| } |
| |
| begin_emit_instruction(emit); |
| emit_dword(emit, opcode0.value); |
| emit_dst_register(emit, dst); |
| if (swapSrc) { |
| emit_src_register(emit, src1); |
| emit_src_register(emit, src0); |
| } |
| else { |
| emit_src_register(emit, src0); |
| emit_src_register(emit, src1); |
| } |
| end_emit_instruction(emit); |
| } |
| |
| |
| /** |
| * Get texel/address offsets for a texture instruction. |
| */ |
| static void |
| get_texel_offsets(const struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst, int offsets[3]) |
| { |
| if (inst->Texture.NumOffsets == 1) { |
| /* According to OpenGL Shader Language spec the offsets are only |
| * fetched from a previously-declared immediate/literal. |
| */ |
| const struct tgsi_texture_offset *off = inst->TexOffsets; |
| const unsigned index = off[0].Index; |
| const unsigned swizzleX = off[0].SwizzleX; |
| const unsigned swizzleY = off[0].SwizzleY; |
| const unsigned swizzleZ = off[0].SwizzleZ; |
| const union tgsi_immediate_data *imm = emit->immediates[index]; |
| |
| assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); |
| |
| offsets[0] = imm[swizzleX].Int; |
| offsets[1] = imm[swizzleY].Int; |
| offsets[2] = imm[swizzleZ].Int; |
| } |
| else { |
| offsets[0] = offsets[1] = offsets[2] = 0; |
| } |
| } |
| |
| |
| /** |
| * Set up the coordinate register for texture sampling. |
| * When we're sampling from a RECT texture we have to scale the |
| * unnormalized coordinate to a normalized coordinate. |
| * We do that by multiplying the coordinate by an "extra" constant. |
| * An alternative would be to use the RESINFO instruction to query the |
| * texture's size. |
| */ |
| static struct tgsi_full_src_register |
| setup_texcoord(struct svga_shader_emitter_v10 *emit, |
| unsigned unit, |
| const struct tgsi_full_src_register *coord) |
| { |
| if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) { |
| unsigned scale_index = emit->texcoord_scale_index[unit]; |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); |
| |
| if (emit->key.tex[unit].texel_bias) { |
| /* to fix texture coordinate rounding issue, 0.0001 offset is |
| * been added. This fixes piglit test fbo-blit-scaled-linear. */ |
| struct tgsi_full_src_register offset = |
| make_immediate_reg_float(emit, 0.0001f); |
| |
| /* ADD tmp, coord, offset */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst, |
| coord, &offset); |
| /* MUL tmp, tmp, scale */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, |
| &tmp_src, &scale_src); |
| } |
| else { |
| /* MUL tmp, coord, const[] */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, |
| coord, &scale_src); |
| } |
| return tmp_src; |
| } |
| else { |
| /* use texcoord as-is */ |
| return *coord; |
| } |
| } |
| |
| |
| /** |
| * For SAMPLE_C instructions, emit the extra src register which indicates |
| * the reference/comparision value. |
| */ |
| static void |
| emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, |
| enum tgsi_texture_type target, |
| const struct tgsi_full_src_register *coord) |
| { |
| struct tgsi_full_src_register coord_src_ref; |
| int component; |
| |
| assert(tgsi_is_shadow_target(target)); |
| |
| component = tgsi_util_get_shadow_ref_src_index(target) % 4; |
| assert(component >= 0); |
| |
| coord_src_ref = scalar_src(coord, component); |
| |
| emit_src_register(emit, &coord_src_ref); |
| } |
| |
| |
| /** |
| * Info for implementing texture swizzles. |
| * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() |
| * functions use this to encapsulate the extra steps needed to perform |
| * a texture swizzle, or shadow/depth comparisons. |
| * The shadow/depth comparison is only done here if for the cases where |
| * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). |
| */ |
| struct tex_swizzle_info |
| { |
| boolean swizzled; |
| boolean shadow_compare; |
| unsigned unit; |
| enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */ |
| struct tgsi_full_src_register tmp_src; |
| struct tgsi_full_dst_register tmp_dst; |
| const struct tgsi_full_dst_register *inst_dst; |
| const struct tgsi_full_src_register *coord_src; |
| }; |
| |
| |
| /** |
| * Do setup for handling texture swizzles or shadow compares. |
| * \param unit the texture unit |
| * \param inst the TGSI texture instruction |
| * \param shadow_compare do shadow/depth comparison? |
| * \param swz returns the swizzle info |
| */ |
| static void |
| begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, |
| unsigned unit, |
| const struct tgsi_full_instruction *inst, |
| boolean shadow_compare, |
| struct tex_swizzle_info *swz) |
| { |
| swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || |
| emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || |
| emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || |
| emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); |
| |
| swz->shadow_compare = shadow_compare; |
| swz->texture_target = inst->Texture.Texture; |
| |
| if (swz->swizzled || shadow_compare) { |
| /* Allocate temp register for the result of the SAMPLE instruction |
| * and the source of the MOV/compare/swizzle instructions. |
| */ |
| unsigned tmp = get_temp_index(emit); |
| swz->tmp_src = make_src_temp_reg(tmp); |
| swz->tmp_dst = make_dst_temp_reg(tmp); |
| |
| swz->unit = unit; |
| } |
| swz->inst_dst = &inst->Dst[0]; |
| swz->coord_src = &inst->Src[0]; |
| |
| emit->fs.shadow_compare_units |= shadow_compare << unit; |
| } |
| |
| |
| /** |
| * Returns the register to put the SAMPLE instruction results into. |
| * This will either be the original instruction dst reg (if no swizzle |
| * and no shadow comparison) or a temporary reg if there is a swizzle. |
| */ |
| static const struct tgsi_full_dst_register * |
| get_tex_swizzle_dst(const struct tex_swizzle_info *swz) |
| { |
| return (swz->swizzled || swz->shadow_compare) |
| ? &swz->tmp_dst : swz->inst_dst; |
| } |
| |
| |
| /** |
| * This emits the MOV instruction that actually implements a texture swizzle |
| * and/or shadow comparison. |
| */ |
| static void |
| end_tex_swizzle(struct svga_shader_emitter_v10 *emit, |
| const struct tex_swizzle_info *swz) |
| { |
| if (swz->shadow_compare) { |
| /* Emit extra instructions to compare the fetched texel value against |
| * a texture coordinate component. The result of the comparison |
| * is 0.0 or 1.0. |
| */ |
| struct tgsi_full_src_register coord_src; |
| struct tgsi_full_src_register texel_src = |
| scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); |
| struct tgsi_full_src_register one = |
| make_immediate_reg_float(emit, 1.0f); |
| /* convert gallium comparison func to SVGA comparison func */ |
| SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; |
| |
| int component = |
| tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4; |
| assert(component >= 0); |
| coord_src = scalar_src(swz->coord_src, component); |
| |
| /* COMPARE tmp, coord, texel */ |
| emit_comparison(emit, compare_func, |
| &swz->tmp_dst, &coord_src, &texel_src); |
| |
| /* AND dest, tmp, {1.0} */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); |
| if (swz->swizzled) { |
| emit_dst_register(emit, &swz->tmp_dst); |
| } |
| else { |
| emit_dst_register(emit, swz->inst_dst); |
| } |
| emit_src_register(emit, &swz->tmp_src); |
| emit_src_register(emit, &one); |
| end_emit_instruction(emit); |
| } |
| |
| if (swz->swizzled) { |
| unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; |
| unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; |
| unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; |
| unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; |
| unsigned writemask_0 = 0, writemask_1 = 0; |
| boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); |
| |
| /* Swizzle w/out zero/one terms */ |
| struct tgsi_full_src_register src_swizzled = |
| swizzle_src(&swz->tmp_src, |
| swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, |
| swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, |
| swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, |
| swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); |
| |
| /* MOV dst, color(tmp).<swizzle> */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, |
| swz->inst_dst, &src_swizzled); |
| |
| /* handle swizzle zero terms */ |
| writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | |
| ((swz_g == PIPE_SWIZZLE_0) << 1) | |
| ((swz_b == PIPE_SWIZZLE_0) << 2) | |
| ((swz_a == PIPE_SWIZZLE_0) << 3)); |
| writemask_0 &= swz->inst_dst->Register.WriteMask; |
| |
| if (writemask_0) { |
| struct tgsi_full_src_register zero = int_tex ? |
| make_immediate_reg_int(emit, 0) : |
| make_immediate_reg_float(emit, 0.0f); |
| struct tgsi_full_dst_register dst = |
| writemask_dst(swz->inst_dst, writemask_0); |
| |
| /* MOV dst.writemask_0, {0,0,0,0} */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero); |
| } |
| |
| /* handle swizzle one terms */ |
| writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | |
| ((swz_g == PIPE_SWIZZLE_1) << 1) | |
| ((swz_b == PIPE_SWIZZLE_1) << 2) | |
| ((swz_a == PIPE_SWIZZLE_1) << 3)); |
| writemask_1 &= swz->inst_dst->Register.WriteMask; |
| |
| if (writemask_1) { |
| struct tgsi_full_src_register one = int_tex ? |
| make_immediate_reg_int(emit, 1) : |
| make_immediate_reg_float(emit, 1.0f); |
| struct tgsi_full_dst_register dst = |
| writemask_dst(swz->inst_dst, writemask_1); |
| |
| /* MOV dst.writemask_1, {1,1,1,1} */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one); |
| } |
| } |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_SAMPLE instruction. |
| */ |
| static boolean |
| emit_sample(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const unsigned resource_unit = inst->Src[1].Register.Index; |
| const unsigned sampler_unit = inst->Src[2].Register.Index; |
| struct tgsi_full_src_register coord; |
| int offsets[3]; |
| struct tex_swizzle_info swz_info; |
| |
| begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); |
| |
| /* SAMPLE dst, coord(s0), resource, sampler */ |
| begin_emit_instruction(emit); |
| |
| /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L |
| * with LOD=0. But our virtual GPU accepts this as-is. |
| */ |
| emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, |
| inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &coord); |
| emit_resource_register(emit, resource_unit); |
| emit_sampler_register(emit, sampler_unit); |
| end_emit_instruction(emit); |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Check if a texture instruction is valid. |
| * An example of an invalid texture instruction is doing shadow comparison |
| * with an integer-valued texture. |
| * If we detect an invalid texture instruction, we replace it with: |
| * MOV dst, {1,1,1,1}; |
| * \return TRUE if valid, FALSE if invalid. |
| */ |
| static boolean |
| is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const unsigned unit = inst->Src[1].Register.Index; |
| const enum tgsi_texture_type target = inst->Texture.Texture; |
| boolean valid = TRUE; |
| |
| if (tgsi_is_shadow_target(target) && |
| is_integer_type(emit->sampler_return_type[unit])) { |
| debug_printf("Invalid SAMPLE_C with an integer texture!\n"); |
| valid = FALSE; |
| } |
| /* XXX might check for other conditions in the future here */ |
| |
| if (!valid) { |
| /* emit a MOV dst, {1,1,1,1} instruction. */ |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_src_register(emit, &one); |
| end_emit_instruction(emit); |
| } |
| |
| return valid; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TEX (simple texture lookup) |
| */ |
| static boolean |
| emit_tex(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[1].Register.Index; |
| const enum tgsi_texture_type target = inst->Texture.Texture; |
| VGPU10_OPCODE_TYPE opcode; |
| struct tgsi_full_src_register coord; |
| int offsets[3]; |
| struct tex_swizzle_info swz_info; |
| |
| /* check that the sampler returns a float */ |
| if (!is_valid_tex_instruction(emit, inst)) |
| return TRUE; |
| |
| begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| coord = setup_texcoord(emit, unit, &inst->Src[0]); |
| |
| /* SAMPLE dst, coord(s0), resource, sampler */ |
| begin_emit_instruction(emit); |
| |
| if (tgsi_is_shadow_target(target)) |
| opcode = VGPU10_OPCODE_SAMPLE_C; |
| else |
| opcode = VGPU10_OPCODE_SAMPLE; |
| |
| emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &coord); |
| emit_resource_register(emit, unit); |
| emit_sampler_register(emit, unit); |
| if (opcode == VGPU10_OPCODE_SAMPLE_C) { |
| emit_tex_compare_refcoord(emit, target, &coord); |
| } |
| end_emit_instruction(emit); |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| /** |
| * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather) |
| */ |
| static boolean |
| emit_tg4(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[2].Register.Index; |
| struct tgsi_full_src_register src; |
| struct tgsi_full_src_register offset_src, sampler, ref; |
| int offsets[3]; |
| |
| /* check that the sampler returns a float */ |
| if (!is_valid_tex_instruction(emit, inst)) |
| return TRUE; |
| |
| if (emit->version >= 50) { |
| unsigned target = inst->Texture.Texture; |
| int index = inst->Src[1].Register.Index; |
| const union tgsi_immediate_data *imm = emit->immediates[index]; |
| int select_comp = imm[inst->Src[1].Register.SwizzleX].Int; |
| unsigned select_swizzle = PIPE_SWIZZLE_X; |
| |
| if (!tgsi_is_shadow_target(target)) { |
| switch (select_comp) { |
| case 0: |
| select_swizzle = emit->key.tex[unit].swizzle_r; |
| break; |
| case 1: |
| select_swizzle = emit->key.tex[unit].swizzle_g; |
| break; |
| case 2: |
| select_swizzle = emit->key.tex[unit].swizzle_b; |
| break; |
| case 3: |
| select_swizzle = emit->key.tex[unit].swizzle_a; |
| break; |
| default: |
| assert(!"Unexpected component in texture gather swizzle"); |
| } |
| } |
| else { |
| select_swizzle = emit->key.tex[unit].swizzle_r; |
| } |
| |
| if (select_swizzle == PIPE_SWIZZLE_1) { |
| src = make_immediate_reg_float(emit, 1.0); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); |
| return TRUE; |
| } |
| else if (select_swizzle == PIPE_SWIZZLE_0) { |
| src = make_immediate_reg_float(emit, 0.0); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); |
| return TRUE; |
| } |
| |
| src = setup_texcoord(emit, unit, &inst->Src[0]); |
| |
| /* GATHER4 dst, coord, resource, sampler */ |
| /* GATHER4_C dst, coord, resource, sampler ref */ |
| /* GATHER4_PO dst, coord, offset resource, sampler */ |
| /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */ |
| begin_emit_instruction(emit); |
| if (inst->Texture.NumOffsets == 1) { |
| if (tgsi_is_shadow_target(target)) { |
| emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C, |
| inst->Instruction.Saturate); |
| } |
| else { |
| emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO, |
| inst->Instruction.Saturate); |
| } |
| } |
| else { |
| if (tgsi_is_shadow_target(target)) { |
| emit_opcode(emit, VGPU10_OPCODE_GATHER4_C, |
| inst->Instruction.Saturate); |
| } |
| else { |
| emit_opcode(emit, VGPU10_OPCODE_GATHER4, |
| inst->Instruction.Saturate); |
| } |
| } |
| |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_src_register(emit, &src); |
| if (inst->Texture.NumOffsets == 1) { |
| /* offset */ |
| offset_src = make_src_reg(inst->TexOffsets[0].File, |
| inst->TexOffsets[0].Index); |
| offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX, |
| inst->TexOffsets[0].SwizzleY, |
| inst->TexOffsets[0].SwizzleZ, |
| TGSI_SWIZZLE_W); |
| emit_src_register(emit, &offset_src); |
| } |
| |
| /* resource */ |
| emit_resource_register(emit, unit); |
| |
| /* sampler */ |
| sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); |
| sampler.Register.SwizzleX = |
| sampler.Register.SwizzleY = |
| sampler.Register.SwizzleZ = |
| sampler.Register.SwizzleW = select_swizzle; |
| emit_src_register(emit, &sampler); |
| |
| if (tgsi_is_shadow_target(target)) { |
| /* ref */ |
| if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { |
| ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); |
| emit_tex_compare_refcoord(emit, target, &ref); |
| } |
| else { |
| emit_tex_compare_refcoord(emit, target, &src); |
| } |
| } |
| |
| end_emit_instruction(emit); |
| free_temp_indexes(emit); |
| } |
| else { |
| /* Only a single channel is supported in SM4_1 and we report |
| * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1. |
| * Only the 0th component will be gathered. |
| */ |
| switch (emit->key.tex[unit].swizzle_r) { |
| case PIPE_SWIZZLE_X: |
| get_texel_offsets(emit, inst, offsets); |
| src = setup_texcoord(emit, unit, &inst->Src[0]); |
| |
| /* Gather dst, coord, resource, sampler */ |
| begin_emit_instruction(emit); |
| emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4, |
| inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_src_register(emit, &src); |
| emit_resource_register(emit, unit); |
| |
| /* sampler */ |
| sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); |
| sampler.Register.SwizzleX = |
| sampler.Register.SwizzleY = |
| sampler.Register.SwizzleZ = |
| sampler.Register.SwizzleW = PIPE_SWIZZLE_X; |
| emit_src_register(emit, &sampler); |
| |
| end_emit_instruction(emit); |
| break; |
| case PIPE_SWIZZLE_W: |
| case PIPE_SWIZZLE_1: |
| src = make_immediate_reg_float(emit, 1.0); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); |
| break; |
| case PIPE_SWIZZLE_Y: |
| case PIPE_SWIZZLE_Z: |
| case PIPE_SWIZZLE_0: |
| default: |
| src = make_immediate_reg_float(emit, 0.0); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); |
| break; |
| } |
| } |
| |
| return TRUE; |
| } |
| |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays) |
| */ |
| static boolean |
| emit_tex2(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[2].Register.Index; |
| unsigned target = inst->Texture.Texture; |
| struct tgsi_full_src_register coord, ref; |
| int offsets[3]; |
| struct tex_swizzle_info swz_info; |
| |
| /* check that the sampler returns a float */ |
| if (!is_valid_tex_instruction(emit, inst)) |
| return TRUE; |
| |
| begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| coord = setup_texcoord(emit, unit, &inst->Src[0]); |
| ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); |
| |
| /* SAMPLE_C dst, coord, resource, sampler, ref */ |
| begin_emit_instruction(emit); |
| emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C, |
| inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &coord); |
| emit_resource_register(emit, unit); |
| emit_sampler_register(emit, unit); |
| emit_tex_compare_refcoord(emit, target, &ref); |
| end_emit_instruction(emit); |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TXP (projective texture) |
| */ |
| static boolean |
| emit_txp(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[1].Register.Index; |
| const enum tgsi_texture_type target = inst->Texture.Texture; |
| VGPU10_OPCODE_TYPE opcode; |
| int offsets[3]; |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register src0_wwww = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); |
| struct tgsi_full_src_register coord; |
| struct tex_swizzle_info swz_info; |
| |
| /* check that the sampler returns a float */ |
| if (!is_valid_tex_instruction(emit, inst)) |
| return TRUE; |
| |
| begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| coord = setup_texcoord(emit, unit, &inst->Src[0]); |
| |
| /* DIV tmp, coord, coord.wwww */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, |
| &coord, &src0_wwww); |
| |
| /* SAMPLE dst, coord(tmp), resource, sampler */ |
| begin_emit_instruction(emit); |
| |
| if (tgsi_is_shadow_target(target)) |
| /* NOTE: for non-fragment shaders, we should use |
| * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. |
| */ |
| opcode = VGPU10_OPCODE_SAMPLE_C; |
| else |
| opcode = VGPU10_OPCODE_SAMPLE; |
| |
| emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &tmp_src); /* projected coord */ |
| emit_resource_register(emit, unit); |
| emit_sampler_register(emit, unit); |
| if (opcode == VGPU10_OPCODE_SAMPLE_C) { |
| emit_tex_compare_refcoord(emit, target, &tmp_src); |
| } |
| end_emit_instruction(emit); |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TXD (explicit derivatives) |
| */ |
| static boolean |
| emit_txd(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[3].Register.Index; |
| const enum tgsi_texture_type target = inst->Texture.Texture; |
| int offsets[3]; |
| struct tgsi_full_src_register coord; |
| struct tex_swizzle_info swz_info; |
| |
| begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), |
| &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| coord = setup_texcoord(emit, unit, &inst->Src[0]); |
| |
| /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ |
| begin_emit_instruction(emit); |
| emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, |
| inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &coord); |
| emit_resource_register(emit, unit); |
| emit_sampler_register(emit, unit); |
| emit_src_register(emit, &inst->Src[1]); /* Xderiv */ |
| emit_src_register(emit, &inst->Src[2]); /* Yderiv */ |
| end_emit_instruction(emit); |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TXF (texel fetch) |
| */ |
| static boolean |
| emit_txf(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[1].Register.Index; |
| const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture) |
| && emit->key.tex[unit].num_samples > 1; |
| int offsets[3]; |
| struct tex_swizzle_info swz_info; |
| |
| begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| if (msaa) { |
| assert(emit->key.tex[unit].num_samples > 1); |
| |
| /* Fetch one sample from an MSAA texture */ |
| struct tgsi_full_src_register sampleIndex = |
| scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); |
| /* LD_MS dst, coord(s0), resource, sampleIndex */ |
| begin_emit_instruction(emit); |
| emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, |
| inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &inst->Src[0]); |
| emit_resource_register(emit, unit); |
| emit_src_register(emit, &sampleIndex); |
| end_emit_instruction(emit); |
| } |
| else { |
| /* Fetch one texel specified by integer coordinate */ |
| /* LD dst, coord(s0), resource */ |
| begin_emit_instruction(emit); |
| emit_sample_opcode(emit, VGPU10_OPCODE_LD, |
| inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &inst->Src[0]); |
| emit_resource_register(emit, unit); |
| end_emit_instruction(emit); |
| } |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) |
| * or TGSI_OPCODE_TXB2 (for cube shadow maps). |
| */ |
| static boolean |
| emit_txl_txb(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const enum tgsi_texture_type target = inst->Texture.Texture; |
| VGPU10_OPCODE_TYPE opcode; |
| unsigned unit; |
| int offsets[3]; |
| struct tgsi_full_src_register coord, lod_bias; |
| struct tex_swizzle_info swz_info; |
| |
| assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || |
| inst->Instruction.Opcode == TGSI_OPCODE_TXB || |
| inst->Instruction.Opcode == TGSI_OPCODE_TXB2); |
| |
| if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { |
| lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); |
| unit = inst->Src[2].Register.Index; |
| } |
| else { |
| lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); |
| unit = inst->Src[1].Register.Index; |
| } |
| |
| begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), |
| &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| coord = setup_texcoord(emit, unit, &inst->Src[0]); |
| |
| /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ |
| begin_emit_instruction(emit); |
| if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { |
| opcode = VGPU10_OPCODE_SAMPLE_L; |
| } |
| else { |
| opcode = VGPU10_OPCODE_SAMPLE_B; |
| } |
| emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &coord); |
| emit_resource_register(emit, unit); |
| emit_sampler_register(emit, unit); |
| emit_src_register(emit, &lod_bias); |
| end_emit_instruction(emit); |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array. |
| */ |
| static boolean |
| emit_txl2(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| unsigned target = inst->Texture.Texture; |
| unsigned opcode, unit; |
| int offsets[3]; |
| struct tgsi_full_src_register coord, lod; |
| struct tex_swizzle_info swz_info; |
| |
| assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2); |
| |
| lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); |
| unit = inst->Src[2].Register.Index; |
| |
| begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), |
| &swz_info); |
| |
| get_texel_offsets(emit, inst, offsets); |
| |
| coord = setup_texcoord(emit, unit, &inst->Src[0]); |
| |
| /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */ |
| begin_emit_instruction(emit); |
| opcode = VGPU10_OPCODE_SAMPLE_L; |
| emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); |
| emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); |
| emit_src_register(emit, &coord); |
| emit_resource_register(emit, unit); |
| emit_sampler_register(emit, unit); |
| emit_src_register(emit, &lod); |
| end_emit_instruction(emit); |
| |
| end_tex_swizzle(emit, &swz_info); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. |
| */ |
| static boolean |
| emit_txq(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const uint unit = inst->Src[1].Register.Index; |
| |
| if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { |
| /* RESINFO does not support querying texture buffers, so we instead |
| * store texture buffer sizes in shader constants, then copy them to |
| * implement TXQ instead of emitting RESINFO. |
| * MOV dst, const[texture_buffer_size_index[unit]] |
| */ |
| struct tgsi_full_src_register size_src = |
| make_src_const_reg(emit->texture_buffer_size_index[unit]); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src); |
| } else { |
| /* RESINFO dst, srcMipLevel, resource */ |
| begin_emit_instruction(emit); |
| emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_src_register(emit, &inst->Src[0]); |
| emit_resource_register(emit, unit); |
| end_emit_instruction(emit); |
| } |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Does this opcode produce a double-precision result? |
| * XXX perhaps move this to a TGSI utility. |
| */ |
| static bool |
| opcode_has_dbl_dst(unsigned opcode) |
| { |
| switch (opcode) { |
| case TGSI_OPCODE_F2D: |
| case TGSI_OPCODE_DABS: |
| case TGSI_OPCODE_DADD: |
| case TGSI_OPCODE_DFRAC: |
| case TGSI_OPCODE_DMAX: |
| case TGSI_OPCODE_DMIN: |
| case TGSI_OPCODE_DMUL: |
| case TGSI_OPCODE_DNEG: |
| case TGSI_OPCODE_I2D: |
| case TGSI_OPCODE_U2D: |
| // XXX more TBD |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| |
| /** |
| * Does this opcode use double-precision source registers? |
| */ |
| static bool |
| opcode_has_dbl_src(unsigned opcode) |
| { |
| switch (opcode) { |
| case TGSI_OPCODE_D2F: |
| case TGSI_OPCODE_DABS: |
| case TGSI_OPCODE_DADD: |
| case TGSI_OPCODE_DFRAC: |
| case TGSI_OPCODE_DMAX: |
| case TGSI_OPCODE_DMIN: |
| case TGSI_OPCODE_DMUL: |
| case TGSI_OPCODE_DNEG: |
| case TGSI_OPCODE_D2I: |
| case TGSI_OPCODE_D2U: |
| // XXX more TBD |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| |
| /** |
| * Check that the swizzle for reading from a double-precision register |
| * is valid. |
| */ |
| static void |
| check_double_src_swizzle(const struct tgsi_full_src_register *reg) |
| { |
| assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X && |
| reg->Register.SwizzleY == PIPE_SWIZZLE_Y) || |
| (reg->Register.SwizzleX == PIPE_SWIZZLE_Z && |
| reg->Register.SwizzleY == PIPE_SWIZZLE_W)); |
| |
| assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X && |
| reg->Register.SwizzleW == PIPE_SWIZZLE_Y) || |
| (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z && |
| reg->Register.SwizzleW == PIPE_SWIZZLE_W)); |
| } |
| |
| |
| /** |
| * Check that the writemask for a double-precision instruction is valid. |
| */ |
| static void |
| check_double_dst_writemask(const struct tgsi_full_instruction *inst) |
| { |
| ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask; |
| |
| switch (inst->Instruction.Opcode) { |
| case TGSI_OPCODE_DABS: |
| case TGSI_OPCODE_DADD: |
| case TGSI_OPCODE_DFRAC: |
| case TGSI_OPCODE_DNEG: |
| case TGSI_OPCODE_DMAD: |
| case TGSI_OPCODE_DMAX: |
| case TGSI_OPCODE_DMIN: |
| case TGSI_OPCODE_DMUL: |
| case TGSI_OPCODE_DRCP: |
| case TGSI_OPCODE_DSQRT: |
| case TGSI_OPCODE_F2D: |
| assert(writemask == TGSI_WRITEMASK_XYZW || |
| writemask == TGSI_WRITEMASK_XY || |
| writemask == TGSI_WRITEMASK_ZW); |
| break; |
| case TGSI_OPCODE_DSEQ: |
| case TGSI_OPCODE_DSGE: |
| case TGSI_OPCODE_DSNE: |
| case TGSI_OPCODE_DSLT: |
| case TGSI_OPCODE_D2I: |
| case TGSI_OPCODE_D2U: |
| /* Write to 1 or 2 components only */ |
| assert(util_bitcount(writemask) <= 2); |
| break; |
| default: |
| /* XXX this list may be incomplete */ |
| ; |
| } |
| } |
| |
| |
| /** |
| * Double-precision absolute value. |
| */ |
| static boolean |
| emit_dabs(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| assert(emit->version >= 50); |
| check_double_src_swizzle(&inst->Src[0]); |
| check_double_dst_writemask(inst); |
| |
| struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]); |
| |
| /* DMOV dst, |src| */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Double-precision negation |
| */ |
| static boolean |
| emit_dneg(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| assert(emit->version >= 50); |
| check_double_src_swizzle(&inst->Src[0]); |
| check_double_dst_writemask(inst); |
| |
| struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); |
| |
| /* DMOV dst, -src */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * SM5 has no DMAD opcode. Implement negation with DMUL/DADD. |
| */ |
| static boolean |
| emit_dmad(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| assert(emit->version >= 50); |
| check_double_src_swizzle(&inst->Src[0]); |
| check_double_src_swizzle(&inst->Src[1]); |
| check_double_src_swizzle(&inst->Src[2]); |
| check_double_dst_writemask(inst); |
| |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| /* DMUL tmp, src[0], src[1] */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_DMUL, |
| &tmp_dst, &inst->Src[0], &inst->Src[1], NULL, |
| FALSE, inst->Instruction.Precise); |
| |
| /* DADD dst, tmp, src[2] */ |
| emit_instruction_opn(emit, VGPU10_OPCODE_DADD, |
| &inst->Dst[0], &tmp_src, &inst->Src[2], NULL, |
| inst->Instruction.Saturate, inst->Instruction.Precise); |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Double precision reciprocal square root |
| */ |
| static boolean |
| emit_drsq(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_src_register *src) |
| { |
| assert(emit->version >= 50); |
| |
| VGPU10OpcodeToken0 token0; |
| begin_emit_instruction(emit); |
| |
| token0.value = 0; |
| token0.opcodeType = VGPU10_OPCODE_VMWARE; |
| token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ; |
| emit_dword(emit, token0.value); |
| |
| emit_dst_register(emit, dst); |
| |
| check_double_src_swizzle(src); |
| emit_src_register(emit, src); |
| |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * There is no SM5 opcode for double precision square root. |
| * It will be implemented with DRSQ. |
| * dst = src * DRSQ(src) |
| */ |
| static boolean |
| emit_dsqrt(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| assert(emit->version >= 50); |
| |
| check_double_src_swizzle(&inst->Src[0]); |
| |
| /* temporary register to hold the source */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| |
| /* temporary register to hold the DEQ result */ |
| unsigned tmp_cond = get_temp_index(emit); |
| struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond); |
| struct tgsi_full_dst_register tmp_cond_dst_xy = |
| writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); |
| struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond); |
| struct tgsi_full_src_register tmp_cond_src_xy = |
| swizzle_src(&tmp_cond_src, |
| PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, |
| PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y); |
| |
| /* The reciprocal square root of zero yields INF. |
| * So if the source is 0, we replace it with 1 in the tmp register. |
| * The later multiplication of zero in the original source will yield 0 |
| * in the result. |
| */ |
| |
| /* tmp1 = (src == 0) ? 1 : src; |
| * EQ tmp1, 0, src |
| * MOVC tmp, tmp1, 1.0, src |
| */ |
| struct tgsi_full_src_register zero = |
| make_immediate_reg_double(emit, 0); |
| |
| struct tgsi_full_src_register one = |
| make_immediate_reg_double(emit, 1.0); |
| |
| emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy, |
| &zero, &inst->Src[0]); |
| emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst, |
| &tmp_cond_src_xy, &one, &inst->Src[0]); |
| |
| struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp); |
| |
| /* DRSQ tmp_rsq, tmp */ |
| emit_drsq(emit, &tmp_rsq_dst, &tmp_src); |
| |
| /* DMUL dst, tmp_rsq, src[0] */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0], |
| &tmp_rsq_src, &inst->Src[0]); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| static boolean |
| emit_interp_offset(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| assert(emit->version >= 50); |
| |
| /* The src1.xy offset is a float with values in the range [-0.5, 0.5] |
| * where (0,0) is the center of the pixel. We need to translate that |
| * into an integer offset on a 16x16 grid in the range [-8/16, 7/16]. |
| * Also need to flip the Y axis (I think). |
| */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst_xy = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); |
| struct tgsi_full_src_register const16 = |
| make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0); |
| |
| /* MUL tmp.xy, src1, {16, -16, 0, 0} */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, |
| &tmp_dst_xy, &inst->Src[1], &const16); |
| |
| /* FTOI tmp.xy, tmp */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src); |
| |
| /* EVAL_SNAPPED dst, src0, tmp */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED, |
| &inst->Dst[0], &inst->Src[0], &tmp_src); |
| |
| free_temp_indexes(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit a simple instruction (like ADD, MUL, MIN, etc). |
| */ |
| static boolean |
| emit_simple(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const enum tgsi_opcode opcode = inst->Instruction.Opcode; |
| const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); |
| const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); |
| const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); |
| unsigned i; |
| |
| if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { |
| emit->current_loop_depth++; |
| } |
| else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { |
| emit->current_loop_depth--; |
| } |
| |
| begin_emit_instruction(emit); |
| emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode), |
| inst->Instruction.Saturate, |
| inst->Instruction.Precise); |
| for (i = 0; i < op->num_dst; i++) { |
| if (dbl_dst) { |
| check_double_dst_writemask(inst); |
| } |
| emit_dst_register(emit, &inst->Dst[i]); |
| } |
| for (i = 0; i < op->num_src; i++) { |
| if (dbl_src) { |
| check_double_src_swizzle(&inst->Src[i]); |
| } |
| emit_src_register(emit, &inst->Src[i]); |
| } |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit MSB instruction (like IMSB, UMSB). |
| * |
| * GLSL returns the index starting from the LSB; |
| * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB. |
| * To get correct location as per glsl from SM5 device, we should |
| * return (31 - index) if returned index is not -1. |
| */ |
| static boolean |
| emit_msb(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const struct tgsi_full_dst_register *index_dst = &inst->Dst[0]; |
| |
| assert(index_dst->Register.File != TGSI_FILE_OUTPUT); |
| |
| struct tgsi_full_src_register index_src = |
| make_src_reg(index_dst->Register.File, index_dst->Register.Index); |
| struct tgsi_full_src_register imm31 = |
| make_immediate_reg_int(emit, 31); |
| imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X); |
| struct tgsi_full_src_register neg_one = |
| make_immediate_reg_int(emit, -1); |
| neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X); |
| unsigned tmp = get_temp_index(emit); |
| const struct tgsi_full_dst_register tmp_dst = |
| make_dst_temp_reg(tmp); |
| const struct tgsi_full_dst_register tmp_dst_x = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); |
| const struct tgsi_full_src_register tmp_src_x = |
| make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X); |
| int writemask = TGSI_WRITEMASK_X; |
| int src_swizzle = TGSI_SWIZZLE_X; |
| int dst_writemask = index_dst->Register.WriteMask; |
| |
| emit_simple(emit, inst); |
| |
| /* index conversion from SM5 to GLSL */ |
| while (writemask & dst_writemask) { |
| struct tgsi_full_src_register index_src_comp = |
| scalar_src(&index_src, src_swizzle); |
| struct tgsi_full_dst_register index_dst_comp = |
| writemask_dst(index_dst, writemask); |
| |
| /* check if index_src_comp != -1 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_INE, |
| &tmp_dst_x, &index_src_comp, &neg_one); |
| |
| /* if */ |
| emit_if(emit, &tmp_src_x); |
| |
| index_src_comp = negate_src(&index_src_comp); |
| /* SUB DST, IMM{31}, DST */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_IADD, |
| &index_dst_comp, &imm31, &index_src_comp); |
| |
| /* endif */ |
| emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); |
| |
| writemask = writemask << 1; |
| src_swizzle = src_swizzle + 1; |
| } |
| free_temp_indexes(emit); |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit a BFE instruction (like UBFE, IBFE). |
| * tgsi representation: |
| * U/IBFE dst, value, offset, width |
| * SM5 representation: |
| * U/IBFE dst, width, offset, value |
| * Note: SM5 has width & offset range (0-31); |
| * whereas GLSL has width & offset range (0-32) |
| */ |
| static boolean |
| emit_bfe(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const enum tgsi_opcode opcode = inst->Instruction.Opcode; |
| struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); |
| imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); |
| struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); |
| zero = scalar_src(&zero, TGSI_SWIZZLE_X); |
| |
| unsigned tmp1 = get_temp_index(emit); |
| const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); |
| const struct tgsi_full_dst_register cond1_dst_x = |
| writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); |
| const struct tgsi_full_src_register cond1_src_x = |
| make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); |
| |
| unsigned tmp2 = get_temp_index(emit); |
| const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); |
| const struct tgsi_full_dst_register cond2_dst_x = |
| writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); |
| const struct tgsi_full_src_register cond2_src_x = |
| make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); |
| |
| /** |
| * In SM5, when width = 32 and offset = 0, it returns 0. |
| * On the other hand GLSL, expects value to be copied as it is, to dst. |
| */ |
| |
| /* cond1 = width ! = 32 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, |
| &cond1_dst_x, &inst->Src[2], &imm32); |
| |
| /* cond2 = offset ! = 0 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, |
| &cond2_dst_x, &inst->Src[1], &zero); |
| |
| /* cond 2 = cond1 & cond 2 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x, |
| &cond2_src_x, |
| &cond1_src_x); |
| /* IF */ |
| emit_if(emit, &cond2_src_x); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], |
| &inst->Src[0]); |
| |
| /* ELSE */ |
| emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); |
| |
| /* U/IBFE dst, width, offset, value */ |
| emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0], |
| &inst->Src[2], &inst->Src[1], &inst->Src[0]); |
| |
| /* ENDIF */ |
| emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); |
| |
| free_temp_indexes(emit); |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit BFI instruction |
| * tgsi representation: |
| * BFI dst, base, insert, offset, width |
| * SM5 representation: |
| * BFI dst, width, offset, insert, base |
| * Note: SM5 has width & offset range (0-31); |
| * whereas GLSL has width & offset range (0-32) |
| */ |
| static boolean |
| emit_bfi(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const enum tgsi_opcode opcode = inst->Instruction.Opcode; |
| struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); |
| imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); |
| |
| struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); |
| zero = scalar_src(&zero, TGSI_SWIZZLE_X); |
| |
| unsigned tmp1 = get_temp_index(emit); |
| const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); |
| const struct tgsi_full_dst_register cond1_dst_x = |
| writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); |
| const struct tgsi_full_src_register cond1_src_x = |
| make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); |
| |
| unsigned tmp2 = get_temp_index(emit); |
| const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); |
| const struct tgsi_full_dst_register cond2_dst_x = |
| writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); |
| const struct tgsi_full_src_register cond2_src_x = |
| make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); |
| |
| /** |
| * In SM5, when width = 32 and offset = 0, it returns 0. |
| * On the other hand GLSL, expects insert to be copied as it is, to dst. |
| */ |
| |
| /* cond1 = width == 32 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, |
| &cond1_dst_x, &inst->Src[3], &imm32); |
| |
| /* cond1 = offset == 0 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, |
| &cond2_dst_x, &inst->Src[2], &zero); |
| |
| /* cond2 = cond1 & cond2 */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_AND, |
| &cond2_dst_x, &cond2_src_x, &cond1_src_x); |
| |
| /* if */ |
| emit_if(emit, &cond2_src_x); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], |
| &inst->Src[1]); |
| |
| /* else */ |
| emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); |
| |
| /* BFI dst, width, offset, insert, base */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate); |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_src_register(emit, &inst->Src[3]); |
| emit_src_register(emit, &inst->Src[2]); |
| emit_src_register(emit, &inst->Src[1]); |
| emit_src_register(emit, &inst->Src[0]); |
| end_emit_instruction(emit); |
| |
| /* endif */ |
| emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); |
| |
| free_temp_indexes(emit); |
| return TRUE; |
| } |
| |
| |
| /** |
| * We only special case the MOV instruction to try to detect constant |
| * color writes in the fragment shader. |
| */ |
| static boolean |
| emit_mov(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| const struct tgsi_full_src_register *src = &inst->Src[0]; |
| const struct tgsi_full_dst_register *dst = &inst->Dst[0]; |
| |
| if (emit->unit == PIPE_SHADER_FRAGMENT && |
| dst->Register.File == TGSI_FILE_OUTPUT && |
| dst->Register.Index == 0 && |
| src->Register.File == TGSI_FILE_CONSTANT && |
| !src->Register.Indirect) { |
| emit->constant_color_output = TRUE; |
| } |
| |
| return emit_simple(emit, inst); |
| } |
| |
| |
| /** |
| * Emit a simple VGPU10 instruction which writes to multiple dest registers, |
| * where TGSI only uses one dest register. |
| */ |
| static boolean |
| emit_simple_1dst(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst, |
| unsigned dst_count, |
| unsigned dst_index) |
| { |
| const enum tgsi_opcode opcode = inst->Instruction.Opcode; |
| const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); |
| unsigned i; |
| |
| begin_emit_instruction(emit); |
| emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate); |
| |
| for (i = 0; i < dst_count; i++) { |
| if (i == dst_index) { |
| emit_dst_register(emit, &inst->Dst[0]); |
| } else { |
| emit_null_dst_register(emit); |
| } |
| } |
| |
| for (i = 0; i < op->num_src; i++) { |
| emit_src_register(emit, &inst->Src[i]); |
| } |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit a vmware specific VGPU10 instruction. |
| */ |
| static boolean |
| emit_vmware(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst, |
| VGPU10_VMWARE_OPCODE_TYPE subopcode) |
| { |
| VGPU10OpcodeToken0 token0; |
| const enum tgsi_opcode opcode = inst->Instruction.Opcode; |
| const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); |
| const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); |
| const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); |
| |
| unsigned i; |
| |
| begin_emit_instruction(emit); |
| |
| assert((subopcode > 0 && emit->version >= 50) || subopcode == 0); |
| |
| token0.value = 0; |
| token0.opcodeType = VGPU10_OPCODE_VMWARE; |
| token0.vmwareOpcodeType = subopcode; |
| emit_dword(emit, token0.value); |
| |
| if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) { |
| /* IDIV only uses the first dest register. */ |
| emit_dst_register(emit, &inst->Dst[0]); |
| emit_null_dst_register(emit); |
| } else { |
| for (i = 0; i < op->num_dst; i++) { |
| if (dbl_dst) { |
| check_double_dst_writemask(inst); |
| } |
| emit_dst_register(emit, &inst->Dst[i]); |
| } |
| } |
| |
| for (i = 0; i < op->num_src; i++) { |
| if (dbl_src) { |
| check_double_src_swizzle(&inst->Src[i]); |
| } |
| emit_src_register(emit, &inst->Src[i]); |
| } |
| end_emit_instruction(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Translate a single TGSI instruction to VGPU10. |
| */ |
| static boolean |
| emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, |
| unsigned inst_number, |
| const struct tgsi_full_instruction *inst) |
| { |
| const enum tgsi_opcode opcode = inst->Instruction.Opcode; |
| |
| if (emit->skip_instruction) |
| return TRUE; |
| |
| switch (opcode) { |
| case TGSI_OPCODE_ADD: |
| case TGSI_OPCODE_AND: |
| case TGSI_OPCODE_BGNLOOP: |
| case TGSI_OPCODE_BRK: |
| case TGSI_OPCODE_CEIL: |
| case TGSI_OPCODE_CONT: |
| case TGSI_OPCODE_DDX: |
| case TGSI_OPCODE_DDY: |
| case TGSI_OPCODE_DIV: |
| case TGSI_OPCODE_DP2: |
| case TGSI_OPCODE_DP3: |
| case TGSI_OPCODE_DP4: |
| case TGSI_OPCODE_ELSE: |
| case TGSI_OPCODE_ENDIF: |
| case TGSI_OPCODE_ENDLOOP: |
| case TGSI_OPCODE_ENDSUB: |
| case TGSI_OPCODE_F2I: |
| case TGSI_OPCODE_F2U: |
| case TGSI_OPCODE_FLR: |
| case TGSI_OPCODE_FRC: |
| case TGSI_OPCODE_FSEQ: |
| case TGSI_OPCODE_FSGE: |
| case TGSI_OPCODE_FSLT: |
| case TGSI_OPCODE_FSNE: |
| case TGSI_OPCODE_I2F: |
| case TGSI_OPCODE_IMAX: |
| case TGSI_OPCODE_IMIN: |
| case TGSI_OPCODE_INEG: |
| case TGSI_OPCODE_ISGE: |
| case TGSI_OPCODE_ISHR: |
| case TGSI_OPCODE_ISLT: |
| case TGSI_OPCODE_MAD: |
| case TGSI_OPCODE_MAX: |
| case TGSI_OPCODE_MIN: |
| case TGSI_OPCODE_MUL: |
| case TGSI_OPCODE_NOP: |
| case TGSI_OPCODE_NOT: |
| case TGSI_OPCODE_OR: |
| case TGSI_OPCODE_UADD: |
| case TGSI_OPCODE_USEQ: |
| case TGSI_OPCODE_USGE: |
| case TGSI_OPCODE_USLT: |
| case TGSI_OPCODE_UMIN: |
| case TGSI_OPCODE_UMAD: |
| case TGSI_OPCODE_UMAX: |
| case TGSI_OPCODE_ROUND: |
| case TGSI_OPCODE_SQRT: |
| case TGSI_OPCODE_SHL: |
| case TGSI_OPCODE_TRUNC: |
| case TGSI_OPCODE_U2F: |
| case TGSI_OPCODE_UCMP: |
| case TGSI_OPCODE_USHR: |
| case TGSI_OPCODE_USNE: |
| case TGSI_OPCODE_XOR: |
| /* Begin SM5 opcodes */ |
| case TGSI_OPCODE_F2D: |
| case TGSI_OPCODE_D2F: |
| case TGSI_OPCODE_DADD: |
| case TGSI_OPCODE_DMUL: |
| case TGSI_OPCODE_DMAX: |
| case TGSI_OPCODE_DMIN: |
| case TGSI_OPCODE_DSGE: |
| case TGSI_OPCODE_DSLT: |
| case TGSI_OPCODE_DSEQ: |
| case TGSI_OPCODE_DSNE: |
| case TGSI_OPCODE_BREV: |
| case TGSI_OPCODE_POPC: |
| case TGSI_OPCODE_LSB: |
| case TGSI_OPCODE_INTERP_CENTROID: |
| case TGSI_OPCODE_INTERP_SAMPLE: |
| /* simple instructions */ |
| return emit_simple(emit, inst); |
| case TGSI_OPCODE_RET: |
| if (emit->unit == PIPE_SHADER_TESS_CTRL && |
| !emit->tcs.control_point_phase) { |
| |
| /* store the tessellation levels in the patch constant phase only */ |
| store_tesslevels(emit); |
| } |
| return emit_simple(emit, inst); |
| |
| case TGSI_OPCODE_IMSB: |
| case TGSI_OPCODE_UMSB: |
| return emit_msb(emit, inst); |
| case TGSI_OPCODE_IBFE: |
| case TGSI_OPCODE_UBFE: |
| return emit_bfe(emit, inst); |
| case TGSI_OPCODE_BFI: |
| return emit_bfi(emit, inst); |
| case TGSI_OPCODE_MOV: |
| return emit_mov(emit, inst); |
| case TGSI_OPCODE_EMIT: |
| return emit_vertex(emit, inst); |
| case TGSI_OPCODE_ENDPRIM: |
| return emit_endprim(emit, inst); |
| case TGSI_OPCODE_IABS: |
| return emit_iabs(emit, inst); |
| case TGSI_OPCODE_ARL: |
| /* fall-through */ |
| case TGSI_OPCODE_UARL: |
| return emit_arl_uarl(emit, inst); |
| case TGSI_OPCODE_BGNSUB: |
| /* no-op */ |
| return TRUE; |
| case TGSI_OPCODE_CAL: |
| return emit_cal(emit, inst); |
| case TGSI_OPCODE_CMP: |
| return emit_cmp(emit, inst); |
| case TGSI_OPCODE_COS: |
| return emit_sincos(emit, inst); |
| case TGSI_OPCODE_DST: |
| return emit_dst(emit, inst); |
| case TGSI_OPCODE_EX2: |
| return emit_ex2(emit, inst); |
| case TGSI_OPCODE_EXP: |
| return emit_exp(emit, inst); |
| case TGSI_OPCODE_IF: |
| return emit_if(emit, &inst->Src[0]); |
| case TGSI_OPCODE_KILL: |
| return emit_kill(emit, inst); |
| case TGSI_OPCODE_KILL_IF: |
| return emit_kill_if(emit, inst); |
| case TGSI_OPCODE_LG2: |
| return emit_lg2(emit, inst); |
| case TGSI_OPCODE_LIT: |
| return emit_lit(emit, inst); |
| case TGSI_OPCODE_LODQ: |
| return emit_lodq(emit, inst); |
| case TGSI_OPCODE_LOG: |
| return emit_log(emit, inst); |
| case TGSI_OPCODE_LRP: |
| return emit_lrp(emit, inst); |
| case TGSI_OPCODE_POW: |
| return emit_pow(emit, inst); |
| case TGSI_OPCODE_RCP: |
| return emit_rcp(emit, inst); |
| case TGSI_OPCODE_RSQ: |
| return emit_rsq(emit, inst); |
| case TGSI_OPCODE_SAMPLE: |
| return emit_sample(emit, inst); |
| case TGSI_OPCODE_SEQ: |
| return emit_seq(emit, inst); |
| case TGSI_OPCODE_SGE: |
| return emit_sge(emit, inst); |
| case TGSI_OPCODE_SGT: |
| return emit_sgt(emit, inst); |
| case TGSI_OPCODE_SIN: |
| return emit_sincos(emit, inst); |
| case TGSI_OPCODE_SLE: |
| return emit_sle(emit, inst); |
| case TGSI_OPCODE_SLT: |
| return emit_slt(emit, inst); |
| case TGSI_OPCODE_SNE: |
| return emit_sne(emit, inst); |
| case TGSI_OPCODE_SSG: |
| return emit_ssg(emit, inst); |
| case TGSI_OPCODE_ISSG: |
| return emit_issg(emit, inst); |
| case TGSI_OPCODE_TEX: |
| return emit_tex(emit, inst); |
| case TGSI_OPCODE_TG4: |
| return emit_tg4(emit, inst); |
| case TGSI_OPCODE_TEX2: |
| return emit_tex2(emit, inst); |
| case TGSI_OPCODE_TXP: |
| return emit_txp(emit, inst); |
| case TGSI_OPCODE_TXB: |
| case TGSI_OPCODE_TXB2: |
| case TGSI_OPCODE_TXL: |
| return emit_txl_txb(emit, inst); |
| case TGSI_OPCODE_TXD: |
| return emit_txd(emit, inst); |
| case TGSI_OPCODE_TXF: |
| return emit_txf(emit, inst); |
| case TGSI_OPCODE_TXL2: |
| return emit_txl2(emit, inst); |
| case TGSI_OPCODE_TXQ: |
| return emit_txq(emit, inst); |
| case TGSI_OPCODE_UIF: |
| return emit_if(emit, &inst->Src[0]); |
| case TGSI_OPCODE_UMUL_HI: |
| case TGSI_OPCODE_IMUL_HI: |
| case TGSI_OPCODE_UDIV: |
| /* These cases use only the FIRST of two destination registers */ |
| return emit_simple_1dst(emit, inst, 2, 0); |
| case TGSI_OPCODE_IDIV: |
| return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV); |
| case TGSI_OPCODE_UMUL: |
| case TGSI_OPCODE_UMOD: |
| case TGSI_OPCODE_MOD: |
| /* These cases use only the SECOND of two destination registers */ |
| return emit_simple_1dst(emit, inst, 2, 1); |
| |
| /* Begin SM5 opcodes */ |
| case TGSI_OPCODE_DABS: |
| return emit_dabs(emit, inst); |
| case TGSI_OPCODE_DNEG: |
| return emit_dneg(emit, inst); |
| case TGSI_OPCODE_DRCP: |
| return emit_simple(emit, inst); |
| case TGSI_OPCODE_DSQRT: |
| return emit_dsqrt(emit, inst); |
| case TGSI_OPCODE_DMAD: |
| return emit_dmad(emit, inst); |
| case TGSI_OPCODE_DFRAC: |
| return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC); |
| case TGSI_OPCODE_D2I: |
| case TGSI_OPCODE_D2U: |
| return emit_simple(emit, inst); |
| case TGSI_OPCODE_I2D: |
| case TGSI_OPCODE_U2D: |
| return emit_simple(emit, inst); |
| case TGSI_OPCODE_DRSQ: |
| return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]); |
| case TGSI_OPCODE_DDIV: |
| return emit_simple(emit, inst); |
| case TGSI_OPCODE_INTERP_OFFSET: |
| return emit_interp_offset(emit, inst); |
| |
| /* The following opcodes should never be seen here. We return zero |
| * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED, |
| * FMA_SUPPORTED, LDEXP_SUPPORTED queries. |
| */ |
| case TGSI_OPCODE_FMA: |
| case TGSI_OPCODE_LDEXP: |
| case TGSI_OPCODE_DSSG: |
| case TGSI_OPCODE_DFRACEXP: |
| case TGSI_OPCODE_DLDEXP: |
| case TGSI_OPCODE_DTRUNC: |
| case TGSI_OPCODE_DCEIL: |
| case TGSI_OPCODE_DFLR: |
| debug_printf("Unexpected TGSI opcode %s. " |
| "Should have been translated away by the GLSL compiler.\n", |
| tgsi_get_opcode_name(opcode)); |
| return FALSE; |
| |
| case TGSI_OPCODE_LOAD: |
| case TGSI_OPCODE_STORE: |
| case TGSI_OPCODE_ATOMAND: |
| case TGSI_OPCODE_ATOMCAS: |
| case TGSI_OPCODE_ATOMIMAX: |
| case TGSI_OPCODE_ATOMIMIN: |
| case TGSI_OPCODE_ATOMOR: |
| case TGSI_OPCODE_ATOMUADD: |
| case TGSI_OPCODE_ATOMUMAX: |
| case TGSI_OPCODE_ATOMUMIN: |
| case TGSI_OPCODE_ATOMXCHG: |
| case TGSI_OPCODE_ATOMXOR: |
| return FALSE; |
| case TGSI_OPCODE_BARRIER: |
| if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| /* SM5 device doesn't support BARRIER in tcs . If barrier is used |
| * in shader, don't do anything for this opcode and continue rest |
| * of shader translation |
| */ |
| pipe_debug_message(&emit->svga_debug_callback, INFO, |
| "barrier instruction is not supported in tessellation control shader\n"); |
| return TRUE; |
| } |
| else { |
| return emit_simple(emit, inst); |
| } |
| |
| case TGSI_OPCODE_END: |
| if (!emit_post_helpers(emit)) |
| return FALSE; |
| return emit_simple(emit, inst); |
| |
| default: |
| debug_printf("Unimplemented tgsi instruction %s\n", |
| tgsi_get_opcode_name(opcode)); |
| return FALSE; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Emit the extra instructions to adjust the vertex position. |
| * There are two possible adjustments: |
| * 1. Converting from Gallium to VGPU10 coordinate space by applying the |
| * "prescale" and "pretranslate" values. |
| * 2. Undoing the viewport transformation when we use the swtnl/draw path. |
| * \param vs_pos_tmp_index which temporary register contains the vertex pos. |
| */ |
| static void |
| emit_vpos_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| struct tgsi_full_src_register tmp_pos_src; |
| struct tgsi_full_dst_register pos_dst; |
| const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; |
| |
| /* Don't bother to emit any extra vertex instructions if vertex position is |
| * not written out |
| */ |
| if (emit->vposition.out_index == INVALID_INDEX) |
| return; |
| |
| /** |
| * Reset the temporary vertex position register index |
| * so that emit_dst_register() will use the real vertex position output |
| */ |
| emit->vposition.tmp_index = INVALID_INDEX; |
| |
| tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); |
| pos_dst = make_dst_output_reg(emit->vposition.out_index); |
| |
| /* If non-adjusted vertex position register index |
| * is valid, copy the vertex position from the temporary |
| * vertex position register before it is modified by the |
| * prescale computation. |
| */ |
| if (emit->vposition.so_index != INVALID_INDEX) { |
| struct tgsi_full_dst_register pos_so_dst = |
| make_dst_output_reg(emit->vposition.so_index); |
| |
| /* MOV pos_so, tmp_pos */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src); |
| } |
| |
| if (emit->vposition.need_prescale) { |
| /* This code adjusts the vertex position to match the VGPU10 convention. |
| * If p is the position computed by the shader (usually by applying the |
| * modelview and projection matrices), the new position q is computed by: |
| * |
| * q.x = p.w * trans.x + p.x * scale.x |
| * q.y = p.w * trans.y + p.y * scale.y |
| * q.z = p.w * trans.z + p.z * scale.z; |
| * q.w = p.w * trans.w + p.w; |
| */ |
| struct tgsi_full_src_register tmp_pos_src_w = |
| scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); |
| struct tgsi_full_dst_register tmp_pos_dst = |
| make_dst_temp_reg(vs_pos_tmp_index); |
| struct tgsi_full_dst_register tmp_pos_dst_xyz = |
| writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); |
| |
| struct tgsi_full_src_register prescale_scale = |
| make_src_temp_reg(emit->vposition.prescale_scale_index); |
| struct tgsi_full_src_register prescale_trans = |
| make_src_temp_reg(emit->vposition.prescale_trans_index); |
| |
| /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, |
| &tmp_pos_src, &prescale_scale); |
| |
| /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ |
| emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, |
| &prescale_trans, &tmp_pos_src); |
| } |
| else if (emit->key.vs.undo_viewport) { |
| /* This code computes the final vertex position from the temporary |
| * vertex position by undoing the viewport transformation and the |
| * divide-by-W operation (we convert window coords back to clip coords). |
| * This is needed when we use the 'draw' module for fallbacks. |
| * If p is the temp pos in window coords, then the NDC coord q is: |
| * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w |
| * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w |
| * q.z = p.z * p.w |
| * q.w = p.w |
| * CONST[vs_viewport_index] contains: |
| * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } |
| */ |
| struct tgsi_full_dst_register tmp_pos_dst = |
| make_dst_temp_reg(vs_pos_tmp_index); |
| struct tgsi_full_dst_register tmp_pos_dst_xy = |
| writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); |
| struct tgsi_full_src_register tmp_pos_src_wwww = |
| scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); |
| |
| struct tgsi_full_dst_register pos_dst_xyz = |
| writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); |
| struct tgsi_full_dst_register pos_dst_w = |
| writemask_dst(&pos_dst, TGSI_WRITEMASK_W); |
| |
| struct tgsi_full_src_register vp_xyzw = |
| make_src_const_reg(emit->vs.viewport_index); |
| struct tgsi_full_src_register vp_zwww = |
| swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, |
| TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); |
| |
| /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, |
| &tmp_pos_src, &vp_zwww); |
| |
| /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, |
| &tmp_pos_src, &vp_xyzw); |
| |
| /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, |
| &tmp_pos_src, &tmp_pos_src_wwww); |
| |
| /* MOV pos.w, tmp_pos.w */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src); |
| } |
| else if (vs_pos_tmp_index != INVALID_INDEX) { |
| /* This code is to handle the case where the temporary vertex |
| * position register is created when the vertex shader has stream |
| * output and prescale is disabled because rasterization is to be |
| * discarded. |
| */ |
| struct tgsi_full_dst_register pos_dst = |
| make_dst_output_reg(emit->vposition.out_index); |
| |
| /* MOV pos, tmp_pos */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); |
| emit_dst_register(emit, &pos_dst); |
| emit_src_register(emit, &tmp_pos_src); |
| end_emit_instruction(emit); |
| } |
| |
| /* Restore original vposition.tmp_index value for the next GS vertex. |
| * It doesn't matter for VS. |
| */ |
| emit->vposition.tmp_index = vs_pos_tmp_index; |
| } |
| |
| static void |
| emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| if (emit->clip_mode == CLIP_DISTANCE) { |
| /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ |
| emit_clip_distance_instructions(emit); |
| |
| } else if (emit->clip_mode == CLIP_VERTEX && |
| emit->key.last_vertex_stage) { |
| /* Convert TGSI CLIPVERTEX to CLIPDIST */ |
| emit_clip_vertex_instructions(emit); |
| } |
| |
| /** |
| * Emit vertex position and take care of legacy user planes only if |
| * there is a valid vertex position register index. |
| * This is to take care of the case |
| * where the shader doesn't output vertex position. Then in |
| * this case, don't bother to emit more vertex instructions. |
| */ |
| if (emit->vposition.out_index == INVALID_INDEX) |
| return; |
| |
| /** |
| * Emit per-vertex clipping instructions for legacy user defined clip planes. |
| * NOTE: we must emit the clip distance instructions before the |
| * emit_vpos_instructions() call since the later function will change |
| * the TEMP[vs_pos_tmp_index] value. |
| */ |
| if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) { |
| /* Emit CLIPDIST for legacy user defined clip planes */ |
| emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); |
| } |
| } |
| |
| |
| /** |
| * Emit extra per-vertex instructions. This includes clip-coordinate |
| * space conversion and computing clip distances. This is called for |
| * each GS emit-vertex instruction and at the end of VS translation. |
| */ |
| static void |
| emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| /* Emit clipping instructions based on clipping mode */ |
| emit_clipping_instructions(emit); |
| |
| /* Emit vertex position instructions */ |
| emit_vpos_instructions(emit); |
| } |
| |
| |
| /** |
| * Translate the TGSI_OPCODE_EMIT GS instruction. |
| */ |
| static boolean |
| emit_vertex(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_full_instruction *inst) |
| { |
| unsigned ret = TRUE; |
| |
| assert(emit->unit == PIPE_SHADER_GEOMETRY); |
| |
| /** |
| * Emit the viewport array index for the first vertex. |
| */ |
| if (emit->gs.viewport_index_out_index != INVALID_INDEX) { |
| struct tgsi_full_dst_register viewport_index_out = |
| make_dst_output_reg(emit->gs.viewport_index_out_index); |
| struct tgsi_full_dst_register viewport_index_out_x = |
| writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X); |
| struct tgsi_full_src_register viewport_index_tmp = |
| make_src_temp_reg(emit->gs.viewport_index_tmp_index); |
| |
| /* Set the out index to INVALID_INDEX, so it will not |
| * be assigned to a temp again in emit_dst_register, and |
| * the viewport index will not be assigned again in the |
| * subsequent vertices. |
| */ |
| emit->gs.viewport_index_out_index = INVALID_INDEX; |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, |
| &viewport_index_out_x, &viewport_index_tmp); |
| } |
| |
| /** |
| * Find the stream index associated with this emit vertex instruction. |
| */ |
| assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); |
| unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); |
| |
| /** |
| * According to the ARB_gpu_shader5 spec, the built-in geometry shader |
| * outputs are always associated with vertex stream zero. |
| * So emit the extra vertex instructions for position or clip distance |
| * for stream zero only. |
| */ |
| if (streamIndex == 0) { |
| /** |
| * Before emitting vertex instructions, emit the temporaries for |
| * the prescale constants based on the viewport index if needed. |
| */ |
| if (emit->vposition.need_prescale && !emit->vposition.have_prescale) |
| emit_temp_prescale_instructions(emit); |
| |
| emit_vertex_instructions(emit); |
| } |
| |
| begin_emit_instruction(emit); |
| if (emit->version >= 50) { |
| if (emit->info.num_stream_output_components[streamIndex] == 0) { |
| /** |
| * If there is no output for this stream, discard this instruction. |
| */ |
| emit->discard_instruction = TRUE; |
| } |
| else { |
| emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE); |
| emit_stream_register(emit, streamIndex); |
| } |
| } |
| else { |
| emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); |
| } |
| end_emit_instruction(emit); |
| |
| return ret; |
| } |
| |
| |
| /** |
| * Emit the extra code to convert from VGPU10's boolean front-face |
| * register to TGSI's signed front-face register. |
| * |
| * TODO: Make temporary front-face register a scalar. |
| */ |
| static void |
| emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| |
| if (emit->fs.face_input_index != INVALID_INDEX) { |
| /* convert vgpu10 boolean face register to gallium +/-1 value */ |
| struct tgsi_full_dst_register tmp_dst = |
| make_dst_temp_reg(emit->fs.face_tmp_index); |
| struct tgsi_full_src_register one = |
| make_immediate_reg_float(emit, 1.0f); |
| struct tgsi_full_src_register neg_one = |
| make_immediate_reg_float(emit, -1.0f); |
| |
| /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); |
| emit_dst_register(emit, &tmp_dst); |
| emit_face_register(emit); |
| emit_src_register(emit, &one); |
| emit_src_register(emit, &neg_one); |
| end_emit_instruction(emit); |
| } |
| } |
| |
| |
| /** |
| * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. |
| */ |
| static void |
| emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| |
| if (emit->fs.fragcoord_input_index != INVALID_INDEX) { |
| struct tgsi_full_dst_register tmp_dst = |
| make_dst_temp_reg(emit->fs.fragcoord_tmp_index); |
| struct tgsi_full_dst_register tmp_dst_xyz = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); |
| struct tgsi_full_dst_register tmp_dst_w = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); |
| struct tgsi_full_src_register one = |
| make_immediate_reg_float(emit, 1.0f); |
| struct tgsi_full_src_register fragcoord = |
| make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); |
| |
| /* save the input index */ |
| unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; |
| /* set to invalid to prevent substitution in emit_src_register() */ |
| emit->fs.fragcoord_input_index = INVALID_INDEX; |
| |
| /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); |
| emit_dst_register(emit, &tmp_dst_xyz); |
| emit_src_register(emit, &fragcoord); |
| end_emit_instruction(emit); |
| |
| /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); |
| emit_dst_register(emit, &tmp_dst_w); |
| emit_src_register(emit, &one); |
| emit_src_register(emit, &fragcoord); |
| end_emit_instruction(emit); |
| |
| /* restore saved value */ |
| emit->fs.fragcoord_input_index = fragcoord_input_index; |
| } |
| } |
| |
| |
| /** |
| * Emit the extra code to get the current sample position value and |
| * put it into a temp register. |
| */ |
| static void |
| emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| |
| if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { |
| assert(emit->version >= 41); |
| |
| struct tgsi_full_dst_register tmp_dst = |
| make_dst_temp_reg(emit->fs.sample_pos_tmp_index); |
| struct tgsi_full_src_register half = |
| make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0); |
| |
| struct tgsi_full_src_register tmp_src = |
| make_src_temp_reg(emit->fs.sample_pos_tmp_index); |
| struct tgsi_full_src_register sample_index_reg = |
| make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE, |
| emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X); |
| |
| /* The first src register is a shader resource (if we want a |
| * multisampled resource sample position) or the rasterizer register |
| * (if we want the current sample position in the color buffer). We |
| * want the later. |
| */ |
| |
| /* SAMPLE_POS dst, RASTERIZER, sampleIndex */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE); |
| emit_dst_register(emit, &tmp_dst); |
| emit_rasterizer_register(emit); |
| emit_src_register(emit, &sample_index_reg); |
| end_emit_instruction(emit); |
| |
| /* Convert from D3D coords to GL coords by adding 0.5 bias */ |
| /* ADD dst, dst, half */ |
| begin_emit_instruction(emit); |
| emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE); |
| emit_dst_register(emit, &tmp_dst); |
| emit_src_register(emit, &tmp_src); |
| emit_src_register(emit, &half); |
| end_emit_instruction(emit); |
| } |
| } |
| |
| |
| /** |
| * Emit extra instructions to adjust VS inputs/attributes. This can |
| * mean casting a vertex attribute from int to float or setting the |
| * W component to 1, or both. |
| */ |
| static void |
| emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; |
| const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; |
| const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; |
| const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; |
| const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; |
| const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; |
| const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; |
| |
| unsigned adjust_mask = (save_w_1_mask | |
| save_itof_mask | |
| save_utof_mask | |
| save_is_bgra_mask | |
| save_puint_to_snorm_mask | |
| save_puint_to_uscaled_mask | |
| save_puint_to_sscaled_mask); |
| |
| assert(emit->unit == PIPE_SHADER_VERTEX); |
| |
| if (adjust_mask) { |
| struct tgsi_full_src_register one = |
| make_immediate_reg_float(emit, 1.0f); |
| |
| struct tgsi_full_src_register one_int = |
| make_immediate_reg_int(emit, 1); |
| |
| /* We need to turn off these bitmasks while emitting the |
| * instructions below, then restore them afterward. |
| */ |
| emit->key.vs.adjust_attrib_w_1 = 0; |
| emit->key.vs.adjust_attrib_itof = 0; |
| emit->key.vs.adjust_attrib_utof = 0; |
| emit->key.vs.attrib_is_bgra = 0; |
| emit->key.vs.attrib_puint_to_snorm = 0; |
| emit->key.vs.attrib_puint_to_uscaled = 0; |
| emit->key.vs.attrib_puint_to_sscaled = 0; |
| |
| while (adjust_mask) { |
| unsigned index = u_bit_scan(&adjust_mask); |
| |
| /* skip the instruction if this vertex attribute is not being used */ |
| if (emit->info.input_usage_mask[index] == 0) |
| continue; |
| |
| unsigned tmp = emit->vs.adjusted_input[index]; |
| struct tgsi_full_src_register input_src = |
| make_src_reg(TGSI_FILE_INPUT, index); |
| |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_dst_register tmp_dst_w = |
| writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); |
| |
| /* ITOF/UTOF/MOV tmp, input[index] */ |
| if (save_itof_mask & (1 << index)) { |
| emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, |
| &tmp_dst, &input_src); |
| } |
| else if (save_utof_mask & (1 << index)) { |
| emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, |
| &tmp_dst, &input_src); |
| } |
| else if (save_puint_to_snorm_mask & (1 << index)) { |
| emit_puint_to_snorm(emit, &tmp_dst, &input_src); |
| } |
| else if (save_puint_to_uscaled_mask & (1 << index)) { |
| emit_puint_to_uscaled(emit, &tmp_dst, &input_src); |
| } |
| else if (save_puint_to_sscaled_mask & (1 << index)) { |
| emit_puint_to_sscaled(emit, &tmp_dst, &input_src); |
| } |
| else { |
| assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, |
| &tmp_dst, &input_src); |
| } |
| |
| if (save_is_bgra_mask & (1 << index)) { |
| emit_swap_r_b(emit, &tmp_dst, &tmp_src); |
| } |
| |
| if (save_w_1_mask & (1 << index)) { |
| /* MOV tmp.w, 1.0 */ |
| if (emit->key.vs.attrib_is_pure_int & (1 << index)) { |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, |
| &tmp_dst_w, &one_int); |
| } |
| else { |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, |
| &tmp_dst_w, &one); |
| } |
| } |
| } |
| |
| emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; |
| emit->key.vs.adjust_attrib_itof = save_itof_mask; |
| emit->key.vs.adjust_attrib_utof = save_utof_mask; |
| emit->key.vs.attrib_is_bgra = save_is_bgra_mask; |
| emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; |
| emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; |
| emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; |
| } |
| } |
| |
| |
| /* Find zero-value immedate for default layer index */ |
| static void |
| emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| |
| /* immediate for default layer index 0 */ |
| if (emit->fs.layer_input_index != INVALID_INDEX) { |
| union tgsi_immediate_data imm; |
| imm.Int = 0; |
| emit->fs.layer_imm_index = find_immediate(emit, imm, 0); |
| } |
| } |
| |
| |
| static void |
| emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, |
| unsigned cbuf_index, |
| struct tgsi_full_dst_register *scale, |
| struct tgsi_full_dst_register *translate) |
| { |
| struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index); |
| struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf); |
| } |
| |
| |
| /** |
| * A recursive helper function to find the prescale from the constant buffer |
| */ |
| static void |
| find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, |
| unsigned index, unsigned num_prescale, |
| struct tgsi_full_src_register *vp_index, |
| struct tgsi_full_dst_register *scale, |
| struct tgsi_full_dst_register *translate, |
| struct tgsi_full_src_register *tmp_src, |
| struct tgsi_full_dst_register *tmp_dst) |
| { |
| if (num_prescale == 0) |
| return; |
| |
| if (index > 0) { |
| /* ELSE */ |
| emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); |
| } |
| |
| struct tgsi_full_src_register index_src = |
| make_immediate_reg_int(emit, index); |
| |
| if (index == 0) { |
| /* GE tmp, vp_index, index */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst, |
| vp_index, &index_src); |
| } else { |
| /* EQ tmp, vp_index, index */ |
| emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst, |
| vp_index, &index_src); |
| } |
| |
| /* IF tmp */ |
| emit_if(emit, tmp_src); |
| emit_temp_prescale_from_cbuf(emit, |
| emit->vposition.prescale_cbuf_index + 2 * index, |
| scale, translate); |
| |
| find_prescale_from_cbuf(emit, index+1, num_prescale-1, |
| vp_index, scale, translate, |
| tmp_src, tmp_dst); |
| |
| /* ENDIF */ |
| emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); |
| } |
| |
| |
| /** |
| * This helper function emits instructions to set the prescale |
| * and translate temporaries to the correct constants from the |
| * constant buffer according to the designated viewport. |
| */ |
| static void |
| emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| struct tgsi_full_dst_register prescale_scale = |
| make_dst_temp_reg(emit->vposition.prescale_scale_index); |
| struct tgsi_full_dst_register prescale_translate = |
| make_dst_temp_reg(emit->vposition.prescale_trans_index); |
| |
| unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index; |
| |
| if (emit->vposition.num_prescale == 1) { |
| emit_temp_prescale_from_cbuf(emit, |
| prescale_cbuf_index, |
| &prescale_scale, &prescale_translate); |
| } else { |
| /** |
| * Since SM5 device does not support dynamic indexing, we need |
| * to do the if-else to find the prescale constants for the |
| * specified viewport. |
| */ |
| struct tgsi_full_src_register vp_index_src = |
| make_src_temp_reg(emit->gs.viewport_index_tmp_index); |
| |
| struct tgsi_full_src_register vp_index_src_x = |
| scalar_src(&vp_index_src, TGSI_SWIZZLE_X); |
| |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_src_register tmp_src_x = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| |
| find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale, |
| &vp_index_src_x, |
| &prescale_scale, &prescale_translate, |
| &tmp_src_x, &tmp_dst); |
| } |
| |
| /* Mark prescale temporaries are emitted */ |
| emit->vposition.have_prescale = 1; |
| } |
| |
| |
| /** |
| * A helper function to emit an instruction in a vertex shader to add a bias |
| * to the VertexID system value. This patches the VertexID in the SVGA vertex |
| * shader to include the base vertex of an indexed primitive or the start index |
| * of a non-indexed primitive. |
| */ |
| static void |
| emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit) |
| { |
| struct tgsi_full_src_register vertex_id_bias_index = |
| make_src_const_reg(emit->vs.vertex_id_bias_index); |
| struct tgsi_full_src_register vertex_id_sys_src = |
| make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index); |
| struct tgsi_full_src_register vertex_id_sys_src_x = |
| scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X); |
| struct tgsi_full_dst_register vertex_id_tmp_dst = |
| make_dst_temp_reg(emit->vs.vertex_id_tmp_index); |
| |
| /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */ |
| unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index; |
| emit->vs.vertex_id_tmp_index = INVALID_INDEX; |
| emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst, |
| &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE, |
| FALSE); |
| emit->vs.vertex_id_tmp_index = vertex_id_tmp_index; |
| } |
| |
| /** |
| * Hull Shader must have control point outputs. But tessellation |
| * control shader can return without writing to control point output. |
| * In this case, the control point output is assumed to be passthrough |
| * from the control point input. |
| * This helper function is to write out a control point output first in case |
| * the tessellation control shader returns before writing a |
| * control point output. |
| */ |
| static void |
| emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit) |
| { |
| assert(emit->unit == PIPE_SHADER_TESS_CTRL); |
| assert(emit->tcs.control_point_phase); |
| assert(emit->tcs.control_point_input_index != INVALID_INDEX); |
| assert(emit->tcs.control_point_out_index != INVALID_INDEX); |
| assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX); |
| |
| /* UARL ADDR[INDEX].x INVOCATION.xxxx */ |
| |
| struct tgsi_full_src_register invocation_src; |
| struct tgsi_full_dst_register addr_dst; |
| struct tgsi_full_dst_register addr_dst_x; |
| unsigned addr_tmp; |
| |
| addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; |
| addr_dst = make_dst_temp_reg(addr_tmp); |
| addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); |
| |
| invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, |
| emit->tcs.invocation_id_sys_index); |
| |
| begin_emit_instruction(emit); |
| emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); |
| emit_dst_register(emit, &addr_dst_x); |
| emit_src_register(emit, &invocation_src); |
| end_emit_instruction(emit); |
| |
| |
| /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ |
| |
| struct tgsi_full_src_register input_control_point; |
| struct tgsi_full_dst_register output_control_point; |
| |
| input_control_point = make_src_reg(TGSI_FILE_INPUT, |
| emit->tcs.control_point_input_index); |
| input_control_point.Register.Dimension = 1; |
| input_control_point.Dimension.Indirect = 1; |
| input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; |
| input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index; |
| output_control_point = |
| make_dst_output_reg(emit->tcs.control_point_out_index); |
| |
| begin_emit_instruction(emit); |
| emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); |
| emit_dst_register(emit, &output_control_point); |
| emit_src_register(emit, &input_control_point); |
| end_emit_instruction(emit); |
| } |
| |
| /** |
| * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR |
| * values in domain shader. SM5 has tessfactors as floating point values where |
| * as tgsi emit them as vector. This function allows to construct temp |
| * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with |
| * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever |
| * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader. |
| */ |
| static void |
| emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit) |
| { |
| struct tgsi_full_src_register src; |
| struct tgsi_full_dst_register dst; |
| |
| if (emit->tes.inner.tgsi_index != INVALID_INDEX) { |
| dst = make_dst_temp_reg(emit->tes.inner.temp_index); |
| |
| switch (emit->tes.prim_mode) { |
| case PIPE_PRIM_QUADS: |
| src = make_src_scalar_reg(TGSI_FILE_INPUT, |
| emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| /* fallthrough */ |
| case PIPE_PRIM_TRIANGLES: |
| src = make_src_scalar_reg(TGSI_FILE_INPUT, |
| emit->tes.inner.in_index, TGSI_SWIZZLE_X); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| break; |
| case PIPE_PRIM_LINES: |
| /** |
| * As per SM5 spec, InsideTessFactor for isolines are unused. |
| * In fact glsl tessInnerLevel for isolines doesn't mean anything but if |
| * any application try to read tessInnerLevel in TES when primitive type |
| * is isolines, then instead of driver throwing segfault for accesing it, |
| * return atleast vec(1.0f) |
| */ |
| src = make_immediate_reg_float(emit, 1.0f); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| break; |
| default: |
| break; |
| } |
| } |
| |
| if (emit->tes.outer.tgsi_index != INVALID_INDEX) { |
| dst = make_dst_temp_reg(emit->tes.outer.temp_index); |
| |
| switch (emit->tes.prim_mode) { |
| case PIPE_PRIM_QUADS: |
| src = make_src_scalar_reg(TGSI_FILE_INPUT, |
| emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_W); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| /* fallthrough */ |
| case PIPE_PRIM_TRIANGLES: |
| src = make_src_scalar_reg(TGSI_FILE_INPUT, |
| emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_Z); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| /* fallthrough */ |
| case PIPE_PRIM_LINES: |
| src = make_src_scalar_reg(TGSI_FILE_INPUT, |
| emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| |
| src = make_src_scalar_reg(TGSI_FILE_INPUT, |
| emit->tes.outer.in_index , TGSI_SWIZZLE_X); |
| dst = writemask_dst(&dst, TGSI_WRITEMASK_X); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| |
| break; |
| default: |
| break; |
| } |
| } |
| } |
| |
| |
| static void |
| emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit) |
| { |
| struct tgsi_full_src_register src; |
| struct tgsi_full_dst_register dst; |
| unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY, |
| emit->initialize_temp_index); |
| src = make_immediate_reg_float(emit, 0.0f); |
| dst = make_dst_temp_reg(vgpu10_temp_index); |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); |
| emit->temp_map[emit->initialize_temp_index].initialized = TRUE; |
| emit->initialize_temp_index = INVALID_INDEX; |
| } |
| |
| |
| /** |
| * Emit any extra/helper declarations/code that we might need between |
| * the declaration section and code section. |
| */ |
| static boolean |
| emit_pre_helpers(struct svga_shader_emitter_v10 *emit) |
| { |
| /* Properties */ |
| if (emit->unit == PIPE_SHADER_GEOMETRY) |
| emit_property_instructions(emit); |
| else if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| emit_hull_shader_declarations(emit); |
| |
| /* Save the position of the first instruction token so that we can |
| * do a second pass of the instructions for the patch constant phase. |
| */ |
| emit->tcs.instruction_token_pos = emit->cur_tgsi_token; |
| emit->tcs.fork_phase_add_signature = FALSE; |
| |
| if (!emit_hull_shader_control_point_phase(emit)) { |
| emit->skip_instruction = TRUE; |
| return TRUE; |
| } |
| |
| /* Set the current tcs phase to control point phase */ |
| emit->tcs.control_point_phase = TRUE; |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_EVAL) { |
| emit_domain_shader_declarations(emit); |
| } |
| |
| /* Declare inputs */ |
| if (!emit_input_declarations(emit)) |
| return FALSE; |
| |
| /* Declare outputs */ |
| if (!emit_output_declarations(emit)) |
| return FALSE; |
| |
| /* Declare temporary registers */ |
| emit_temporaries_declaration(emit); |
| |
| /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates |
| * will already be declared in hs_decls (emit_hull_shader_declarations) |
| */ |
| if (emit->unit != PIPE_SHADER_TESS_CTRL) { |
| /* Declare constant registers */ |
| emit_constant_declaration(emit); |
| |
| /* Declare samplers and resources */ |
| emit_sampler_declarations(emit); |
| emit_resource_declarations(emit); |
| |
| alloc_common_immediates(emit); |
| /* Now, emit the constant block containing all the immediates |
| * declared by shader, as well as the extra ones seen above. |
| */ |
| } |
| |
| if (emit->unit != PIPE_SHADER_FRAGMENT) { |
| /* |
| * Declare clip distance output registers for ClipVertex or |
| * user defined planes |
| */ |
| emit_clip_distance_declarations(emit); |
| } |
| |
| if (emit->unit == PIPE_SHADER_FRAGMENT && |
| emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { |
| float alpha = emit->key.fs.alpha_ref; |
| emit->fs.alpha_ref_index = |
| alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); |
| } |
| |
| if (emit->unit != PIPE_SHADER_TESS_CTRL) { |
| /** |
| * For PIPE_SHADER_TESS_CTRL, immediates are already declared in |
| * hs_decls |
| */ |
| emit_vgpu10_immediates_block(emit); |
| } |
| else { |
| emit_tcs_default_control_point_output(emit); |
| } |
| |
| if (emit->unit == PIPE_SHADER_FRAGMENT) { |
| emit_frontface_instructions(emit); |
| emit_fragcoord_instructions(emit); |
| emit_sample_position_instructions(emit); |
| emit_default_layer_instructions(emit); |
| } |
| else if (emit->unit == PIPE_SHADER_VERTEX) { |
| emit_vertex_attrib_instructions(emit); |
| |
| if (emit->info.uses_vertexid) |
| emit_vertex_id_nobase_instruction(emit); |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_EVAL) { |
| emit_temp_tessfactor_instructions(emit); |
| } |
| |
| /** |
| * For geometry shader that writes to viewport index, the prescale |
| * temporaries will be done at the first vertex emission. |
| */ |
| if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1) |
| emit_temp_prescale_instructions(emit); |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * The device has no direct support for the pipe_blend_state::alpha_to_one |
| * option so we implement it here with shader code. |
| * |
| * Note that this is kind of pointless, actually. Here we're clobbering |
| * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind |
| * up with 100% coverage. That's almost certainly not what the user wants. |
| * The work-around is to add extra shader code to compute coverage from alpha |
| * and write it to the coverage output register (if the user's shader doesn't |
| * do so already). We'll probably do that in the future. |
| */ |
| static void |
| emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit, |
| unsigned fs_color_tmp_index) |
| { |
| struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); |
| unsigned i; |
| |
| /* Note: it's not 100% clear from the spec if we're supposed to clobber |
| * the alpha for all render targets. But that's what NVIDIA does and |
| * that's what Piglit tests. |
| */ |
| for (i = 0; i < emit->fs.num_color_outputs; i++) { |
| struct tgsi_full_dst_register color_dst; |
| |
| if (fs_color_tmp_index != INVALID_INDEX && i == 0) { |
| /* write to the temp color register */ |
| color_dst = make_dst_temp_reg(fs_color_tmp_index); |
| } |
| else { |
| /* write directly to the color[i] output */ |
| color_dst = make_dst_output_reg(emit->fs.color_out_index[i]); |
| } |
| |
| color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W); |
| |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one); |
| } |
| } |
| |
| |
| /** |
| * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w |
| * against the alpha reference value and discards the fragment if the |
| * comparison fails. |
| */ |
| static void |
| emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, |
| unsigned fs_color_tmp_index) |
| { |
| /* compare output color's alpha to alpha ref and kill */ |
| unsigned tmp = get_temp_index(emit); |
| struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); |
| struct tgsi_full_src_register tmp_src_x = |
| scalar_src(&tmp_src, TGSI_SWIZZLE_X); |
| struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); |
| struct tgsi_full_src_register color_src = |
| make_src_temp_reg(fs_color_tmp_index); |
| struct tgsi_full_src_register color_src_w = |
| scalar_src(&color_src, TGSI_SWIZZLE_W); |
| struct tgsi_full_src_register ref_src = |
| make_src_immediate_reg(emit->fs.alpha_ref_index); |
| struct tgsi_full_dst_register color_dst = |
| make_dst_output_reg(emit->fs.color_out_index[0]); |
| |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| |
| /* dst = src0 'alpha_func' src1 */ |
| emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, |
| &color_src_w, &ref_src); |
| |
| /* DISCARD if dst.x == 0 */ |
| begin_emit_instruction(emit); |
| emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ |
| emit_src_register(emit, &tmp_src_x); |
| end_emit_instruction(emit); |
| |
| /* If we don't need to broadcast the color below, emit the final color here. |
| */ |
| if (emit->key.fs.write_color0_to_n_cbufs <= 1) { |
| /* MOV output.color, tempcolor */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); |
| } |
| |
| free_temp_indexes(emit); |
| } |
| |
| |
| /** |
| * Emit instructions for writing a single color output to multiple |
| * color buffers. |
| * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or |
| * when key.fs.white_fragments is true). |
| * property is set and the number of render targets is greater than one. |
| * \param fs_color_tmp_index index of the temp register that holds the |
| * color to broadcast. |
| */ |
| static void |
| emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, |
| unsigned fs_color_tmp_index) |
| { |
| const unsigned n = emit->key.fs.write_color0_to_n_cbufs; |
| unsigned i; |
| struct tgsi_full_src_register color_src; |
| |
| if (emit->key.fs.white_fragments) { |
| /* set all color outputs to white */ |
| color_src = make_immediate_reg_float(emit, 1.0f); |
| } |
| else { |
| /* set all color outputs to TEMP[fs_color_tmp_index] */ |
| assert(fs_color_tmp_index != INVALID_INDEX); |
| color_src = make_src_temp_reg(fs_color_tmp_index); |
| } |
| |
| assert(emit->unit == PIPE_SHADER_FRAGMENT); |
| |
| for (i = 0; i < n; i++) { |
| unsigned output_reg = emit->fs.color_out_index[i]; |
| struct tgsi_full_dst_register color_dst = |
| make_dst_output_reg(output_reg); |
| |
| /* Fill in this semantic here since we'll use it later in |
| * emit_dst_register(). |
| */ |
| emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; |
| |
| /* MOV output.color[i], tempcolor */ |
| emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); |
| } |
| } |
| |
| |
| /** |
| * Emit extra helper code after the original shader code, but before the |
| * last END/RET instruction. |
| * For vertex shaders this means emitting the extra code to apply the |
| * prescale scale/translation. |
| */ |
| static boolean |
| emit_post_helpers(struct svga_shader_emitter_v10 *emit) |
| { |
| if (emit->unit == PIPE_SHADER_VERTEX) { |
| emit_vertex_instructions(emit); |
| } |
| else if (emit->unit == PIPE_SHADER_FRAGMENT) { |
| const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; |
| |
| assert(!(emit->key.fs.white_fragments && |
| emit->key.fs.write_color0_to_n_cbufs == 0)); |
| |
| /* We no longer want emit_dst_register() to substitute the |
| * temporary fragment color register for the real color output. |
| */ |
| emit->fs.color_tmp_index = INVALID_INDEX; |
| |
| if (emit->key.fs.alpha_to_one) { |
| emit_alpha_to_one_instructions(emit, fs_color_tmp_index); |
| } |
| if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { |
| emit_alpha_test_instructions(emit, fs_color_tmp_index); |
| } |
| if (emit->key.fs.write_color0_to_n_cbufs > 1 || |
| emit->key.fs.white_fragments) { |
| emit_broadcast_color_instructions(emit, fs_color_tmp_index); |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| if (!emit->tcs.control_point_phase) { |
| /* store the tessellation levels in the patch constant phase only */ |
| store_tesslevels(emit); |
| } |
| else { |
| emit_clipping_instructions(emit); |
| } |
| } |
| else if (emit->unit == PIPE_SHADER_TESS_EVAL) { |
| emit_vertex_instructions(emit); |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Translate the TGSI tokens into VGPU10 tokens. |
| */ |
| static boolean |
| emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_token *tokens) |
| { |
| struct tgsi_parse_context parse; |
| boolean ret = TRUE; |
| boolean pre_helpers_emitted = FALSE; |
| unsigned inst_number = 0; |
| |
| tgsi_parse_init(&parse, tokens); |
| |
| while (!tgsi_parse_end_of_tokens(&parse)) { |
| |
| /* Save the current tgsi token starting position */ |
| emit->cur_tgsi_token = parse.Position; |
| |
| tgsi_parse_token(&parse); |
| |
| switch (parse.FullToken.Token.Type) { |
| case TGSI_TOKEN_TYPE_IMMEDIATE: |
| ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); |
| if (!ret) |
| goto done; |
| break; |
| |
| case TGSI_TOKEN_TYPE_DECLARATION: |
| ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); |
| if (!ret) |
| goto done; |
| break; |
| |
| case TGSI_TOKEN_TYPE_INSTRUCTION: |
| if (!pre_helpers_emitted) { |
| ret = emit_pre_helpers(emit); |
| if (!ret) |
| goto done; |
| pre_helpers_emitted = TRUE; |
| } |
| ret = emit_vgpu10_instruction(emit, inst_number++, |
| &parse.FullToken.FullInstruction); |
| |
| /* Usually this applies to TCS only. If shader is reading control |
| * point outputs in control point phase, we should reemit all |
| * instructions which are writting into control point output in |
| * control phase to store results into temporaries. |
| */ |
| if (emit->reemit_instruction) { |
| assert(emit->unit == PIPE_SHADER_TESS_CTRL); |
| ret = emit_vgpu10_instruction(emit, inst_number, |
| &parse.FullToken.FullInstruction); |
| } |
| else if (emit->initialize_temp_index != INVALID_INDEX) { |
| emit_initialize_temp_instruction(emit); |
| emit->initialize_temp_index = INVALID_INDEX; |
| ret = emit_vgpu10_instruction(emit, inst_number - 1, |
| &parse.FullToken.FullInstruction); |
| } |
| |
| if (!ret) |
| goto done; |
| break; |
| |
| case TGSI_TOKEN_TYPE_PROPERTY: |
| ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); |
| if (!ret) |
| goto done; |
| break; |
| |
| default: |
| break; |
| } |
| } |
| |
| if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| ret = emit_hull_shader_patch_constant_phase(emit, &parse); |
| } |
| |
| done: |
| tgsi_parse_free(&parse); |
| return ret; |
| } |
| |
| |
| /** |
| * Emit the first VGPU10 shader tokens. |
| */ |
| static boolean |
| emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10ProgramToken ptoken; |
| |
| /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ |
| ptoken.value = 0; /* init whole token to zero */ |
| ptoken.majorVersion = emit->version / 10; |
| ptoken.minorVersion = emit->version % 10; |
| ptoken.programType = translate_shader_type(emit->unit); |
| if (!emit_dword(emit, ptoken.value)) |
| return FALSE; |
| |
| /* Second token: total length of shader, in tokens. We can't fill this |
| * in until we're all done. Emit zero for now. |
| */ |
| if (!emit_dword(emit, 0)) |
| return FALSE; |
| |
| if (emit->version >= 50) { |
| VGPU10OpcodeToken0 token; |
| |
| if (emit->unit == PIPE_SHADER_TESS_CTRL) { |
| /* For hull shader, we need to start the declarations phase first before |
| * emitting any declarations including the global flags. |
| */ |
| token.value = 0; |
| token.opcodeType = VGPU10_OPCODE_HS_DECLS; |
| begin_emit_instruction(emit); |
| emit_dword(emit, token.value); |
| end_emit_instruction(emit); |
| } |
| |
| /* Emit global flags */ |
| token.value = 0; /* init whole token to zero */ |
| token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; |
| token.enableDoublePrecisionFloatOps = 1; /* set bit */ |
| token.instructionLength = 1; |
| if (!emit_dword(emit, token.value)) |
| return FALSE; |
| } |
| |
| if (emit->version >= 40) { |
| VGPU10OpcodeToken0 token; |
| |
| /* Reserved for global flag such as refactoringAllowed. |
| * If the shader does not use the precise qualifier, we will set the |
| * refactoringAllowed global flag; otherwise, we will leave the reserved |
| * token to NOP. |
| */ |
| emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0); |
| token.value = 0; |
| token.opcodeType = VGPU10_OPCODE_NOP; |
| token.instructionLength = 1; |
| if (!emit_dword(emit, token.value)) |
| return FALSE; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| static boolean |
| emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) |
| { |
| VGPU10ProgramToken *tokens; |
| |
| /* Replace the second token with total shader length */ |
| tokens = (VGPU10ProgramToken *) emit->buf; |
| tokens[1].value = emit_get_num_tokens(emit); |
| |
| if (emit->version >= 40 && !emit->uses_precise_qualifier) { |
| /* Replace the reserved token with the RefactoringAllowed global flag */ |
| VGPU10OpcodeToken0 *ptoken; |
| |
| ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token]; |
| assert(ptoken->opcodeType == VGPU10_OPCODE_NOP); |
| ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; |
| ptoken->refactoringAllowed = 1; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Modify the FS to read the BCOLORs and use the FACE register |
| * to choose between the front/back colors. |
| */ |
| static const struct tgsi_token * |
| transform_fs_twoside(const struct tgsi_token *tokens) |
| { |
| if (0) { |
| debug_printf("Before tgsi_add_two_side ------------------\n"); |
| tgsi_dump(tokens,0); |
| } |
| tokens = tgsi_add_two_side(tokens); |
| if (0) { |
| debug_printf("After tgsi_add_two_side ------------------\n"); |
| tgsi_dump(tokens, 0); |
| } |
| return tokens; |
| } |
| |
| |
| /** |
| * Modify the FS to do polygon stipple. |
| */ |
| static const struct tgsi_token * |
| transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, |
| const struct tgsi_token *tokens) |
| { |
| const struct tgsi_token *new_tokens; |
| unsigned unit; |
| |
| if (0) { |
| debug_printf("Before pstipple ------------------\n"); |
| tgsi_dump(tokens,0); |
| } |
| |
| new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, |
| TGSI_FILE_INPUT); |
| |
| emit->fs.pstipple_sampler_unit = unit; |
| |
| /* Setup texture state for stipple */ |
| emit->sampler_target[unit] = TGSI_TEXTURE_2D; |
| emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; |
| emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; |
| emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; |
| emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; |
| |
| if (0) { |
| debug_printf("After pstipple ------------------\n"); |
| tgsi_dump(new_tokens, 0); |
| } |
| |
| return new_tokens; |
| } |
| |
| /** |
| * Modify the FS to support anti-aliasing point. |
| */ |
| static const struct tgsi_token * |
| transform_fs_aapoint(const struct tgsi_token *tokens, |
| int aa_coord_index) |
| { |
| if (0) { |
| debug_printf("Before tgsi_add_aa_point ------------------\n"); |
| tgsi_dump(tokens,0); |
| } |
| tokens = tgsi_add_aa_point(tokens, aa_coord_index); |
| if (0) { |
| debug_printf("After tgsi_add_aa_point ------------------\n"); |
| tgsi_dump(tokens, 0); |
| } |
| return tokens; |
| } |
| |
| |
| /** |
| * A helper function to determine the shader in the previous stage and |
| * then call the linker function to determine the input mapping for this |
| * shader to match the output indices from the shader in the previous stage. |
| */ |
| static void |
| compute_input_mapping(struct svga_context *svga, |
| struct svga_shader_emitter_v10 *emit, |
| enum pipe_shader_type unit) |
| { |
| struct svga_shader *prevShader = NULL; /* shader in the previous stage */ |
| |
| if (unit == PIPE_SHADER_FRAGMENT) { |
| prevShader = svga->curr.gs ? |
| &svga->curr.gs->base : (svga->curr.tes ? |
| &svga->curr.tes->base : &svga->curr.vs->base); |
| } else if (unit == PIPE_SHADER_GEOMETRY) { |
| prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base; |
| } else if (unit == PIPE_SHADER_TESS_EVAL) { |
| assert(svga->curr.tcs); |
| prevShader = &svga->curr.tcs->base; |
| } else if (unit == PIPE_SHADER_TESS_CTRL) { |
| assert(svga->curr.vs); |
| prevShader = &svga->curr.vs->base; |
| } |
| |
| if (prevShader != NULL) { |
| svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage); |
| emit->prevShaderInfo = &prevShader->info; |
| } |
| else { |
| /** |
| * Since vertex shader does not need to go through the linker to |
| * establish the input map, we need to make sure the highest index |
| * of input registers is set properly here. |
| */ |
| emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max, |
| emit->info.file_max[TGSI_FILE_INPUT]); |
| } |
| } |
| |
| |
| /** |
| * Copies the shader signature info to the shader variant |
| */ |
| static void |
| copy_shader_signature(struct svga_shader_signature *sgn, |
| struct svga_shader_variant *variant) |
| { |
| SVGA3dDXShaderSignatureHeader *header = &sgn->header; |
| |
| /* Calculate the signature length */ |
| variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) + |
| (header->numInputSignatures + |
| header->numOutputSignatures + |
| header->numPatchConstantSignatures) * |
| sizeof(SVGA3dDXShaderSignatureEntry); |
| |
| /* Allocate buffer for the signature info */ |
| variant->signature = |
| (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen); |
| |
| char *sgnBuf = (char *)variant->signature; |
| unsigned sgnLen; |
| |
| /* Copy the signature info to the shader variant structure */ |
| memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader)); |
| sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader); |
| |
| if (header->numInputSignatures) { |
| sgnLen = |
| header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); |
| memcpy(sgnBuf, &sgn->inputs[0], sgnLen); |
| sgnBuf += sgnLen; |
| } |
| |
| if (header->numOutputSignatures) { |
| sgnLen = |
| header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); |
| memcpy(sgnBuf, &sgn->outputs[0], sgnLen); |
| sgnBuf += sgnLen; |
| } |
| |
| if (header->numPatchConstantSignatures) { |
| sgnLen = |
| header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry); |
| memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen); |
| } |
| } |
| |
| |
| /** |
| * This is the main entrypoint for the TGSI -> VPGU10 translator. |
| */ |
| struct svga_shader_variant * |
| svga_tgsi_vgpu10_translate(struct svga_context *svga, |
| const struct svga_shader *shader, |
| const struct svga_compile_key *key, |
| enum pipe_shader_type unit) |
| { |
| struct svga_shader_variant *variant = NULL; |
| struct svga_shader_emitter_v10 *emit; |
| const struct tgsi_token *tokens = shader->tokens; |
| |
| (void) make_immediate_reg_double; /* unused at this time */ |
| |
| assert(unit == PIPE_SHADER_VERTEX || |
| unit == PIPE_SHADER_GEOMETRY || |
| unit == PIPE_SHADER_FRAGMENT || |
| unit == PIPE_SHADER_TESS_CTRL || |
| unit == PIPE_SHADER_TESS_EVAL || |
| unit == PIPE_SHADER_COMPUTE); |
| |
| /* These two flags cannot be used together */ |
| assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); |
| |
| SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE); |
| /* |
| * Setup the code emitter |
| */ |
| emit = alloc_emitter(); |
| if (!emit) |
| goto done; |
| |
| emit->unit = unit; |
| if (svga_have_sm5(svga)) { |
| emit->version = 50; |
| } else if (svga_have_sm4_1(svga)) { |
| emit->version = 41; |
| } else { |
| emit->version = 40; |
| } |
| |
| emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0; |
| |
| emit->key = *key; |
| |
| emit->vposition.need_prescale = (emit->key.vs.need_prescale || |
| emit->key.gs.need_prescale || |
| emit->key.tes.need_prescale); |
| |
| /* Determine how many prescale factors in the constant buffer */ |
| emit->vposition.num_prescale = 1; |
| if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) { |
| assert(emit->unit == PIPE_SHADER_GEOMETRY); |
| emit->vposition.num_prescale = emit->key.gs.num_prescale; |
| } |
| |
| emit->vposition.tmp_index = INVALID_INDEX; |
| emit->vposition.so_index = INVALID_INDEX; |
| emit->vposition.out_index = INVALID_INDEX; |
| |
| emit->vs.vertex_id_sys_index = INVALID_INDEX; |
| emit->vs.vertex_id_tmp_index = INVALID_INDEX; |
| emit->vs.vertex_id_bias_index = INVALID_INDEX; |
| |
| emit->fs.color_tmp_index = INVALID_INDEX; |
| emit->fs.face_input_index = INVALID_INDEX; |
| emit->fs.fragcoord_input_index = INVALID_INDEX; |
| emit->fs.sample_id_sys_index = INVALID_INDEX; |
| emit->fs.sample_pos_sys_index = INVALID_INDEX; |
| emit->fs.sample_mask_in_sys_index = INVALID_INDEX; |
| emit->fs.layer_input_index = INVALID_INDEX; |
| emit->fs.layer_imm_index = INVALID_INDEX; |
| |
| emit->gs.prim_id_index = INVALID_INDEX; |
| emit->gs.invocation_id_sys_index = INVALID_INDEX; |
| emit->gs.viewport_index_out_index = INVALID_INDEX; |
| emit->gs.viewport_index_tmp_index = INVALID_INDEX; |
| |
| emit->tcs.vertices_per_patch_index = INVALID_INDEX; |
| emit->tcs.invocation_id_sys_index = INVALID_INDEX; |
| emit->tcs.control_point_input_index = INVALID_INDEX; |
| emit->tcs.control_point_addr_index = INVALID_INDEX; |
| emit->tcs.control_point_out_index = INVALID_INDEX; |
| emit->tcs.control_point_tmp_index = INVALID_INDEX; |
| emit->tcs.control_point_out_count = 0; |
| emit->tcs.inner.out_index = INVALID_INDEX; |
| emit->tcs.inner.out_index = INVALID_INDEX; |
| emit->tcs.inner.temp_index = INVALID_INDEX; |
| emit->tcs.inner.tgsi_index = INVALID_INDEX; |
| emit->tcs.outer.out_index = INVALID_INDEX; |
| emit->tcs.outer.temp_index = INVALID_INDEX; |
| emit->tcs.outer.tgsi_index = INVALID_INDEX; |
| emit->tcs.patch_generic_out_count = 0; |
| emit->tcs.patch_generic_out_index = INVALID_INDEX; |
| emit->tcs.patch_generic_tmp_index = INVALID_INDEX; |
| emit->tcs.prim_id_index = INVALID_INDEX; |
| |
| emit->tes.tesscoord_sys_index = INVALID_INDEX; |
| emit->tes.inner.in_index = INVALID_INDEX; |
| emit->tes.inner.temp_index = INVALID_INDEX; |
| emit->tes.inner.tgsi_index = INVALID_INDEX; |
| emit->tes.outer.in_index = INVALID_INDEX; |
| emit->tes.outer.temp_index = INVALID_INDEX; |
| emit->tes.outer.tgsi_index = INVALID_INDEX; |
| emit->tes.prim_id_index = INVALID_INDEX; |
| |
| emit->clip_dist_out_index = INVALID_INDEX; |
| emit->clip_dist_tmp_index = INVALID_INDEX; |
| emit->clip_dist_so_index = INVALID_INDEX; |
| emit->clip_vertex_out_index = INVALID_INDEX; |
| emit->clip_vertex_tmp_index = INVALID_INDEX; |
| emit->svga_debug_callback = svga->debug.callback; |
| |
| emit->index_range.start_index = INVALID_INDEX; |
| emit->index_range.count = 0; |
| emit->index_range.required = FALSE; |
| emit->index_range.operandType = VGPU10_NUM_OPERANDS; |
| emit->index_range.dim = 0; |
| emit->index_range.size = 0; |
| |
| emit->current_loop_depth = 0; |
| |
| emit->initialize_temp_index = INVALID_INDEX; |
| |
| if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { |
| emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; |
| } |
| |
| if (unit == PIPE_SHADER_FRAGMENT) { |
| if (key->fs.light_twoside) { |
| tokens = transform_fs_twoside(tokens); |
| } |
| if (key->fs.pstipple) { |
| const struct tgsi_token *new_tokens = |
| transform_fs_pstipple(emit, tokens); |
| if (tokens != shader->tokens) { |
| /* free the two-sided shader tokens */ |
| tgsi_free_tokens(tokens); |
| } |
| tokens = new_tokens; |
| } |
| if (key->fs.aa_point) { |
| tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); |
| } |
| } |
| |
| if (SVGA_DEBUG & DEBUG_TGSI) { |
| debug_printf("#####################################\n"); |
| debug_printf("### TGSI Shader %u\n", shader->id); |
| tgsi_dump(tokens, 0); |
| } |
| |
| /** |
| * Rescan the header if the token string is different from the one |
| * included in the shader; otherwise, the header info is already up-to-date |
| */ |
| if (tokens != shader->tokens) { |
| tgsi_scan_shader(tokens, &emit->info); |
| } else { |
| emit->info = shader->info; |
| } |
| |
| emit->num_outputs = emit->info.num_outputs; |
| |
| /** |
| * Compute input mapping to match the outputs from shader |
| * in the previous stage |
| */ |
| compute_input_mapping(svga, emit, unit); |
| |
| determine_clipping_mode(emit); |
| |
| if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX || |
| unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) { |
| if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { |
| /* if there is stream output declarations associated |
| * with this shader or the shader writes to ClipDistance |
| * then reserve extra registers for the non-adjusted vertex position |
| * and the ClipDistance shadow copy. |
| */ |
| emit->vposition.so_index = emit->num_outputs++; |
| |
| if (emit->clip_mode == CLIP_DISTANCE) { |
| emit->clip_dist_so_index = emit->num_outputs++; |
| if (emit->info.num_written_clipdistance > 4) |
| emit->num_outputs++; |
| } |
| } |
| } |
| |
| /* |
| * Do actual shader translation. |
| */ |
| if (!emit_vgpu10_header(emit)) { |
| debug_printf("svga: emit VGPU10 header failed\n"); |
| goto cleanup; |
| } |
| |
| if (!emit_vgpu10_instructions(emit, tokens)) { |
| debug_printf("svga: emit VGPU10 instructions failed\n"); |
| goto cleanup; |
| } |
| |
| if (!emit_vgpu10_tail(emit)) { |
| debug_printf("svga: emit VGPU10 tail failed\n"); |
| goto cleanup; |
| } |
| |
| if (emit->register_overflow) { |
| goto cleanup; |
| } |
| |
| /* |
| * Create, initialize the 'variant' object. |
| */ |
| variant = svga_new_shader_variant(svga, unit); |
| if (!variant) |
| goto cleanup; |
| |
| variant->shader = shader; |
| variant->nr_tokens = emit_get_num_tokens(emit); |
| variant->tokens = (const unsigned *)emit->buf; |
| |
| /* Copy shader signature info to the shader variant */ |
| if (svga_have_sm5(svga)) { |
| copy_shader_signature(&emit->signature, variant); |
| } |
| |
| emit->buf = NULL; /* buffer is no longer owed by emitter context */ |
| memcpy(&variant->key, key, sizeof(*key)); |
| variant->id = UTIL_BITMASK_INVALID_INDEX; |
| |
| /* The extra constant starting offset starts with the number of |
| * shader constants declared in the shader. |
| */ |
| variant->extra_const_start = emit->num_shader_consts[0]; |
| if (key->gs.wide_point) { |
| /** |
| * The extra constant added in the transformed shader |
| * for inverse viewport scale is to be supplied by the driver. |
| * So the extra constant starting offset needs to be reduced by 1. |
| */ |
| assert(variant->extra_const_start > 0); |
| variant->extra_const_start--; |
| } |
| |
| if (unit == PIPE_SHADER_FRAGMENT) { |
| struct svga_fs_variant *fs_variant = svga_fs_variant(variant); |
| |
| fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; |
| |
| /* If there was exactly one write to a fragment shader output register |
| * and it came from a constant buffer, we know all fragments will have |
| * the same color (except for blending). |
| */ |
| fs_variant->constant_color_output = |
| emit->constant_color_output && emit->num_output_writes == 1; |
| |
| /** keep track in the variant if flat interpolation is used |
| * for any of the varyings. |
| */ |
| fs_variant->uses_flat_interp = emit->uses_flat_interp; |
| |
| fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; |
| } |
| else if (unit == PIPE_SHADER_TESS_EVAL) { |
| struct svga_tes_variant *tes_variant = svga_tes_variant(variant); |
| |
| /* Keep track in the tes variant some of the layout parameters. |
| * These parameters will be referenced by the tcs to emit |
| * the necessary declarations for the hull shader. |
| */ |
| tes_variant->prim_mode = emit->tes.prim_mode; |
| tes_variant->spacing = emit->tes.spacing; |
| tes_variant->vertices_order_cw = emit->tes.vertices_order_cw; |
| tes_variant->point_mode = emit->tes.point_mode; |
| } |
| |
| |
| if (tokens != shader->tokens) { |
| tgsi_free_tokens(tokens); |
| } |
| |
| cleanup: |
| free_emitter(emit); |
| |
| done: |
| SVGA_STATS_TIME_POP(svga_sws(svga)); |
| return variant; |
| } |