| /* -*- mesa-c++ -*- |
| * |
| * Copyright (c) 2018 Collabora LTD |
| * |
| * Author: Gert Wollny <gert.wollny@collabora.com> |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * on the rights to use, copy, modify, merge, publish, distribute, sub |
| * license, and/or sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| |
| #include "pipe/p_defines.h" |
| #include "tgsi/tgsi_from_mesa.h" |
| #include "sfn_shader_vertex.h" |
| |
| #include <queue> |
| |
| |
| namespace r600 { |
| |
| using std::priority_queue; |
| |
| VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh, |
| r600_pipe_shader_selector& sel, |
| const r600_shader_key& key): |
| ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader, |
| sh->scratch_space_needed), |
| m_num_clip_dist(0), |
| m_last_param_export(nullptr), |
| m_last_pos_export(nullptr), |
| m_pipe_shader(sh), |
| m_enabled_stream_buffers_mask(0), |
| m_so_info(&sel.so), |
| m_cur_param(0), |
| m_cur_clip_pos(1), |
| m_vertex_id(), |
| m_key(key) |
| { |
| // reg 0 is used in the fetch shader |
| increment_reserved_registers(); |
| |
| sh_info().atomic_base = key.vs.first_atomic_counter; |
| } |
| |
| bool VertexShaderFromNir::do_process_inputs(nir_variable *input) |
| { |
| ++sh_info().ninput; |
| |
| if (input->data.location < VERT_ATTRIB_MAX) { |
| increment_reserved_registers(); |
| return true; |
| } |
| fprintf(stderr, "r600-NIR-VS: Unimplemented process_inputs for %d\n", input->data.location); |
| return false; |
| } |
| |
| bool VertexShaderFromNir::allocate_reserved_registers() |
| { |
| /* Since the vertex ID is nearly always used, we add it here as an input so |
| * that the registers used for vertex attributes don't get clobbered by the |
| * register merge step */ |
| auto R0x = new GPRValue(0,0); |
| R0x->set_as_input(); |
| m_vertex_id.reset(R0x); |
| inject_register(0, 0, m_vertex_id, false); |
| |
| if (m_sv_values.test(es_instanceid)) { |
| auto R0w = new GPRValue(0,3); |
| R0w->set_as_input(); |
| m_instance_id.reset(R0w); |
| inject_register(0, 3, m_instance_id, false); |
| } |
| |
| priority_queue<int, std::vector<int>, std::greater<int>> q; |
| for (auto a: m_param_map) { |
| q.push(a.first); |
| } |
| |
| int next_param = 0; |
| while (!q.empty()) { |
| int loc = q.top(); |
| q.pop(); |
| m_param_map[loc] = next_param++; |
| } |
| return true; |
| } |
| |
| bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr) |
| { |
| switch (instr->type) { |
| case nir_instr_type_intrinsic: { |
| nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); |
| switch (ii->intrinsic) { |
| case nir_intrinsic_load_vertex_id: |
| m_sv_values.set(es_vertexid); |
| break; |
| case nir_intrinsic_load_instance_id: |
| m_sv_values.set(es_instanceid); |
| break; |
| default: |
| ; |
| } |
| } |
| default: |
| ; |
| } |
| return true; |
| } |
| |
| bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) |
| { |
| switch (instr->intrinsic) { |
| case nir_intrinsic_load_vertex_id: |
| return load_preloaded_value(instr->dest, 0, m_vertex_id); |
| case nir_intrinsic_load_instance_id: |
| return load_preloaded_value(instr->dest, 0, m_instance_id); |
| default: |
| return false; |
| } |
| } |
| |
| bool VertexShaderFromNir::do_process_outputs(nir_variable *output) |
| { |
| if (output->data.location == VARYING_SLOT_COL0 || |
| output->data.location == VARYING_SLOT_COL1 || |
| (output->data.location >= VARYING_SLOT_VAR0 && |
| output->data.location <= VARYING_SLOT_VAR31) || |
| (output->data.location >= VARYING_SLOT_TEX0 && |
| output->data.location <= VARYING_SLOT_TEX7) || |
| output->data.location == VARYING_SLOT_BFC0 || |
| output->data.location == VARYING_SLOT_BFC1 || |
| output->data.location == VARYING_SLOT_CLIP_VERTEX || |
| output->data.location == VARYING_SLOT_CLIP_DIST0 || |
| output->data.location == VARYING_SLOT_CLIP_DIST1 || |
| output->data.location == VARYING_SLOT_POS || |
| output->data.location == VARYING_SLOT_PSIZ || |
| output->data.location == VARYING_SLOT_FOGC || |
| output->data.location == VARYING_SLOT_LAYER || |
| output->data.location == VARYING_SLOT_EDGE || |
| output->data.location == VARYING_SLOT_VIEWPORT |
| ) { |
| |
| r600_shader_io& io = sh_info().output[output->data.driver_location]; |
| tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>( output->data.location), |
| true, &io.name, &io.sid); |
| if (! m_key.vs.as_es) |
| evaluate_spi_sid(io); |
| ++sh_info().noutput; |
| |
| if (output->data.location == VARYING_SLOT_PSIZ || |
| output->data.location == VARYING_SLOT_EDGE || |
| output->data.location == VARYING_SLOT_LAYER) |
| m_cur_clip_pos = 2; |
| |
| if (output->data.location != VARYING_SLOT_POS && |
| output->data.location != VARYING_SLOT_EDGE && |
| output->data.location != VARYING_SLOT_PSIZ && |
| output->data.location != VARYING_SLOT_CLIP_VERTEX) |
| m_param_map[output->data.location] = m_cur_param++; |
| |
| return true; |
| } |
| return false; |
| } |
| |
| bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) |
| { |
| if (in_var->data.location < VERT_ATTRIB_MAX) { |
| for (int i = 0; i < instr->num_components ; ++i) { |
| auto s = new GPRValue(in_var->data.driver_location + 1, i); |
| s->set_as_input(); |
| auto src = PValue(s); |
| inject_register(in_var->data.driver_location + 1, i, src, false); |
| |
| if (i == 0) |
| set_input(in_var->data.driver_location, src); |
| |
| load_preloaded_value(instr->dest, i, src, i == instr->num_components - 1); |
| } |
| return true; |
| } |
| fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", in_var->data.location); |
| return false; |
| } |
| |
| bool VertexShaderFromNir::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr) |
| { |
| sh_info().cc_dist_mask = 0xff; |
| sh_info().clip_dist_write = 0xff; |
| |
| std::unique_ptr<GPRVector> clip_vertex(vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3})); |
| |
| for (int i = 0; i < 4; ++i) |
| sh_info().output[out_var->data.driver_location].write_mask |= 1 << i; |
| |
| GPRVector clip_dist[2] = { get_temp_vec4(), get_temp_vec4()}; |
| |
| for (int i = 0; i < 8; i++) { |
| int oreg = i >> 2; |
| int ochan = i & 3; |
| AluInstruction *ir = nullptr; |
| for (int j = 0; j < 4; j++) { |
| ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j), |
| PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)), |
| (j == ochan) ? EmitInstruction::write : EmitInstruction::empty); |
| emit_instruction(ir); |
| } |
| ir->set_flag(alu_last_instr); |
| } |
| |
| m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos); |
| emit_export_instruction(m_last_pos_export); |
| |
| m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos); |
| emit_export_instruction(m_last_pos_export); |
| |
| return true; |
| } |
| |
| bool VertexShaderFromNir::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr, |
| std::array<uint32_t, 4> *swizzle_override) |
| { |
| std::array<uint32_t,4> swizzle; |
| uint32_t write_mask = 0; |
| |
| if (swizzle_override) { |
| swizzle = *swizzle_override; |
| for (int i = 0; i < 4; ++i) { |
| if (swizzle[i] < 6) |
| write_mask |= 1 << i; |
| } |
| } else { |
| write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; |
| for (int i = 0; i < 4; ++i) |
| swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; |
| } |
| |
| sh_info().output[out_var->data.driver_location].write_mask = write_mask; |
| |
| GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); |
| set_output(out_var->data.driver_location, PValue(value)); |
| |
| int export_slot = 0; |
| |
| switch (out_var->data.location) { |
| case VARYING_SLOT_EDGE: { |
| sh_info().vs_out_misc_write = 1; |
| sh_info().vs_out_edgeflag = 1; |
| emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr}); |
| emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr}); |
| sh_info().output[out_var->data.driver_location].write_mask = 0xf; |
| } |
| /* fallthrough */ |
| case VARYING_SLOT_PSIZ: |
| case VARYING_SLOT_LAYER: |
| export_slot = 1; |
| break; |
| case VARYING_SLOT_POS: |
| break; |
| case VARYING_SLOT_CLIP_DIST0: |
| case VARYING_SLOT_CLIP_DIST1: |
| export_slot = m_cur_clip_pos++; |
| break; |
| default: |
| sfn_log << SfnLog::err << __func__ << "Unsupported location " |
| << out_var->data.location << "\n"; |
| return false; |
| } |
| |
| m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos); |
| emit_export_instruction(m_last_pos_export); |
| add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr()); |
| return true; |
| } |
| |
| bool VertexShaderFromNir::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr) |
| { |
| assert(out_var->data.driver_location < sh_info().noutput); |
| sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n"; |
| |
| int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; |
| std::array<uint32_t,4> swizzle; |
| for (int i = 0; i < 4; ++i) |
| swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; |
| |
| sh_info().output[out_var->data.driver_location].write_mask = write_mask; |
| |
| GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); |
| sh_info().output[out_var->data.driver_location].gpr = value->sel(); |
| |
| /* This should use the registers!! */ |
| set_output(out_var->data.driver_location, PValue(value)); |
| |
| auto param_loc = m_param_map.find(out_var->data.location); |
| assert(param_loc != m_param_map.end()); |
| |
| m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param); |
| emit_export_instruction(m_last_param_export); |
| add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr()); |
| return true; |
| } |
| |
| bool VertexShaderFromNir::emit_stream(int stream) |
| { |
| assert(m_so_info); |
| if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) { |
| R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs); |
| return false; |
| } |
| for (unsigned i = 0; i < m_so_info->num_outputs; i++) { |
| if (m_so_info->output[i].output_buffer >= 4) { |
| R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", |
| m_so_info->output[i].output_buffer); |
| return false; |
| } |
| } |
| const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS]; |
| unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; |
| std::vector<GPRVector> tmp(m_so_info->num_outputs); |
| |
| /* Initialize locations where the outputs are stored. */ |
| for (unsigned i = 0; i < m_so_info->num_outputs; i++) { |
| if (stream != -1 && stream != m_so_info->output[i].stream) |
| continue; |
| |
| sfn_log << SfnLog::instr << "Emit stream " << i |
| << " with register index " << m_so_info->output[i].register_index << " so_gpr:"; |
| |
| |
| so_gpr[i] = output_register(m_so_info->output[i].register_index); |
| |
| if (!so_gpr[i]) { |
| sfn_log << SfnLog::err << "\nERR: register index " |
| << m_so_info->output[i].register_index |
| << " doesn't correspond to an output register\n"; |
| return false; |
| } |
| start_comp[i] = m_so_info->output[i].start_component; |
| /* Lower outputs with dst_offset < start_component. |
| * |
| * We can only output 4D vectors with a write mask, e.g. we can |
| * only output the W component at offset 3, etc. If we want |
| * to store Y, Z, or W at buffer offset 0, we need to use MOV |
| * to move it to X and output X. */ |
| if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) { |
| int tmp_index = allocate_temp_register(); |
| int sc = m_so_info->output[i].start_component; |
| AluInstruction *alu = nullptr; |
| for (int j = 0; j < m_so_info->output[i].num_components; j++) { |
| PValue dst(new GPRValue(tmp_index, j)); |
| alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write}); |
| tmp[i].set_reg_i(j, dst); |
| emit_instruction(alu); |
| } |
| if (alu) |
| alu->set_flag(alu_last_instr); |
| |
| /* Fill the vector with masked values */ |
| PValue dst_blank(new GPRValue(tmp_index, 7)); |
| for (int j = m_so_info->output[i].num_components; j < 4; j++) |
| tmp[i].set_reg_i(j, dst_blank); |
| |
| start_comp[i] = 0; |
| so_gpr[i] = &tmp[i]; |
| } |
| sfn_log << SfnLog::instr << *so_gpr[i] << "\n"; |
| } |
| |
| /* Write outputs to buffers. */ |
| for (unsigned i = 0; i < m_so_info->num_outputs; i++) { |
| sfn_log << SfnLog::instr << "Write output buffer " << i |
| << " with register index " << m_so_info->output[i].register_index << "\n"; |
| |
| StreamOutIntruction *out_stream = |
| new StreamOutIntruction(*so_gpr[i], |
| m_so_info->output[i].num_components, |
| m_so_info->output[i].dst_offset - start_comp[i], |
| ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i], |
| m_so_info->output[i].output_buffer, |
| m_so_info->output[i].stream); |
| emit_export_instruction(out_stream); |
| m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4; |
| } |
| return true; |
| } |
| |
| void VertexShaderFromNir::do_finalize() |
| { |
| if (m_key.vs.as_gs_a) { |
| PValue o(new GPRValue(0,PIPE_SWIZZLE_0)); |
| GPRVector primid({PValue(new GPRValue(0,2)), o,o,o}); |
| m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param); |
| emit_export_instruction(m_last_param_export); |
| int i; |
| i = sh_info().noutput++; |
| auto& io = sh_info().output[i]; |
| io.name = TGSI_SEMANTIC_PRIMID; |
| io.sid = 0; |
| io.gpr = 0; |
| io.interpolate = TGSI_INTERPOLATE_CONSTANT; |
| io.write_mask = 0x4; |
| io.spi_sid = m_key.vs.prim_id_out; |
| sh_info().vs_as_gs_a = 1; |
| } |
| |
| finalize_exports(); |
| } |
| |
| |
| bool VertexShaderFromNirForFS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) |
| { |
| |
| switch (out_var->data.location) { |
| case VARYING_SLOT_PSIZ: |
| sh_info().vs_out_point_size = 1; |
| sh_info().vs_out_misc_write = 1; |
| /* fallthrough */ |
| case VARYING_SLOT_POS: |
| return emit_varying_pos(out_var, instr); |
| case VARYING_SLOT_EDGE: { |
| std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7}; |
| return emit_varying_pos(out_var, instr, &swizzle_override); |
| } |
| case VARYING_SLOT_CLIP_VERTEX: |
| return emit_clip_vertices(out_var, instr); |
| case VARYING_SLOT_CLIP_DIST0: |
| case VARYING_SLOT_CLIP_DIST1: |
| m_num_clip_dist += 4; |
| return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr); |
| case VARYING_SLOT_LAYER: { |
| sh_info().vs_out_misc_write = 1; |
| sh_info().vs_out_layer = 1; |
| std::array<uint32_t, 4> swz = {7,7,0,7}; |
| return emit_varying_pos(out_var, instr, &swz) && |
| emit_varying_param(out_var, instr); |
| } |
| case VARYING_SLOT_VIEW_INDEX: |
| return emit_varying_pos(out_var, instr) && |
| emit_varying_param(out_var, instr); |
| |
| default: |
| if (out_var->data.location <= VARYING_SLOT_VAR31 || |
| (out_var->data.location >= VARYING_SLOT_TEX0 && |
| out_var->data.location <= VARYING_SLOT_TEX7)) |
| return emit_varying_param(out_var, instr); |
| } |
| |
| fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n", |
| out_var->data.location); |
| return false; |
| } |
| |
| void VertexShaderFromNirForFS::finalize_exports() |
| { |
| if (m_so_info && m_so_info->num_outputs) |
| emit_stream(-1); |
| |
| m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask; |
| |
| if (!m_last_param_export) { |
| GPRVector value(0,{7,7,7,7}); |
| m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param); |
| emit_export_instruction(m_last_param_export); |
| } |
| m_last_param_export->set_last(); |
| |
| if (!m_last_pos_export) { |
| GPRVector value(0,{7,7,7,7}); |
| m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos); |
| emit_export_instruction(m_last_pos_export); |
| } |
| m_last_pos_export->set_last(); |
| |
| } |
| |
| VertexShaderFromNirForGS::VertexShaderFromNirForGS(r600_pipe_shader *sh, |
| r600_pipe_shader_selector& sel, |
| const r600_shader_key &key, |
| const r600_shader *gs_shader): |
| VertexShaderFromNir(sh, sel, key), |
| m_gs_shader(gs_shader) |
| { |
| sh->shader.vs_as_es = true; |
| } |
| |
| bool VertexShaderFromNirForGS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) |
| { |
| |
| assert(m_gs_shader); |
| |
| int ring_offset = -1; |
| const r600_shader_io& out_io = sh_info().output[out_var->data.driver_location]; |
| |
| sfn_log << SfnLog::io << "check output " << out_var->data.driver_location |
| << " name=" << out_io.name<< " sid=" << out_io.sid << "\n"; |
| for (unsigned k = 0; k < m_gs_shader->ninput; ++k) { |
| auto& in_io = m_gs_shader->input[k]; |
| sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n"; |
| |
| if (in_io.name == out_io.name && |
| in_io.sid == out_io.sid) { |
| ring_offset = in_io.ring_offset; |
| break; |
| } |
| } |
| |
| if (out_var->data.location == VARYING_SLOT_VIEWPORT) |
| return true; |
| |
| if (ring_offset == -1) { |
| sfn_log << SfnLog::err << "VS defines output at " |
| << out_var->data.driver_location << "name=" << out_io.name |
| << " sid=" << out_io.sid << " that is not consumed as GS input\n"; |
| return true; |
| } |
| |
| uint32_t write_mask = (1 << instr->num_components) - 1; |
| |
| std::unique_ptr<GPRVector> value(vec_from_nir_with_fetch_constant(instr->src[1], write_mask, |
| swizzle_from_mask(instr->num_components))); |
| |
| auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value, |
| ring_offset >> 2, 4, PValue()); |
| emit_export_instruction(ir); |
| |
| sh_info().output[out_var->data.driver_location].write_mask |= write_mask; |
| if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 || |
| out_var->data.location == VARYING_SLOT_CLIP_DIST1) |
| m_num_clip_dist += 4; |
| |
| return true; |
| } |
| |
| void VertexShaderFromNirForGS::finalize_exports() |
| { |
| } |
| |
| } |