| /* |
| * Copyright © 2013 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /** |
| * \file brw_vec4_tes.cpp |
| * |
| * Tessellaton evaluation shader specific code derived from the vec4_visitor class. |
| */ |
| |
| #include "brw_vec4_tes.h" |
| #include "brw_cfg.h" |
| |
| namespace brw { |
| |
| vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler, |
| void *log_data, |
| const struct brw_tes_prog_key *key, |
| struct brw_tes_prog_data *prog_data, |
| const nir_shader *shader, |
| void *mem_ctx, |
| int shader_time_index) |
| : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base, |
| shader, mem_ctx, false, shader_time_index) |
| { |
| } |
| |
| |
| dst_reg * |
| vec4_tes_visitor::make_reg_for_system_value(int location) |
| { |
| return NULL; |
| } |
| |
| void |
| vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) |
| { |
| switch (instr->intrinsic) { |
| case nir_intrinsic_load_tess_level_outer: |
| case nir_intrinsic_load_tess_level_inner: |
| break; |
| default: |
| vec4_visitor::nir_setup_system_value_intrinsic(instr); |
| } |
| } |
| |
| |
| void |
| vec4_tes_visitor::setup_payload() |
| { |
| int reg = 0; |
| |
| /* The payload always contains important data in r0 and r1, which contains |
| * the URB handles that are passed on to the URB write at the end |
| * of the thread. |
| */ |
| reg += 2; |
| |
| reg = setup_uniforms(reg); |
| |
| foreach_block_and_inst(block, vec4_instruction, inst, cfg) { |
| for (int i = 0; i < 3; i++) { |
| if (inst->src[i].file != ATTR) |
| continue; |
| |
| bool is_64bit = type_sz(inst->src[i].type) == 8; |
| |
| struct brw_reg grf = |
| brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2)); |
| grf = stride(grf, 0, is_64bit ? 2 : 4, 1); |
| grf.swizzle = inst->src[i].swizzle; |
| grf.type = inst->src[i].type; |
| grf.abs = inst->src[i].abs; |
| grf.negate = inst->src[i].negate; |
| |
| /* For 64-bit attributes we can end up with components XY in the |
| * second half of a register and components ZW in the first half |
| * of the next. Fix it up here. |
| */ |
| if (is_64bit && grf.subnr > 0) { |
| /* We can't do swizzles that mix XY and ZW channels in this case. |
| * Such cases should have been handled by the scalarization pass. |
| */ |
| assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^ |
| (brw_mask_for_swizzle(grf.swizzle) & 0xc)); |
| if (brw_mask_for_swizzle(grf.swizzle) & 0xc) { |
| grf.subnr = 0; |
| grf.nr++; |
| grf.swizzle -= BRW_SWIZZLE_ZZZZ; |
| } |
| } |
| |
| inst->src[i] = grf; |
| } |
| } |
| |
| reg += 8 * prog_data->urb_read_length; |
| |
| this->first_non_payload_grf = reg; |
| } |
| |
| |
| void |
| vec4_tes_visitor::emit_prolog() |
| { |
| input_read_header = src_reg(this, glsl_type::uvec4_type); |
| emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header)); |
| |
| this->current_annotation = NULL; |
| } |
| |
| |
| void |
| vec4_tes_visitor::emit_urb_write_header(int mrf) |
| { |
| /* No need to do anything for DS; an implied write to this MRF will be |
| * performed by VS_OPCODE_URB_WRITE. |
| */ |
| (void) mrf; |
| } |
| |
| |
| vec4_instruction * |
| vec4_tes_visitor::emit_urb_write_opcode(bool complete) |
| { |
| /* For DS, the URB writes end the thread. */ |
| if (complete) { |
| if (INTEL_DEBUG & DEBUG_SHADER_TIME) |
| emit_shader_time_end(); |
| } |
| |
| vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); |
| inst->urb_write_flags = complete ? |
| BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; |
| |
| return inst; |
| } |
| |
| void |
| vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) |
| { |
| const struct brw_tes_prog_data *tes_prog_data = |
| (const struct brw_tes_prog_data *) prog_data; |
| |
| switch (instr->intrinsic) { |
| case nir_intrinsic_load_tess_coord: |
| /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ |
| emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), |
| src_reg(brw_vec8_grf(1, 0)))); |
| break; |
| case nir_intrinsic_load_tess_level_outer: |
| if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { |
| emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), |
| swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), |
| BRW_SWIZZLE_ZWZW))); |
| } else { |
| emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), |
| swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), |
| BRW_SWIZZLE_WZYX))); |
| } |
| break; |
| case nir_intrinsic_load_tess_level_inner: |
| if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { |
| emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), |
| swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), |
| BRW_SWIZZLE_WZYX))); |
| } else { |
| emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), |
| src_reg(ATTR, 1, glsl_type::float_type))); |
| } |
| break; |
| case nir_intrinsic_load_primitive_id: |
| emit(TES_OPCODE_GET_PRIMITIVE_ID, |
| get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); |
| break; |
| |
| case nir_intrinsic_load_input: |
| case nir_intrinsic_load_per_vertex_input: { |
| src_reg indirect_offset = get_indirect_offset(instr); |
| unsigned imm_offset = instr->const_index[0]; |
| src_reg header = input_read_header; |
| bool is_64bit = nir_dest_bit_size(instr->dest) == 64; |
| unsigned first_component = nir_intrinsic_component(instr); |
| if (is_64bit) |
| first_component /= 2; |
| |
| if (indirect_offset.file != BAD_FILE) { |
| header = src_reg(this, glsl_type::uvec4_type); |
| emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), |
| input_read_header, indirect_offset); |
| } else { |
| /* Arbitrarily only push up to 24 vec4 slots worth of data, |
| * which is 12 registers (since each holds 2 vec4 slots). |
| */ |
| const unsigned max_push_slots = 24; |
| if (imm_offset < max_push_slots) { |
| const glsl_type *src_glsl_type = |
| is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; |
| src_reg src = src_reg(ATTR, imm_offset, src_glsl_type); |
| src.swizzle = BRW_SWZ_COMP_INPUT(first_component); |
| |
| const brw_reg_type dst_reg_type = |
| is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; |
| emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src)); |
| |
| prog_data->urb_read_length = |
| MAX2(prog_data->urb_read_length, |
| DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); |
| break; |
| } |
| } |
| |
| if (!is_64bit) { |
| dst_reg temp(this, glsl_type::ivec4_type); |
| vec4_instruction *read = |
| emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); |
| read->offset = imm_offset; |
| read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; |
| |
| src_reg src = src_reg(temp); |
| src.swizzle = BRW_SWZ_COMP_INPUT(first_component); |
| |
| /* Copy to target. We might end up with some funky writemasks landing |
| * in here, but we really don't want them in the above pseudo-ops. |
| */ |
| dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); |
| dst.writemask = brw_writemask_for_size(instr->num_components); |
| emit(MOV(dst, src)); |
| } else { |
| /* For 64-bit we need to load twice as many 32-bit components, and for |
| * dvec3/4 we need to emit 2 URB Read messages |
| */ |
| dst_reg temp(this, glsl_type::dvec4_type); |
| dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); |
| |
| vec4_instruction *read = |
| emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); |
| read->offset = imm_offset; |
| read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; |
| |
| if (instr->num_components > 2) { |
| read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE), |
| src_reg(header)); |
| read->offset = imm_offset + 1; |
| read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; |
| } |
| |
| src_reg temp_as_src = src_reg(temp); |
| temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component); |
| |
| dst_reg shuffled(this, glsl_type::dvec4_type); |
| shuffle_64bit_data(shuffled, temp_as_src, false); |
| |
| dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); |
| dst.writemask = brw_writemask_for_size(instr->num_components); |
| emit(MOV(dst, src_reg(shuffled))); |
| } |
| break; |
| } |
| default: |
| vec4_visitor::nir_emit_intrinsic(instr); |
| } |
| } |
| |
| |
| void |
| vec4_tes_visitor::emit_thread_end() |
| { |
| /* For DS, we always end the thread by emitting a single vertex. |
| * emit_urb_write_opcode() will take care of setting the eot flag on the |
| * SEND instruction. |
| */ |
| emit_vertex(); |
| } |
| |
| } /* namespace brw */ |