intel/compiler: Make more functions in NIR conversion static
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26323>
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 4497bf8..15bcb0c 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -310,21 +310,8 @@
bool opt_combine_constants();
void emit_repclear_shader();
- void emit_fragcoord_interpolation(fs_reg wpos);
- void emit_is_helper_invocation(fs_reg result);
- fs_reg emit_frontfacing_interpolation();
- fs_reg emit_samplepos_setup();
- fs_reg emit_sampleid_setup();
- fs_reg emit_samplemaskin_setup();
- fs_reg emit_shading_rate_setup();
void emit_interpolation_setup_gfx4();
void emit_interpolation_setup_gfx6();
- fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
- const fs_reg &texture,
- const fs_reg &texture_handle);
- fs_reg resolve_source_modifiers(const brw::fs_builder &bld, const fs_reg &src);
- void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
- fs_reg result, fs_reg *op, unsigned fsign_src);
void emit_shader_float_controls_execution_mode();
bool opt_peephole_sel();
bool opt_saturate_propagation();
@@ -345,17 +332,12 @@
void nir_emit_instr(nir_instr *instr);
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr,
bool need_dest);
- bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,
- nir_alu_instr *instr);
void nir_emit_load_const(const brw::fs_builder &bld,
nir_load_const_instr *instr);
fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
fs_reg get_nir_buffer_intrinsic_index(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
- fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld,
- const fs_reg &addr,
- bool in_dwords);
void nir_emit_surface_atomic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr,
fs_reg surface,
@@ -373,18 +355,7 @@
fs_reg get_nir_def(const nir_def &def);
nir_component_mask_t get_nir_write_mask(const nir_def &def);
fs_reg get_resource_nir_src(const nir_src &src);
- fs_reg try_rebuild_resource(const brw::fs_builder &bld,
- nir_def *resource_def);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
- fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
- nir_intrinsic_instr *instr);
- fs_reg get_tcs_multi_patch_icp_handle(const brw::fs_builder &bld,
- nir_intrinsic_instr *instr);
-
- bool optimize_extract_to_float(nir_alu_instr *instr,
- const fs_reg &result);
- bool optimize_frontfacing_ternary(nir_alu_instr *instr,
- const fs_reg &result);
void emit_alpha_test();
fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
@@ -392,36 +363,13 @@
fs_reg src0_alpha, unsigned components);
void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
void emit_fb_writes();
- fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
- const fs_reg &dst, unsigned target);
void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
- void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
- unsigned stream_id);
void emit_gs_control_data_bits(const fs_reg &vertex_count);
- void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
- void emit_gs_vertex(const nir_src &vertex_count_nir_src,
- unsigned stream_id);
void emit_gs_thread_end();
- void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
- unsigned base_offset, const nir_src &offset_src,
- unsigned num_components, unsigned first_component);
bool mark_last_urb_write_with_eot();
void emit_tcs_thread_end();
void emit_urb_fence();
void emit_cs_terminate();
- fs_reg emit_work_group_id_setup();
-
- void emit_task_mesh_store(const brw::fs_builder &bld,
- nir_intrinsic_instr *instr,
- const fs_reg &urb_handle);
- void emit_task_mesh_load(const brw::fs_builder &bld,
- nir_intrinsic_instr *instr,
- const fs_reg &urb_handle);
-
- void emit_barrier();
- void emit_tcs_barrier();
-
- fs_reg get_timestamp(const brw::fs_builder &bld);
fs_reg interp_reg(int location, int channel);
fs_reg per_primitive_reg(int location, unsigned comp);
@@ -543,13 +491,6 @@
brw::fs_builder bld;
- fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld,
- nir_alu_instr *instr,
- fs_reg *op,
- bool need_dest);
-
- void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,
- fs_reg *op);
void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);
void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);
void lower_mulh_inst(fs_inst *inst, bblock_t *block);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 8e5a344..dc2b403 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -35,6 +35,10 @@
using namespace brw;
static void fs_nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr);
+static fs_reg emit_samplepos_setup(fs_visitor *s);
+static fs_reg emit_sampleid_setup(fs_visitor *s);
+static fs_reg emit_samplemaskin_setup(fs_visitor *s);
+static fs_reg emit_shading_rate_setup(fs_visitor *s);
void
fs_visitor::emit_nir_code()
@@ -124,10 +128,12 @@
}
}
-fs_reg
-fs_visitor::emit_work_group_id_setup()
+static fs_reg
+emit_work_group_id_setup(fs_visitor *s)
{
- assert(gl_shader_stage_is_compute(stage));
+ const fs_builder &bld = s->bld;
+
+ assert(gl_shader_stage_is_compute(s->stage));
fs_reg id = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
@@ -192,14 +198,14 @@
assert(v->stage == MESA_SHADER_FRAGMENT);
reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
if (reg->file == BAD_FILE)
- *reg = v->emit_samplepos_setup();
+ *reg = emit_samplepos_setup(v);
break;
case nir_intrinsic_load_sample_id:
assert(v->stage == MESA_SHADER_FRAGMENT);
reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
if (reg->file == BAD_FILE)
- *reg = v->emit_sampleid_setup();
+ *reg = emit_sampleid_setup(v);
break;
case nir_intrinsic_load_sample_mask_in:
@@ -207,7 +213,7 @@
assert(v->devinfo->ver >= 7);
reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
if (reg->file == BAD_FILE)
- *reg = v->emit_samplemaskin_setup();
+ *reg = emit_samplemaskin_setup(v);
break;
case nir_intrinsic_load_workgroup_id:
@@ -217,7 +223,7 @@
assert(gl_shader_stage_is_compute(v->stage));
reg = &v->nir_system_values[SYSTEM_VALUE_WORKGROUP_ID];
if (reg->file == BAD_FILE)
- *reg = v->emit_work_group_id_setup();
+ *reg = emit_work_group_id_setup(v);
break;
case nir_intrinsic_load_helper_invocation:
@@ -278,7 +284,7 @@
case nir_intrinsic_load_frag_shading_rate:
reg = &v->nir_system_values[SYSTEM_VALUE_FRAG_SHADING_RATE];
if (reg->file == BAD_FILE)
- *reg = v->emit_shading_rate_setup();
+ *reg = emit_shading_rate_setup(v);
break;
default:
@@ -423,10 +429,12 @@
* Recognizes a parent instruction of nir_op_extract_* and changes the type to
* match instr.
*/
-bool
-fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
- const fs_reg &result)
+static bool
+optimize_extract_to_float(fs_visitor *s, nir_alu_instr *instr,
+ const fs_reg &result)
{
+ const intel_device_info *devinfo = s->devinfo;
+
if (!instr->src[0].src.ssa->parent_instr)
return false;
@@ -447,20 +455,23 @@
src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16 ? 2 : 1,
src0->op == nir_op_extract_i16 || src0->op == nir_op_extract_i8);
- fs_reg op0 = get_nir_src(src0->src[0].src);
+ fs_reg op0 = s->get_nir_src(src0->src[0].src);
op0.type = brw_type_for_nir_type(devinfo,
(nir_alu_type)(nir_op_infos[src0->op].input_types[0] |
nir_src_bit_size(src0->src[0].src)));
- op0 = offset(op0, bld, src0->src[0].swizzle[0]);
+ op0 = offset(op0, s->bld, src0->src[0].swizzle[0]);
- bld.MOV(result, subscript(op0, type, element));
+ s->bld.MOV(result, subscript(op0, type, element));
return true;
}
-bool
-fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
- const fs_reg &result)
+static bool
+optimize_frontfacing_ternary(fs_visitor *s,
+ nir_alu_instr *instr,
+ const fs_reg &result)
{
+ const intel_device_info *devinfo = s->devinfo;
+
nir_intrinsic_instr *src0 = nir_src_as_intrinsic(instr->src[0].src);
if (src0 == NULL || src0->intrinsic != nir_intrinsic_load_front_face)
return false;
@@ -477,7 +488,7 @@
/* nir_opt_algebraic should have gotten rid of bcsel(b, a, a) */
assert(value1 == -value2);
- fs_reg tmp = vgrf(glsl_type::int_type);
+ fs_reg tmp = s->vgrf(glsl_type::int_type);
if (devinfo->ver >= 12) {
/* Bit 15 of g1.1 is 0 if the polygon is front facing. */
@@ -493,7 +504,7 @@
if (value1 == -1.0f)
g1.negate = true;
- bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
+ s->bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
g1, brw_imm_uw(0x3f80));
} else if (devinfo->ver >= 6) {
/* Bit 15 of g0.0 is 0 if the polygon is front facing. */
@@ -514,7 +525,7 @@
g0.negate = true;
}
- bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
+ s->bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
g0, brw_imm_uw(0x3f80));
} else {
/* Bit 31 of g1.6 is 0 if the polygon is front facing. */
@@ -535,9 +546,9 @@
g1_6.negate = true;
}
- bld.OR(tmp, g1_6, brw_imm_d(0x3f800000));
+ s->bld.OR(tmp, g1_6, brw_imm_d(0x3f800000));
}
- bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf800000));
+ s->bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf800000));
return true;
}
@@ -564,21 +575,24 @@
return BRW_RND_MODE_UNSPECIFIED;
}
-fs_reg
-fs_visitor::prepare_alu_destination_and_sources(const fs_builder &bld,
- nir_alu_instr *instr,
- fs_reg *op,
- bool need_dest)
+static fs_reg
+prepare_alu_destination_and_sources(const fs_builder &bld,
+ nir_alu_instr *instr,
+ fs_reg *op,
+ bool need_dest)
{
+ fs_visitor *s = (fs_visitor *)bld.shader;
+ const intel_device_info *devinfo = s->devinfo;
+
fs_reg result =
- need_dest ? get_nir_def(instr->def) : bld.null_reg_ud();
+ need_dest ? s->get_nir_def(instr->def) : bld.null_reg_ud();
result.type = brw_type_for_nir_type(devinfo,
(nir_alu_type)(nir_op_infos[instr->op].output_type |
instr->def.bit_size));
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
- op[i] = get_nir_src(instr->src[i].src);
+ op[i] = s->get_nir_src(instr->src[i].src);
op[i].type = brw_type_for_nir_type(devinfo,
(nir_alu_type)(nir_op_infos[instr->op].input_types[i] |
nir_src_bit_size(instr->src[i].src)));
@@ -610,7 +624,7 @@
/* Since NIR is doing the scalarizing for us, we should only ever see
* vectorized operations with a single channel.
*/
- nir_component_mask_t write_mask = get_nir_write_mask(instr->def);
+ nir_component_mask_t write_mask = s->get_nir_write_mask(instr->def);
assert(util_bitcount(write_mask) == 1);
channel = ffs(write_mask) - 1;
@@ -625,9 +639,21 @@
return result;
}
-void
-fs_visitor::resolve_inot_sources(const fs_builder &bld, nir_alu_instr *instr,
- fs_reg *op)
+static fs_reg
+resolve_source_modifiers(const fs_builder &bld, const fs_reg &src)
+{
+ if (!src.abs && !src.negate)
+ return src;
+
+ fs_reg temp = bld.vgrf(src.type);
+ bld.MOV(temp, src);
+
+ return temp;
+}
+
+static void
+resolve_inot_sources(const fs_builder &bld, nir_alu_instr *instr,
+ fs_reg *op)
{
for (unsigned i = 0; i < 2; i++) {
nir_alu_instr *inot_instr = nir_src_as_alu_instr(instr->src[i].src);
@@ -644,11 +670,13 @@
}
}
-bool
-fs_visitor::try_emit_b2fi_of_inot(const fs_builder &bld,
- fs_reg result,
- nir_alu_instr *instr)
+static bool
+try_emit_b2fi_of_inot(const fs_builder &bld,
+ fs_reg result,
+ nir_alu_instr *instr)
{
+ const intel_device_info *devinfo = bld.shader->devinfo;
+
if (devinfo->ver < 6 || devinfo->verx10 >= 125)
return false;
@@ -688,10 +716,13 @@
* If \c instr is not the \c nir_op_fsign, then \c fsign_src is the index of
* the source of \c instr that is a \c nir_op_fsign.
*/
-void
-fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
- fs_reg result, fs_reg *op, unsigned fsign_src)
+static void
+emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
+ fs_reg result, fs_reg *op, unsigned fsign_src)
{
+ fs_visitor *s = (fs_visitor *)bld.shader;
+ const intel_device_info *devinfo = s->devinfo;
+
fs_inst *inst;
assert(instr->op == nir_op_fsign || instr->op == nir_op_fmul);
@@ -709,7 +740,7 @@
if (fsign_src != 0)
op[1] = op[0];
- op[0] = get_nir_src(fsign_instr->src[0].src);
+ op[0] = s->get_nir_src(fsign_instr->src[0].src);
const nir_alu_type t =
(nir_alu_type)(nir_op_infos[instr->op].input_types[0] |
@@ -722,7 +753,7 @@
/* Since NIR is doing the scalarizing for us, we should only ever see
* vectorized operations with a single channel.
*/
- nir_component_mask_t write_mask = get_nir_write_mask(instr->def);
+ nir_component_mask_t write_mask = s->get_nir_write_mask(instr->def);
assert(util_bitcount(write_mask) == 1);
channel = ffs(write_mask) - 1;
}
@@ -778,7 +809,7 @@
* - We need to produce a DF result.
*/
- fs_reg zero = vgrf(glsl_type::double_type);
+ fs_reg zero = s->vgrf(glsl_type::double_type);
bld.MOV(zero, setup_imm_df(bld, 0.0));
bld.CMP(bld.null_reg_df(), op[0], zero, BRW_CONDITIONAL_NZ);
@@ -959,7 +990,7 @@
case nir_op_i2f32:
case nir_op_u2f32:
- if (optimize_extract_to_float(instr, result))
+ if (optimize_extract_to_float(this, instr, result))
return;
inst = bld.MOV(result, op[0]);
break;
@@ -1783,7 +1814,7 @@
break;
case nir_op_b32csel:
- if (optimize_frontfacing_ternary(instr, result))
+ if (optimize_frontfacing_ternary(this, instr, result))
return;
bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
@@ -2067,14 +2098,14 @@
return result;
}
-void
-fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src)
+static void
+emit_gs_end_primitive(fs_visitor *s, const nir_src &vertex_count_nir_src)
{
- assert(stage == MESA_SHADER_GEOMETRY);
+ assert(s->stage == MESA_SHADER_GEOMETRY);
- struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
+ struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s->prog_data);
- if (gs_compile->control_data_header_size_bits == 0)
+ if (s->gs_compile->control_data_header_size_bits == 0)
return;
/* We can only do EndPrimitive() functionality when the control data
@@ -2087,9 +2118,9 @@
}
/* Cut bits use one bit per vertex. */
- assert(gs_compile->control_data_bits_per_vertex == 1);
+ assert(s->gs_compile->control_data_bits_per_vertex == 1);
- fs_reg vertex_count = get_nir_src(vertex_count_nir_src);
+ fs_reg vertex_count = s->get_nir_src(vertex_count_nir_src);
vertex_count.type = BRW_REGISTER_TYPE_UD;
/* Cut bit n should be set to 1 if EndPrimitive() was called after emitting
@@ -2113,10 +2144,10 @@
* control_data_bits register to 0 when the first vertex is emitted.
*/
- const fs_builder abld = bld.annotate("end primitive");
+ const fs_builder abld = s->bld.annotate("end primitive");
/* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
- fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg prev_count = s->bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
fs_reg mask = intexp2(abld, prev_count);
/* Note: we're relying on the fact that the GEN SHL instruction only pays
@@ -2124,7 +2155,7 @@
* architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
* ((vertex_count - 1) % 32).
*/
- abld.OR(this->control_data_bits, this->control_data_bits, mask);
+ abld.OR(s->control_data_bits, s->control_data_bits, mask);
}
void
@@ -2230,9 +2261,9 @@
inst->offset = 2;
}
-void
-fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count,
- unsigned stream_id)
+static void
+set_gs_stream_control_data_bits(fs_visitor *s, const fs_reg &vertex_count,
+ unsigned stream_id)
{
/* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */
@@ -2241,7 +2272,7 @@
*/
/* Stream mode uses 2 bits per vertex */
- assert(gs_compile->control_data_bits_per_vertex == 2);
+ assert(s->gs_compile->control_data_bits_per_vertex == 2);
/* Must be a valid stream */
assert(stream_id < 4); /* MAX_VERTEX_STREAMS */
@@ -2252,14 +2283,14 @@
if (stream_id == 0)
return;
- const fs_builder abld = bld.annotate("set stream control data bits", NULL);
+ const fs_builder abld = s->bld.annotate("set stream control data bits", NULL);
/* reg::sid = stream_id */
- fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg sid = s->bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.MOV(sid, brw_imm_ud(stream_id));
/* reg:shift_count = 2 * (vertex_count - 1) */
- fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg shift_count = s->bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.SHL(shift_count, vertex_count, brw_imm_ud(1u));
/* Note: we're relying on the fact that the GEN SHL instruction only pays
@@ -2267,20 +2298,20 @@
* architecture, stream_id << 2 * (vertex_count - 1) is equivalent to
* stream_id << ((2 * (vertex_count - 1)) % 32).
*/
- fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg mask = s->bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.SHL(mask, sid, shift_count);
- abld.OR(this->control_data_bits, this->control_data_bits, mask);
+ abld.OR(s->control_data_bits, s->control_data_bits, mask);
}
-void
-fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
- unsigned stream_id)
+static void
+emit_gs_vertex(fs_visitor *s, const nir_src &vertex_count_nir_src,
+ unsigned stream_id)
{
- assert(stage == MESA_SHADER_GEOMETRY);
+ assert(s->stage == MESA_SHADER_GEOMETRY);
- struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
+ struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s->prog_data);
- fs_reg vertex_count = get_nir_src(vertex_count_nir_src);
+ fs_reg vertex_count = s->get_nir_src(vertex_count_nir_src);
vertex_count.type = BRW_REGISTER_TYPE_UD;
/* Haswell and later hardware ignores the "Render Stream Select" bits
@@ -2293,7 +2324,7 @@
* be recorded by transform feedback, we can simply discard all geometry
* bound to these streams when transform feedback is disabled.
*/
- if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
+ if (stream_id > 0 && !s->nir->info.has_transform_feedback_varyings)
return;
/* If we're outputting 32 control data bits or less, then we can wait
@@ -2303,9 +2334,9 @@
* control data bits associated with the (vertex_count - 1)th vertex are
* correct.
*/
- if (gs_compile->control_data_header_size_bits > 32) {
+ if (s->gs_compile->control_data_header_size_bits > 32) {
const fs_builder abld =
- bld.annotate("emit vertex: emit control data bits");
+ s->bld.annotate("emit vertex: emit control data bits");
/* Only emit control data bits if we've finished accumulating a batch
* of 32 bits. This is the case when:
@@ -2329,18 +2360,18 @@
* at compile time...
*/
fs_inst *inst =
- abld.AND(bld.null_reg_d(), vertex_count,
- brw_imm_ud(32u / gs_compile->control_data_bits_per_vertex - 1u));
+ abld.AND(s->bld.null_reg_d(), vertex_count,
+ brw_imm_ud(32u / s->gs_compile->control_data_bits_per_vertex - 1u));
inst->conditional_mod = BRW_CONDITIONAL_Z;
abld.IF(BRW_PREDICATE_NORMAL);
/* If vertex_count is 0, then no control data bits have been
* accumulated yet, so we can skip emitting them.
*/
- abld.CMP(bld.null_reg_d(), vertex_count, brw_imm_ud(0u),
+ abld.CMP(s->bld.null_reg_d(), vertex_count, brw_imm_ud(0u),
BRW_CONDITIONAL_NEQ);
abld.IF(BRW_PREDICATE_NORMAL);
- emit_gs_control_data_bits(vertex_count);
+ s->emit_gs_control_data_bits(vertex_count);
abld.emit(BRW_OPCODE_ENDIF);
/* Reset control_data_bits to 0 so we can start accumulating a new
@@ -2350,34 +2381,37 @@
* effect of any call to EndPrimitive() that the shader may have
* made before outputting its first vertex.
*/
- inst = abld.MOV(this->control_data_bits, brw_imm_ud(0u));
+ inst = abld.MOV(s->control_data_bits, brw_imm_ud(0u));
inst->force_writemask_all = true;
abld.emit(BRW_OPCODE_ENDIF);
}
- emit_urb_writes(vertex_count);
+ s->emit_urb_writes(vertex_count);
/* In stream mode we have to set control data bits for all vertices
* unless we have disabled control data bits completely (which we do
* do for MESA_PRIM_POINTS outputs that don't use streams).
*/
- if (gs_compile->control_data_header_size_bits > 0 &&
+ if (s->gs_compile->control_data_header_size_bits > 0 &&
gs_prog_data->control_data_format ==
GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
- set_gs_stream_control_data_bits(vertex_count, stream_id);
+ set_gs_stream_control_data_bits(s, vertex_count, stream_id);
}
}
-void
-fs_visitor::emit_gs_input_load(const fs_reg &dst,
- const nir_src &vertex_src,
- unsigned base_offset,
- const nir_src &offset_src,
- unsigned num_components,
- unsigned first_component)
+static void
+emit_gs_input_load(fs_visitor *s, const fs_reg &dst,
+ const nir_src &vertex_src,
+ unsigned base_offset,
+ const nir_src &offset_src,
+ unsigned num_components,
+ unsigned first_component)
{
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
assert(type_sz(dst.type) == 4);
- struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
+ struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s->prog_data);
const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
/* TODO: figure out push input layout for invocations == 1 */
@@ -2387,7 +2421,7 @@
int imm_offset = (base_offset + nir_src_as_uint(offset_src)) * 4 +
nir_src_as_uint(vertex_src) * push_reg_count;
for (unsigned i = 0; i < num_components; i++) {
- bld.MOV(offset(dst, bld, i),
+ s->bld.MOV(offset(dst, s->bld, i),
fs_reg(ATTR, imm_offset + i + first_component, dst.type));
}
return;
@@ -2396,13 +2430,13 @@
/* Resort to the pull model. Ensure the VUE handles are provided. */
assert(gs_prog_data->base.include_vue_handles);
- fs_reg start = gs_payload().icp_handle_start;
- fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg start = s->gs_payload().icp_handle_start;
+ fs_reg icp_handle = s->bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
if (gs_prog_data->invocations == 1) {
if (nir_src_is_const(vertex_src)) {
/* The vertex index is constant; just select the proper URB handle. */
- icp_handle = offset(start, bld, nir_src_as_uint(vertex_src));
+ icp_handle = offset(start, s->bld, nir_src_as_uint(vertex_src));
} else {
/* The vertex index is non-constant. We need to use indirect
* addressing to fetch the proper URB handle.
@@ -2416,7 +2450,7 @@
* the final indirect byte offset.
*/
fs_reg sequence =
- nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION];
+ s->nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION];
fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
@@ -2425,7 +2459,7 @@
bld.SHL(channel_offsets, sequence, brw_imm_ud(2u));
/* Convert vertex_index to bytes (multiply by 32) */
bld.SHL(vertex_offset_bytes,
- retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+ retype(s->get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
brw_imm_ud(5u));
bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets);
@@ -2435,7 +2469,7 @@
*/
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start,
fs_reg(icp_offset_bytes),
- brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE));
+ brw_imm_ud(s->nir->info.gs.vertices_in * REG_SIZE));
}
} else {
assert(gs_prog_data->invocations > 1);
@@ -2453,7 +2487,7 @@
/* Convert vertex_index to bytes (multiply by 4) */
bld.SHL(icp_offset_bytes,
- retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+ retype(s->get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
brw_imm_ud(2u));
/* Use first_icp_handle as the base offset. There is one DWord
@@ -2462,13 +2496,13 @@
*/
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start,
fs_reg(icp_offset_bytes),
- brw_imm_ud(DIV_ROUND_UP(nir->info.gs.vertices_in, 8) *
+ brw_imm_ud(DIV_ROUND_UP(s->nir->info.gs.vertices_in, 8) *
REG_SIZE));
}
}
fs_inst *inst;
- fs_reg indirect_offset = get_nir_src(offset_src);
+ fs_reg indirect_offset = s->get_nir_src(offset_src);
if (nir_src_is_const(offset_src)) {
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
@@ -2579,15 +2613,17 @@
}
}
-fs_reg
-fs_visitor::get_tcs_single_patch_icp_handle(const fs_builder &bld,
- nir_intrinsic_instr *instr)
+static fs_reg
+get_tcs_single_patch_icp_handle(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
{
- struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+ fs_visitor *s = (fs_visitor *)bld.shader;
+
+ struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(s->prog_data);
const nir_src &vertex_src = instr->src[0];
nir_intrinsic_instr *vertex_intrin = nir_src_as_intrinsic(vertex_src);
- const fs_reg start = tcs_payload().icp_handle_start;
+ const fs_reg start = s->tcs_payload().icp_handle_start;
fs_reg icp_handle;
@@ -2612,7 +2648,7 @@
/* Each ICP handle is a single DWord (4 bytes) */
fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
bld.SHL(vertex_offset_bytes,
- retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+ retype(s->get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
brw_imm_ud(2u));
/* We might read up to 4 registers. */
@@ -2624,15 +2660,18 @@
return icp_handle;
}
-fs_reg
-fs_visitor::get_tcs_multi_patch_icp_handle(const fs_builder &bld,
- nir_intrinsic_instr *instr)
+static fs_reg
+get_tcs_multi_patch_icp_handle(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
{
- struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
+ fs_visitor *s = (fs_visitor *)bld.shader;
+ const intel_device_info *devinfo = s->devinfo;
+
+ struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) s->key;
const nir_src &vertex_src = instr->src[0];
const unsigned grf_size_bytes = REG_SIZE * reg_unit(devinfo);
- const fs_reg start = tcs_payload().icp_handle_start;
+ const fs_reg start = s->tcs_payload().icp_handle_start;
if (nir_src_is_const(vertex_src))
return byte_offset(start, nir_src_as_uint(vertex_src) * grf_size_bytes);
@@ -2649,7 +2688,7 @@
* the final indirect byte offset.
*/
fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- fs_reg sequence = nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION];
+ fs_reg sequence = s->nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION];
fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
@@ -2659,7 +2698,7 @@
/* Convert vertex_index to bytes (multiply by 32) */
assert(util_is_power_of_two_nonzero(grf_size_bytes)); /* for ffs() */
bld.SHL(vertex_offset_bytes,
- retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+ retype(s->get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
brw_imm_ud(ffs(grf_size_bytes) - 1));
bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets);
@@ -2689,13 +2728,16 @@
bld.exec_all().group(2, 0).MOV(m0_10ub, r0_11ub);
}
-void
-fs_visitor::emit_barrier()
+static void
+emit_barrier(fs_visitor *s)
{
- /* We are getting the barrier ID from the compute shader header */
- assert(gl_shader_stage_uses_workgroup(stage));
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
- fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ /* We are getting the barrier ID from the compute shader header */
+ assert(gl_shader_stage_uses_workgroup(s->stage));
+
+ fs_reg payload = fs_reg(VGRF, s->alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* Clear the message payload */
bld.exec_all().group(8, 0).MOV(payload, brw_imm_ud(0u));
@@ -2703,7 +2745,7 @@
if (devinfo->verx10 >= 125) {
setup_barrier_message_payload_gfx125(bld, payload);
} else {
- assert(gl_shader_stage_is_compute(stage));
+ assert(gl_shader_stage_is_compute(s->stage));
uint32_t barrier_id_mask;
switch (devinfo->ver) {
@@ -2731,11 +2773,14 @@
bld.exec_all().emit(SHADER_OPCODE_BARRIER, reg_undef, payload);
}
-void
-fs_visitor::emit_tcs_barrier()
+static void
+emit_tcs_barrier(fs_visitor *s)
{
- assert(stage == MESA_SHADER_TESS_CTRL);
- struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
+ assert(s->stage == MESA_SHADER_TESS_CTRL);
+ struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(s->prog_data);
fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
fs_reg m0_2 = component(m0, 2);
@@ -2798,7 +2843,7 @@
fs_nir_emit_intrinsic(bld, instr);
if (nir_intrinsic_execution_scope(instr) == SCOPE_WORKGROUP) {
if (tcs_prog_data->instances != 1)
- s->emit_tcs_barrier();
+ emit_tcs_barrier(s);
}
break;
@@ -2816,8 +2861,8 @@
vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_MULTI_PATCH;
fs_reg icp_handle = multi_patch ?
- s->get_tcs_multi_patch_icp_handle(bld, instr) :
- s->get_tcs_single_patch_icp_handle(bld, instr);
+ get_tcs_multi_patch_icp_handle(bld, instr) :
+ get_tcs_single_patch_icp_handle(bld, instr);
/* We can only read two double components with each URB read, so
* we send two read messages in that case, each one loading up to
@@ -3134,17 +3179,17 @@
unreachable("load_input intrinsics are invalid for the GS stage");
case nir_intrinsic_load_per_vertex_input:
- s->emit_gs_input_load(dest, instr->src[0], nir_intrinsic_base(instr),
+ emit_gs_input_load(s, dest, instr->src[0], nir_intrinsic_base(instr),
instr->src[1], instr->num_components,
nir_intrinsic_component(instr));
break;
case nir_intrinsic_emit_vertex_with_counter:
- s->emit_gs_vertex(instr->src[0], nir_intrinsic_stream_id(instr));
+ emit_gs_vertex(s, instr->src[0], nir_intrinsic_stream_id(instr));
break;
case nir_intrinsic_end_primitive_with_counter:
- s->emit_gs_end_primitive(instr->src[0]);
+ emit_gs_end_primitive(s, instr->src[0]);
break;
case nir_intrinsic_set_vertex_and_primitive_count:
@@ -3196,12 +3241,14 @@
}
/* Sample from the MCS surface attached to this multisample texture. */
-fs_reg
-fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
- const fs_reg &texture,
- const fs_reg &texture_handle)
+static fs_reg
+emit_mcs_fetch(fs_visitor *s, const fs_reg &coordinate, unsigned components,
+ const fs_reg &texture,
+ const fs_reg &texture_handle)
{
- const fs_reg dest = vgrf(glsl_type::uvec4_type);
+ const fs_builder &bld = s->bld;
+
+ const fs_reg dest = s->vgrf(glsl_type::uvec4_type);
fs_reg srcs[TEX_LOGICAL_NUM_SRCS];
srcs[TEX_LOGICAL_SRC_COORDINATE] = coordinate;
@@ -3227,21 +3274,22 @@
* Fake non-coherent framebuffer read implemented using TXF to fetch from the
* framebuffer at the current fragment coordinates and sample index.
*/
-fs_inst *
-fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
- unsigned target)
+static fs_inst *
+emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
+ unsigned target)
{
- const struct intel_device_info *devinfo = bld.shader->devinfo;
+ fs_visitor *s = (fs_visitor *)bld.shader;
+ const struct intel_device_info *devinfo = s->devinfo;
assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
const brw_wm_prog_key *wm_key =
- reinterpret_cast<const brw_wm_prog_key *>(key);
+ reinterpret_cast<const brw_wm_prog_key *>(s->key);
assert(!wm_key->coherent_fb_fetch);
/* Calculate the fragment coordinates. */
const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
- bld.MOV(offset(coords, bld, 0), pixel_x);
- bld.MOV(offset(coords, bld, 1), pixel_y);
+ bld.MOV(offset(coords, bld, 0), s->pixel_x);
+ bld.MOV(offset(coords, bld, 1), s->pixel_y);
bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
/* Calculate the sample index and MCS payload when multisampling. Luckily
@@ -3252,12 +3300,12 @@
assert(wm_key->multisample_fbo == BRW_ALWAYS ||
wm_key->multisample_fbo == BRW_NEVER);
if (wm_key->multisample_fbo &&
- nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
- nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup();
+ s->nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
+ s->nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(s);
- const fs_reg sample = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
+ const fs_reg sample = s->nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
const fs_reg mcs = wm_key->multisample_fbo ?
- emit_mcs_fetch(coords, 3, brw_imm_ud(target), fs_reg()) : fs_reg();
+ emit_mcs_fetch(s, coords, 3, brw_imm_ud(target), fs_reg()) : fs_reg();
/* Use either a normal or a CMS texel fetch message depending on whether
* the framebuffer is single or multisample. On SKL+ use the wide CMS
@@ -3365,9 +3413,11 @@
unreachable("Invalid location");
}
-void
-fs_visitor::emit_is_helper_invocation(fs_reg result)
+static void
+emit_is_helper_invocation(fs_visitor *s, fs_reg result)
{
+ const fs_builder &bld = s->bld;
+
/* Unlike the regular gl_HelperInvocation, that is defined at dispatch,
* the helperInvocationEXT() (aka SpvOpIsHelperInvocationEXT) takes into
* consideration demoted invocations.
@@ -3392,36 +3442,42 @@
}
}
-void
-fs_visitor::emit_fragcoord_interpolation(fs_reg wpos)
+static void
+emit_fragcoord_interpolation(fs_visitor *s, fs_reg wpos)
{
- assert(stage == MESA_SHADER_FRAGMENT);
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
+ assert(s->stage == MESA_SHADER_FRAGMENT);
/* gl_FragCoord.x */
- bld.MOV(wpos, this->pixel_x);
+ bld.MOV(wpos, s->pixel_x);
wpos = offset(wpos, bld, 1);
/* gl_FragCoord.y */
- bld.MOV(wpos, this->pixel_y);
+ bld.MOV(wpos, s->pixel_y);
wpos = offset(wpos, bld, 1);
/* gl_FragCoord.z */
if (devinfo->ver >= 6) {
- bld.MOV(wpos, this->pixel_z);
+ bld.MOV(wpos, s->pixel_z);
} else {
bld.emit(FS_OPCODE_LINTERP, wpos,
- this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL],
- component(interp_reg(VARYING_SLOT_POS, 2), 0));
+ s->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL],
+ component(s->interp_reg(VARYING_SLOT_POS, 2), 0));
}
wpos = offset(wpos, bld, 1);
/* gl_FragCoord.w: Already set up in emit_interpolation */
- bld.MOV(wpos, this->wpos_w);
+ bld.MOV(wpos, s->wpos_w);
}
-fs_reg
-fs_visitor::emit_frontfacing_interpolation()
+static fs_reg
+emit_frontfacing_interpolation(fs_visitor *s)
{
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
fs_reg ff = bld.vgrf(BRW_REGISTER_TYPE_D);
if (devinfo->ver >= 12) {
@@ -3466,11 +3522,14 @@
return ff;
}
-fs_reg
-fs_visitor::emit_samplepos_setup()
+static fs_reg
+emit_samplepos_setup(fs_visitor *s)
{
- assert(stage == MESA_SHADER_FRAGMENT);
- struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
+ assert(s->stage == MESA_SHADER_FRAGMENT);
+ struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s->prog_data);
assert(devinfo->ver >= 6);
const fs_builder abld = bld.annotate("compute sample position");
@@ -3499,7 +3558,7 @@
* the positions using vstride=16, width=8, hstride=2.
*/
const fs_reg sample_pos_reg =
- fetch_payload_reg(abld, fs_payload().sample_pos_reg, BRW_REGISTER_TYPE_W);
+ fetch_payload_reg(abld, s->fs_payload().sample_pos_reg, BRW_REGISTER_TYPE_W);
for (unsigned i = 0; i < 2; i++) {
fs_reg tmp_d = bld.vgrf(BRW_REGISTER_TYPE_D);
@@ -3524,12 +3583,15 @@
return pos;
}
-fs_reg
-fs_visitor::emit_sampleid_setup()
+static fs_reg
+emit_sampleid_setup(fs_visitor *s)
{
- assert(stage == MESA_SHADER_FRAGMENT);
- ASSERTED brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
+ assert(s->stage == MESA_SHADER_FRAGMENT);
+ ASSERTED brw_wm_prog_key *key = (brw_wm_prog_key*) s->key;
+ struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s->prog_data);
assert(devinfo->ver >= 6);
const fs_builder abld = bld.annotate("compute sample id");
@@ -3568,8 +3630,8 @@
*/
const fs_reg tmp = abld.vgrf(BRW_REGISTER_TYPE_UW);
- for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {
- const fs_builder hbld = abld.group(MIN2(16, dispatch_width), i);
+ for (unsigned i = 0; i < DIV_ROUND_UP(s->dispatch_width, 16); i++) {
+ const fs_builder hbld = abld.group(MIN2(16, s->dispatch_width), i);
hbld.SHR(offset(tmp, hbld, i),
stride(retype(brw_vec1_grf(1 + i, 0), BRW_REGISTER_TYPE_UB),
1, 8, 0),
@@ -3616,7 +3678,7 @@
* actually works on gfx7.
*/
if (devinfo->ver >= 7)
- limit_dispatch_width(16, "gl_SampleId is unsupported in SIMD32 on gfx7");
+ s->limit_dispatch_width(16, "gl_SampleId is unsupported in SIMD32 on gfx7");
abld.exec_all().group(8, 0).MOV(t2, brw_imm_v(0x32103210));
/* This special instruction takes care of setting vstride=1,
@@ -3635,18 +3697,21 @@
return sample_id;
}
-fs_reg
-fs_visitor::emit_samplemaskin_setup()
+static fs_reg
+emit_samplemaskin_setup(fs_visitor *s)
{
- assert(stage == MESA_SHADER_FRAGMENT);
- struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
+ assert(s->stage == MESA_SHADER_FRAGMENT);
+ struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s->prog_data);
assert(devinfo->ver >= 6);
/* The HW doesn't provide us with expected values. */
assert(wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS);
fs_reg coverage_mask =
- fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
+ fetch_payload_reg(bld, s->fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
if (wm_prog_data->persample_dispatch == BRW_NEVER)
return coverage_mask;
@@ -3663,13 +3728,13 @@
*/
const fs_builder abld = bld.annotate("compute gl_SampleMaskIn");
- if (nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
- nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup();
+ if (s->nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
+ s->nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(s);
- fs_reg one = vgrf(glsl_type::int_type);
- fs_reg enabled_mask = vgrf(glsl_type::int_type);
+ fs_reg one = s->vgrf(glsl_type::int_type);
+ fs_reg enabled_mask = s->vgrf(glsl_type::int_type);
abld.MOV(one, brw_imm_d(1));
- abld.SHL(enabled_mask, one, nir_system_values[SYSTEM_VALUE_SAMPLE_ID]);
+ abld.SHL(enabled_mask, one, s->nir_system_values[SYSTEM_VALUE_SAMPLE_ID]);
fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_D);
abld.AND(mask, enabled_mask, coverage_mask);
@@ -3683,9 +3748,12 @@
return mask;
}
-fs_reg
-fs_visitor::emit_shading_rate_setup()
+static fs_reg
+emit_shading_rate_setup(fs_visitor *s)
{
+ const intel_device_info *devinfo = s->devinfo;
+ const fs_builder &bld = s->bld;
+
assert(devinfo->ver >= 11);
struct brw_wm_prog_data *wm_prog_data =
@@ -3730,18 +3798,6 @@
return rate;
}
-fs_reg
-fs_visitor::resolve_source_modifiers(const fs_builder &bld, const fs_reg &src)
-{
- if (!src.abs && !src.negate)
- return src;
-
- fs_reg temp = bld.vgrf(src.type);
- bld.MOV(temp, src);
-
- return temp;
-}
-
static void
fs_nir_emit_fs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
@@ -3758,7 +3814,7 @@
switch (instr->intrinsic) {
case nir_intrinsic_load_front_face:
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
- s->emit_frontfacing_interpolation());
+ emit_frontfacing_interpolation(s));
break;
case nir_intrinsic_load_sample_pos:
@@ -3777,7 +3833,7 @@
break;
case nir_intrinsic_is_helper_invocation:
- s->emit_is_helper_invocation(dest);
+ emit_is_helper_invocation(s, dest);
break;
case nir_intrinsic_load_helper_invocation:
@@ -3818,7 +3874,7 @@
if (reinterpret_cast<const brw_wm_prog_key *>(s->key)->coherent_fb_fetch)
emit_coherent_fb_read(bld, tmp, target);
else
- s->emit_non_coherent_fb_read(bld, tmp, target);
+ emit_non_coherent_fb_read(bld, tmp, target);
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j),
@@ -4081,7 +4137,7 @@
}
case nir_intrinsic_load_frag_coord:
- s->emit_fragcoord_interpolation(dest);
+ emit_fragcoord_interpolation(s, dest);
break;
case nir_intrinsic_load_interpolated_input: {
@@ -4157,7 +4213,7 @@
break;
}
- s->emit_barrier();
+ emit_barrier(s);
cs_prog_data->uses_barrier = true;
}
break;
@@ -4467,9 +4523,11 @@
return true;
}
-fs_reg
-fs_visitor::try_rebuild_resource(const brw::fs_builder &bld, nir_def *resource_def)
+static fs_reg
+try_rebuild_resource(const brw::fs_builder &bld, nir_def *resource_def)
{
+ fs_visitor *s = (fs_visitor *)bld.shader;
+
/* Create a build at the location of the resource_intel intrinsic */
fs_builder ubld8 = bld.exec_all().group(8, 0);
@@ -4510,7 +4568,7 @@
nir_load_const_instr *load_const =
nir_instr_as_load_const(instr);
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
- nir_resource_insts[def->index] =
+ s->nir_resource_insts[def->index] =
ubld8.MOV(dst, brw_imm_ud(load_const->value[0].i32));
break;
}
@@ -4535,11 +4593,11 @@
switch (alu->op) {
case nir_op_iadd: {
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
- fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
- fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
+ fs_reg src0 = s->nir_resource_insts[alu->src[0].src.ssa->index]->dst;
+ fs_reg src1 = s->nir_resource_insts[alu->src[1].src.ssa->index]->dst;
assert(src0.file != BAD_FILE && src1.file != BAD_FILE);
assert(src0.type == BRW_REGISTER_TYPE_UD);
- nir_resource_insts[def->index] =
+ s->nir_resource_insts[def->index] =
ubld8.ADD(dst,
src0.file != IMM ? src0 : src1,
src0.file != IMM ? src1 : src0);
@@ -4547,12 +4605,12 @@
}
case nir_op_iadd3: {
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
- fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
- fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
- fs_reg src2 = nir_resource_insts[alu->src[2].src.ssa->index]->dst;
+ fs_reg src0 = s->nir_resource_insts[alu->src[0].src.ssa->index]->dst;
+ fs_reg src1 = s->nir_resource_insts[alu->src[1].src.ssa->index]->dst;
+ fs_reg src2 = s->nir_resource_insts[alu->src[2].src.ssa->index]->dst;
assert(src0.file != BAD_FILE && src1.file != BAD_FILE && src2.file != BAD_FILE);
assert(src0.type == BRW_REGISTER_TYPE_UD);
- nir_resource_insts[def->index] =
+ s->nir_resource_insts[def->index] =
ubld8.ADD3(dst,
src1.file == IMM ? src1 : src0,
src1.file == IMM ? src0 : src1,
@@ -4561,20 +4619,20 @@
}
case nir_op_ushr: {
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
- fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
- fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
+ fs_reg src0 = s->nir_resource_insts[alu->src[0].src.ssa->index]->dst;
+ fs_reg src1 = s->nir_resource_insts[alu->src[1].src.ssa->index]->dst;
assert(src0.file != BAD_FILE && src1.file != BAD_FILE);
assert(src0.type == BRW_REGISTER_TYPE_UD);
- nir_resource_insts[def->index] = ubld8.SHR(dst, src0, src1);
+ s->nir_resource_insts[def->index] = ubld8.SHR(dst, src0, src1);
break;
}
case nir_op_ishl: {
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
- fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
- fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
+ fs_reg src0 = s->nir_resource_insts[alu->src[0].src.ssa->index]->dst;
+ fs_reg src1 = s->nir_resource_insts[alu->src[1].src.ssa->index]->dst;
assert(src0.file != BAD_FILE && src1.file != BAD_FILE);
assert(src0.type == BRW_REGISTER_TYPE_UD);
- nir_resource_insts[def->index] = ubld8.SHL(dst, src0, src1);
+ s->nir_resource_insts[def->index] = ubld8.SHL(dst, src0, src1);
break;
}
case nir_op_mov: {
@@ -4590,8 +4648,8 @@
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_resource_intel:
- nir_resource_insts[def->index] =
- nir_resource_insts[intrin->src[1].ssa->index];
+ s->nir_resource_insts[def->index] =
+ s->nir_resource_insts[intrin->src[1].ssa->index];
break;
case nir_intrinsic_load_uniform: {
@@ -4603,7 +4661,7 @@
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
fs_reg src(UNIFORM, base_offset / 4, BRW_REGISTER_TYPE_UD);
src.offset = load_offset + base_offset % 4;
- nir_resource_insts[def->index] = ubld8.MOV(dst, src);
+ s->nir_resource_insts[def->index] = ubld8.MOV(dst, src);
break;
}
@@ -4617,12 +4675,12 @@
break;
}
- if (nir_resource_insts[def->index] == NULL)
+ if (s->nir_resource_insts[def->index] == NULL)
return fs_reg();
}
- assert(nir_resource_insts[resource_def->index] != NULL);
- return component(nir_resource_insts[resource_def->index]->dst, 0);
+ assert(s->nir_resource_insts[resource_def->index] != NULL);
+ return component(s->nir_resource_insts[resource_def->index]->dst, 0);
}
fs_reg
@@ -4684,14 +4742,16 @@
* at the same logical offset, the scratch read/write instruction acts on
* continuous elements and we get good cache locality.
*/
-fs_reg
-fs_visitor::swizzle_nir_scratch_addr(const brw::fs_builder &bld,
- const fs_reg &nir_addr,
- bool in_dwords)
+static fs_reg
+swizzle_nir_scratch_addr(const brw::fs_builder &bld,
+ const fs_reg &nir_addr,
+ bool in_dwords)
{
+ fs_visitor *s = (fs_visitor *) bld.shader;
+
const fs_reg &chan_index =
- nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION];
- const unsigned chan_index_bits = ffs(dispatch_width) - 1;
+ s->nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION];
+ const unsigned chan_index_bits = ffs(s->dispatch_width) - 1;
fs_reg addr = bld.vgrf(BRW_REGISTER_TYPE_UD);
if (in_dwords) {
@@ -4803,9 +4863,12 @@
/**
* Create a MOV to read the timestamp register.
*/
-fs_reg
-fs_visitor::get_timestamp(const fs_builder &bld)
+static fs_reg
+get_timestamp(const fs_builder &bld)
{
+ fs_visitor *s = (fs_visitor *)bld.shader;
+ const intel_device_info *devinfo = s->devinfo;
+
assert(devinfo->ver >= 7);
fs_reg ts = fs_reg(retype(brw_vec4_reg(BRW_ARCHITECTURE_REGISTER_FILE,
@@ -4813,7 +4876,7 @@
0),
BRW_REGISTER_TYPE_UD));
- fs_reg dst = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg dst = fs_reg(VGRF, s->alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* We want to read the 3 fields we care about even if it's not enabled in
* the dispatch.
@@ -5354,11 +5417,13 @@
}
}
-void
-fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *instr,
- const fs_reg &urb_handle)
+static void
+emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *instr,
+ const fs_reg &urb_handle)
{
- fs_reg src = get_nir_src(instr->src[0]);
+ fs_visitor *s = (fs_visitor *)bld.shader;
+
+ fs_reg src = s->get_nir_src(instr->src[0]);
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
if (nir_src_is_const(*offset_nir_src)) {
@@ -5368,7 +5433,7 @@
emit_urb_direct_writes(bld, instr, src, urb_handle);
} else {
if (bld.shader->devinfo->ver >= 20) {
- emit_urb_indirect_writes_xe2(bld, instr, src, get_nir_src(*offset_nir_src), urb_handle);
+ emit_urb_indirect_writes_xe2(bld, instr, src, s->get_nir_src(*offset_nir_src), urb_handle);
return;
}
bool use_mod = false;
@@ -5384,18 +5449,20 @@
}
if (use_mod) {
- emit_urb_indirect_writes_mod(bld, instr, src, get_nir_src(*offset_nir_src), urb_handle, mod);
+ emit_urb_indirect_writes_mod(bld, instr, src, s->get_nir_src(*offset_nir_src), urb_handle, mod);
} else {
- emit_urb_indirect_writes(bld, instr, src, get_nir_src(*offset_nir_src), urb_handle);
+ emit_urb_indirect_writes(bld, instr, src, s->get_nir_src(*offset_nir_src), urb_handle);
}
}
}
-void
-fs_visitor::emit_task_mesh_load(const fs_builder &bld, nir_intrinsic_instr *instr,
- const fs_reg &urb_handle)
+static void
+emit_task_mesh_load(const fs_builder &bld, nir_intrinsic_instr *instr,
+ const fs_reg &urb_handle)
{
- fs_reg dest = get_nir_def(instr->def);
+ fs_visitor *s = (fs_visitor *)bld.shader;
+
+ fs_reg dest = s->get_nir_def(instr->def);
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
/* TODO(mesh): for per_vertex and per_primitive, if we could keep around
@@ -5410,9 +5477,9 @@
emit_urb_direct_reads(bld, instr, dest, urb_handle);
} else {
if (bld.shader->devinfo->ver >= 20)
- emit_urb_indirect_reads_xe2(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
+ emit_urb_indirect_reads_xe2(bld, instr, dest, s->get_nir_src(*offset_nir_src), urb_handle);
else
- emit_urb_indirect_reads(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
+ emit_urb_indirect_reads(bld, instr, dest, s->get_nir_src(*offset_nir_src), urb_handle);
}
}
@@ -5480,12 +5547,12 @@
switch (instr->intrinsic) {
case nir_intrinsic_store_output:
case nir_intrinsic_store_task_payload:
- s->emit_task_mesh_store(bld, instr, payload.urb_output);
+ emit_task_mesh_store(bld, instr, payload.urb_output);
break;
case nir_intrinsic_load_output:
case nir_intrinsic_load_task_payload:
- s->emit_task_mesh_load(bld, instr, payload.urb_output);
+ emit_task_mesh_load(bld, instr, payload.urb_output);
break;
default:
@@ -5507,17 +5574,17 @@
case nir_intrinsic_store_per_primitive_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_output:
- s->emit_task_mesh_store(bld, instr, payload.urb_output);
+ emit_task_mesh_store(bld, instr, payload.urb_output);
break;
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_primitive_output:
case nir_intrinsic_load_output:
- s->emit_task_mesh_load(bld, instr, payload.urb_output);
+ emit_task_mesh_load(bld, instr, payload.urb_output);
break;
case nir_intrinsic_load_task_payload:
- s->emit_task_mesh_load(bld, instr, payload.task_urb_input);
+ emit_task_mesh_load(bld, instr, payload.task_urb_input);
break;
default:
@@ -5570,7 +5637,7 @@
s->nir_resource_values[instr->def.index] = fs_reg();
} else {
s->nir_resource_values[instr->def.index] =
- s->try_rebuild_resource(bld, instr->src[1].ssa);
+ try_rebuild_resource(bld, instr->src[1].ssa);
}
s->nir_ssa_values[instr->def.index] =
s->nir_ssa_values[instr->src[1].ssa->index];
@@ -5994,7 +6061,7 @@
case nir_intrinsic_shader_clock: {
/* We cannot do anything if there is an event, so ignore it for now */
- const fs_reg shader_clock = s->get_timestamp(bld);
+ const fs_reg shader_clock = get_timestamp(bld);
const fs_reg srcs[] = { component(shader_clock, 0),
component(shader_clock, 1) };
bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0);
@@ -6683,7 +6750,7 @@
nir_intrinsic_align(instr) >= 4);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] =
- s->swizzle_nir_scratch_addr(bld, nir_addr, false);
+ swizzle_nir_scratch_addr(bld, nir_addr, false);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
@@ -6691,14 +6758,14 @@
} else {
/* The offset for a DWORD scattered message is in dwords. */
srcs[SURFACE_LOGICAL_SRC_ADDRESS] =
- s->swizzle_nir_scratch_addr(bld, nir_addr, true);
+ swizzle_nir_scratch_addr(bld, nir_addr, true);
bld.emit(SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
}
} else {
srcs[SURFACE_LOGICAL_SRC_ADDRESS] =
- s->swizzle_nir_scratch_addr(bld, nir_addr, false);
+ swizzle_nir_scratch_addr(bld, nir_addr, false);
fs_reg read_result = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
@@ -6756,7 +6823,7 @@
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
srcs[SURFACE_LOGICAL_SRC_ADDRESS] =
- s->swizzle_nir_scratch_addr(bld, nir_addr, false);
+ swizzle_nir_scratch_addr(bld, nir_addr, false);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
@@ -6766,7 +6833,7 @@
/* The offset for a DWORD scattered message is in dwords. */
srcs[SURFACE_LOGICAL_SRC_ADDRESS] =
- s->swizzle_nir_scratch_addr(bld, nir_addr, true);
+ swizzle_nir_scratch_addr(bld, nir_addr, true);
bld.emit(SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL,
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -6776,7 +6843,7 @@
bld.MOV(srcs[SURFACE_LOGICAL_SRC_DATA], data);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] =
- s->swizzle_nir_scratch_addr(bld, nir_addr, false);
+ swizzle_nir_scratch_addr(bld, nir_addr, false);
bld.emit(SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -7790,7 +7857,7 @@
instr->op == nir_texop_samples_identical)) {
if (devinfo->ver >= 7) {
srcs[TEX_LOGICAL_SRC_MCS] =
- emit_mcs_fetch(srcs[TEX_LOGICAL_SRC_COORDINATE],
+ emit_mcs_fetch(this, srcs[TEX_LOGICAL_SRC_COORDINATE],
instr->coord_components,
srcs[TEX_LOGICAL_SRC_SURFACE],
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]);