blob: 2c052ec84392932f729dbb9514a8c0064f60af3d [file] [log] [blame]
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "pipe/p_defines.h"
#include "tgsi/tgsi_from_mesa.h"
#include "sfn_shader_fragment.h"
#include "sfn_instruction_fetch.h"
namespace r600 {
FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
r600_shader& sh,
r600_pipe_shader_selector &sel,
const r600_shader_key &key,
enum chip_class chip_class):
ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
m_max_counted_color_exports(0),
m_two_sided_color(key.ps.color_two_side),
m_last_pixel_export(nullptr),
m_nir(nir),
m_reserved_registers(0),
m_frag_pos_index(0),
m_need_back_color(false),
m_front_face_loaded(false),
m_depth_exports(0),
m_enable_centroid_interpolators(false),
m_enable_sample_interpolators(false),
m_apply_sample_mask(key.ps.apply_sample_id_mask),
m_dual_source_blend(key.ps.dual_source_blend)
{
for (auto& i: m_interpolator) {
i.enabled = false;
i.ij_index= 0;
}
sh_info().rat_base = key.ps.nr_cbufs;
sh_info().atomic_base = key.ps.first_atomic_counter;
}
bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
{
sfn_log << SfnLog::io << "Parse input variable "
<< input->name << " location:" << input->data.location
<< " driver-loc:" << input->data.driver_location
<< " interpolation:" << input->data.interpolation
<< "\n";
if (input->data.location == VARYING_SLOT_FACE) {
m_sv_values.set(es_face);
return true;
}
unsigned name, sid;
auto semantic = r600_get_varying_semantic(input->data.location);
name = semantic.first;
sid = semantic.second;
tgsi_semantic sname = static_cast<tgsi_semantic>(name);
switch (sname) {
case TGSI_SEMANTIC_POSITION: {
m_sv_values.set(es_pos);
return true;
}
case TGSI_SEMANTIC_COLOR: {
m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
m_need_back_color = m_two_sided_color;
return true;
}
case TGSI_SEMANTIC_PRIMID:
sh_info().gs_prim_id_input = true;
sh_info().ps_prim_id_input = m_shaderio.inputs().size();
/* fallthrough */
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_LAYER:
case TGSI_SEMANTIC_PCOORD:
case TGSI_SEMANTIC_VIEWPORT_INDEX:
case TGSI_SEMANTIC_CLIPDIST: {
if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
return true;
}
default:
return false;
}
}
bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
{
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
switch (ii->intrinsic) {
case nir_intrinsic_load_front_face:
m_sv_values.set(es_face);
break;
case nir_intrinsic_load_sample_mask_in:
m_sv_values.set(es_sample_mask_in);
break;
case nir_intrinsic_load_sample_pos:
m_sv_values.set(es_sample_pos);
/* fallthrough */
case nir_intrinsic_load_sample_id:
m_sv_values.set(es_sample_id);
break;
case nir_intrinsic_interp_deref_at_centroid:
/* This is not a sysvalue, should go elsewhere */
m_enable_centroid_interpolators = true;
break;
case nir_intrinsic_interp_deref_at_sample:
m_enable_sample_interpolators = true;
break;
case nir_intrinsic_load_helper_invocation:
m_sv_values.set(es_helper_invocation);
break;
default:
;
}
}
default:
;
}
return true;
}
bool FragmentShaderFromNir::do_allocate_reserved_registers()
{
assert(!m_reserved_registers);
int face_reg_index = -1;
int sample_id_index = -1;
// enabled interpolators based on inputs
for (auto& i: m_shaderio.inputs()) {
int ij = i->ij_index();
if (ij >= 0) {
m_interpolator[ij].enabled = true;
}
}
/* Lazy, enable both possible interpolators,
* TODO: check which ones are really needed */
if (m_enable_centroid_interpolators) {
m_interpolator[2].enabled = true; /* perspective */
m_interpolator[5].enabled = true; /* linear */
}
if (m_enable_sample_interpolators)
m_interpolator[1].enabled = true; /* perspective */
// sort the varying inputs
m_shaderio.sort_varying_inputs();
// handle interpolators
int num_baryc = 0;
for (int i = 0; i < 6; ++i) {
if (m_interpolator[i].enabled) {
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
m_interpolator[i].ij_index = num_baryc;
unsigned sel = num_baryc / 2;
unsigned chan = 2 * (num_baryc % 2);
auto ip_i = new GPRValue(sel, chan + 1);
ip_i->set_as_input();
m_interpolator[i].i.reset(ip_i);
inject_register(sel, chan + 1, m_interpolator[i].i, false);
auto ip_j = new GPRValue(sel, chan);
ip_j->set_as_input();
m_interpolator[i].j.reset(ip_j);
inject_register(sel, chan, m_interpolator[i].j, false);
++num_baryc;
}
}
m_reserved_registers += (num_baryc + 1) >> 1;
if (m_sv_values.test(es_pos)) {
m_frag_pos_index = m_reserved_registers++;
m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
}
// handle system values
if (m_sv_values.test(es_face) || m_need_back_color) {
face_reg_index = m_reserved_registers++;
auto ffr = new GPRValue(face_reg_index,0);
ffr->set_as_input();
m_front_face_reg.reset(ffr);
sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false);
m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
load_front_face();
}
if (m_sv_values.test(es_sample_mask_in)) {
if (face_reg_index < 0)
face_reg_index = m_reserved_registers++;
auto smi = new GPRValue(face_reg_index,2);
smi->set_as_input();
m_sample_mask_reg.reset(smi);
sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
//inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
sh_info().nsys_inputs = 1;
m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
}
if (m_sv_values.test(es_sample_id) ||
m_sv_values.test(es_sample_mask_in)) {
if (sample_id_index < 0)
sample_id_index = m_reserved_registers++;
auto smi = new GPRValue(sample_id_index, 3);
smi->set_as_input();
m_sample_id_reg.reset(smi);
sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
sh_info().nsys_inputs++;
m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
}
// The back color handling is not emmited in the code, so we have
// to add the inputs here and later we also need to inject the code to set
// the right color
if (m_need_back_color) {
size_t ninputs = m_shaderio.inputs().size();
for (size_t k = 0; k < ninputs; ++k) {
ShaderInput& i = m_shaderio.input(k);
if (i.name() != TGSI_SEMANTIC_COLOR)
continue;
ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
size_t next_pos = m_shaderio.size();
auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
m_shaderio.add_input(bcol);
col.set_back_color(next_pos);
}
m_shaderio.set_two_sided();
}
m_shaderio.update_lds_pos();
set_reserved_registers(m_reserved_registers);
return true;
}
void FragmentShaderFromNir::emit_shader_start()
{
if (m_sv_values.test(es_face))
load_front_face();
if (m_sv_values.test(es_pos)) {
for (int i = 0; i < 4; ++i) {
auto v = new GPRValue(m_frag_pos_index, i);
v->set_as_input();
auto reg = PValue(v);
if (i == 3)
emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
m_frag_pos[i] = reg;
}
}
if (m_sv_values.test(es_helper_invocation)) {
m_helper_invocation = get_temp_register();
auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
GPRVector dst({m_helper_invocation, dummy, dummy, dummy});
auto vtx = new FetchInstruction(dst, m_helper_invocation,
R600_BUFFER_INFO_CONST_BUFFER, bim_none);
vtx->set_flag(vtx_vpm);
vtx->set_flag(vtx_use_tc);
vtx->set_dest_swizzle({4,7,7,7});
emit_instruction(vtx);
}
}
bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
{
if (out_var->data.location == FRAG_RESULT_COLOR)
return emit_export_pixel(out_var, instr, m_dual_source_blend ? 1 : m_max_color_exports);
if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
out_var->data.location <= FRAG_RESULT_DATA7) ||
out_var->data.location == FRAG_RESULT_DEPTH ||
out_var->data.location == FRAG_RESULT_STENCIL ||
out_var->data.location == FRAG_RESULT_SAMPLE_MASK)
return emit_export_pixel(out_var, instr, 1);
sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
out_var->data.location << "(" << out_var->data.driver_location << ")\n";
return false;
}
bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
{
sfn_log << SfnLog::io << "Parse output variable "
<< output->name << " @" << output->data.location
<< "@dl:" << output->data.driver_location
<< " dual source idx: " << output->data.index
<< "\n";
++sh_info().noutput;
r600_shader_io& io = sh_info().output[output->data.driver_location];
tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
&io.name, &io.sid);
/* Check whether this code has become obsolete by the IO vectorization */
unsigned num_components = 4;
unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
if (vector_elements)
num_components = vector_elements;
unsigned component = output->data.location_frac;
for (unsigned j = component; j < num_components + component; j++)
io.write_mask |= 1 << j;
int loc = output->data.location;
if (loc == FRAG_RESULT_COLOR &&
(m_nir.info.outputs_written & (1ull << loc)) &&
!m_dual_source_blend) {
sh_info().fs_write_all = true;
}
if (output->data.location == FRAG_RESULT_COLOR ||
(output->data.location >= FRAG_RESULT_DATA0 &&
output->data.location <= FRAG_RESULT_DATA7)) {
++m_max_counted_color_exports;
if (m_max_counted_color_exports > 1)
sh_info().fs_write_all = false;
return true;
}
if (output->data.location == FRAG_RESULT_DEPTH ||
output->data.location == FRAG_RESULT_STENCIL ||
output->data.location == FRAG_RESULT_SAMPLE_MASK) {
io.write_mask = 15;
return true;
}
return false;
}
bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
{
auto dest = from_nir(instr->dest, 0);
assert(m_sample_id_reg);
assert(m_sample_mask_reg);
emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
return true;
}
bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
{
switch (instr->intrinsic) {
case nir_intrinsic_load_sample_mask_in:
if (m_apply_sample_mask) {
return emit_load_sample_mask_in(instr);
} else
return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
case nir_intrinsic_load_sample_id:
return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
case nir_intrinsic_load_front_face:
return load_preloaded_value(instr->dest, 0, m_front_face_reg);
case nir_intrinsic_interp_deref_at_sample:
return emit_interp_deref_at_sample(instr);
case nir_intrinsic_interp_deref_at_offset:
return emit_interp_deref_at_offset(instr);
case nir_intrinsic_interp_deref_at_centroid:
return emit_interp_deref_at_centroid(instr);
case nir_intrinsic_load_sample_pos:
return emit_load_sample_pos(instr);
case nir_intrinsic_load_helper_invocation:
return load_preloaded_value(instr->dest, 0, m_helper_invocation);
default:
return false;
}
}
void FragmentShaderFromNir::load_front_face()
{
assert(m_front_face_reg);
if (m_front_face_loaded)
return;
auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
Value::zero, {alu_write, alu_last_instr});
m_front_face_loaded = true;
emit_instruction(ir);
}
bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
{
GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
auto fetch = new FetchInstruction(vc_fetch,
no_index_offset,
fmt_32_32_32_32_float,
vtx_nf_scaled,
vtx_es_none,
m_sample_id_reg,
dest,
0,
false,
0xf,
R600_BUFFER_INFO_CONST_BUFFER,
0,
bim_none,
false,
false,
0,
0,
0,
PValue(),
{0,1,2,3});
fetch->set_flag(vtx_srf_mode);
emit_instruction(fetch);
return true;
}
bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
{
GPRVector slope = get_temp_vec4();
auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
from_nir_with_fetch_constant(instr->src[1], 0),
0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
fetch->set_flag(vtx_srf_mode);
emit_instruction(fetch);
GPRVector grad = get_temp_vec4();
auto var = get_deref_location(instr->src[0]);
assert(var);
auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
auto interpolator = m_interpolator[1];
assert(interpolator.enabled);
PValue dummy(new GPRValue(interpolator.i->sel(), 0));
GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
tex->set_flag(TexInstruction::grad_fine);
tex->set_flag(TexInstruction::x_unnormalized);
tex->set_flag(TexInstruction::y_unnormalized);
tex->set_flag(TexInstruction::z_unnormalized);
tex->set_flag(TexInstruction::w_unnormalized);
tex->set_dest_swizzle({0,1,7,7});
emit_instruction(tex);
tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
tex->set_flag(TexInstruction::x_unnormalized);
tex->set_flag(TexInstruction::y_unnormalized);
tex->set_flag(TexInstruction::z_unnormalized);
tex->set_flag(TexInstruction::w_unnormalized);
tex->set_flag(TexInstruction::grad_fine);
tex->set_dest_swizzle({7,7,0,1});
emit_instruction(tex);
emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
auto dst = vec_from_nir(instr->dest, 4);
int num_components = instr->dest.is_ssa ?
instr->dest.ssa.num_components:
instr->dest.reg.reg->num_components;
load_interpolated(dst, io, ip, num_components, var->data.location_frac);
return true;
}
bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
{
int temp = allocate_temp_register();
GPRVector help(temp, {0,1,2,3});
auto var = get_deref_location(instr->src[0]);
assert(var);
auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
auto interpolator = m_interpolator[io.ij_index()];
PValue dummy(new GPRValue(interpolator.i->sel(), 0));
GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
getgradh->set_dest_swizzle({0,1,7,7});
getgradh->set_flag(TexInstruction::x_unnormalized);
getgradh->set_flag(TexInstruction::y_unnormalized);
getgradh->set_flag(TexInstruction::z_unnormalized);
getgradh->set_flag(TexInstruction::w_unnormalized);
getgradh->set_flag(TexInstruction::grad_fine);
emit_instruction(getgradh);
auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
getgradv->set_dest_swizzle({7,7,0,1});
getgradv->set_flag(TexInstruction::x_unnormalized);
getgradv->set_flag(TexInstruction::y_unnormalized);
getgradv->set_flag(TexInstruction::z_unnormalized);
getgradv->set_flag(TexInstruction::w_unnormalized);
getgradv->set_flag(TexInstruction::grad_fine);
emit_instruction(getgradv);
PValue ofs_x = from_nir(instr->src[1], 0);
PValue ofs_y = from_nir(instr->src[1], 1);
emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
auto dst = vec_from_nir(instr->dest, 4);
load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
var->data.location_frac);
return true;
}
bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
{
auto var = get_deref_location(instr->src[0]);
assert(var);
auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
io.set_uses_interpolate_at_centroid();
int ij_index = io.ij_index() >= 3 ? 5 : 2;
assert (m_interpolator[ij_index].enabled);
auto ip = m_interpolator[ij_index];
int num_components = nir_dest_num_components(instr->dest);
auto dst = vec_from_nir(instr->dest, 4);
load_interpolated(dst, io, ip, num_components, var->data.location_frac);
return true;
}
bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
{
if (in_var->data.location == VARYING_SLOT_POS) {
assert(instr->dest.is_ssa);
for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
}
return true;
}
if (in_var->data.location == VARYING_SLOT_FACE)
return load_preloaded_value(instr->dest, 0, m_front_face_reg);
// todo: replace io with ShaderInputVarying
auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
unsigned num_components = 4;
if (instr->dest.is_ssa) {
num_components = instr->dest.ssa.num_components;
} else {
num_components = instr->dest.reg.reg->num_components;
}
auto dst = vec_from_nir(instr->dest, 4);
sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
<< "].gpr=" << dst.sel()
<< " interp=" << io.ij_index()
<< "\n";
io.set_gpr(dst.sel());
auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
/* These results are expected starting in slot x..*/
if (in_var->data.location_frac > 0) {
int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
instr->dest.reg.reg->num_components;
AluInstruction *ir = nullptr;
for (int i = 0; i < n; ++i) {
ir = new AluInstruction(op1_mov, dst[i],
dst[i + in_var->data.location_frac], {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
auto & color_input = static_cast<ShaderInputColor&> (io);
auto& bgio = m_shaderio.input(color_input.back_color_input_index());
bgio.set_gpr(allocate_temp_register());
GPRVector bgcol(bgio.gpr(), {0,1,2,3});
load_interpolated(bgcol, bgio, ip, num_components, 0);
load_front_face();
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
return true;
}
bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
ShaderInput& io, const Interpolator &ip,
int num_components, int start_comp)
{
// replace io with ShaderInputVarying
if (io.interpolate() > 0) {
sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
if (num_components == 1) {
switch (start_comp) {
case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
default:
assert(0);
}
}
if (num_components == 2) {
switch (start_comp) {
case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
default:
assert(0);
}
}
if (num_components == 3 && start_comp == 0)
return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
load_interpolated_one_comp(dest, io, ip, op2_interp_z);
int full_write_mask = ((1 << num_components) - 1) << start_comp;
bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
return success;
} else {
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op1_interp_load_p0, dest[i],
PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
EmitInstruction::write);
emit_instruction(ir);
}
ir->set_flag(alu_last_instr);
}
return true;
}
bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
ShaderInput& io, const Interpolator& ip, EAluOp op)
{
for (unsigned i = 0; i < 2 ; ++i) {
int chan = i;
if (op == op2_interp_z)
chan += 2;
auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
i == 0 ? EmitInstruction::write : EmitInstruction::last);
dest.pin_to_channel(chan);
ir->set_bank_swizzle(alu_vec_210);
emit_instruction(ir);
}
return true;
}
bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
const Interpolator& ip, EAluOp op, int writemask)
{
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
(writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
dest.pin_to_channel(i);
ir->set_bank_swizzle(alu_vec_210);
emit_instruction(ir);
}
ir->set_flag(alu_last_instr);
return true;
}
bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
ShaderInput& io, const Interpolator& ip,
EAluOp op, UNUSED int start, int comp)
{
AluInstruction *ir = nullptr;
for (int i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
i == comp ? EmitInstruction::write : EmitInstruction::empty);
ir->set_bank_swizzle(alu_vec_210);
dest.pin_to_channel(i);
emit_instruction(ir);
}
ir->set_flag(alu_last_instr);
return true;
}
bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
{
std::array<uint32_t,4> swizzle;
unsigned writemask = nir_intrinsic_write_mask(instr);
switch (out_var->data.location) {
case FRAG_RESULT_DEPTH:
writemask = 1;
swizzle = {0,7,7,7};
break;
case FRAG_RESULT_STENCIL:
writemask = 2;
swizzle = {7,0,7,7};
break;
case FRAG_RESULT_SAMPLE_MASK:
writemask = 4;
swizzle = {7,7,0,7};
break;
default:
for (int i = 0; i < 4; ++i) {
swizzle[i] = (i < instr->num_components) ? i : 7;
}
}
auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
set_output(out_var->data.driver_location, value.sel());
if (out_var->data.location == FRAG_RESULT_COLOR ||
(out_var->data.location >= FRAG_RESULT_DATA0 &&
out_var->data.location <= FRAG_RESULT_DATA7)) {
for (int k = 0 ; k < outputs; ++k) {
unsigned location = (m_dual_source_blend ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
if (location >= m_max_color_exports) {
sfn_log << SfnLog::io << "Pixel output loc:" << location
<< " dl:" << out_var->data.location
<< " skipped because we have only " << m_max_color_exports << " CBs\n";
continue;
}
m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
if (sh_info().ps_export_highest < location)
sh_info().ps_export_highest = location;
sh_info().nr_ps_color_exports++;
unsigned mask = (0xfu << (location * 4));
sh_info().ps_color_export_mask |= mask;
emit_export_instruction(m_last_pixel_export);
};
} else if (out_var->data.location == FRAG_RESULT_DEPTH ||
out_var->data.location == FRAG_RESULT_STENCIL ||
out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
m_depth_exports++;
emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
} else {
return false;
}
return true;
}
void FragmentShaderFromNir::do_finalize()
{
// update shader io info and set LDS etc.
sh_info().ninput = m_shaderio.inputs().size();
sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
for (size_t i = 0; i < sh_info().ninput; ++i) {
int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
}
sh_info().two_side = m_shaderio.two_sided();
sh_info().nlds = m_shaderio.nlds();
sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
if (sh_info().fs_write_all) {
sh_info().nr_ps_max_color_exports = m_max_color_exports;
}
if (!m_last_pixel_export) {
GPRVector v(0, {7,7,7,7});
m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
sh_info().nr_ps_color_exports++;
sh_info().ps_color_export_mask = 0xf;
emit_export_instruction(m_last_pixel_export);
}
m_last_pixel_export->set_last();
if (sh_info().fs_write_all)
sh_info().nr_ps_max_color_exports = 8;
}
}