blob: 6123e3d70cd4c3c3e6184968e679bbaf770d6e5b [file] [log] [blame]
#include "ir3_nir.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_search.h"
#include "nir_search_helpers.h"
#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
struct transform {
const nir_search_expression *search;
const nir_search_value *replace;
unsigned condition_offset;
};
struct per_op_table {
const uint16_t *filter;
unsigned num_filtered_states;
const uint16_t *table;
};
/* Note: these must match the start states created in
* TreeAutomaton._build_table()
*/
/* WILDCARD_STATE = 0 is set by zeroing the state array */
static const uint16_t CONST_STATE = 1;
#endif
static const nir_search_variable search0_0 = {
{ nir_search_value_variable, -1 },
0, /* x */
false,
nir_type_invalid,
NULL,
};
static const nir_search_expression search0 = {
{ nir_search_value_expression, -1 },
false,
-1, 0,
nir_op_fsin,
{ &search0_0.value },
NULL,
};
static const nir_search_constant replace0_0_0_0 = {
{ nir_search_value_constant, -1 },
nir_type_float, { 0x401921fb3fa6defc /* 6.283185 */ },
};
static const nir_search_constant replace0_0_0_1_0_0_0 = {
{ nir_search_value_constant, -1 },
nir_type_float, { 0x3fc45f30e7ff583a /* 0.159155 */ },
};
/* replace0_0_0_1_0_0_1 -> search0_0 in the cache */
static const nir_search_expression replace0_0_0_1_0_0 = {
{ nir_search_value_expression, -1 },
false,
2, 1,
nir_op_fmul,
{ &replace0_0_0_1_0_0_0.value, &search0_0.value },
NULL,
};
static const nir_search_constant replace0_0_0_1_0_1 = {
{ nir_search_value_constant, -1 },
nir_type_float, { 0x3fe0000000000000 /* 0.5 */ },
};
static const nir_search_expression replace0_0_0_1_0 = {
{ nir_search_value_expression, -1 },
false,
1, 2,
nir_op_fadd,
{ &replace0_0_0_1_0_0.value, &replace0_0_0_1_0_1.value },
NULL,
};
static const nir_search_expression replace0_0_0_1 = {
{ nir_search_value_expression, -1 },
false,
-1, 2,
nir_op_ffract,
{ &replace0_0_0_1_0.value },
NULL,
};
static const nir_search_expression replace0_0_0 = {
{ nir_search_value_expression, -1 },
false,
0, 3,
nir_op_fmul,
{ &replace0_0_0_0.value, &replace0_0_0_1.value },
NULL,
};
static const nir_search_constant replace0_0_1 = {
{ nir_search_value_constant, -1 },
nir_type_float, { 0x400921fb82c2bd7f /* 3.141593 */ },
};
static const nir_search_expression replace0_0 = {
{ nir_search_value_expression, -1 },
false,
-1, 3,
nir_op_fsub,
{ &replace0_0_0.value, &replace0_0_1.value },
NULL,
};
static const nir_search_expression replace0 = {
{ nir_search_value_expression, -1 },
false,
-1, 3,
nir_op_fsin,
{ &replace0_0.value },
NULL,
};
/* search1_0 -> search0_0 in the cache */
static const nir_search_expression search1 = {
{ nir_search_value_expression, -1 },
false,
-1, 0,
nir_op_fcos,
{ &search0_0.value },
NULL,
};
/* replace1_0_0_0 -> replace0_0_0_0 in the cache */
/* replace1_0_0_1_0_0_0 -> replace0_0_0_1_0_0_0 in the cache */
/* replace1_0_0_1_0_0_1 -> search0_0 in the cache */
/* replace1_0_0_1_0_0 -> replace0_0_0_1_0_0 in the cache */
/* replace1_0_0_1_0_1 -> replace0_0_0_1_0_1 in the cache */
/* replace1_0_0_1_0 -> replace0_0_0_1_0 in the cache */
/* replace1_0_0_1 -> replace0_0_0_1 in the cache */
/* replace1_0_0 -> replace0_0_0 in the cache */
/* replace1_0_1 -> replace0_0_1 in the cache */
/* replace1_0 -> replace0_0 in the cache */
static const nir_search_expression replace1 = {
{ nir_search_value_expression, -1 },
false,
-1, 3,
nir_op_fcos,
{ &replace0_0.value },
NULL,
};
static const struct transform ir3_nir_apply_trig_workarounds_state2_xforms[] = {
{ &search0, &replace0.value, 0 },
};
static const struct transform ir3_nir_apply_trig_workarounds_state3_xforms[] = {
{ &search1, &replace1.value, 0 },
};
static const struct per_op_table ir3_nir_apply_trig_workarounds_table[nir_num_search_ops] = {
[nir_op_fsin] = {
.filter = (uint16_t []) {
0,
0,
0,
0,
},
.num_filtered_states = 1,
.table = (uint16_t []) {
2,
},
},
[nir_op_fcos] = {
.filter = (uint16_t []) {
0,
0,
0,
0,
},
.num_filtered_states = 1,
.table = (uint16_t []) {
3,
},
},
};
static void
ir3_nir_apply_trig_workarounds_pre_block(nir_block *block, uint16_t *states)
{
nir_foreach_instr(instr, block) {
switch (instr->type) {
case nir_instr_type_alu: {
nir_alu_instr *alu = nir_instr_as_alu(instr);
nir_op op = alu->op;
uint16_t search_op = nir_search_op_for_nir_op(op);
const struct per_op_table *tbl = &ir3_nir_apply_trig_workarounds_table[search_op];
if (tbl->num_filtered_states == 0)
continue;
/* Calculate the index into the transition table. Note the index
* calculated must match the iteration order of Python's
* itertools.product(), which was used to emit the transition
* table.
*/
uint16_t index = 0;
for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
index *= tbl->num_filtered_states;
index += tbl->filter[states[alu->src[i].src.ssa->index]];
}
states[alu->dest.dest.ssa.index] = tbl->table[index];
break;
}
case nir_instr_type_load_const: {
nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
states[load_const->def.index] = CONST_STATE;
break;
}
default:
break;
}
}
}
static bool
ir3_nir_apply_trig_workarounds_block(nir_builder *build, nir_block *block,
const uint16_t *states, const bool *condition_flags)
{
bool progress = false;
nir_foreach_instr_reverse_safe(instr, block) {
if (instr->type != nir_instr_type_alu)
continue;
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (!alu->dest.dest.is_ssa)
continue;
switch (states[alu->dest.dest.ssa.index]) {
case 0:
break;
case 1:
break;
case 2:
for (unsigned i = 0; i < ARRAY_SIZE(ir3_nir_apply_trig_workarounds_state2_xforms); i++) {
const struct transform *xform = &ir3_nir_apply_trig_workarounds_state2_xforms[i];
if (condition_flags[xform->condition_offset] &&
nir_replace_instr(build, alu, xform->search, xform->replace)) {
progress = true;
break;
}
}
break;
case 3:
for (unsigned i = 0; i < ARRAY_SIZE(ir3_nir_apply_trig_workarounds_state3_xforms); i++) {
const struct transform *xform = &ir3_nir_apply_trig_workarounds_state3_xforms[i];
if (condition_flags[xform->condition_offset] &&
nir_replace_instr(build, alu, xform->search, xform->replace)) {
progress = true;
break;
}
}
break;
default: assert(0);
}
}
return progress;
}
static bool
ir3_nir_apply_trig_workarounds_impl(nir_function_impl *impl, const bool *condition_flags)
{
bool progress = false;
nir_builder build;
nir_builder_init(&build, impl);
/* Note: it's important here that we're allocating a zeroed array, since
* state 0 is the default state, which means we don't have to visit
* anything other than constants and ALU instructions.
*/
uint16_t *states = calloc(impl->ssa_alloc, sizeof(*states));
nir_foreach_block(block, impl) {
ir3_nir_apply_trig_workarounds_pre_block(block, states);
}
nir_foreach_block_reverse(block, impl) {
progress |= ir3_nir_apply_trig_workarounds_block(&build, block, states, condition_flags);
}
free(states);
if (progress) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
} else {
#ifndef NDEBUG
impl->valid_metadata &= ~nir_metadata_not_properly_reset;
#endif
}
return progress;
}
bool
ir3_nir_apply_trig_workarounds(nir_shader *shader)
{
bool progress = false;
bool condition_flags[1];
const nir_shader_compiler_options *options = shader->options;
const shader_info *info = &shader->info;
(void) options;
(void) info;
condition_flags[0] = true;
nir_foreach_function(function, shader) {
if (function->impl)
progress |= ir3_nir_apply_trig_workarounds_impl(function->impl, condition_flags);
}
return progress;
}