blob: a51f61a0bfa2a4067be08087d7d3a29f2164e61d [file] [log] [blame]
/*
* Copyright © 2019 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include <map>
#include "aco_ir.h"
#include "aco_builder.h"
/*
* Implements an algorithm to lower to Concentional SSA Form (CSSA).
* After "Revisiting Out-of-SSA Translation for Correctness, CodeQuality, and Efficiency"
* by B. Boissinot, A. Darte, F. Rastello, B. Dupont de Dinechin, C. Guillon,
*
* By lowering the IR to CSSA, the insertion of parallelcopies is separated from
* the register coalescing problem. Additionally, correctness is ensured w.r.t. spilling.
* The algorithm tries to find beneficial insertion points by checking if a basic block
* is empty and if the variable already has a new definition in a dominating block.
*/
namespace aco {
namespace {
typedef std::map<uint32_t, std::vector<std::pair<Definition, Operand>>> phi_info;
struct cssa_ctx {
Program* program;
live& live_vars;
phi_info logical_phi_info;
phi_info linear_phi_info;
cssa_ctx(Program* program, live& live_vars) : program(program), live_vars(live_vars) {}
};
bool collect_phi_info(cssa_ctx& ctx)
{
bool progress = false;
for (Block& block : ctx.program->blocks) {
for (aco_ptr<Instruction>& phi : block.instructions) {
bool is_logical;
if (phi->opcode == aco_opcode::p_phi)
is_logical = true;
else if (phi->opcode == aco_opcode::p_linear_phi)
is_logical = false;
else
break;
/* no CSSA for the exec mask as we don't spill it anyway */
if (phi->definitions[0].isFixed() && phi->definitions[0].physReg() == exec)
continue;
std::vector<unsigned>& preds = is_logical ? block.logical_preds : block.linear_preds;
/* collect definition's block per Operand */
std::vector<unsigned> def_points(phi->operands.size());
for (unsigned i = 0; i < phi->operands.size(); i++) {
Operand& op = phi->operands[i];
if (op.isUndefined()) {
def_points[i] = preds[i];
} else if (op.isConstant()) {
/* in theory, we could insert the definition there... */
def_points[i] = 0;
} else {
assert(op.isTemp());
unsigned pred = preds[i];
do {
def_points[i] = pred;
pred = is_logical ?
ctx.program->blocks[pred].logical_idom :
ctx.program->blocks[pred].linear_idom;
} while (def_points[i] != pred &&
ctx.live_vars.live_out[pred].find(op.getTemp()) != ctx.live_vars.live_out[pred].end());
}
}
/* check live-range intersections */
for (unsigned i = 0; i < phi->operands.size(); i++) {
Operand op = phi->operands[i];
if (op.isUndefined())
continue;
/* check if the operand comes from the exec mask of a predecessor */
if (op.isTemp() && op.getTemp() == ctx.program->blocks[preds[i]].live_out_exec)
op.setFixed(exec);
bool interferes = false;
unsigned idom = is_logical ?
ctx.program->blocks[def_points[i]].logical_idom :
ctx.program->blocks[def_points[i]].linear_idom;
/* live-through operands definitely interfere */
if (op.isTemp() && !op.isKill()) {
interferes = true;
/* create copies for constants to ease spilling */
} else if (op.isConstant()) {
interferes = true;
/* create copies for SGPR -> VGPR moves */
} else if (op.regClass() != phi->definitions[0].regClass()) {
interferes = true;
/* operand might interfere with any phi-def*/
} else if (def_points[i] == block.index) {
interferes = true;
/* operand might interfere with phi-def */
} else if (ctx.live_vars.live_out[idom].count(phi->definitions[0].getTemp())) {
interferes = true;
/* else check for interferences with other operands */
} else {
for (unsigned j = 0; !interferes && j < phi->operands.size(); j++) {
/* don't care about other register classes */
if (!phi->operands[j].isTemp() || phi->operands[j].regClass() != phi->definitions[0].regClass())
continue;
/* same operands cannot interfere */
if (op.getTemp() == phi->operands[j].getTemp())
continue;
/* if def_points[i] dominates any other def_point, assume they interfere.
* As live-through operands are checked above, only test up the current block. */
unsigned other_def_point = def_points[j];
while (def_points[i] < other_def_point && other_def_point != block.index)
other_def_point = is_logical ?
ctx.program->blocks[other_def_point].logical_idom :
ctx.program->blocks[other_def_point].linear_idom;
interferes = def_points[i] == other_def_point;
}
}
if (!interferes)
continue;
progress = true;
/* create new temporary and rename operands */
Temp new_tmp = Temp{ctx.program->allocateId(), phi->definitions[0].regClass()};
if (is_logical)
ctx.logical_phi_info[preds[i]].emplace_back(Definition(new_tmp), op);
else
ctx.linear_phi_info[preds[i]].emplace_back(Definition(new_tmp), op);
phi->operands[i] = Operand(new_tmp);
phi->operands[i].setKill(true);
def_points[i] = preds[i];
}
}
}
return progress;
}
void insert_parallelcopies(cssa_ctx& ctx)
{
/* insert the parallelcopies from logical phis before p_logical_end */
for (auto&& entry : ctx.logical_phi_info) {
Block& block = ctx.program->blocks[entry.first];
unsigned idx = block.instructions.size() - 1;
while (block.instructions[idx]->opcode != aco_opcode::p_logical_end) {
assert(idx > 0);
idx--;
}
Builder bld(ctx.program);
bld.reset(&block.instructions, std::next(block.instructions.begin(), idx));
for (std::pair<Definition, Operand>& pair : entry.second)
bld.pseudo(aco_opcode::p_parallelcopy, pair.first, pair.second);
}
/* insert parallelcopies for the linear phis at the end of blocks just before the branch */
for (auto&& entry : ctx.linear_phi_info) {
Block& block = ctx.program->blocks[entry.first];
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
--it;
assert((*it)->format == Format::PSEUDO_BRANCH);
Builder bld(ctx.program);
bld.reset(&block.instructions, it);
for (std::pair<Definition, Operand>& pair : entry.second)
bld.pseudo(aco_opcode::p_parallelcopy, pair.first, pair.second);
}
}
} /* end namespace */
void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options)
{
cssa_ctx ctx = {program, live_vars};
/* collect information about all interfering phi operands */
bool progress = collect_phi_info(ctx);
if (!progress)
return;
insert_parallelcopies(ctx);
/* update live variable information */
live_vars = live_var_analysis(program, options);
}
}