blob: 5f319473868b3a69487a43f7b9a1e833144231af [file] [log] [blame]
//! A Constant-Phi-Node removal pass.
use crate::dominator_tree::DominatorTree;
use crate::entity::EntityList;
use crate::fx::FxHashMap;
use crate::fx::FxHashSet;
use crate::ir;
use crate::ir::instructions::BranchInfo;
use crate::ir::Function;
use crate::ir::{Block, Inst, Value};
use crate::timing;
use arrayvec::ArrayVec;
use bumpalo::Bump;
use cranelift_entity::SecondaryMap;
use smallvec::SmallVec;
// A note on notation. For the sake of clarity, this file uses the phrase
// "formal parameters" to mean the `Value`s listed in the block head, and
// "actual parameters" to mean the `Value`s passed in a branch or a jump:
//
// block4(v16: i32, v18: i32): <-- formal parameters
// ...
// brnz v27, block7(v22, v24) <-- actual parameters
// jump block6
// This transformation pass (conceptually) partitions all values in the
// function into two groups:
//
// * Group A: values defined by block formal parameters, except for the entry block.
//
// * Group B: All other values: that is, values defined by instructions,
// and the formals of the entry block.
//
// For each value in Group A, it attempts to establish whether it will have
// the value of exactly one member of Group B. If so, the formal parameter is
// deleted, all corresponding actual parameters (in jumps/branches to the
// defining block) are deleted, and a rename is inserted.
//
// The entry block is special-cased because (1) we don't know what values flow
// to its formals and (2) in any case we can't change its formals.
//
// Work proceeds in three phases.
//
// * Phase 1: examine all instructions. For each block, make up a useful
// grab-bag of information, `BlockSummary`, that summarises the block's
// formals and jump/branch instruction. This is used by Phases 2 and 3.
//
// * Phase 2: for each value in Group A, try to find a single Group B value
// that flows to it. This is done using a classical iterative forward
// dataflow analysis over a simple constant-propagation style lattice. It
// converges quickly in practice -- I have seen at most 4 iterations. This
// is relatively cheap because the iteration is done over the
// `BlockSummary`s, and does not visit each instruction. The resulting
// fixed point is stored in a `SolverState`.
//
// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
// remove redundant formals and actuals, and to insert suitable renames.
//
// Note that the effectiveness of the analysis depends on on the fact that
// there are no copy instructions in Cranelift's IR. If there were, the
// computation of `actual_absval` in Phase 2 would have to be extended to
// chase through such copies.
//
// For large functions, the analysis cost using the new AArch64 backend is about
// 0.6% of the non-optimising compile time, as measured by instruction counts.
// This transformation usually pays for itself several times over, though, by
// reducing the isel/regalloc cost downstream. Gains of up to 7% have been
// seen for large functions.
/// The `Value`s (Group B) that can flow to a formal parameter (Group A).
#[derive(Clone, Copy, Debug, PartialEq)]
enum AbstractValue {
/// Two or more values flow to this formal.
Many,
/// Exactly one value, as stated, flows to this formal. The `Value`s that
/// can appear here are exactly: `Value`s defined by `Inst`s, plus the
/// `Value`s defined by the formals of the entry block. Note that this is
/// exactly the set of `Value`s that are *not* tracked in the solver below
/// (see `SolverState`).
One(Value /*Group B*/),
/// No value flows to this formal.
None,
}
impl AbstractValue {
fn join(self, other: AbstractValue) -> AbstractValue {
match (self, other) {
// Joining with `None` has no effect
(AbstractValue::None, p2) => p2,
(p1, AbstractValue::None) => p1,
// Joining with `Many` produces `Many`
(AbstractValue::Many, _p2) => AbstractValue::Many,
(_p1, AbstractValue::Many) => AbstractValue::Many,
// The only interesting case
(AbstractValue::One(v1), AbstractValue::One(v2)) => {
if v1 == v2 {
AbstractValue::One(v1)
} else {
AbstractValue::Many
}
}
}
}
fn is_one(self) -> bool {
matches!(self, AbstractValue::One(_))
}
}
#[derive(Clone, Copy, Debug)]
struct OutEdge<'a> {
/// An instruction that transfers control.
inst: Inst,
/// The block that control is transferred to.
block: Block,
/// The arguments to that block.
///
/// These values can be from both groups A and B.
args: &'a [Value],
}
impl<'a> OutEdge<'a> {
/// Construct a new `OutEdge` for the given instruction.
///
/// Returns `None` if this is an edge without any block arguments, which
/// means we can ignore it for this analysis's purposes.
#[inline]
fn new(bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, block: Block) -> Option<Self> {
let inst_var_args = dfg.inst_variable_args(inst);
// Skip edges without params.
if inst_var_args.is_empty() {
return None;
}
Some(OutEdge {
inst,
block,
args: bump.alloc_slice_fill_iter(
inst_var_args
.iter()
.map(|value| dfg.resolve_aliases(*value)),
),
})
}
}
/// For some block, a useful bundle of info. The `Block` itself is not stored
/// here since it will be the key in the associated `FxHashMap` -- see
/// `summaries` below. For the `SmallVec` tuning params: most blocks have
/// few parameters, hence `4`. And almost all blocks have either one or two
/// successors, hence `2`.
#[derive(Clone, Debug, Default)]
struct BlockSummary<'a> {
/// Formal parameters for this `Block`.
///
/// These values are from group A.
formals: &'a [Value],
/// Each outgoing edge from this block.
///
/// We don't bother to include transfers that pass zero parameters
/// since that makes more work for the solver for no purpose.
///
/// Note that, because blocks used with `br_table`s cannot have block
/// arguments, there are at most two outgoing edges from these blocks.
dests: ArrayVec<OutEdge<'a>, 2>,
}
impl<'a> BlockSummary<'a> {
/// Construct a new `BlockSummary`, using `values` as its backing storage.
#[inline]
fn new(bump: &'a Bump, formals: &[Value]) -> Self {
Self {
formals: bump.alloc_slice_copy(formals),
dests: Default::default(),
}
}
}
/// Solver state. This holds a AbstractValue for each formal parameter, except
/// for those from the entry block.
struct SolverState {
absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
}
impl SolverState {
fn new() -> Self {
Self {
absvals: FxHashMap::default(),
}
}
fn get(&self, actual: Value) -> AbstractValue {
*self
.absvals
.get(&actual)
.unwrap_or_else(|| panic!("SolverState::get: formal param {:?} is untracked?!", actual))
}
fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
self.absvals.get(&actual)
}
fn set(&mut self, actual: Value, lp: AbstractValue) {
match self.absvals.insert(actual, lp) {
Some(_old_lp) => {}
None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
}
}
}
/// Detect phis in `func` that will only ever produce one value, using a
/// classic forward dataflow analysis. Then remove them.
#[inline(never)]
pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
let _tt = timing::remove_constant_phis();
debug_assert!(domtree.is_valid());
// Phase 1 of 3: for each block, make a summary containing all relevant
// info. The solver will iterate over the summaries, rather than having
// to inspect each instruction in each block.
let bump =
Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
let mut summaries =
SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
for b in domtree.cfg_postorder().iter().rev().copied() {
let formals = func.dfg.block_params(b);
let mut summary = BlockSummary::new(&bump, formals);
for inst in func.layout.block_insts(b) {
let idetails = &func.dfg.insts[inst];
// Note that multi-dest transfers (i.e., branch tables) don't
// carry parameters in our IR, so we only have to care about
// `SingleDest` here.
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
{
if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, dest) {
summary.dests.push(edge);
}
}
}
// Ensure the invariant that all blocks (except for the entry) appear
// in the summary, *unless* they have neither formals nor any
// param-carrying branches/jumps.
if formals.len() > 0 || summary.dests.len() > 0 {
summaries[b] = summary;
}
}
// Phase 2 of 3: iterate over the summaries in reverse postorder,
// computing new `AbstractValue`s for each tracked `Value`. The set of
// tracked `Value`s is exactly Group A as described above.
let entry_block = func
.layout
.entry_block()
.expect("remove_constant_phis: entry block unknown");
// Set up initial solver state
let mut state = SolverState::new();
for b in domtree.cfg_postorder().iter().rev().copied() {
// For each block, get the formals
if b == entry_block {
continue;
}
let formals = func.dfg.block_params(b);
for formal in formals {
let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
assert!(mb_old_absval.is_none());
}
}
// Solve: repeatedly traverse the blocks in reverse postorder, until there
// are no changes.
let mut iter_no = 0;
loop {
iter_no += 1;
let mut changed = false;
for src in domtree.cfg_postorder().iter().rev().copied() {
let src_summary = &summaries[src];
for edge in &src_summary.dests {
assert!(edge.block != entry_block);
// By contrast, the dst block must have a summary. Phase 1
// will have only included an entry in `src_summary.dests` if
// that branch/jump carried at least one parameter. So the
// dst block does take parameters, so it must have a summary.
let dst_summary = &summaries[edge.block];
let dst_formals = &dst_summary.formals;
assert_eq!(edge.args.len(), dst_formals.len());
for (formal, actual) in dst_formals.iter().zip(edge.args) {
// Find the abstract value for `actual`. If it is a block
// formal parameter then the most recent abstract value is
// to be found in the solver state. If not, then it's a
// real value defining point (not a phi), in which case
// return it itself.
let actual_absval = match state.maybe_get(*actual) {
Some(pt) => *pt,
None => AbstractValue::One(*actual),
};
// And `join` the new value with the old.
let formal_absval_old = state.get(*formal);
let formal_absval_new = formal_absval_old.join(actual_absval);
if formal_absval_new != formal_absval_old {
changed = true;
state.set(*formal, formal_absval_new);
}
}
}
}
if !changed {
break;
}
}
let mut n_consts = 0;
for absval in state.absvals.values() {
if absval.is_one() {
n_consts += 1;
}
}
// Phase 3 of 3: edit the function to remove constant formals, using the
// summaries and the final solver state as a guide.
// Make up a set of blocks that need editing.
let mut need_editing = FxHashSet::<Block>::default();
for (block, summary) in summaries.iter() {
if block == entry_block {
continue;
}
for formal in summary.formals {
let formal_absval = state.get(*formal);
if formal_absval.is_one() {
need_editing.insert(block);
break;
}
}
}
// Firstly, deal with the formals. For each formal which is redundant,
// remove it, and also add a reroute from it to the constant value which
// it we know it to be.
for b in &need_editing {
let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
let formals: &[Value] = func.dfg.block_params(*b);
for formal in formals {
// The state must give an absval for `formal`.
if let AbstractValue::One(replacement_val) = state.get(*formal) {
del_these.push((*formal, replacement_val));
}
}
// We can delete the formals in any order. However,
// `remove_block_param` works by sliding backwards all arguments to
// the right of the value it is asked to delete. Hence when removing more
// than one formal, it is significantly more efficient to ask it to
// remove the rightmost formal first, and hence this `rev()`.
for (redundant_formal, replacement_val) in del_these.into_iter().rev() {
func.dfg.remove_block_param(redundant_formal);
func.dfg.change_to_alias(redundant_formal, replacement_val);
}
}
// Secondly, visit all branch insns. If the destination has had its
// formals changed, change the actuals accordingly. Don't scan all insns,
// rather just visit those as listed in the summaries we prepared earlier.
for summary in summaries.values() {
for edge in &summary.dests {
if !need_editing.contains(&edge.block) {
continue;
}
let old_actuals = func.dfg.insts[edge.inst].value_list().unwrap();
let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
let num_fixed_actuals = func.dfg.insts[edge.inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
let dst_summary = &summaries[edge.block];
// Check that the numbers of arguments make sense.
assert!(num_fixed_actuals <= num_old_actuals);
assert_eq!(
num_fixed_actuals + dst_summary.formals.len(),
num_old_actuals
);
// Create a new value list.
let mut new_actuals = EntityList::<Value>::new();
// Copy the fixed args to the new list
for i in 0..num_fixed_actuals {
let val = old_actuals.get(i, &func.dfg.value_lists).unwrap();
new_actuals.push(val, &mut func.dfg.value_lists);
}
// Copy the variable args (the actual block params) to the new
// list, filtering out redundant ones.
for (i, formal_i) in dst_summary.formals.iter().enumerate() {
let actual_i = old_actuals
.get(num_fixed_actuals + i, &func.dfg.value_lists)
.unwrap();
let is_redundant = state.get(*formal_i).is_one();
if !is_redundant {
new_actuals.push(actual_i, &mut func.dfg.value_lists);
}
}
*func.dfg.insts[edge.inst].value_list_mut().unwrap() = new_actuals;
}
}
log::debug!(
"do_remove_constant_phis: done, {} iters. {} formals, of which {} const.",
iter_no,
state.absvals.len(),
n_consts
);
}