blob: fc8a1d67e35d73826abab34abd528f8ed8a2670e [file] [log] [blame]
//! Computation of basic block order in emitted code.
//!
//! This module handles the translation from CLIF BBs to VCode BBs.
//!
//! The basic idea is that we compute a sequence of "lowered blocks" that
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
//! moves for phi-nodes on every block-to-block transfer; these blocks always
//! conceptually exist, but may be merged with an "original" CLIF block (and
//! hence not actually exist; this is equivalent to inserting the blocks only on
//! critical edges).
//!
//! In other words, starting from a CFG like this (where each "CLIF block" and
//! "(edge N->M)" is a separate basic block):
//!
//! ```plain
//!
//! CLIF block 0
//! / \
//! (edge 0->1) (edge 0->2)
//! | |
//! CLIF block 1 CLIF block 2
//! \ /
//! (edge 1->3) (edge 2->3)
//! \ /
//! CLIF block 3
//! ```
//!
//! We can produce a CFG of lowered blocks like so:
//!
//! ```plain
//! +--------------+
//! | CLIF block 0 |
//! +--------------+
//! / \
//! +--------------+ +--------------+
//! | (edge 0->1) | |(edge 0->2) |
//! | CLIF block 1 | | CLIF block 2 |
//! +--------------+ +--------------+
//! \ /
//! +-----------+ +-----------+
//! |(edge 1->3)| |(edge 2->3)|
//! +-----------+ +-----------+
//! \ /
//! +------------+
//! |CLIF block 3|
//! +------------+
//! ```
//!
//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
//! blocks' original bodies, but the out-edges could not because for simplicity
//! in the successor-function definition, we only ever merge an edge onto one
//! side of an original CLIF block.)
//!
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
//! block prepended or appended with an edge block (never both, though), or just
//! an edge block.
//!
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
//! (never actually materialized, just defined by a "successors" function), and
//! compute the reverse postorder.
//!
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
//! example, consider any information about whether edge blocks will actually
//! have content, because this computation happens as part of lowering *before*
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
//! particular edge. But it works relatively well and is conceptually simple.
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
//! branch editing that in practice elides empty blocks and simplifies some of
//! the other redundancies that this scheme produces.
use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet};
use crate::inst_predicates::visit_block_succs;
use crate::ir::{Block, Function, Inst, Opcode};
use crate::{machinst::*, trace};
use smallvec::SmallVec;
/// Mapping from CLIF BBs to VCode BBs.
#[derive(Debug)]
pub struct BlockLoweringOrder {
/// Lowered blocks, in BlockIndex order. Each block is some combination of
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
/// see [LoweredBlock] for details.
lowered_order: Vec<LoweredBlock>,
/// Successors for all lowered blocks, in one serialized vector. Indexed by
/// the ranges in `lowered_succ_ranges`.
#[allow(dead_code)]
lowered_succs: Vec<(Inst, LoweredBlock)>,
/// BlockIndex values for successors for all lowered blocks, in the same
/// order as `lowered_succs`.
lowered_succ_indices: Vec<(Inst, BlockIndex)>,
/// Ranges in `lowered_succs` giving the successor lists for each lowered
/// block. Indexed by lowering-order index (`BlockIndex`).
lowered_succ_ranges: Vec<(usize, usize)>,
/// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
/// some CLIF BBs may not be lowered; in particular, we skip unreachable
/// blocks.
#[allow(dead_code)]
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
/// Cold blocks. These blocks are not reordered in the
/// `lowered_order` above; the lowered order must respect RPO
/// (uses after defs) in order for lowering to be
/// correct. Instead, this set is used to provide `is_cold()`,
/// which is used by VCode emission to sink the blocks at the last
/// moment (when we actually emit bytes into the MachBuffer).
cold_blocks: FxHashSet<BlockIndex>,
/// Lowered blocks that are indirect branch targets.
indirect_branch_targets: FxHashSet<BlockIndex>,
}
/// The origin of a block in the lowered block-order: either an original CLIF
/// block, or an inserted edge-block, or a combination of the two if an edge is
/// non-critical.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum LoweredBlock {
/// Block in original CLIF, with no merged edge-blocks.
Orig {
/// Original CLIF block.
block: Block,
},
/// Block in the original CLIF, plus edge-block to one succ (which is the
/// one successor of the original block).
OrigAndEdge {
/// The original CLIF block contained in this lowered block.
block: Block,
/// The edge (jump) instruction transitioning from this block
/// to the next, i.e., corresponding to the included edge-block. This
/// will be an instruction in `block`.
edge_inst: Inst,
/// The successor index in this edge, to distinguish multiple
/// edges between the same block pair.
succ_idx: usize,
/// The successor CLIF block.
succ: Block,
},
/// Block in the original CLIF, preceded by edge-block from one pred (which
/// is the one pred of the original block).
EdgeAndOrig {
/// The previous CLIF block, i.e., the edge block's predecessor.
pred: Block,
/// The edge (jump) instruction corresponding to the included
/// edge-block. This will be an instruction in `pred`.
edge_inst: Inst,
/// The successor index in this edge, to distinguish multiple
/// edges between the same block pair.
succ_idx: usize,
/// The original CLIF block included in this lowered block.
block: Block,
},
/// Split critical edge between two CLIF blocks. This lowered block does not
/// correspond to any original CLIF blocks; it only serves as an insertion
/// point for work to happen on the transition from `pred` to `succ`.
Edge {
/// The predecessor CLIF block.
pred: Block,
/// The edge (jump) instruction corresponding to this edge's transition.
/// This will be an instruction in `pred`.
edge_inst: Inst,
/// The successor index in this edge, to distinguish multiple
/// edges between the same block pair.
succ_idx: usize,
/// The successor CLIF block.
succ: Block,
},
}
impl LoweredBlock {
/// The associated original (CLIF) block included in this lowered block, if
/// any.
pub fn orig_block(self) -> Option<Block> {
match self {
LoweredBlock::Orig { block, .. }
| LoweredBlock::OrigAndEdge { block, .. }
| LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
LoweredBlock::Edge { .. } => None,
}
}
/// The associated in-edge, if any.
#[cfg(test)]
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::EdgeAndOrig {
pred,
edge_inst,
block,
..
} => Some((pred, edge_inst, block)),
_ => None,
}
}
/// the associated out-edge, if any. Also includes edge-only blocks.
#[cfg(test)]
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::OrigAndEdge {
block,
edge_inst,
succ,
..
} => Some((block, edge_inst, succ)),
LoweredBlock::Edge {
pred,
edge_inst,
succ,
..
} => Some((pred, edge_inst, succ)),
_ => None,
}
}
}
impl BlockLoweringOrder {
/// Compute and return a lowered block order for `f`.
pub fn new(f: &Function) -> BlockLoweringOrder {
trace!("BlockLoweringOrder: function body {:?}", f);
// Make sure that we have an entry block, and the entry block is
// not marked as cold. (The verifier ensures this as well, but
// the user may not have run the verifier, and this property is
// critical to avoid a miscompile, so we assert it here too.)
let entry = f.layout.entry_block().expect("Must have entry block");
assert!(!f.layout.is_cold(entry));
// Step 1: compute the in-edge and out-edge count of every block.
let mut block_in_count = SecondaryMap::with_default(0);
let mut block_out_count = SecondaryMap::with_default(0);
// Cache the block successors to avoid re-examining branches below.
let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new();
let mut block_succ_range = SecondaryMap::with_default((0, 0));
let mut indirect_branch_target_clif_blocks = FxHashSet::default();
for block in f.layout.blocks() {
let block_succ_start = block_succs.len();
let mut succ_idx = 0;
visit_block_succs(f, block, |inst, succ, from_table| {
block_out_count[block] += 1;
block_in_count[succ] += 1;
block_succs.push((inst, succ_idx, succ));
succ_idx += 1;
if from_table {
indirect_branch_target_clif_blocks.insert(succ);
}
});
let block_succ_end = block_succs.len();
block_succ_range[block] = (block_succ_start, block_succ_end);
for inst in f.layout.block_likely_branches(block) {
if f.dfg.insts[inst].opcode() == Opcode::Return {
// Implicit output edge for any return.
block_out_count[block] += 1;
}
}
}
// Implicit input edge for entry block.
block_in_count[entry] += 1;
// All blocks ending in conditional branches or br_tables must
// have edge-moves inserted at the top of successor blocks,
// not at the end of themselves. This is because the moves
// would have to be inserted prior to the branch's register
// use; but RA2's model is that the moves happen *on* the
// edge, after every def/use in the block. RA2 will check for
// "branch register use safety" and panic if such a problem
// occurs. To avoid this, we force the below algorithm to
// never merge the edge block onto the end of a block that
// ends in a conditional branch. We do this by "faking" more
// than one successor, even if there is only one.
//
// (One might ask, isn't that always the case already? It
// could not be, in cases of br_table with no table and just a
// default label, for example.)
for block in f.layout.blocks() {
for inst in f.layout.block_likely_branches(block) {
// If the block has a branch with any "fixed args"
// (not blockparam args) ...
if f.dfg.insts[inst].opcode().is_branch() && f.dfg.inst_fixed_args(inst).len() > 0 {
// ... then force a minimum successor count of
// two, so the below algorithm cannot put
// edge-moves on the end of the block.
block_out_count[block] = std::cmp::max(2, block_out_count[block]);
}
}
}
// Here we define the implicit CLIF-plus-edges graph. There are
// conceptually two such graphs: the original, with every edge explicit,
// and the merged one, with blocks (represented by `LoweredBlock`
// values) that contain original CLIF blocks, edges, or both. This
// function returns a lowered block's successors as per the latter, with
// consideration to edge-block merging.
//
// Note that there is a property of the block-merging rules below
// that is very important to ensure we don't miss any lowered blocks:
// any block in the implicit CLIF-plus-edges graph will *only* be
// included in one block in the merged graph.
//
// This, combined with the property that every edge block is reachable
// only from one predecessor (and hence cannot be reached by a DFS
// backedge), means that it is sufficient in our DFS below to track
// visited-bits per original CLIF block only, not per edge. This greatly
// simplifies the data structures (no need to keep a sparse hash-set of
// (block, block) tuples).
let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
let start_idx = ret.len();
match block {
LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
// At an orig block; successors are always edge blocks,
// possibly with orig blocks following.
let range = block_succ_range[block];
for &(edge_inst, succ_idx, succ) in &block_succs[range.0..range.1] {
if block_in_count[succ] == 1 {
ret.push((
edge_inst,
LoweredBlock::EdgeAndOrig {
pred: block,
edge_inst,
succ_idx,
block: succ,
},
));
} else {
ret.push((
edge_inst,
LoweredBlock::Edge {
pred: block,
edge_inst,
succ_idx,
succ,
},
));
}
}
}
LoweredBlock::Edge {
succ, edge_inst, ..
}
| LoweredBlock::OrigAndEdge {
succ, edge_inst, ..
} => {
// At an edge block; successors are always orig blocks,
// possibly with edge blocks following.
if block_out_count[succ] == 1 {
let range = block_succ_range[succ];
// check if the one succ is a real CFG edge (vs.
// implicit return succ).
if range.1 - range.0 > 0 {
debug_assert!(range.1 - range.0 == 1);
let (succ_edge_inst, succ_succ_idx, succ_succ) = block_succs[range.0];
ret.push((
edge_inst,
LoweredBlock::OrigAndEdge {
block: succ,
edge_inst: succ_edge_inst,
succ_idx: succ_succ_idx,
succ: succ_succ,
},
));
} else {
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
}
} else {
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
}
}
}
let end_idx = ret.len();
(start_idx, end_idx)
};
// Build the explicit LoweredBlock-to-LoweredBlock successors list.
let mut lowered_succs = vec![];
let mut lowered_succ_indices = vec![];
// Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
// explicit stack so we don't overflow the real stack with a deep DFS.
#[derive(Debug)]
struct StackEntry {
this: LoweredBlock,
succs: (usize, usize), // range in lowered_succs
cur_succ: usize, // index in lowered_succs
}
let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
let mut visited = FxHashSet::default();
let mut postorder = vec![];
// Add the entry block.
//
// FIXME(cfallin): we might be able to use OrigAndEdge. Find a
// way to not special-case the entry block here.
let block = LoweredBlock::Orig { block: entry };
visited.insert(block);
let range = compute_lowered_succs(&mut lowered_succs, block);
lowered_succ_indices.resize(lowered_succs.len(), 0);
stack.push(StackEntry {
this: block,
succs: range,
cur_succ: range.1,
});
while !stack.is_empty() {
let stack_entry = stack.last_mut().unwrap();
let range = stack_entry.succs;
if stack_entry.cur_succ == range.0 {
postorder.push((stack_entry.this, range));
stack.pop();
} else {
// Heuristic: chase the children in reverse. This puts the first
// successor block first in RPO, all other things being equal,
// which tends to prioritize loop backedges over out-edges,
// putting the edge-block closer to the loop body and minimizing
// live-ranges in linear instruction space.
let next = lowered_succs[stack_entry.cur_succ - 1].1;
stack_entry.cur_succ -= 1;
if visited.contains(&next) {
continue;
}
visited.insert(next);
let range = compute_lowered_succs(&mut lowered_succs, next);
lowered_succ_indices.resize(lowered_succs.len(), 0);
stack.push(StackEntry {
this: next,
succs: range,
cur_succ: range.1,
});
}
}
postorder.reverse();
let rpo = postorder;
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
let mut lowered_order = vec![];
let mut cold_blocks = FxHashSet::default();
let mut lowered_succ_ranges = vec![];
let mut lb_to_bindex = FxHashMap::default();
let mut indirect_branch_targets = FxHashSet::default();
for (block, succ_range) in rpo.into_iter() {
let index = BlockIndex::new(lowered_order.len());
lb_to_bindex.insert(block, index);
lowered_order.push(block);
lowered_succ_ranges.push(succ_range);
match block {
LoweredBlock::Orig { block }
| LoweredBlock::OrigAndEdge { block, .. }
| LoweredBlock::EdgeAndOrig { block, .. } => {
if f.layout.is_cold(block) {
cold_blocks.insert(index);
}
if indirect_branch_target_clif_blocks.contains(&block) {
indirect_branch_targets.insert(index);
}
}
LoweredBlock::Edge { pred, succ, .. } => {
if f.layout.is_cold(pred) || f.layout.is_cold(succ) {
cold_blocks.insert(index);
}
if indirect_branch_target_clif_blocks.contains(&succ) {
indirect_branch_targets.insert(index);
}
}
}
}
let lowered_succ_indices = lowered_succs
.iter()
.map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
.collect();
let mut orig_map = SecondaryMap::with_default(None);
for (i, lb) in lowered_order.iter().enumerate() {
let i = BlockIndex::new(i);
if let Some(b) = lb.orig_block() {
orig_map[b] = Some(i);
}
}
let result = BlockLoweringOrder {
lowered_order,
lowered_succs,
lowered_succ_indices,
lowered_succ_ranges,
orig_map,
cold_blocks,
indirect_branch_targets,
};
trace!("BlockLoweringOrder: {:?}", result);
result
}
/// Get the lowered order of blocks.
pub fn lowered_order(&self) -> &[LoweredBlock] {
&self.lowered_order[..]
}
/// Get the successor indices for a lowered block.
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
let range = self.lowered_succ_ranges[block.index()];
&self.lowered_succ_indices[range.0..range.1]
}
/// Determine whether the given lowered-block index is cold.
pub fn is_cold(&self, block: BlockIndex) -> bool {
self.cold_blocks.contains(&block)
}
/// Determine whether the given lowered block index is an indirect branch
/// target.
pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {
self.indirect_branch_targets.contains(&block)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::UserFuncName;
use crate::ir::{AbiParam, Function, InstBuilder, Signature};
use crate::isa::CallConv;
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
assert!(n_blocks > 0);
let name = UserFuncName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let blocks = (0..n_blocks)
.map(|i| {
let bb = func.dfg.make_block();
assert!(bb.as_u32() == i as u32);
bb
})
.collect::<Vec<_>>();
let arg0 = func.dfg.append_block_param(blocks[0], I32);
let mut pos = FuncCursor::new(&mut func);
let mut edge = 0;
for i in 0..n_blocks {
pos.insert_block(blocks[i]);
let mut succs = vec![];
while edge < edges.len() && edges[edge].0 == i {
succs.push(edges[edge].1);
edge += 1;
}
if succs.len() == 0 {
pos.ins().return_(&[arg0]);
} else if succs.len() == 1 {
pos.ins().jump(blocks[succs[0]], &[]);
} else if succs.len() == 2 {
pos.ins().brnz(arg0, blocks[succs[0]], &[]);
pos.ins().jump(blocks[succs[1]], &[]);
} else {
panic!("Too many successors");
}
}
func
}
#[test]
fn test_blockorder_diamond() {
let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
let order = BlockLoweringOrder::new(&func);
assert_eq!(order.lowered_order.len(), 6);
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
assert!(order.lowered_order[0].in_edge().is_none());
assert!(order.lowered_order[0].out_edge().is_none());
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
assert!(order.lowered_order[2].orig_block().is_none());
assert!(order.lowered_order[2].in_edge().is_none());
assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
assert!(order.lowered_order[3].out_edge().is_none());
assert!(order.lowered_order[4].orig_block().is_none());
assert!(order.lowered_order[4].in_edge().is_none());
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
assert!(order.lowered_order[5].in_edge().is_none());
assert!(order.lowered_order[5].out_edge().is_none());
}
#[test]
fn test_blockorder_critedge() {
// 0
// / \
// 1 2
// / \ \
// 3 4 |
// |\ _|____|
// | \/ |
// | /\ |
// 5 6
//
// (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
//
let func = build_test_func(
7,
&[
(0, 1),
(0, 2),
(1, 3),
(1, 4),
(2, 5),
(3, 5),
(3, 6),
(4, 6),
],
);
let order = BlockLoweringOrder::new(&func);
assert_eq!(order.lowered_order.len(), 11);
println!("ordered = {:?}", order.lowered_order);
// block 0
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
assert!(order.lowered_order[0].in_edge().is_none());
assert!(order.lowered_order[0].out_edge().is_none());
// edge 0->1 + block 1
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
assert!(order.lowered_order[1].out_edge().is_none());
// edge 1->3 + block 3
assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[2].out_edge().is_none());
// edge 3->5
assert!(order.lowered_order[3].orig_block().is_none());
assert!(order.lowered_order[3].in_edge().is_none());
assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
// edge 3->6
assert!(order.lowered_order[4].orig_block().is_none());
assert!(order.lowered_order[4].in_edge().is_none());
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
// edge 1->4 + block 4
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
assert!(order.lowered_order[5].out_edge().is_none());
// edge 4->6
assert!(order.lowered_order[6].orig_block().is_none());
assert!(order.lowered_order[6].in_edge().is_none());
assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
// block 6
assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
assert!(order.lowered_order[7].in_edge().is_none());
assert!(order.lowered_order[7].out_edge().is_none());
// edge 0->2 + block 2
assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
assert!(order.lowered_order[8].out_edge().is_none());
// edge 2->5
assert!(order.lowered_order[9].orig_block().is_none());
assert!(order.lowered_order[9].in_edge().is_none());
assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
// block 5
assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
assert!(order.lowered_order[10].in_edge().is_none());
assert!(order.lowered_order[10].out_edge().is_none());
}
}