| //! Support for egraphs represented in the DataFlowGraph. |
| |
| use crate::alias_analysis::{AliasAnalysis, LastStores}; |
| use crate::ctxhash::{CtxEq, CtxHash, CtxHashMap}; |
| use crate::cursor::{Cursor, CursorPosition, FuncCursor}; |
| use crate::dominator_tree::DominatorTree; |
| use crate::egraph::domtree::DomTreeWithChildren; |
| use crate::egraph::elaborate::Elaborator; |
| use crate::fx::FxHashSet; |
| use crate::inst_predicates::is_pure_for_egraph; |
| use crate::ir::{ |
| DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool, |
| }; |
| use crate::loop_analysis::LoopAnalysis; |
| use crate::opts::generated_code::ContextIter; |
| use crate::opts::IsleContext; |
| use crate::trace; |
| use crate::unionfind::UnionFind; |
| use cranelift_entity::packed_option::ReservedValue; |
| use cranelift_entity::SecondaryMap; |
| use std::hash::Hasher; |
| |
| mod cost; |
| mod domtree; |
| mod elaborate; |
| |
| /// Pass over a Function that does the whole aegraph thing. |
| /// |
| /// - Removes non-skeleton nodes from the Layout. |
| /// - Performs a GVN-and-rule-application pass over all Values |
| /// reachable from the skeleton, potentially creating new Union |
| /// nodes (i.e., an aegraph) so that some values have multiple |
| /// representations. |
| /// - Does "extraction" on the aegraph: selects the best value out of |
| /// the tree-of-Union nodes for each used value. |
| /// - Does "scoped elaboration" on the aegraph: chooses one or more |
| /// locations for pure nodes to become instructions again in the |
| /// layout, as forced by the skeleton. |
| /// |
| /// At the beginning and end of this pass, the CLIF should be in a |
| /// state that passes the verifier and, additionally, has no Union |
| /// nodes. During the pass, Union nodes may exist, and instructions in |
| /// the layout may refer to results of instructions that are not |
| /// placed in the layout. |
| pub struct EgraphPass<'a> { |
| /// The function we're operating on. |
| func: &'a mut Function, |
| /// Dominator tree, used for elaboration pass. |
| domtree: &'a DominatorTree, |
| /// Alias analysis, used during optimization. |
| alias_analysis: &'a mut AliasAnalysis<'a>, |
| /// "Domtree with children": like `domtree`, but with an explicit |
| /// list of children, rather than just parent pointers. |
| domtree_children: DomTreeWithChildren, |
| /// Loop analysis results, used for built-in LICM during |
| /// elaboration. |
| loop_analysis: &'a LoopAnalysis, |
| /// Which canonical Values do we want to rematerialize in each |
| /// block where they're used? |
| /// |
| /// (A canonical Value is the *oldest* Value in an eclass, |
| /// i.e. tree of union value-nodes). |
| remat_values: FxHashSet<Value>, |
| /// Stats collected while we run this pass. |
| pub(crate) stats: Stats, |
| /// Union-find that maps all members of a Union tree (eclass) back |
| /// to the *oldest* (lowest-numbered) `Value`. |
| eclasses: UnionFind<Value>, |
| } |
| |
| /// Context passed through node insertion and optimization. |
| pub(crate) struct OptimizeCtx<'opt, 'analysis> |
| where |
| 'analysis: 'opt, |
| { |
| // Borrowed from EgraphPass: |
| pub(crate) func: &'opt mut Function, |
| pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>, |
| pub(crate) gvn_map: &'opt mut CtxHashMap<(Type, InstructionData), Value>, |
| pub(crate) eclasses: &'opt mut UnionFind<Value>, |
| pub(crate) remat_values: &'opt mut FxHashSet<Value>, |
| pub(crate) stats: &'opt mut Stats, |
| pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>, |
| pub(crate) alias_analysis_state: &'opt mut LastStores, |
| // Held locally during optimization of one node (recursively): |
| pub(crate) rewrite_depth: usize, |
| pub(crate) subsume_values: FxHashSet<Value>, |
| } |
| |
| /// For passing to `insert_pure_enode`. Sometimes the enode already |
| /// exists as an Inst (from the original CLIF), and sometimes we're in |
| /// the middle of creating it and want to avoid inserting it if |
| /// possible until we know we need it. |
| pub(crate) enum NewOrExistingInst { |
| New(InstructionData, Type), |
| Existing(Inst), |
| } |
| |
| impl NewOrExistingInst { |
| fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) { |
| match self { |
| NewOrExistingInst::New(data, ty) => (*ty, *data), |
| NewOrExistingInst::Existing(inst) => { |
| let ty = dfg.ctrl_typevar(*inst); |
| (ty, dfg.insts[*inst].clone()) |
| } |
| } |
| } |
| } |
| |
| impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis> |
| where |
| 'analysis: 'opt, |
| { |
| /// Optimization of a single instruction. |
| /// |
| /// This does a few things: |
| /// - Looks up the instruction in the GVN deduplication map. If we |
| /// already have the same instruction somewhere else, with the |
| /// same args, then we can alias the original instruction's |
| /// results and omit this instruction entirely. |
| /// - Note that we do this canonicalization based on the |
| /// instruction with its arguments as *canonical* eclass IDs, |
| /// that is, the oldest (smallest index) `Value` reachable in |
| /// the tree-of-unions (whole eclass). This ensures that we |
| /// properly canonicalize newer nodes that use newer "versions" |
| /// of a value that are still equal to the older versions. |
| /// - If the instruction is "new" (not deduplicated), then apply |
| /// optimization rules: |
| /// - All of the mid-end rules written in ISLE. |
| /// - Store-to-load forwarding. |
| /// - Update the value-to-opt-value map, and update the eclass |
| /// union-find, if we rewrote the value to different form(s). |
| pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value { |
| // Create the external context for looking up and updating the |
| // GVN map. This is necessary so that instructions themselves |
| // do not have to carry all the references or data for a full |
| // `Eq` or `Hash` impl. |
| let gvn_context = GVNContext { |
| union_find: self.eclasses, |
| value_lists: &self.func.dfg.value_lists, |
| }; |
| |
| self.stats.pure_inst += 1; |
| if let NewOrExistingInst::New(..) = inst { |
| self.stats.new_inst += 1; |
| } |
| |
| // Does this instruction already exist? If so, add entries to |
| // the value-map to rewrite uses of its results to the results |
| // of the original (existing) instruction. If not, optimize |
| // the new instruction. |
| if let Some(&orig_result) = self |
| .gvn_map |
| .get(&inst.get_inst_key(&self.func.dfg), &gvn_context) |
| { |
| self.stats.pure_inst_deduped += 1; |
| if let NewOrExistingInst::Existing(inst) = inst { |
| debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1); |
| let result = self.func.dfg.first_result(inst); |
| self.value_to_opt_value[result] = orig_result; |
| self.eclasses.union(result, orig_result); |
| self.stats.union += 1; |
| result |
| } else { |
| orig_result |
| } |
| } else { |
| // Now actually insert the InstructionData and attach |
| // result value (exactly one). |
| let (inst, result, ty) = match inst { |
| NewOrExistingInst::New(data, typevar) => { |
| let inst = self.func.dfg.make_inst(data); |
| // TODO: reuse return value? |
| self.func.dfg.make_inst_results(inst, typevar); |
| let result = self.func.dfg.first_result(inst); |
| // Add to eclass unionfind. |
| self.eclasses.add(result); |
| // New inst. We need to do the analysis of its result. |
| (inst, result, typevar) |
| } |
| NewOrExistingInst::Existing(inst) => { |
| let result = self.func.dfg.first_result(inst); |
| let ty = self.func.dfg.ctrl_typevar(inst); |
| (inst, result, ty) |
| } |
| }; |
| |
| let opt_value = self.optimize_pure_enode(inst); |
| let gvn_context = GVNContext { |
| union_find: self.eclasses, |
| value_lists: &self.func.dfg.value_lists, |
| }; |
| self.gvn_map.insert( |
| (ty, self.func.dfg.insts[inst].clone()), |
| opt_value, |
| &gvn_context, |
| ); |
| self.value_to_opt_value[result] = opt_value; |
| opt_value |
| } |
| } |
| |
| /// Optimizes an enode by applying any matching mid-end rewrite |
| /// rules (or store-to-load forwarding, which is a special case), |
| /// unioning together all possible optimized (or rewritten) forms |
| /// of this expression into an eclass and returning the `Value` |
| /// that represents that eclass. |
| fn optimize_pure_enode(&mut self, inst: Inst) -> Value { |
| // A pure node always has exactly one result. |
| let orig_value = self.func.dfg.first_result(inst); |
| |
| let mut isle_ctx = IsleContext { ctx: self }; |
| |
| // Limit rewrite depth. When we apply optimization rules, they |
| // may create new nodes (values) and those are, recursively, |
| // optimized eagerly as soon as they are created. So we may |
| // have more than one ISLE invocation on the stack. (This is |
| // necessary so that as the toplevel builds the |
| // right-hand-side expression bottom-up, it uses the "latest" |
| // optimized values for all the constituent parts.) To avoid |
| // infinite or problematic recursion, we bound the rewrite |
| // depth to a small constant here. |
| const REWRITE_LIMIT: usize = 5; |
| if isle_ctx.ctx.rewrite_depth > REWRITE_LIMIT { |
| isle_ctx.ctx.stats.rewrite_depth_limit += 1; |
| return orig_value; |
| } |
| isle_ctx.ctx.rewrite_depth += 1; |
| |
| // Invoke the ISLE toplevel constructor, getting all new |
| // values produced as equivalents to this value. |
| trace!("Calling into ISLE with original value {}", orig_value); |
| isle_ctx.ctx.stats.rewrite_rule_invoked += 1; |
| let mut optimized_values = |
| crate::opts::generated_code::constructor_simplify(&mut isle_ctx, orig_value); |
| |
| // Create a union of all new values with the original (or |
| // maybe just one new value marked as "subsuming" the |
| // original, if present.) |
| let mut union_value = orig_value; |
| while let Some(optimized_value) = optimized_values.next(&mut isle_ctx) { |
| trace!( |
| "Returned from ISLE for {}, got {:?}", |
| orig_value, |
| optimized_value |
| ); |
| if optimized_value == orig_value { |
| trace!(" -> same as orig value; skipping"); |
| continue; |
| } |
| if isle_ctx.ctx.subsume_values.contains(&optimized_value) { |
| // Merge in the unionfind so canonicalization |
| // still works, but take *only* the subsuming |
| // value, and break now. |
| isle_ctx.ctx.eclasses.union(optimized_value, union_value); |
| union_value = optimized_value; |
| break; |
| } |
| |
| let old_union_value = union_value; |
| union_value = isle_ctx |
| .ctx |
| .func |
| .dfg |
| .union(old_union_value, optimized_value); |
| isle_ctx.ctx.stats.union += 1; |
| trace!(" -> union: now {}", union_value); |
| isle_ctx.ctx.eclasses.add(union_value); |
| isle_ctx |
| .ctx |
| .eclasses |
| .union(old_union_value, optimized_value); |
| isle_ctx.ctx.eclasses.union(old_union_value, union_value); |
| } |
| |
| isle_ctx.ctx.rewrite_depth -= 1; |
| |
| union_value |
| } |
| |
| /// Optimize a "skeleton" instruction, possibly removing |
| /// it. Returns `true` if the instruction should be removed from |
| /// the layout. |
| fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool { |
| self.stats.skeleton_inst += 1; |
| // Not pure, but may still be a load or store: |
| // process it to see if we can optimize it. |
| if let Some(new_result) = |
| self.alias_analysis |
| .process_inst(self.func, self.alias_analysis_state, inst) |
| { |
| self.stats.alias_analysis_removed += 1; |
| let result = self.func.dfg.first_result(inst); |
| self.value_to_opt_value[result] = new_result; |
| true |
| } else { |
| // Set all results to identity-map to themselves |
| // in the value-to-opt-value map. |
| for &result in self.func.dfg.inst_results(inst) { |
| self.value_to_opt_value[result] = result; |
| self.eclasses.add(result); |
| } |
| false |
| } |
| } |
| } |
| |
| impl<'a> EgraphPass<'a> { |
| /// Create a new EgraphPass. |
| pub fn new( |
| func: &'a mut Function, |
| domtree: &'a DominatorTree, |
| loop_analysis: &'a LoopAnalysis, |
| alias_analysis: &'a mut AliasAnalysis<'a>, |
| ) -> Self { |
| let num_values = func.dfg.num_values(); |
| let domtree_children = DomTreeWithChildren::new(func, domtree); |
| Self { |
| func, |
| domtree, |
| domtree_children, |
| loop_analysis, |
| alias_analysis, |
| stats: Stats::default(), |
| eclasses: UnionFind::with_capacity(num_values), |
| remat_values: FxHashSet::default(), |
| } |
| } |
| |
| /// Run the process. |
| pub fn run(&mut self) { |
| self.remove_pure_and_optimize(); |
| |
| trace!("egraph built:\n{}\n", self.func.display()); |
| if cfg!(feature = "trace-log") { |
| for (value, def) in self.func.dfg.values_and_defs() { |
| trace!(" -> {} = {:?}", value, def); |
| match def { |
| ValueDef::Result(i, 0) => { |
| trace!(" -> {} = {:?}", i, self.func.dfg.insts[i]); |
| } |
| _ => {} |
| } |
| } |
| } |
| trace!("stats: {:?}", self.stats); |
| self.elaborate(); |
| } |
| |
| /// Remove pure nodes from the `Layout` of the function, ensuring |
| /// that only the "side-effect skeleton" remains, and also |
| /// optimize the pure nodes. This is the first step of |
| /// egraph-based processing and turns the pure CFG-based CLIF into |
| /// a CFG skeleton with a sea of (optimized) nodes tying it |
| /// together. |
| /// |
| /// As we walk through the code, we eagerly apply optimization |
| /// rules; at any given point we have a "latest version" of an |
| /// eclass of possible representations for a `Value` in the |
| /// original program, which is itself a `Value` at the root of a |
| /// union-tree. We keep a map from the original values to these |
| /// optimized values. When we encounter any instruction (pure or |
| /// side-effecting skeleton) we rewrite its arguments to capture |
| /// the "latest" optimized forms of these values. (We need to do |
| /// this as part of this pass, and not later using a finished map, |
| /// because the eclass can continue to be updated and we need to |
| /// only refer to its subset that exists at this stage, to |
| /// maintain acyclicity.) |
| fn remove_pure_and_optimize(&mut self) { |
| let mut cursor = FuncCursor::new(self.func); |
| let mut value_to_opt_value: SecondaryMap<Value, Value> = |
| SecondaryMap::with_default(Value::reserved_value()); |
| let mut gvn_map: CtxHashMap<(Type, InstructionData), Value> = |
| CtxHashMap::with_capacity(cursor.func.dfg.num_values()); |
| |
| // In domtree preorder, visit blocks. (TODO: factor out an |
| // iterator from this and elaborator.) |
| let root = self.domtree_children.root(); |
| let mut block_stack = vec![root]; |
| while let Some(block) = block_stack.pop() { |
| // We popped this block; push children |
| // immediately, then process this block. |
| block_stack.extend(self.domtree_children.children(block)); |
| |
| trace!("Processing block {}", block); |
| cursor.set_position(CursorPosition::Before(block)); |
| |
| let mut alias_analysis_state = self.alias_analysis.block_starting_state(block); |
| |
| for ¶m in cursor.func.dfg.block_params(block) { |
| trace!("creating initial singleton eclass for blockparam {}", param); |
| self.eclasses.add(param); |
| value_to_opt_value[param] = param; |
| } |
| while let Some(inst) = cursor.next_inst() { |
| trace!("Processing inst {}", inst); |
| |
| // While we're passing over all insts, create initial |
| // singleton eclasses for all result and blockparam |
| // values. Also do initial analysis of all inst |
| // results. |
| for &result in cursor.func.dfg.inst_results(inst) { |
| trace!("creating initial singleton eclass for {}", result); |
| self.eclasses.add(result); |
| } |
| |
| // Rewrite args of *all* instructions using the |
| // value-to-opt-value map. |
| cursor.func.dfg.resolve_aliases_in_arguments(inst); |
| for arg in cursor.func.dfg.inst_args_mut(inst) { |
| let new_value = value_to_opt_value[*arg]; |
| trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value); |
| debug_assert_ne!(new_value, Value::reserved_value()); |
| *arg = new_value; |
| } |
| |
| // Build a context for optimization, with borrows of |
| // state. We can't invoke a method on `self` because |
| // we've borrowed `self.func` mutably (as |
| // `cursor.func`) so we pull apart the pieces instead |
| // here. |
| let mut ctx = OptimizeCtx { |
| func: cursor.func, |
| value_to_opt_value: &mut value_to_opt_value, |
| gvn_map: &mut gvn_map, |
| eclasses: &mut self.eclasses, |
| rewrite_depth: 0, |
| subsume_values: FxHashSet::default(), |
| remat_values: &mut self.remat_values, |
| stats: &mut self.stats, |
| alias_analysis: self.alias_analysis, |
| alias_analysis_state: &mut alias_analysis_state, |
| }; |
| |
| if is_pure_for_egraph(ctx.func, inst) { |
| // Insert into GVN map and optimize any new nodes |
| // inserted (recursively performing this work for |
| // any nodes the optimization rules produce). |
| let inst = NewOrExistingInst::Existing(inst); |
| ctx.insert_pure_enode(inst); |
| // We've now rewritten all uses, or will when we |
| // see them, and the instruction exists as a pure |
| // enode in the eclass, so we can remove it. |
| cursor.remove_inst_and_step_back(); |
| } else { |
| if ctx.optimize_skeleton_inst(inst) { |
| cursor.remove_inst_and_step_back(); |
| } |
| } |
| } |
| } |
| } |
| |
| /// Scoped elaboration: compute a final ordering of op computation |
| /// for each block and update the given Func body. After this |
| /// runs, the function body is back into the state where every |
| /// Inst with an used result is placed in the layout (possibly |
| /// duplicated, if our code-motion logic decides this is the best |
| /// option). |
| /// |
| /// This works in concert with the domtree. We do a preorder |
| /// traversal of the domtree, tracking a scoped map from Id to |
| /// (new) Value. The map's scopes correspond to levels in the |
| /// domtree. |
| /// |
| /// At each block, we iterate forward over the side-effecting |
| /// eclasses, and recursively generate their arg eclasses, then |
| /// emit the ops themselves. |
| /// |
| /// To use an eclass in a given block, we first look it up in the |
| /// scoped map, and get the Value if already present. If not, we |
| /// need to generate it. We emit the extracted enode for this |
| /// eclass after recursively generating its args. Eclasses are |
| /// thus computed "as late as possible", but then memoized into |
| /// the Id-to-Value map and available to all dominated blocks and |
| /// for the rest of this block. (This subsumes GVN.) |
| fn elaborate(&mut self) { |
| let mut elaborator = Elaborator::new( |
| self.func, |
| self.domtree, |
| &self.domtree_children, |
| self.loop_analysis, |
| &mut self.remat_values, |
| &mut self.eclasses, |
| &mut self.stats, |
| ); |
| elaborator.elaborate(); |
| |
| self.check_post_egraph(); |
| } |
| |
| #[cfg(debug_assertions)] |
| fn check_post_egraph(&self) { |
| // Verify that no union nodes are reachable from inst args, |
| // and that all inst args' defining instructions are in the |
| // layout. |
| for block in self.func.layout.blocks() { |
| for inst in self.func.layout.block_insts(block) { |
| for &arg in self.func.dfg.inst_args(inst) { |
| match self.func.dfg.value_def(arg) { |
| ValueDef::Result(i, _) => { |
| debug_assert!(self.func.layout.inst_block(i).is_some()); |
| } |
| ValueDef::Union(..) => { |
| panic!("egraph union node {} still reachable at {}!", arg, inst); |
| } |
| _ => {} |
| } |
| } |
| } |
| } |
| } |
| |
| #[cfg(not(debug_assertions))] |
| fn check_post_egraph(&self) {} |
| } |
| |
| /// Implementation of external-context equality and hashing on |
| /// InstructionData. This allows us to deduplicate instructions given |
| /// some context that lets us see its value lists and the mapping from |
| /// any value to "canonical value" (in an eclass). |
| struct GVNContext<'a> { |
| value_lists: &'a ValueListPool, |
| union_find: &'a UnionFind<Value>, |
| } |
| |
| impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> { |
| fn ctx_eq( |
| &self, |
| (a_ty, a_inst): &(Type, InstructionData), |
| (b_ty, b_inst): &(Type, InstructionData), |
| ) -> bool { |
| a_ty == b_ty |
| && a_inst.eq(b_inst, self.value_lists, |value| { |
| self.union_find.find(value) |
| }) |
| } |
| } |
| |
| impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> { |
| fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) { |
| std::hash::Hash::hash(&ty, state); |
| inst.hash(state, self.value_lists, |value| self.union_find.find(value)); |
| } |
| } |
| |
| /// Statistics collected during egraph-based processing. |
| #[derive(Clone, Debug, Default)] |
| pub(crate) struct Stats { |
| pub(crate) pure_inst: u64, |
| pub(crate) pure_inst_deduped: u64, |
| pub(crate) skeleton_inst: u64, |
| pub(crate) alias_analysis_removed: u64, |
| pub(crate) new_inst: u64, |
| pub(crate) union: u64, |
| pub(crate) subsume: u64, |
| pub(crate) remat: u64, |
| pub(crate) rewrite_rule_invoked: u64, |
| pub(crate) rewrite_depth_limit: u64, |
| pub(crate) elaborate_visit_node: u64, |
| pub(crate) elaborate_memoize_hit: u64, |
| pub(crate) elaborate_memoize_miss: u64, |
| pub(crate) elaborate_memoize_miss_remat: u64, |
| pub(crate) elaborate_licm_hoist: u64, |
| pub(crate) elaborate_func: u64, |
| pub(crate) elaborate_func_pre_insts: u64, |
| pub(crate) elaborate_func_post_insts: u64, |
| } |