| //===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a CFL-based context-insensitive alias analysis |
| // algorithm. It does not depend on types. The algorithm is a mixture of the one |
| // described in "Demand-driven alias analysis for C" by Xin Zheng and Radu |
| // Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to |
| // Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the |
| // papers, we build a graph of the uses of a variable, where each node is a |
| // memory location, and each edge is an action that happened on that memory |
| // location. The "actions" can be one of Dereference, Reference, or Assign. |
| // |
| // Two variables are considered as aliasing iff you can reach one value's node |
| // from the other value's node and the language formed by concatenating all of |
| // the edge labels (actions) conforms to a context-free grammar. |
| // |
| // Because this algorithm requires a graph search on each query, we execute the |
| // algorithm outlined in "Fast algorithms..." (mentioned above) |
| // in order to transform the graph into sets of variables that may alias in |
| // ~nlogn time (n = number of variables.), which makes queries take constant |
| // time. |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Analysis/CFLAliasAnalysis.h" |
| #include "StratifiedSets.h" |
| #include "llvm/ADT/BitVector.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/None.h" |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/Analysis/TargetLibraryInfo.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/InstVisitor.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/Allocator.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <memory> |
| #include <tuple> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "cfl-aa" |
| |
| CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {} |
| CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {} |
| |
| // \brief Information we have about a function and would like to keep around |
| struct CFLAAResult::FunctionInfo { |
| StratifiedSets<Value *> Sets; |
| // Lots of functions have < 4 returns. Adjust as necessary. |
| SmallVector<Value *, 4> ReturnedValues; |
| |
| FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV) |
| : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} |
| }; |
| |
| // Try to go from a Value* to a Function*. Never returns nullptr. |
| static Optional<Function *> parentFunctionOfValue(Value *); |
| |
| // Returns possible functions called by the Inst* into the given |
| // SmallVectorImpl. Returns true if targets found, false otherwise. |
| // This is templated because InvokeInst/CallInst give us the same |
| // set of functions that we care about, and I don't like repeating |
| // myself. |
| template <typename Inst> |
| static bool getPossibleTargets(Inst *, SmallVectorImpl<Function *> &); |
| |
| // Some instructions need to have their users tracked. Instructions like |
| // `add` require you to get the users of the Instruction* itself, other |
| // instructions like `store` require you to get the users of the first |
| // operand. This function gets the "proper" value to track for each |
| // type of instruction we support. |
| static Optional<Value *> getTargetValue(Instruction *); |
| |
| // There are certain instructions (i.e. FenceInst, etc.) that we ignore. |
| // This notes that we should ignore those. |
| static bool hasUsefulEdges(Instruction *); |
| |
| const StratifiedIndex StratifiedLink::SetSentinel = |
| std::numeric_limits<StratifiedIndex>::max(); |
| |
| namespace { |
| // StratifiedInfo Attribute things. |
| typedef unsigned StratifiedAttr; |
| LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs; |
| LLVM_CONSTEXPR unsigned AttrAllIndex = 0; |
| LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1; |
| LLVM_CONSTEXPR unsigned AttrUnknownIndex = 2; |
| LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 3; |
| LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex; |
| LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; |
| |
| LLVM_CONSTEXPR StratifiedAttr AttrNone = 0; |
| LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex; |
| LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone; |
| |
| // \brief StratifiedSets call for knowledge of "direction", so this is how we |
| // represent that locally. |
| enum class Level { Same, Above, Below }; |
| |
| // \brief Edges can be one of four "weights" -- each weight must have an inverse |
| // weight (Assign has Assign; Reference has Dereference). |
| enum class EdgeType { |
| // The weight assigned when assigning from or to a value. For example, in: |
| // %b = getelementptr %a, 0 |
| // ...The relationships are %b assign %a, and %a assign %b. This used to be |
| // two edges, but having a distinction bought us nothing. |
| Assign, |
| |
| // The edge used when we have an edge going from some handle to a Value. |
| // Examples of this include: |
| // %b = load %a (%b Dereference %a) |
| // %b = extractelement %a, 0 (%a Dereference %b) |
| Dereference, |
| |
| // The edge used when our edge goes from a value to a handle that may have |
| // contained it at some point. Examples: |
| // %b = load %a (%a Reference %b) |
| // %b = extractelement %a, 0 (%b Reference %a) |
| Reference |
| }; |
| |
| // \brief Encodes the notion of a "use" |
| struct Edge { |
| // \brief Which value the edge is coming from |
| Value *From; |
| |
| // \brief Which value the edge is pointing to |
| Value *To; |
| |
| // \brief Edge weight |
| EdgeType Weight; |
| |
| // \brief Whether we aliased any external values along the way that may be |
| // invisible to the analysis (i.e. landingpad for exceptions, calls for |
| // interprocedural analysis, etc.) |
| StratifiedAttrs AdditionalAttrs; |
| |
| Edge(Value *From, Value *To, EdgeType W, StratifiedAttrs A) |
| : From(From), To(To), Weight(W), AdditionalAttrs(A) {} |
| }; |
| |
| // \brief Gets the edges our graph should have, based on an Instruction* |
| class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { |
| CFLAAResult &AA; |
| SmallVectorImpl<Edge> &Output; |
| |
| public: |
| GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl<Edge> &Output) |
| : AA(AA), Output(Output) {} |
| |
| void visitInstruction(Instruction &) { |
| llvm_unreachable("Unsupported instruction encountered"); |
| } |
| |
| void visitPtrToIntInst(PtrToIntInst &Inst) { |
| auto *Ptr = Inst.getOperand(0); |
| Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown)); |
| } |
| |
| void visitIntToPtrInst(IntToPtrInst &Inst) { |
| auto *Ptr = &Inst; |
| Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown)); |
| } |
| |
| void visitCastInst(CastInst &Inst) { |
| Output.push_back( |
| Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone)); |
| } |
| |
| void visitBinaryOperator(BinaryOperator &Inst) { |
| auto *Op1 = Inst.getOperand(0); |
| auto *Op2 = Inst.getOperand(1); |
| Output.push_back(Edge(&Inst, Op1, EdgeType::Assign, AttrNone)); |
| Output.push_back(Edge(&Inst, Op2, EdgeType::Assign, AttrNone)); |
| } |
| |
| void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = Inst.getNewValOperand(); |
| Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone)); |
| } |
| |
| void visitAtomicRMWInst(AtomicRMWInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = Inst.getValOperand(); |
| Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone)); |
| } |
| |
| void visitPHINode(PHINode &Inst) { |
| for (Value *Val : Inst.incoming_values()) { |
| Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone)); |
| } |
| } |
| |
| void visitGetElementPtrInst(GetElementPtrInst &Inst) { |
| auto *Op = Inst.getPointerOperand(); |
| Output.push_back(Edge(&Inst, Op, EdgeType::Assign, AttrNone)); |
| for (auto I = Inst.idx_begin(), E = Inst.idx_end(); I != E; ++I) |
| Output.push_back(Edge(&Inst, *I, EdgeType::Assign, AttrNone)); |
| } |
| |
| void visitSelectInst(SelectInst &Inst) { |
| // Condition is not processed here (The actual statement producing |
| // the condition result is processed elsewhere). For select, the |
| // condition is evaluated, but not loaded, stored, or assigned |
| // simply as a result of being the condition of a select. |
| |
| auto *TrueVal = Inst.getTrueValue(); |
| Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone)); |
| auto *FalseVal = Inst.getFalseValue(); |
| Output.push_back(Edge(&Inst, FalseVal, EdgeType::Assign, AttrNone)); |
| } |
| |
| void visitAllocaInst(AllocaInst &) {} |
| |
| void visitLoadInst(LoadInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = &Inst; |
| Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone)); |
| } |
| |
| void visitStoreInst(StoreInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = Inst.getValueOperand(); |
| Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone)); |
| } |
| |
| void visitVAArgInst(VAArgInst &Inst) { |
| // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it does |
| // two things: |
| // 1. Loads a value from *((T*)*Ptr). |
| // 2. Increments (stores to) *Ptr by some target-specific amount. |
| // For now, we'll handle this like a landingpad instruction (by placing the |
| // result in its own group, and having that group alias externals). |
| auto *Val = &Inst; |
| Output.push_back(Edge(Val, Val, EdgeType::Assign, AttrAll)); |
| } |
| |
| static bool isFunctionExternal(Function *Fn) { |
| return Fn->isDeclaration() || !Fn->hasLocalLinkage(); |
| } |
| |
| // Gets whether the sets at Index1 above, below, or equal to the sets at |
| // Index2. Returns None if they are not in the same set chain. |
| static Optional<Level> getIndexRelation(const StratifiedSets<Value *> &Sets, |
| StratifiedIndex Index1, |
| StratifiedIndex Index2) { |
| if (Index1 == Index2) |
| return Level::Same; |
| |
| const auto *Current = &Sets.getLink(Index1); |
| while (Current->hasBelow()) { |
| if (Current->Below == Index2) |
| return Level::Below; |
| Current = &Sets.getLink(Current->Below); |
| } |
| |
| Current = &Sets.getLink(Index1); |
| while (Current->hasAbove()) { |
| if (Current->Above == Index2) |
| return Level::Above; |
| Current = &Sets.getLink(Current->Above); |
| } |
| |
| return NoneType(); |
| } |
| |
| bool |
| tryInterproceduralAnalysis(const SmallVectorImpl<Function *> &Fns, |
| Value *FuncValue, |
| const iterator_range<User::op_iterator> &Args) { |
| const unsigned ExpectedMaxArgs = 8; |
| const unsigned MaxSupportedArgs = 50; |
| assert(Fns.size() > 0); |
| |
| // I put this here to give us an upper bound on time taken by IPA. Is it |
| // really (realistically) needed? Keep in mind that we do have an n^2 algo. |
| if (std::distance(Args.begin(), Args.end()) > (int)MaxSupportedArgs) |
| return false; |
| |
| // Exit early if we'll fail anyway |
| for (auto *Fn : Fns) { |
| if (isFunctionExternal(Fn) || Fn->isVarArg()) |
| return false; |
| auto &MaybeInfo = AA.ensureCached(Fn); |
| if (!MaybeInfo.hasValue()) |
| return false; |
| } |
| |
| SmallVector<Value *, ExpectedMaxArgs> Arguments(Args.begin(), Args.end()); |
| SmallVector<StratifiedInfo, ExpectedMaxArgs> Parameters; |
| for (auto *Fn : Fns) { |
| auto &Info = *AA.ensureCached(Fn); |
| auto &Sets = Info.Sets; |
| auto &RetVals = Info.ReturnedValues; |
| |
| Parameters.clear(); |
| for (auto &Param : Fn->args()) { |
| auto MaybeInfo = Sets.find(&Param); |
| // Did a new parameter somehow get added to the function/slip by? |
| if (!MaybeInfo.hasValue()) |
| return false; |
| Parameters.push_back(*MaybeInfo); |
| } |
| |
| // Adding an edge from argument -> return value for each parameter that |
| // may alias the return value |
| for (unsigned I = 0, E = Parameters.size(); I != E; ++I) { |
| auto &ParamInfo = Parameters[I]; |
| auto &ArgVal = Arguments[I]; |
| bool AddEdge = false; |
| StratifiedAttrs Externals; |
| for (unsigned X = 0, XE = RetVals.size(); X != XE; ++X) { |
| auto MaybeInfo = Sets.find(RetVals[X]); |
| if (!MaybeInfo.hasValue()) |
| return false; |
| |
| auto &RetInfo = *MaybeInfo; |
| auto RetAttrs = Sets.getLink(RetInfo.Index).Attrs; |
| auto ParamAttrs = Sets.getLink(ParamInfo.Index).Attrs; |
| auto MaybeRelation = |
| getIndexRelation(Sets, ParamInfo.Index, RetInfo.Index); |
| if (MaybeRelation.hasValue()) { |
| AddEdge = true; |
| Externals |= RetAttrs | ParamAttrs; |
| } |
| } |
| if (AddEdge) |
| Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign, |
| StratifiedAttrs().flip())); |
| } |
| |
| if (Parameters.size() != Arguments.size()) |
| return false; |
| |
| // Adding edges between arguments for arguments that may end up aliasing |
| // each other. This is necessary for functions such as |
| // void foo(int** a, int** b) { *a = *b; } |
| // (Technically, the proper sets for this would be those below |
| // Arguments[I] and Arguments[X], but our algorithm will produce |
| // extremely similar, and equally correct, results either way) |
| for (unsigned I = 0, E = Arguments.size(); I != E; ++I) { |
| auto &MainVal = Arguments[I]; |
| auto &MainInfo = Parameters[I]; |
| auto &MainAttrs = Sets.getLink(MainInfo.Index).Attrs; |
| for (unsigned X = I + 1; X != E; ++X) { |
| auto &SubInfo = Parameters[X]; |
| auto &SubVal = Arguments[X]; |
| auto &SubAttrs = Sets.getLink(SubInfo.Index).Attrs; |
| auto MaybeRelation = |
| getIndexRelation(Sets, MainInfo.Index, SubInfo.Index); |
| |
| if (!MaybeRelation.hasValue()) |
| continue; |
| |
| auto NewAttrs = SubAttrs | MainAttrs; |
| Output.push_back(Edge(MainVal, SubVal, EdgeType::Assign, NewAttrs)); |
| } |
| } |
| } |
| return true; |
| } |
| |
| template <typename InstT> void visitCallLikeInst(InstT &Inst) { |
| // TODO: Add support for noalias args/all the other fun function attributes |
| // that we can tack on. |
| SmallVector<Function *, 4> Targets; |
| if (getPossibleTargets(&Inst, Targets)) { |
| if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands())) |
| return; |
| // Cleanup from interprocedural analysis |
| Output.clear(); |
| } |
| |
| // Because the function is opaque, we need to note that anything |
| // could have happened to the arguments, and that the result could alias |
| // just about anything, too. |
| // The goal of the loop is in part to unify many Values into one set, so we |
| // don't care if the function is void there. |
| for (Value *V : Inst.arg_operands()) |
| Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll)); |
| if (Inst.getNumArgOperands() == 0 && |
| Inst.getType() != Type::getVoidTy(Inst.getContext())) |
| Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll)); |
| } |
| |
| void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); } |
| |
| void visitInvokeInst(InvokeInst &Inst) { visitCallLikeInst(Inst); } |
| |
| // Because vectors/aggregates are immutable and unaddressable, |
| // there's nothing we can do to coax a value out of them, other |
| // than calling Extract{Element,Value}. We can effectively treat |
| // them as pointers to arbitrary memory locations we can store in |
| // and load from. |
| void visitExtractElementInst(ExtractElementInst &Inst) { |
| auto *Ptr = Inst.getVectorOperand(); |
| auto *Val = &Inst; |
| Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone)); |
| } |
| |
| void visitInsertElementInst(InsertElementInst &Inst) { |
| auto *Vec = Inst.getOperand(0); |
| auto *Val = Inst.getOperand(1); |
| Output.push_back(Edge(&Inst, Vec, EdgeType::Assign, AttrNone)); |
| Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone)); |
| } |
| |
| void visitLandingPadInst(LandingPadInst &Inst) { |
| // Exceptions come from "nowhere", from our analysis' perspective. |
| // So we place the instruction its own group, noting that said group may |
| // alias externals |
| Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll)); |
| } |
| |
| void visitInsertValueInst(InsertValueInst &Inst) { |
| auto *Agg = Inst.getOperand(0); |
| auto *Val = Inst.getOperand(1); |
| Output.push_back(Edge(&Inst, Agg, EdgeType::Assign, AttrNone)); |
| Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone)); |
| } |
| |
| void visitExtractValueInst(ExtractValueInst &Inst) { |
| auto *Ptr = Inst.getAggregateOperand(); |
| Output.push_back(Edge(&Inst, Ptr, EdgeType::Reference, AttrNone)); |
| } |
| |
| void visitShuffleVectorInst(ShuffleVectorInst &Inst) { |
| auto *From1 = Inst.getOperand(0); |
| auto *From2 = Inst.getOperand(1); |
| Output.push_back(Edge(&Inst, From1, EdgeType::Assign, AttrNone)); |
| Output.push_back(Edge(&Inst, From2, EdgeType::Assign, AttrNone)); |
| } |
| |
| void visitConstantExpr(ConstantExpr *CE) { |
| switch (CE->getOpcode()) { |
| default: |
| llvm_unreachable("Unknown instruction type encountered!"); |
| // Build the switch statement using the Instruction.def file. |
| #define HANDLE_INST(NUM, OPCODE, CLASS) \ |
| case Instruction::OPCODE: \ |
| visit##OPCODE(*(CLASS *)CE); \ |
| break; |
| #include "llvm/IR/Instruction.def" |
| } |
| } |
| }; |
| |
| // For a given instruction, we need to know which Value* to get the |
| // users of in order to build our graph. In some cases (i.e. add), |
| // we simply need the Instruction*. In other cases (i.e. store), |
| // finding the users of the Instruction* is useless; we need to find |
| // the users of the first operand. This handles determining which |
| // value to follow for us. |
| // |
| // Note: we *need* to keep this in sync with GetEdgesVisitor. Add |
| // something to GetEdgesVisitor, add it here -- remove something from |
| // GetEdgesVisitor, remove it here. |
| class GetTargetValueVisitor |
| : public InstVisitor<GetTargetValueVisitor, Value *> { |
| public: |
| Value *visitInstruction(Instruction &Inst) { return &Inst; } |
| |
| Value *visitStoreInst(StoreInst &Inst) { return Inst.getPointerOperand(); } |
| |
| Value *visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { |
| return Inst.getPointerOperand(); |
| } |
| |
| Value *visitAtomicRMWInst(AtomicRMWInst &Inst) { |
| return Inst.getPointerOperand(); |
| } |
| |
| Value *visitInsertElementInst(InsertElementInst &Inst) { |
| return Inst.getOperand(0); |
| } |
| |
| Value *visitInsertValueInst(InsertValueInst &Inst) { |
| return Inst.getAggregateOperand(); |
| } |
| }; |
| |
| // Set building requires a weighted bidirectional graph. |
| template <typename EdgeTypeT> class WeightedBidirectionalGraph { |
| public: |
| typedef std::size_t Node; |
| |
| private: |
| const static Node StartNode = Node(0); |
| |
| struct Edge { |
| EdgeTypeT Weight; |
| Node Other; |
| |
| Edge(const EdgeTypeT &W, const Node &N) : Weight(W), Other(N) {} |
| |
| bool operator==(const Edge &E) const { |
| return Weight == E.Weight && Other == E.Other; |
| } |
| |
| bool operator!=(const Edge &E) const { return !operator==(E); } |
| }; |
| |
| struct NodeImpl { |
| std::vector<Edge> Edges; |
| }; |
| |
| std::vector<NodeImpl> NodeImpls; |
| |
| bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); } |
| |
| const NodeImpl &getNode(Node N) const { return NodeImpls[N]; } |
| NodeImpl &getNode(Node N) { return NodeImpls[N]; } |
| |
| public: |
| // ----- Various Edge iterators for the graph ----- // |
| |
| // \brief Iterator for edges. Because this graph is bidirected, we don't |
| // allow modification of the edges using this iterator. Additionally, the |
| // iterator becomes invalid if you add edges to or from the node you're |
| // getting the edges of. |
| struct EdgeIterator : public std::iterator<std::forward_iterator_tag, |
| std::tuple<EdgeTypeT, Node *>> { |
| EdgeIterator(const typename std::vector<Edge>::const_iterator &Iter) |
| : Current(Iter) {} |
| |
| EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {} |
| |
| EdgeIterator &operator++() { |
| ++Current; |
| return *this; |
| } |
| |
| EdgeIterator operator++(int) { |
| EdgeIterator Copy(Current); |
| operator++(); |
| return Copy; |
| } |
| |
| std::tuple<EdgeTypeT, Node> &operator*() { |
| Store = std::make_tuple(Current->Weight, Current->Other); |
| return Store; |
| } |
| |
| bool operator==(const EdgeIterator &Other) const { |
| return Current == Other.Current; |
| } |
| |
| bool operator!=(const EdgeIterator &Other) const { |
| return !operator==(Other); |
| } |
| |
| private: |
| typename std::vector<Edge>::const_iterator Current; |
| std::tuple<EdgeTypeT, Node> Store; |
| }; |
| |
| // Wrapper for EdgeIterator with begin()/end() calls. |
| struct EdgeIterable { |
| EdgeIterable(const std::vector<Edge> &Edges) |
| : BeginIter(Edges.begin()), EndIter(Edges.end()) {} |
| |
| EdgeIterator begin() { return EdgeIterator(BeginIter); } |
| |
| EdgeIterator end() { return EdgeIterator(EndIter); } |
| |
| private: |
| typename std::vector<Edge>::const_iterator BeginIter; |
| typename std::vector<Edge>::const_iterator EndIter; |
| }; |
| |
| // ----- Actual graph-related things ----- // |
| |
| WeightedBidirectionalGraph() {} |
| |
| WeightedBidirectionalGraph(WeightedBidirectionalGraph<EdgeTypeT> &&Other) |
| : NodeImpls(std::move(Other.NodeImpls)) {} |
| |
| WeightedBidirectionalGraph<EdgeTypeT> & |
| operator=(WeightedBidirectionalGraph<EdgeTypeT> &&Other) { |
| NodeImpls = std::move(Other.NodeImpls); |
| return *this; |
| } |
| |
| Node addNode() { |
| auto Index = NodeImpls.size(); |
| auto NewNode = Node(Index); |
| NodeImpls.push_back(NodeImpl()); |
| return NewNode; |
| } |
| |
| void addEdge(Node From, Node To, const EdgeTypeT &Weight, |
| const EdgeTypeT &ReverseWeight) { |
| assert(inbounds(From)); |
| assert(inbounds(To)); |
| auto &FromNode = getNode(From); |
| auto &ToNode = getNode(To); |
| FromNode.Edges.push_back(Edge(Weight, To)); |
| ToNode.Edges.push_back(Edge(ReverseWeight, From)); |
| } |
| |
| EdgeIterable edgesFor(const Node &N) const { |
| const auto &Node = getNode(N); |
| return EdgeIterable(Node.Edges); |
| } |
| |
| bool empty() const { return NodeImpls.empty(); } |
| std::size_t size() const { return NodeImpls.size(); } |
| |
| // \brief Gets an arbitrary node in the graph as a starting point for |
| // traversal. |
| Node getEntryNode() { |
| assert(inbounds(StartNode)); |
| return StartNode; |
| } |
| }; |
| |
| typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT; |
| typedef DenseMap<Value *, GraphT::Node> NodeMapT; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Function declarations that require types defined in the namespace above |
| //===----------------------------------------------------------------------===// |
| |
| // Given an argument number, returns the appropriate Attr index to set. |
| static StratifiedAttr argNumberToAttrIndex(StratifiedAttr); |
| |
| // Given a Value, potentially return which AttrIndex it maps to. |
| static Optional<StratifiedAttr> valueToAttrIndex(Value *Val); |
| |
| // Gets the inverse of a given EdgeType. |
| static EdgeType flipWeight(EdgeType); |
| |
| // Gets edges of the given Instruction*, writing them to the SmallVector*. |
| static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl<Edge> &); |
| |
| // Gets edges of the given ConstantExpr*, writing them to the SmallVector*. |
| static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl<Edge> &); |
| |
| // Gets the "Level" that one should travel in StratifiedSets |
| // given an EdgeType. |
| static Level directionOfEdgeType(EdgeType); |
| |
| // Builds the graph needed for constructing the StratifiedSets for the |
| // given function |
| static void buildGraphFrom(CFLAAResult &, Function *, |
| SmallVectorImpl<Value *> &, NodeMapT &, GraphT &); |
| |
| // Gets the edges of a ConstantExpr as if it was an Instruction. This |
| // function also acts on any nested ConstantExprs, adding the edges |
| // of those to the given SmallVector as well. |
| static void constexprToEdges(CFLAAResult &, ConstantExpr &, |
| SmallVectorImpl<Edge> &); |
| |
| // Given an Instruction, this will add it to the graph, along with any |
| // Instructions that are potentially only available from said Instruction |
| // For example, given the following line: |
| // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 |
| // addInstructionToGraph would add both the `load` and `getelementptr` |
| // instructions to the graph appropriately. |
| static void addInstructionToGraph(CFLAAResult &, Instruction &, |
| SmallVectorImpl<Value *> &, NodeMapT &, |
| GraphT &); |
| |
| // Notes whether it would be pointless to add the given Value to our sets. |
| static bool canSkipAddingToSets(Value *Val); |
| |
| static Optional<Function *> parentFunctionOfValue(Value *Val) { |
| if (auto *Inst = dyn_cast<Instruction>(Val)) { |
| auto *Bb = Inst->getParent(); |
| return Bb->getParent(); |
| } |
| |
| if (auto *Arg = dyn_cast<Argument>(Val)) |
| return Arg->getParent(); |
| return NoneType(); |
| } |
| |
| template <typename Inst> |
| static bool getPossibleTargets(Inst *Call, |
| SmallVectorImpl<Function *> &Output) { |
| if (auto *Fn = Call->getCalledFunction()) { |
| Output.push_back(Fn); |
| return true; |
| } |
| |
| // TODO: If the call is indirect, we might be able to enumerate all potential |
| // targets of the call and return them, rather than just failing. |
| return false; |
| } |
| |
| static Optional<Value *> getTargetValue(Instruction *Inst) { |
| GetTargetValueVisitor V; |
| return V.visit(Inst); |
| } |
| |
| static bool hasUsefulEdges(Instruction *Inst) { |
| bool IsNonInvokeTerminator = |
| isa<TerminatorInst>(Inst) && !isa<InvokeInst>(Inst); |
| return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && !IsNonInvokeTerminator; |
| } |
| |
| static bool hasUsefulEdges(ConstantExpr *CE) { |
| // ConstantExpr doesn't have terminators, invokes, or fences, so only needs |
| // to check for compares. |
| return CE->getOpcode() != Instruction::ICmp && |
| CE->getOpcode() != Instruction::FCmp; |
| } |
| |
| static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) { |
| if (isa<GlobalValue>(Val)) |
| return AttrGlobalIndex; |
| |
| if (auto *Arg = dyn_cast<Argument>(Val)) |
| // Only pointer arguments should have the argument attribute, |
| // because things can't escape through scalars without us seeing a |
| // cast, and thus, interaction with them doesn't matter. |
| if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy()) |
| return argNumberToAttrIndex(Arg->getArgNo()); |
| return NoneType(); |
| } |
| |
| static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) { |
| if (ArgNum >= AttrMaxNumArgs) |
| return AttrAllIndex; |
| return ArgNum + AttrFirstArgIndex; |
| } |
| |
| static EdgeType flipWeight(EdgeType Initial) { |
| switch (Initial) { |
| case EdgeType::Assign: |
| return EdgeType::Assign; |
| case EdgeType::Dereference: |
| return EdgeType::Reference; |
| case EdgeType::Reference: |
| return EdgeType::Dereference; |
| } |
| llvm_unreachable("Incomplete coverage of EdgeType enum"); |
| } |
| |
| static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst, |
| SmallVectorImpl<Edge> &Output) { |
| assert(hasUsefulEdges(Inst) && |
| "Expected instructions to have 'useful' edges"); |
| GetEdgesVisitor v(Analysis, Output); |
| v.visit(Inst); |
| } |
| |
| static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE, |
| SmallVectorImpl<Edge> &Output) { |
| assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges"); |
| GetEdgesVisitor v(Analysis, Output); |
| v.visitConstantExpr(CE); |
| } |
| |
| static Level directionOfEdgeType(EdgeType Weight) { |
| switch (Weight) { |
| case EdgeType::Reference: |
| return Level::Above; |
| case EdgeType::Dereference: |
| return Level::Below; |
| case EdgeType::Assign: |
| return Level::Same; |
| } |
| llvm_unreachable("Incomplete switch coverage"); |
| } |
| |
| static void constexprToEdges(CFLAAResult &Analysis, |
| ConstantExpr &CExprToCollapse, |
| SmallVectorImpl<Edge> &Results) { |
| SmallVector<ConstantExpr *, 4> Worklist; |
| Worklist.push_back(&CExprToCollapse); |
| |
| SmallVector<Edge, 8> ConstexprEdges; |
| SmallPtrSet<ConstantExpr *, 4> Visited; |
| while (!Worklist.empty()) { |
| auto *CExpr = Worklist.pop_back_val(); |
| |
| if (!hasUsefulEdges(CExpr)) |
| continue; |
| |
| ConstexprEdges.clear(); |
| argsToEdges(Analysis, CExpr, ConstexprEdges); |
| for (auto &Edge : ConstexprEdges) { |
| if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From)) |
| if (Visited.insert(Nested).second) |
| Worklist.push_back(Nested); |
| |
| if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To)) |
| if (Visited.insert(Nested).second) |
| Worklist.push_back(Nested); |
| } |
| |
| Results.append(ConstexprEdges.begin(), ConstexprEdges.end()); |
| } |
| } |
| |
| static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst, |
| SmallVectorImpl<Value *> &ReturnedValues, |
| NodeMapT &Map, GraphT &Graph) { |
| const auto findOrInsertNode = [&Map, &Graph](Value *Val) { |
| auto Pair = Map.insert(std::make_pair(Val, GraphT::Node())); |
| auto &Iter = Pair.first; |
| if (Pair.second) { |
| auto NewNode = Graph.addNode(); |
| Iter->second = NewNode; |
| } |
| return Iter->second; |
| }; |
| |
| // We don't want the edges of most "return" instructions, but we *do* want |
| // to know what can be returned. |
| if (isa<ReturnInst>(&Inst)) |
| ReturnedValues.push_back(&Inst); |
| |
| if (!hasUsefulEdges(&Inst)) |
| return; |
| |
| SmallVector<Edge, 8> Edges; |
| argsToEdges(Analysis, &Inst, Edges); |
| |
| // In the case of an unused alloca (or similar), edges may be empty. Note |
| // that it exists so we can potentially answer NoAlias. |
| if (Edges.empty()) { |
| auto MaybeVal = getTargetValue(&Inst); |
| assert(MaybeVal.hasValue()); |
| auto *Target = *MaybeVal; |
| findOrInsertNode(Target); |
| return; |
| } |
| |
| const auto addEdgeToGraph = [&Graph, &findOrInsertNode](const Edge &E) { |
| auto To = findOrInsertNode(E.To); |
| auto From = findOrInsertNode(E.From); |
| auto FlippedWeight = flipWeight(E.Weight); |
| auto Attrs = E.AdditionalAttrs; |
| Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs), |
| std::make_pair(FlippedWeight, Attrs)); |
| }; |
| |
| SmallVector<ConstantExpr *, 4> ConstantExprs; |
| for (const Edge &E : Edges) { |
| addEdgeToGraph(E); |
| if (auto *Constexpr = dyn_cast<ConstantExpr>(E.To)) |
| ConstantExprs.push_back(Constexpr); |
| if (auto *Constexpr = dyn_cast<ConstantExpr>(E.From)) |
| ConstantExprs.push_back(Constexpr); |
| } |
| |
| for (ConstantExpr *CE : ConstantExprs) { |
| Edges.clear(); |
| constexprToEdges(Analysis, *CE, Edges); |
| std::for_each(Edges.begin(), Edges.end(), addEdgeToGraph); |
| } |
| } |
| |
| // Aside: We may remove graph construction entirely, because it doesn't really |
| // buy us much that we don't already have. I'd like to add interprocedural |
| // analysis prior to this however, in case that somehow requires the graph |
| // produced by this for efficient execution |
| static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn, |
| SmallVectorImpl<Value *> &ReturnedValues, |
| NodeMapT &Map, GraphT &Graph) { |
| for (auto &Bb : Fn->getBasicBlockList()) |
| for (auto &Inst : Bb.getInstList()) |
| addInstructionToGraph(Analysis, Inst, ReturnedValues, Map, Graph); |
| } |
| |
| static bool canSkipAddingToSets(Value *Val) { |
| // Constants can share instances, which may falsely unify multiple |
| // sets, e.g. in |
| // store i32* null, i32** %ptr1 |
| // store i32* null, i32** %ptr2 |
| // clearly ptr1 and ptr2 should not be unified into the same set, so |
| // we should filter out the (potentially shared) instance to |
| // i32* null. |
| if (isa<Constant>(Val)) { |
| bool Container = isa<ConstantVector>(Val) || isa<ConstantArray>(Val) || |
| isa<ConstantStruct>(Val); |
| // TODO: Because all of these things are constant, we can determine whether |
| // the data is *actually* mutable at graph building time. This will probably |
| // come for free/cheap with offset awareness. |
| bool CanStoreMutableData = |
| isa<GlobalValue>(Val) || isa<ConstantExpr>(Val) || Container; |
| return !CanStoreMutableData; |
| } |
| |
| return false; |
| } |
| |
| // Builds the graph + StratifiedSets for a function. |
| CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) { |
| NodeMapT Map; |
| GraphT Graph; |
| SmallVector<Value *, 4> ReturnedValues; |
| |
| buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph); |
| |
| DenseMap<GraphT::Node, Value *> NodeValueMap; |
| NodeValueMap.resize(Map.size()); |
| for (const auto &Pair : Map) |
| NodeValueMap.insert(std::make_pair(Pair.second, Pair.first)); |
| |
| const auto findValueOrDie = [&NodeValueMap](GraphT::Node Node) { |
| auto ValIter = NodeValueMap.find(Node); |
| assert(ValIter != NodeValueMap.end()); |
| return ValIter->second; |
| }; |
| |
| StratifiedSetsBuilder<Value *> Builder; |
| |
| SmallVector<GraphT::Node, 16> Worklist; |
| for (auto &Pair : Map) { |
| Worklist.clear(); |
| |
| auto *Value = Pair.first; |
| Builder.add(Value); |
| auto InitialNode = Pair.second; |
| Worklist.push_back(InitialNode); |
| while (!Worklist.empty()) { |
| auto Node = Worklist.pop_back_val(); |
| auto *CurValue = findValueOrDie(Node); |
| if (canSkipAddingToSets(CurValue)) |
| continue; |
| |
| for (const auto &EdgeTuple : Graph.edgesFor(Node)) { |
| auto Weight = std::get<0>(EdgeTuple); |
| auto Label = Weight.first; |
| auto &OtherNode = std::get<1>(EdgeTuple); |
| auto *OtherValue = findValueOrDie(OtherNode); |
| |
| if (canSkipAddingToSets(OtherValue)) |
| continue; |
| |
| bool Added; |
| switch (directionOfEdgeType(Label)) { |
| case Level::Above: |
| Added = Builder.addAbove(CurValue, OtherValue); |
| break; |
| case Level::Below: |
| Added = Builder.addBelow(CurValue, OtherValue); |
| break; |
| case Level::Same: |
| Added = Builder.addWith(CurValue, OtherValue); |
| break; |
| } |
| |
| auto Aliasing = Weight.second; |
| if (auto MaybeCurIndex = valueToAttrIndex(CurValue)) |
| Aliasing.set(*MaybeCurIndex); |
| if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue)) |
| Aliasing.set(*MaybeOtherIndex); |
| Builder.noteAttributes(CurValue, Aliasing); |
| Builder.noteAttributes(OtherValue, Aliasing); |
| |
| if (Added) |
| Worklist.push_back(OtherNode); |
| } |
| } |
| } |
| |
| // There are times when we end up with parameters not in our graph (i.e. if |
| // it's only used as the condition of a branch). Other bits of code depend on |
| // things that were present during construction being present in the graph. |
| // So, we add all present arguments here. |
| for (auto &Arg : Fn->args()) { |
| if (!Builder.add(&Arg)) |
| continue; |
| |
| auto Attrs = valueToAttrIndex(&Arg); |
| if (Attrs.hasValue()) |
| Builder.noteAttributes(&Arg, *Attrs); |
| } |
| |
| return FunctionInfo(Builder.build(), std::move(ReturnedValues)); |
| } |
| |
| void CFLAAResult::scan(Function *Fn) { |
| auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>())); |
| (void)InsertPair; |
| assert(InsertPair.second && |
| "Trying to scan a function that has already been cached"); |
| |
| FunctionInfo Info(buildSetsFrom(Fn)); |
| Cache[Fn] = std::move(Info); |
| Handles.push_front(FunctionHandle(Fn, this)); |
| } |
| |
| void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); } |
| |
| /// \brief Ensures that the given function is available in the cache. |
| /// Returns the appropriate entry from the cache. |
| const Optional<CFLAAResult::FunctionInfo> & |
| CFLAAResult::ensureCached(Function *Fn) { |
| auto Iter = Cache.find(Fn); |
| if (Iter == Cache.end()) { |
| scan(Fn); |
| Iter = Cache.find(Fn); |
| assert(Iter != Cache.end()); |
| assert(Iter->second.hasValue()); |
| } |
| return Iter->second; |
| } |
| |
| AliasResult CFLAAResult::query(const MemoryLocation &LocA, |
| const MemoryLocation &LocB) { |
| auto *ValA = const_cast<Value *>(LocA.Ptr); |
| auto *ValB = const_cast<Value *>(LocB.Ptr); |
| |
| Function *Fn = nullptr; |
| auto MaybeFnA = parentFunctionOfValue(ValA); |
| auto MaybeFnB = parentFunctionOfValue(ValB); |
| if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { |
| // The only times this is known to happen are when globals + InlineAsm |
| // are involved |
| DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n"); |
| return MayAlias; |
| } |
| |
| if (MaybeFnA.hasValue()) { |
| Fn = *MaybeFnA; |
| assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) && |
| "Interprocedural queries not supported"); |
| } else { |
| Fn = *MaybeFnB; |
| } |
| |
| assert(Fn != nullptr); |
| auto &MaybeInfo = ensureCached(Fn); |
| assert(MaybeInfo.hasValue()); |
| |
| auto &Sets = MaybeInfo->Sets; |
| auto MaybeA = Sets.find(ValA); |
| if (!MaybeA.hasValue()) |
| return MayAlias; |
| |
| auto MaybeB = Sets.find(ValB); |
| if (!MaybeB.hasValue()) |
| return MayAlias; |
| |
| auto SetA = *MaybeA; |
| auto SetB = *MaybeB; |
| auto AttrsA = Sets.getLink(SetA.Index).Attrs; |
| auto AttrsB = Sets.getLink(SetB.Index).Attrs; |
| |
| // Stratified set attributes are used as markets to signify whether a member |
| // of a StratifiedSet (or a member of a set above the current set) has |
| // interacted with either arguments or globals. "Interacted with" meaning |
| // its value may be different depending on the value of an argument or |
| // global. The thought behind this is that, because arguments and globals |
| // may alias each other, if AttrsA and AttrsB have touched args/globals, |
| // we must conservatively say that they alias. However, if at least one of |
| // the sets has no values that could legally be altered by changing the value |
| // of an argument or global, then we don't have to be as conservative. |
| if (AttrsA.any() && AttrsB.any()) |
| return MayAlias; |
| |
| // We currently unify things even if the accesses to them may not be in |
| // bounds, so we can't return partial alias here because we don't |
| // know whether the pointer is really within the object or not. |
| // IE Given an out of bounds GEP and an alloca'd pointer, we may |
| // unify the two. We can't return partial alias for this case. |
| // Since we do not currently track enough information to |
| // differentiate |
| |
| if (SetA.Index == SetB.Index) |
| return MayAlias; |
| |
| return NoAlias; |
| } |
| |
| CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> *AM) { |
| return CFLAAResult(AM->getResult<TargetLibraryAnalysis>(F)); |
| } |
| |
| char CFLAA::PassID; |
| |
| char CFLAAWrapperPass::ID = 0; |
| INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", |
| false, true) |
| INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", |
| false, true) |
| |
| ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); } |
| |
| CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) { |
| initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool CFLAAWrapperPass::doInitialization(Module &M) { |
| Result.reset( |
| new CFLAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); |
| return false; |
| } |
| |
| bool CFLAAWrapperPass::doFinalization(Module &M) { |
| Result.reset(); |
| return false; |
| } |
| |
| void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
| AU.setPreservesAll(); |
| AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| } |