Index: include/llvm/Analysis/Passes.h =================================================================== --- include/llvm/Analysis/Passes.h +++ include/llvm/Analysis/Passes.h @@ -66,6 +66,13 @@ //===--------------------------------------------------------------------===// // + // createCFLAliasAnalysisPass - This pass implements a set-based approach to + // alias analysis. + // + ImmutablePass *createCFLAliasAnalysisPass(); + + //===--------------------------------------------------------------------===// + // /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows /// about the semantics of a set of libcalls specified by LCI. The newly /// constructed pass takes ownership of the pointer that is provided. Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -89,6 +89,7 @@ void initializeCFGOnlyViewerPass(PassRegistry&); void initializeCFGPrinterPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); +void initializeCFLAliasAnalysisPass(PassRegistry&); void initializeFlattenCFGPassPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); void initializeCFGViewerPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -62,6 +62,7 @@ (void) llvm::createCallGraphPrinterPass(); (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); + (void) llvm::createCFLAliasAnalysisPass(); (void) llvm::createStructurizeCFGPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); Index: lib/Analysis/CFLAliasAnalysis.cpp =================================================================== --- /dev/null +++ lib/Analysis/CFLAliasAnalysis.cpp @@ -0,0 +1,981 @@ +//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a CFL-based context-insensitive alias analysis +// algorithm. It does not depend on types. The algorithm is a mixture of the one +// described in "Demand-driven alias analysis for C" by Xin Zheng and Radu +// Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to +// Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the +// papers, we build a graph of the uses of a variable, where each node is a +// memory location, and each edge is an action that happened on that memory +// location. The "actions" can be one of Dereference, Reference, Assign, or +// Assign. +// +// Two variables are considered as aliasing iff you can reach one value's node +// from the other value's node and the language formed by concatenating all of +// the edge labels (actions) conforms to a context-free grammar. +// +// Because this algorithm requires a graph search on each query, we execute the +// algorithm outlined in "Fast algorithms..." (mentioned above) +// in order to transform the graph into sets of variables that may alias in +// ~nlogn time (n = number of variables.), which makes queries take constant +// time. +//===----------------------------------------------------------------------===// + +#include "StratifiedSets.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/None.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include + +using namespace llvm; + +// Try to go from a Value* to a Function*. Never returns nullptr. +static Optional parentFunctionOfValue(Value *); + +// Returns possible functions called by the Inst* into the given +// SmallVectorImpl. Returns true if targets found, false otherwise. +// This is templated because InvokeInst/CallInst give us the same +// set of functions that we care about, and I don't like repeating +// myself. +template +static bool getPossibleTargets(Inst *, SmallVectorImpl &); + +// Some instructions need to have their users tracked. Instructions like +// `add` require you to get the users of the Instruction* itself, other +// instructions like `store` require you to get the users of the first +// operand. This function gets the "proper" value to track for each +// type of instruction we support. +static Optional getTargetValue(Instruction *); + +// There are certain instructions (i.e. FenceInst, etc.) that we ignore. +// This notes that we should ignore those. +static bool hasUsefulEdges(Instruction *); + +namespace { +// StratifiedInfo Attribute things. +typedef unsigned StratifiedAttr; +constexpr unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs; +constexpr unsigned AttrAllIndex = 0; +constexpr unsigned AttrGlobalIndex = 1; +constexpr unsigned AttrFirstArgIndex = 2; +constexpr unsigned AttrLastArgIndex = MaxStratifiedAttrIndex; +constexpr unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; + +constexpr StratifiedAttr AttrNone = 0; +constexpr StratifiedAttr AttrAll = ~AttrNone; + +// \brief StratifiedSets call for knowledge of "direction", so this is how we +// represent that locally. +enum class Level { Same, Above, Below }; + +// \brief Edges can be one of four "weights" -- each weight must have an inverse +// weight (Assign has Assign; Reference has Dereference). +enum class EdgeType { + // The weight assigned when assigning from or to a value. For example, in: + // %b = getelementptr %a, 0 + // ...The relationships are %b assign %a, and %a assign %b. This used to be + // two edges, but having a distinction bought us nothing. + Assign, + + // The edge used when we have an edge going from some handle to a Value. + // Examples of this include: + // %b = load %a (%b Dereference %a) + // %b = extractelement %a, 0 (%a Dereference %b) + Dereference, + + // The edge used when our edge goes from a value to a handle that may have + // contained it at some point. Examples: + // %b = load %a (%a Reference %b) + // %b = extractelement %a, 0 (%b Reference %a) + Reference +}; + +// \brief Encodes the notion of a "use" +struct Edge { + // \brief Which value the edge is coming from + Value *From; + + // \brief Which value the edge is pointing to + Value *To; + + // \brief Edge weight + EdgeType Weight; + + // \brief Whether we aliased any external values along the way that may be + // invisible to the analysis (i.e. landingpad for exceptions, calls for + // interprocedural analysis, etc.) + StratifiedAttrs AdditionalAttrs; + + Edge(Value *From, Value *To, EdgeType W, StratifiedAttrs A) + : From(From), To(To), Weight(W), AdditionalAttrs(A) {} +}; + +// \brief Information we have about a function and would like to keep around +struct FunctionInfo { + StratifiedSets Sets; + // Lots of functions have < 4 returns. Adjust as necessary. + SmallVector ReturnedValues; +}; + +struct CFLAliasAnalysis; + +struct FunctionHandle : public CallbackVH { + FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA) + : CallbackVH(Fn), CFLAA(CFLAA) { + assert(Fn != nullptr); + assert(CFLAA != nullptr); + } + + virtual ~FunctionHandle() {} + + virtual void deleted() override { removeSelfFromCache(); } + virtual void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } + +private: + CFLAliasAnalysis *CFLAA; + + void removeSelfFromCache(); +}; + +struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis { +private: + /// \brief Cached mapping of Functions to their StratifiedSets. + /// If a function's sets are currently being built, it is marked + /// in the cache as an Optional without a value. This way, if we + /// have any kind of recursion, it is discernable from a function + /// that simply has empty sets. + DenseMap> Cache; + std::forward_list Handles; + +public: + static char ID; + + CFLAliasAnalysis() : ImmutablePass(ID) { + initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual ~CFLAliasAnalysis() {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + } + + void *getAdjustedAnalysisPointer(const void *ID) override { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis *)this; + return this; + } + + /// \brief Inserts the given Function into the cache. + void scan(Function *Fn); + + void evict(Function *Fn) { Cache.erase(Fn); } + + /// \brief Ensures that the given function is available in the cache. + /// Returns the appropriate entry from the cache. + const Optional &ensureCached(Function *Fn) { + auto Iter = Cache.find(Fn); + if (Iter == Cache.end()) { + scan(Fn); + Iter = Cache.find(Fn); + assert(Iter != Cache.end()); + assert(Iter->second.hasValue()); + } + return Iter->second; + } + + AliasResult query(const Location &LocA, const Location &LocB); + + AliasResult alias(const Location &LocA, const Location &LocB) override { + if (LocA.Ptr == LocB.Ptr) { + if (LocA.Size == LocB.Size) { + return MustAlias; + } else { + return PartialAlias; + } + } + + // Comparisons between global variables and other constants should be + // handled by BasicAA. + if (isa(LocA.Ptr) && isa(LocB.Ptr)) { + return MayAlias; + } + + return query(LocA, LocB); + } + + void initializePass() override { InitializeAliasAnalysis(this); } +}; + +void FunctionHandle::removeSelfFromCache() { + assert(CFLAA != nullptr); + auto *Val = getValPtr(); + CFLAA->evict(cast(Val)); + setValPtr(nullptr); +} + +// \brief Gets the edges our graph should have, based on an Instruction* +class GetEdgesVisitor : public InstVisitor { + CFLAliasAnalysis &AA; + SmallVectorImpl &Output; + +public: + GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl &Output) + : AA(AA), Output(Output) {} + + void visitInstruction(Instruction &) { + llvm_unreachable("Unsupported instruction encountered"); + } + + void visitCastInst(CastInst &Inst) { + Output.push_back({&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone}); + } + + void visitBinaryOperator(BinaryOperator &Inst) { + auto *Op1 = Inst.getOperand(0); + auto *Op2 = Inst.getOperand(1); + Output.push_back({&Inst, Op1, EdgeType::Assign, AttrNone}); + Output.push_back({&Inst, Op2, EdgeType::Assign, AttrNone}); + } + + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = Inst.getNewValOperand(); + Output.push_back({Ptr, Val, EdgeType::Dereference, AttrNone}); + } + + void visitAtomicRMWInst(AtomicRMWInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = Inst.getValOperand(); + Output.push_back({Ptr, Val, EdgeType::Dereference, AttrNone}); + } + + void visitPHINode(PHINode &Inst) { + for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) { + Value *Val = Inst.getIncomingValue(I); + Output.push_back({&Inst, Val, EdgeType::Assign, AttrNone}); + } + } + + void visitGetElementPtrInst(GetElementPtrInst &Inst) { + auto *Op = Inst.getPointerOperand(); + Output.push_back({&Inst, Op, EdgeType::Assign, AttrNone}); + for (auto I = Inst.idx_begin(), E = Inst.idx_end(); I != E; ++I) + Output.push_back({&Inst, *I, EdgeType::Assign, AttrNone}); + } + + void visitSelectInst(SelectInst &Inst) { + auto *Condition = Inst.getCondition(); + Output.push_back({&Inst, Condition, EdgeType::Assign, AttrNone}); + auto *TrueVal = Inst.getTrueValue(); + Output.push_back({&Inst, TrueVal, EdgeType::Assign, AttrNone}); + auto *FalseVal = Inst.getFalseValue(); + Output.push_back({&Inst, FalseVal, EdgeType::Assign, AttrNone}); + } + + void visitAllocaInst(AllocaInst &) {} + + void visitLoadInst(LoadInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = &Inst; + Output.push_back({Val, Ptr, EdgeType::Reference, AttrNone}); + } + + void visitStoreInst(StoreInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = Inst.getValueOperand(); + Output.push_back({Ptr, Val, EdgeType::Dereference, AttrNone}); + } + + static bool isFunctionExternal(Function *Fn) { + return Fn->isDeclaration() || !Fn->hasLocalLinkage(); + } + + // Gets whether the sets at Index1 above, below, or equal to the sets at + // Index2. Returns None if they are not in the same set chain. + static Optional getIndexRelation(const StratifiedSets &Sets, + StratifiedIndex Index1, + StratifiedIndex Index2) { + if (Index1 == Index2) + return Level::Same; + + const auto *Current = &Sets.getLink(Index1); + while (Current->hasBelow()) { + if (Current->Below == Index2) + return Level::Below; + Current = &Sets.getLink(Current->Below); + } + + Current = &Sets.getLink(Index1); + while (Current->hasAbove()) { + if (Current->Above == Index2) + return Level::Above; + Current = &Sets.getLink(Current->Above); + } + + return NoneType(); + } + + bool + tryInterproceduralAnalysis(const SmallVectorImpl &Fns, + Value *FuncValue, + const iterator_range &Args) { + constexpr unsigned ExpectedMaxArgs = 8; + constexpr unsigned MaxSupportedArgs = 50; + assert(Fns.size() > 0); + + // I put this here to give us an upper bound on time taken by IPA. Is it + // really (realistically) needed? Keep in mind that we do have an n^2 algo. + if (std::distance(Args.begin(), Args.end()) > MaxSupportedArgs) + return false; + + // Exit early if we'll fail anyway + for (auto *Fn : Fns) { + if (isFunctionExternal(Fn) || Fn->isVarArg()) + return false; + auto &MaybeInfo = AA.ensureCached(Fn); + if (!MaybeInfo.hasValue()) + return false; + } + + SmallVector Arguments(Args.begin(), Args.end()); + SmallVector Parameters; + for (auto *Fn : Fns) { + auto &Info = *AA.ensureCached(Fn); + auto &Sets = Info.Sets; + auto &RetVals = Info.ReturnedValues; + + Parameters.clear(); + for (auto &Param : Fn->args()) { + auto MaybeInfo = Sets.find(&Param); + // Did a new parameter somehow get added to the function/slip by? + if (!MaybeInfo.hasValue()) + return false; + Parameters.push_back(*MaybeInfo); + } + + // Adding an edge from argument -> return value for each parameter that + // may alias the return value + for (unsigned I = 0, E = Parameters.size(); I != E; ++I) { + auto &ParamInfo = Parameters[I]; + auto &ArgVal = Arguments[I]; + bool AddEdge = false; + StratifiedAttrs Externals; + for (unsigned X = 0, XE = RetVals.size(); X != XE; ++X) { + auto MaybeInfo = Sets.find(RetVals[X]); + if (!MaybeInfo.hasValue()) + return false; + + auto &RetInfo = *MaybeInfo; + auto RetAttrs = Sets.getLink(RetInfo.Index).Attrs; + auto ParamAttrs = Sets.getLink(ParamInfo.Index).Attrs; + auto MaybeRelation = + getIndexRelation(Sets, ParamInfo.Index, RetInfo.Index); + if (MaybeRelation.hasValue()) { + AddEdge = true; + Externals |= RetAttrs | ParamAttrs; + } + } + if (AddEdge) + Output.push_back({FuncValue, ArgVal, EdgeType::Assign, + StratifiedAttrs().flip()}); + } + + if (Parameters.size() != Arguments.size()) + return false; + + // Adding edges between arguments for arguments that may end up aliasing + // each other. This is necessary for functions such as + // void foo(int** a, int** b) { *a = *b; } + // (Technically, the proper sets for this would be those below + // Arguments[I] and Arguments[X], but our algorithm will produce + // extremely similar, and equally correct, results either way) + for (unsigned I = 0, E = Arguments.size(); I != E; ++I) { + auto &MainVal = Arguments[I]; + auto &MainInfo = Parameters[I]; + auto &MainAttrs = Sets.getLink(MainInfo.Index).Attrs; + for (unsigned X = I + 1; X != E; ++X) { + auto &SubInfo = Parameters[X]; + auto &SubVal = Arguments[X]; + auto &SubAttrs = Sets.getLink(SubInfo.Index).Attrs; + auto MaybeRelation = + getIndexRelation(Sets, MainInfo.Index, SubInfo.Index); + + if (!MaybeRelation.hasValue()) + continue; + + auto NewAttrs = SubAttrs | MainAttrs; + Output.push_back({MainVal, SubVal, EdgeType::Assign, NewAttrs}); + } + } + } + return true; + } + + template void visitCallLikeInst(InstT &Inst) { + SmallVector Targets; + if (getPossibleTargets(&Inst, Targets)) { + if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands())) + return; + // Cleanup from interprocedural analysis + Output.clear(); + } + + for (Value *V : Inst.arg_operands()) + Output.push_back({&Inst, V, EdgeType::Assign, AttrAll}); + } + + void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); } + + void visitInvokeInst(InvokeInst &Inst) { visitCallLikeInst(Inst); } + + // Because vectors/aggregates are immutable and unaddressable, + // there's nothing we can do to coax a value out of them, other + // than calling Extract{Element,Value}. We can effectively treat + // them as pointers to arbitrary memory locations we can store in + // and load from. + void visitExtractElementInst(ExtractElementInst &Inst) { + auto *Ptr = Inst.getVectorOperand(); + auto *Val = &Inst; + Output.push_back({Val, Ptr, EdgeType::Reference, AttrNone}); + } + + void visitInsertElementInst(InsertElementInst &Inst) { + auto *Vec = Inst.getOperand(0); + auto *Val = Inst.getOperand(1); + Output.push_back({&Inst, Vec, EdgeType::Assign, AttrNone}); + Output.push_back({&Inst, Val, EdgeType::Dereference, AttrNone}); + } + + void visitLandingPadInst(LandingPadInst &Inst) { + // Exceptions come from "nowhere", from our analysis' perspective. + // So we place the instruction its own group, noting that said group may + // alias externals + Output.push_back({&Inst, &Inst, EdgeType::Assign, AttrAll}); + } + + void visitInsertValueInst(InsertValueInst &Inst) { + auto *Agg = Inst.getOperand(0); + auto *Val = Inst.getOperand(1); + Output.push_back({&Inst, Agg, EdgeType::Assign, AttrNone}); + Output.push_back({&Inst, Val, EdgeType::Dereference, AttrNone}); + } + + void visitExtractValueInst(ExtractValueInst &Inst) { + auto *Ptr = Inst.getAggregateOperand(); + Output.push_back({&Inst, Ptr, EdgeType::Reference, AttrNone}); + } + + void visitShuffleVectorInst(ShuffleVectorInst &Inst) { + auto *From1 = Inst.getOperand(0); + auto *From2 = Inst.getOperand(1); + Output.push_back({&Inst, From1, EdgeType::Assign, AttrNone}); + Output.push_back({&Inst, From2, EdgeType::Assign, AttrNone}); + } +}; + +// For a given instruction, we need to know which Value* to get the +// users of in order to build our graph. In some cases (i.e. add), +// we simply need the Instruction*. In other cases (i.e. store), +// finding the users of the Instruction* is useless; we need to find +// the users of the first operand. This handles determining which +// value to follow for us. +// +// Note: we *need* to keep this in sync with GetEdgesVisitor. Add +// something to GetEdgesVisitor, add it here -- remove something from +// GetEdgesVisitor, remove it here. +class GetTargetValueVisitor + : public InstVisitor { +public: + Value *visitInstruction(Instruction &Inst) { return &Inst; } + + Value *visitStoreInst(StoreInst &Inst) { return Inst.getPointerOperand(); } + + Value *visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { + return Inst.getPointerOperand(); + } + + Value *visitAtomicRMWInst(AtomicRMWInst &Inst) { + return Inst.getPointerOperand(); + } + + Value *visitInsertElementInst(InsertElementInst &Inst) { + return Inst.getOperand(0); + } + + Value *visitInsertValueInst(InsertValueInst &Inst) { + return Inst.getAggregateOperand(); + } +}; + +// Set building requires a weighted bidirectional graph. +template class WeightedBidirectionalGraph { +public: + typedef std::size_t Node; + +private: + constexpr static Node StartNode = Node(0); + + struct Edge { + EdgeTypeT Weight; + Node Other; + + bool operator==(const Edge &E) const { + return Weight == E.Weight && Other == E.Other; + } + + bool operator!=(const Edge &E) const { return !operator==(E); } + }; + + struct NodeImpl { + std::vector Edges; + }; + + std::vector NodeImpls; + + bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); } + + const NodeImpl &getNode(Node N) const { return NodeImpls[N]; } + NodeImpl &getNode(Node N) { return NodeImpls[N]; } + +public: + // ----- Various Edge iterators for the graph ----- // + + // \brief Iterator for edges. Because this graph is bidirected, we don't + // allow modificaiton of the edges using this iterator. Additionally, the + // iterator becomes invalid if you add edges to or from the node you're + // getting the edges of. + struct EdgeIterator : public std::iterator> { + EdgeIterator(const typename std::vector::const_iterator &Iter) + : Current(Iter) {} + + EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {} + + EdgeIterator &operator++() { + ++Current; + return *this; + } + + EdgeIterator operator++(int) { + EdgeIterator Copy(Current); + operator++(); + return Copy; + } + + std::tuple &operator*() { + Store = std::make_tuple(Current->Weight, Current->Other); + return Store; + } + + bool operator==(const EdgeIterator &Other) const { + return Current == Other.Current; + } + + bool operator!=(const EdgeIterator &Other) const { + return !operator==(Other); + } + + private: + typename std::vector::const_iterator Current; + std::tuple Store; + }; + + // Wrapper for EdgeIterator with begin()/end() calls. + struct EdgeIterable { + EdgeIterable(const std::vector &Edges) + : BeginIter(Edges.begin()), EndIter(Edges.end()) {} + + EdgeIterator begin() { return EdgeIterator(BeginIter); } + + EdgeIterator end() { return EdgeIterator(EndIter); } + + private: + typename std::vector::const_iterator BeginIter; + typename std::vector::const_iterator EndIter; + }; + + // ----- Actual graph-related things ----- // + + WeightedBidirectionalGraph() = default; + + WeightedBidirectionalGraph(WeightedBidirectionalGraph &&Other) + : NodeImpls(std::move(Other.NodeImpls)) {} + + WeightedBidirectionalGraph & + operator=(WeightedBidirectionalGraph &&Other) { + NodeImpls = std::move(Other.NodeImpls); + return *this; + } + + Node addNode() { + auto Index = NodeImpls.size(); + auto NewNode = Node(Index); + NodeImpls.push_back(NodeImpl()); + return NewNode; + } + + void addEdge(Node From, Node To, const EdgeTypeT &Weight, + const EdgeTypeT &ReverseWeight) { + assert(inbounds(From)); + assert(inbounds(To)); + auto &FromNode = getNode(From); + auto &ToNode = getNode(To); + FromNode.Edges.push_back(Edge{Weight, To}); + ToNode.Edges.push_back(Edge{ReverseWeight, From}); + } + + EdgeIterable edgesFor(const Node &N) const { + const auto &Node = getNode(N); + return EdgeIterable(Node.Edges); + } + + bool empty() const { return NodeImpls.empty(); } + std::size_t size() const { return NodeImpls.size(); } + + // \brief Gets an arbitrary node in the graph as a starting point for + // traversal. + Node getEntryNode() { + assert(inbounds(StartNode)); + return StartNode; + } +}; + +typedef WeightedBidirectionalGraph> GraphT; +typedef DenseMap NodeMapT; +} + +// -- Setting up/registering CFLAA pass -- // +char CFLAliasAnalysis::ID = 0; + +INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa", + "CFL-Based AA implementation", false, true, false) + +ImmutablePass *llvm::createCFLAliasAnalysisPass() { + return new CFLAliasAnalysis(); +} + +//===----------------------------------------------------------------------===// +// Function declarations that require types defined in the namespace above +//===----------------------------------------------------------------------===// + +// Given an argument number, returns the appropriate Attr index to set. +static StratifiedAttr argNumberToAttrIndex(StratifiedAttr); + +// Given a Value, potentially return which AttrIndex it maps to. +static Optional valueToAttrIndex(Value *Val); + +// Gets the inverse of a given EdgeType. +static EdgeType flipWeight(EdgeType); + +// Gets edges of the given Instruction*, writing them to the SmallVector*. +static void argsToEdges(CFLAliasAnalysis &, Instruction *, + SmallVectorImpl &); + +// Gets the "Level" that one should travel in StratifiedSets +// given an EdgeType. +static Level directionOfEdgeType(EdgeType); + +// Builds the graph needed for constructing the StratifiedSets for the +// given function +static void buildGraphFrom(CFLAliasAnalysis &, Function *, + SmallVectorImpl &, NodeMapT &, GraphT &); + +// Builds the graph + StratifiedSets for a function. +static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *); + +static Optional parentFunctionOfValue(Value *Val) { + if (auto *Inst = dyn_cast(Val)) { + auto *Bb = Inst->getParent(); + return Bb->getParent(); + } + + if (auto *Arg = dyn_cast(Val)) + return Arg->getParent(); + return NoneType(); +} + +template +static bool getPossibleTargets(Inst *Call, + SmallVectorImpl &Output) { + if (auto *Fn = Call->getCalledFunction()) { + Output.push_back(Fn); + return true; + } + + // TODO: If the call is indirect, we might be able to enumerate all potential + // targets of the call and return them, rather than just failing. + return false; +} + +static Optional getTargetValue(Instruction *Inst) { + GetTargetValueVisitor V; + return V.visit(Inst); +} + +static bool hasUsefulEdges(Instruction *Inst) { + bool IsNonInvokeTerminator = + isa(Inst) && !isa(Inst); + return !isa(Inst) && !isa(Inst) && !IsNonInvokeTerminator; +} + +static Optional valueToAttrIndex(Value *Val) { + if (isa(Val)) + return AttrGlobalIndex; + + if (auto *Arg = dyn_cast(Val)) + if (!Arg->hasNoAliasAttr()) + return argNumberToAttrIndex(Arg->getArgNo()); + return NoneType(); +} + +static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) { + if (ArgNum > AttrMaxNumArgs) + return AttrAllIndex; + return ArgNum + AttrFirstArgIndex; +} + +static EdgeType flipWeight(EdgeType Initial) { + switch (Initial) { + case EdgeType::Assign: + return EdgeType::Assign; + case EdgeType::Dereference: + return EdgeType::Reference; + case EdgeType::Reference: + return EdgeType::Dereference; + } + llvm_unreachable("Incomplete coverage of EdgeType enum"); +} + +static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, + SmallVectorImpl &Output) { + GetEdgesVisitor v(Analysis, Output); + v.visit(Inst); +} + +static Level directionOfEdgeType(EdgeType Weight) { + switch (Weight) { + case EdgeType::Reference: + return Level::Above; + case EdgeType::Dereference: + return Level::Below; + case EdgeType::Assign: + return Level::Same; + } + llvm_unreachable("Incomplete switch coverage"); +} + +// Aside: We may remove graph construction entirely, because it doesn't really +// buy us much that we don't already have. I'd like to add interprocedural +// analysis prior to this however, in case that somehow requires the graph +// produced by this for efficient execution +static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, + SmallVectorImpl &ReturnedValues, + NodeMapT &Map, GraphT &Graph) { + const auto findOrInsertNode = [&Map, &Graph](Value *Val) { + auto Pair = Map.insert(std::make_pair(Val, GraphT::Node())); + auto &Iter = Pair.first; + if (Pair.second) { + auto NewNode = Graph.addNode(); + Iter->second = NewNode; + } + return Iter->second; + }; + + SmallVector Edges; + for (auto &Bb : Fn->getBasicBlockList()) { + for (auto &Inst : Bb.getInstList()) { + // We don't want the edges of most "return" instructions, but we *do* want + // to know what can be returned. + if (auto *Ret = dyn_cast(&Inst)) + ReturnedValues.push_back(Ret); + + if (!hasUsefulEdges(&Inst)) + continue; + + Edges.clear(); + argsToEdges(Analysis, &Inst, Edges); + + // In the case of an unused alloca (or similar), edges may be empty. Note + // that it exists so we can potentially answer NoAlias. + if (Edges.empty()) { + auto MaybeVal = getTargetValue(&Inst); + assert(MaybeVal.hasValue()); + auto *Target = *MaybeVal; + findOrInsertNode(Target); + continue; + } + + for (const Edge &E : Edges) { + auto To = findOrInsertNode(E.To); + auto From = findOrInsertNode(E.From); + auto FlippedWeight = flipWeight(E.Weight); + auto Attrs = E.AdditionalAttrs; + Graph.addEdge(From, To, {E.Weight, Attrs}, {FlippedWeight, Attrs}); + } + } + } +} + +static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { + NodeMapT Map; + GraphT Graph; + SmallVector ReturnedValues; + + buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph); + + DenseMap NodeValueMap; + NodeValueMap.resize(Map.size()); + for (const auto &Pair : Map) + NodeValueMap.insert({Pair.second, Pair.first}); + + const auto findValueOrDie = [&NodeValueMap](GraphT::Node Node) { + auto ValIter = NodeValueMap.find(Node); + assert(ValIter != NodeValueMap.end()); + return ValIter->second; + }; + + StratifiedSetsBuilder Builder; + + SmallVector Worklist; + for (auto &Pair : Map) { + Worklist.clear(); + + auto *Value = Pair.first; + Builder.add(Value); + auto InitialNode = Pair.second; + Worklist.push_back(InitialNode); + while (!Worklist.empty()) { + auto Node = Worklist.pop_back_val(); + auto *CurValue = findValueOrDie(Node); + if (isa(CurValue) && !isa(CurValue)) + continue; + + for (const auto &EdgeTuple : Graph.edgesFor(Node)) { + auto Weight = std::get<0>(EdgeTuple); + auto Label = Weight.first; + auto &OtherNode = std::get<1>(EdgeTuple); + auto *OtherValue = findValueOrDie(OtherNode); + + if (isa(OtherValue) && !isa(OtherValue)) + continue; + + bool Added; + switch (directionOfEdgeType(Label)) { + case Level::Above: + Added = Builder.addAbove(CurValue, OtherValue); + break; + case Level::Below: + Added = Builder.addBelow(CurValue, OtherValue); + break; + case Level::Same: + Added = Builder.addWith(CurValue, OtherValue); + break; + } + + if (Added) { + auto Aliasing = Weight.second; + if (auto MaybeCurIndex = valueToAttrIndex(CurValue)) + Aliasing.set(*MaybeCurIndex); + if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue)) + Aliasing.set(*MaybeOtherIndex); + Builder.noteAttributes(CurValue, Aliasing); + Builder.noteAttributes(OtherValue, Aliasing); + Worklist.push_back(OtherNode); + } + } + } + } + + // There are times when we end up with parameters not in our graph (i.e. if + // it's only used as the condition of a branch). Other bits of code depend on + // things that were present during construction being present in the graph. + // So, we add all present arguments here. + for (auto &Arg : Fn->args()) { + Builder.add(&Arg); + } + + return {Builder.build(), std::move(ReturnedValues)}; +} + +void CFLAliasAnalysis::scan(Function *Fn) { + auto InsertPair = Cache.insert({Fn, Optional()}); + (void)InsertPair; + assert(InsertPair.second && + "Trying to scan a function that has already been cached"); + + FunctionInfo Info(buildSetsFrom(*this, Fn)); + Cache[Fn] = std::move(Info); + Handles.push_front(FunctionHandle(Fn, this)); +} + +AliasAnalysis::AliasResult +CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA, + const AliasAnalysis::Location &LocB) { + auto *ValA = const_cast(LocA.Ptr); + auto *ValB = const_cast(LocB.Ptr); + + Function *Fn = nullptr; + auto MaybeFnA = parentFunctionOfValue(ValA); + auto MaybeFnB = parentFunctionOfValue(ValB); + if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { + llvm_unreachable("Don't know how to extract the parent function " + "from values A or B"); + } + + if (MaybeFnA.hasValue()) { + Fn = *MaybeFnA; + assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) && + "Interprocedural queries not supported"); + } else { + Fn = *MaybeFnB; + } + + assert(Fn != nullptr); + auto &MaybeInfo = ensureCached(Fn); + assert(MaybeInfo.hasValue()); + + auto &Sets = MaybeInfo->Sets; + auto MaybeA = Sets.find(ValA); + if (!MaybeA.hasValue()) + return AliasAnalysis::MayAlias; + + auto MaybeB = Sets.find(ValB); + if (!MaybeB.hasValue()) + return AliasAnalysis::MayAlias; + + auto SetA = *MaybeA; + auto SetB = *MaybeB; + + if (SetA.Index == SetB.Index) + return AliasAnalysis::PartialAlias; + + auto AttrsA = Sets.getLink(SetA.Index).Attrs; + auto AttrsB = Sets.getLink(SetB.Index).Attrs; + auto CombinedAttrs = AttrsA | AttrsB; + if (CombinedAttrs.any()) + return AliasAnalysis::PartialAlias; + + return AliasAnalysis::NoAlias; +} Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -11,6 +11,7 @@ BranchProbabilityInfo.cpp CFG.cpp CFGPrinter.cpp + CFLAliasAnalysis.cpp CGSCCPassManager.cpp CaptureTracking.cpp CostModel.cpp Index: lib/Analysis/StratifiedSets.h =================================================================== --- /dev/null +++ lib/Analysis/StratifiedSets.h @@ -0,0 +1,691 @@ +//===- StratifiedSets.h - Abstract stratified sets implementation. --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STRATIFIEDSETS_H +#define LLVM_ADT_STRATIFIEDSETS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { +// \brief An index into Stratified Sets. +typedef unsigned StratifiedIndex; +// NOTE: ^ This can't be a short -- bootstrapping clang has a case where +// ~1M sets exist. + +// \brief Container of information related to a value in a StratifiedSet. +struct StratifiedInfo { + StratifiedIndex Index; + // For field sensitivity, etc. we can tack attributes on to this struct. +}; + +// The number of attributes that StratifiedAttrs should contain. Attributes are +// described below, and 32 was an arbitrary choice because it fits nicely in 32 +// bits (because we use a bitset for StratifiedAttrs). +static constexpr unsigned NumStratifiedAttrs = 32; + +// These are attributes that the users of StratifiedSets/StratifiedSetBuilders +// may use for various purposes. These also have the special property of that +// they are merged down. So, if set A is above set B, and one decides to set an +// attribute in set A, then the attribute will automatically be set in set B. +typedef std::bitset StratifiedAttrs; + +// \brief A "link" between two StratifiedSets. +struct StratifiedLink { + // \brief This is a value used to signify "does not exist" where + // the StratifiedIndex type is used. This is used instead of + // Optional because Optional would + // eat up a considerable amount of extra memory, after struct + // padding/alignment is taken into account. + static constexpr auto SetSentinel = + std::numeric_limits::max(); + + // \brief The index for the set "above" current + StratifiedIndex Above; + + // \brief The link for the set "below" current + StratifiedIndex Below; + + // \brief Attributes for these StratifiedSets. + StratifiedAttrs Attrs; + + StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {} + + bool hasBelow() const { return Below != SetSentinel; } + bool hasAbove() const { return Above != SetSentinel; } + + void clearBelow() { Below = SetSentinel; } + void clearAbove() { Above = SetSentinel; } +}; + +// \brief These are stratified sets, as described in "Fast algorithms for +// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M +// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets +// of Value*s. If two Value*s are in the same set, or if both sets have +// overlapping attributes, then the Value*s are said to alias. +// +// Sets may be related by position, meaning that one set may be considered as +// above or below another. In CFL Alias Analysis, this gives us an indication +// of how two variables are related; if the set of variable A is below a set +// containing variable B, then at some point, a variable that has interacted +// with B (or B itself) was either used in order to extract the variable A, or +// was used as storage of variable A. +// +// Sets may also have attributes (as noted above). These attributes are +// generally used for noting whether a variable in the set has interacted with +// a variable whose origins we don't quite know (i.e. globals/arguments), or if +// the variable may have had operations performed on it (modified in a function +// call). All attributes that exist in a set A must exist in all sets marked as +// below set A. +template class StratifiedSets { +public: + StratifiedSets() {} + + StratifiedSets(DenseMap Map, + std::vector Links) + : Values(std::move(Map)), Links(std::move(Links)) {} + + StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); } + + StratifiedSets &operator=(StratifiedSets &&Other) { + Values = std::move(Other.Values); + Links = std::move(Other.Links); + return *this; + } + + Optional find(const T &Elem) const { + auto Iter = Values.find(Elem); + if (Iter == Values.end()) { + return NoneType(); + } + return Iter->second; + } + + const StratifiedLink &getLink(StratifiedIndex Index) const { + assert(inbounds(Index)); + return Links[Index]; + } + +private: + DenseMap Values; + std::vector Links; + + bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); } +}; + +// \brief Generic Builder class that produces StratifiedSets instances. +// +// The goal of this builder is to efficiently produce correct StratifiedSets +// instances. To this end, we use a few tricks: +// > Set chains (A method for linking sets together) +// > Set remaps (A method for marking a set as an alias [irony?] of another) +// +// ==== Set chains ==== +// This builder has a notion of some value A being above, below, or with some +// other value B: +// > The `A above B` relationship implies that there is a reference edge going +// from A to B. Namely, it notes that A can store anything in B's set. +// > The `A below B` relationship is the opposite of `A above B`. It implies +// that there's a dereference edge going from A to B. +// > The `A with B` relationship states that there's an assignment edge going +// from A to B, and that A and B should be treated as equals. +// +// As an example, take the following code snippet: +// +// %a = alloca i32, align 4 +// %ap = alloca i32*, align 8 +// %app = alloca i32**, align 8 +// store %a, %ap +// store %ap, %app +// %aw = getelementptr %ap, 0 +// +// Given this, the follow relations exist: +// - %a below %ap & %ap above %a +// - %ap below %app & %app above %ap +// - %aw with %ap & %ap with %aw +// +// These relations produce the following sets: +// [{%a}, {%ap, %aw}, {%app}] +// +// ...Which states that the only MayAlias relationship in the above program is +// between %ap and %aw. +// +// Life gets more complicated when we actually have logic in our programs. So, +// we either must remove this logic from our programs, or make consessions for +// it in our AA algorithms. In this case, we have decided to select the latter +// option. +// +// First complication: Conditionals +// Motivation: +// %ad = alloca int, align 4 +// %a = alloca int*, align 8 +// %b = alloca int*, align 8 +// %bp = alloca int**, align 8 +// %c = call i1 @SomeFunc() +// %k = select %c, %ad, %bp +// store %ad, %a +// store %b, %bp +// +// %k has 'with' edges to both %a and %b, which ordinarily would not be linked +// together. So, we merge the set that contains %a with the set that contains +// %b. We then recursively merge the set above %a with the set above %b, and +// the set below %a with the set below %b, etc. Ultimately, the sets for this +// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below +// {%a, %b, %c} is below {%ad}. +// +// Second complication: Arbitrary casts +// Motivation: +// %ip = alloca int*, align 8 +// %ipp = alloca int**, align 8 +// %i = bitcast ipp to int +// store %ip, %ipp +// store %i, %ip +// +// This is impossible to construct with any of the rules above, because a set +// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed +// to be below the set with %ip, and the set with %ip is supposed to be below +// the set with %ipp. Because we don't allow circular relationships like this, +// we merge all concerned sets into one. So, the above code would generate a +// single StratifiedSet: {%ip, %ipp, %i}. +// +// ==== Set remaps ==== +// More of an implementation detail than anything -- when merging sets, we need +// to update the numbers of all of the elements mapped to those sets. Rather +// than doing this at each merge, we note in the BuilderLink structure that a +// remap has occurred, and use this information so we can defer renumbering set +// elements until build time. +template class StratifiedSetsBuilder { + // \brief Represents a Stratified Set, with information about the Stratified + // Set above it, the set below it, and whether the current set has been + // remapped to another. + struct BuilderLink { + const StratifiedIndex Number; + + BuilderLink(StratifiedIndex N) : Number(N) { + Remap = StratifiedLink::SetSentinel; + } + + bool hasAbove() const { + assert(!isRemapped()); + return Link.hasAbove(); + } + + bool hasBelow() const { + assert(!isRemapped()); + return Link.hasBelow(); + } + + void setBelow(StratifiedIndex I) { + assert(!isRemapped()); + Link.Below = I; + } + + void setAbove(StratifiedIndex I) { + assert(!isRemapped()); + Link.Above = I; + } + + void clearBelow() { + assert(!isRemapped()); + Link.clearBelow(); + } + + void clearAbove() { + assert(!isRemapped()); + Link.clearAbove(); + } + + StratifiedIndex getBelow() const { + assert(!isRemapped()); + assert(hasBelow()); + return Link.Below; + } + + StratifiedIndex getAbove() const { + assert(!isRemapped()); + assert(hasAbove()); + return Link.Above; + } + + StratifiedAttrs &getAttrs() { + assert(!isRemapped()); + return Link.Attrs; + } + + void setAttr(unsigned index) { + assert(!isRemapped()); + assert(index < NumStratifiedAttrs); + Link.Attrs.set(index); + } + + void setAttrs(const StratifiedAttrs &other) { + assert(!isRemapped()); + Link.Attrs |= other; + } + + bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; } + + // \brief For initial remapping to another set + void remapTo(StratifiedIndex Other) { + assert(!isRemapped()); + Remap = Other; + } + + StratifiedIndex getRemapIndex() const { + assert(isRemapped()); + return Remap; + } + + // \brief Should only be called when we're already remapped. + void updateRemap(StratifiedIndex Other) { + assert(isRemapped()); + Remap = Other; + } + + // \brief Prefer the above functions to calling things directly on what's + // returned from this -- they guard against unexpected calls when the + // current BuilderLink is remapped. + const StratifiedLink &getLink() const { return Link; } + + private: + StratifiedLink Link; + StratifiedIndex Remap; + }; + + // \brief This function performs all of the set unioning/value renumbering + // that we've been putting off, and generates a vector that + // may be placed in a StratifiedSets instance. + void finalizeSets(std::vector &StratLinks) { + DenseMap Remaps; + for (auto &Link : Links) { + if (Link.isRemapped()) { + continue; + } + + StratifiedIndex Number = StratLinks.size(); + Remaps.insert({Link.Number, Number}); + StratLinks.push_back(Link.getLink()); + } + + for (auto &Link : StratLinks) { + if (Link.hasAbove()) { + auto &Above = linksAt(Link.Above); + auto Iter = Remaps.find(Above.Number); + assert(Iter != Remaps.end()); + Link.Above = Iter->second; + } + + if (Link.hasBelow()) { + auto &Below = linksAt(Link.Below); + auto Iter = Remaps.find(Below.Number); + assert(Iter != Remaps.end()); + Link.Below = Iter->second; + } + } + + for (auto &Pair : Values) { + auto &Info = Pair.second; + auto &Link = linksAt(Info.Index); + auto Iter = Remaps.find(Link.Number); + assert(Iter != Remaps.end()); + Info.Index = Iter->second; + } + } + + // \brief There's a guarantee in StratifiedLink where all bits set in a + // Link.externals will be set in all Link.externals "below" it. + static void propagateAttrs(std::vector &Links) { + const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) { + const auto *Link = &Links[Idx]; + while (Link->hasAbove()) { + Idx = Link->Above; + Link = &Links[Idx]; + } + return Idx; + }; + + SmallSet Visited; + for (unsigned I = 0, E = Links.size(); I < E; ++I) { + auto CurrentIndex = getHighestParentAbove(I); + if (!Visited.insert(CurrentIndex)) { + continue; + } + + while (Links[CurrentIndex].hasBelow()) { + auto &CurrentBits = Links[CurrentIndex].Attrs; + auto NextIndex = Links[CurrentIndex].Below; + auto &NextBits = Links[NextIndex].Attrs; + NextBits |= CurrentBits; + CurrentIndex = NextIndex; + } + } + } + +public: + // \brief Builds a StratifiedSet from the information we've been given since + // either construction or the prior build() call. + StratifiedSets build() { + std::vector StratLinks; + finalizeSets(StratLinks); + propagateAttrs(StratLinks); + Links.clear(); + return StratifiedSets(std::move(Values), std::move(StratLinks)); + } + + std::size_t size() const { return Values.size(); } + std::size_t numSets() const { return Links.size(); } + + bool has(const T &Elem) const { return get(Elem).hasValue(); } + + bool add(const T &Main) { + if (get(Main).hasValue()) + return false; + + auto NewIndex = getNewUnlinkedIndex(); + return addAtMerging(Main, NewIndex); + } + + // \brief Restructures the stratified sets as necessary to make "ToAdd" in a + // set above "Main". There are some cases where this is not possible (see + // above), so we merge them such that ToAdd and Main are in the same set. + bool addAbove(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasAbove()) + addLinkAbove(Index); + + auto Above = linksAt(Index).getAbove(); + return addAtMerging(ToAdd, Above); + } + + // \brief Restructures the stratified sets as necessary to make "ToAdd" in a + // set below "Main". There are some cases where this is not possible (see + // above), so we merge them such that ToAdd and Main are in the same set. + bool addBelow(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasBelow()) + addLinkBelow(Index); + + auto Below = linksAt(Index).getBelow(); + return addAtMerging(ToAdd, Below); + } + + bool addWith(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto MainIndex = *indexOf(Main); + return addAtMerging(ToAdd, MainIndex); + } + + void noteAttribute(const T &Main, unsigned AttrNum) { + assert(has(Main)); + assert(AttrNum < StratifiedLink::SetSentinel); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + Link.setAttr(AttrNum); + } + + void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + Link.setAttrs(NewAttrs); + } + + StratifiedAttrs getAttributes(const T &Main) { + assert(has(Main)); + auto *Info = *get(Main); + auto *Link = &linksAt(Info->Index); + auto Attrs = Link->getAttrs(); + while (Link->hasAbove()) { + Link = &linksAt(Link->getAbove()); + Attrs |= Link->getAttrs(); + } + + return Attrs; + } + + bool getAttribute(const T &Main, unsigned AttrNum) { + assert(AttrNum < StratifiedLink::SetSentinel); + auto Attrs = getAttributes(Main); + return Attrs[AttrNum]; + } + + // \brief Gets the attributes that have been applied to the set that Main + // belongs to. It ignores attributes in any sets above the one that Main + // resides in. + StratifiedAttrs getRawAttributes(const T &Main) { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + return Link.getAttrs(); + } + + // \brief Gets an attribute from the attributes that have been applied to the + // set that Main belongs to. It ignores attributes in any sets above the one + // that Main resides in. + bool getRawAttribute(const T &Main, unsigned AttrNum) { + assert(AttrNum < StratifiedLink::SetSentinel); + auto Attrs = getRawAttributes(Main); + return Attrs[AttrNum]; + } + +private: + DenseMap Values; + std::vector Links; + + // \brief Adds the given element at the given index, merging sets if + // necessary. + bool addAtMerging(const T &ToAdd, StratifiedIndex Index) { + StratifiedInfo Info = {Index}; + auto Pair = Values.insert({ToAdd, Info}); + if (Pair.second) + return true; + + auto &Iter = Pair.first; + auto &IterSet = linksAt(Iter->second.Index); + auto &ReqSet = linksAt(Index); + + // Failed to add where we wanted to. Merge the sets. + if (&IterSet != &ReqSet) + merge(IterSet.Number, ReqSet.Number); + + return false; + } + + // \brief Gets the BuilderLink at the given index, taking set remapping into + // account. + BuilderLink &linksAt(StratifiedIndex Index) { + auto *Start = &Links[Index]; + if (!Start->isRemapped()) + return *Start; + + auto *Current = Start; + while (Current->isRemapped()) + Current = &Links[Current->getRemapIndex()]; + + auto NewRemap = Current->Number; + + // Run through everything that has yet to be updated, and update them to + // remap to NewRemap + Current = Start; + while (Current->isRemapped()) { + auto *Next = &Links[Current->getRemapIndex()]; + Current->updateRemap(NewRemap); + Current = Next; + } + + return *Current; + } + + // \brief Merges two sets into one another. Assumes that these sets are not + // already one in the same + void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + assert(&linksAt(Idx1) != &linksAt(Idx2) && + "Merging a set into itself is not allowed"); + + // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge + // both the + // given sets, and all sets between them, into one. + if (tryMergeUpwards(Idx1, Idx2)) + return; + + if (tryMergeUpwards(Idx2, Idx1)) + return; + + // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`. + // We therefore need to merge the two chains together. + mergeDirect(Idx1, Idx2); + } + + // \brief Merges two sets assuming that the set at `Idx1` is unreachable from + // traversing above or below the set at `Idx2`. + void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + + auto *LinksInto = &linksAt(Idx1); + auto *LinksFrom = &linksAt(Idx2); + // Merging everything above LinksInto then proceeding to merge everything + // below LinksInto becomes problematic, so we go as far "up" as possible! + while (LinksInto->hasAbove() && LinksFrom->hasAbove()) { + LinksInto = &linksAt(LinksInto->getAbove()); + LinksFrom = &linksAt(LinksFrom->getAbove()); + } + + if (LinksFrom->hasAbove()) { + LinksInto->setAbove(LinksFrom->getAbove()); + auto &NewAbove = linksAt(LinksInto->getAbove()); + NewAbove.setBelow(LinksInto->Number); + } + + // Merging strategy: + // > If neither has links below, stop. + // > If only `LinksInto` has links below, stop. + // > If only `LinksFrom` has links below, reset `LinksInto.Below` to + // match `LinksFrom.Below` + // > If both have links above, deal with those next. + while (LinksInto->hasBelow() && LinksFrom->hasBelow()) { + auto &FromAttrs = LinksFrom->getAttrs(); + LinksInto->setAttrs(FromAttrs); + + // Remap needs to happen after getBelow(), but before + // assignment of LinksFrom + auto *NewLinksFrom = &linksAt(LinksFrom->getBelow()); + LinksFrom->remapTo(LinksInto->Number); + LinksFrom = NewLinksFrom; + LinksInto = &linksAt(LinksInto->getBelow()); + } + + if (LinksFrom->hasBelow()) { + LinksInto->setBelow(LinksFrom->getBelow()); + auto &NewBelow = linksAt(LinksInto->getBelow()); + NewBelow.setAbove(LinksInto->Number); + } + + LinksFrom->remapTo(LinksInto->Number); + } + + // \brief Checks to see if lowerIndex is at a level lower than upperIndex. + // If so, it will merge lowerIndex with upperIndex (and all of the sets + // between) and return true. Otherwise, it will return false. + bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) { + assert(inbounds(LowerIndex) && inbounds(UpperIndex)); + auto *Lower = &linksAt(LowerIndex); + auto *Upper = &linksAt(UpperIndex); + if (Lower == Upper) + return true; + + SmallVector Found; + auto *Current = Lower; + auto Attrs = Current->getAttrs(); + while (Current->hasAbove() && Current != Upper) { + Found.push_back(Current); + Attrs |= Current->getAttrs(); + Current = &linksAt(Current->getAbove()); + } + + if (Current != Upper) + return false; + + Upper->setAttrs(Attrs); + + if (Lower->hasBelow()) { + auto NewBelowIndex = Lower->getBelow(); + Upper->setBelow(NewBelowIndex); + auto &NewBelow = linksAt(NewBelowIndex); + NewBelow.setAbove(UpperIndex); + } else { + Upper->clearBelow(); + } + + for (const auto &Ptr : Found) + Ptr->remapTo(Upper->Number); + + return true; + } + + Optional get(const T &Val) const { + auto Result = Values.find(Val); + if (Result == Values.end()) + return NoneType(); + return &Result->second; + } + + Optional get(const T &Val) { + auto Result = Values.find(Val); + if (Result == Values.end()) + return NoneType(); + return &Result->second; + } + + Optional indexOf(const T &Val) { + auto MaybeVal = get(Val); + if (!MaybeVal.hasValue()) + return NoneType(); + auto *Info = *MaybeVal; + auto &Link = linksAt(Info->Index); + return Link.Number; + } + + StratifiedIndex addLinkBelow(StratifiedIndex Set) { + auto At = addLinks(); + Links[Set].setBelow(At); + Links[At].setAbove(Set); + return At; + } + + StratifiedIndex addLinkAbove(StratifiedIndex Set) { + auto At = addLinks(); + Links[At].setBelow(Set); + Links[Set].setAbove(At); + return At; + } + + StratifiedIndex getNewUnlinkedIndex() { return addLinks(); } + + StratifiedIndex addLinks() { + auto Link = Links.size(); + Links.push_back(BuilderLink(Link)); + return Link; + } + + bool inbounds(StratifiedIndex N) const { return N >= 0 && N < Links.size(); } +}; +} +#endif // LLVM_ADT_STRATIFIEDSETS_H