Index: lib/Analysis/StratifiedSets.h =================================================================== --- /dev/null +++ lib/Analysis/StratifiedSets.h @@ -0,0 +1,611 @@ +//===- StratifiedSets.h - Abstract stratified sets implementation. --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STRATIFIEDSETS_H +#define LLVM_ADT_STRATIFIEDSETS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include +#include +#include +#include +#include + +namespace llvm { +// \brief An index into Stratified Sets. +typedef unsigned StratifiedIndex; +// NOTE: ^ This can't be a short -- bootstrapping clang has a case where +// ~1M sets exist. + +// \brief These are stratified sets, as described in "Fast algorithms for +// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M +// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets +// of Value*s. If two Value*s are in the same set, or if both sets have the +// aliasingExternals attribute, then the Value*s are considered to potentially +// alias. Otherwise, they are not. +template class StratifiedSets { +public: + struct StratifiedInfo { + StratifiedIndex Index; + // For field sensitivity, etc. we can tack attributes on to this struct. + }; + + StratifiedSets(){}; + + StratifiedSets(DenseMap Map, + std::vector AliasingExternals) + : Sets(std::move(Map)), AliasingExternals(std::move(AliasingExternals)) {} + + StratifiedSets(StratifiedSets &&Other) + : Sets(std::move(Other.Sets)), + AliasingExternals(std::move(Other.AliasingExternals)) {} + + StratifiedSets &operator=(StratifiedSets &&Other) { + Sets = std::move(Other.Sets); + AliasingExternals = std::move(Other.AliasingExternals); + return *this; + } + + bool hasAliasingExternal(StratifiedIndex I) const { + assert(inbounds(I)); + return AliasingExternals[I]; + } + + Optional find(const T &E) const { + auto Iter = Sets.find(E); + if (Iter == Sets.end()) + return NoneType(); + return Iter->second; + } + + std::size_t numSets() const { return AliasingExternals.size(); } + std::size_t size() const { return Sets.size(); } + +private: + DenseMap Sets; + std::vector AliasingExternals; + + bool inbounds(StratifiedIndex I) const { + return I < AliasingExternals.size(); + } +}; + +// TODO: This class needs more cleanup -- linksAt is called too often, and +// other uglies need to be looked at. + +// \brief Generic Builder class that produces StratifiedSets instances. +// +// The goal of this builder is to efficiently produce correct StratifiedSets +// instances. To this end, we use a few tricks: +// > Set chains (A method for linking sets together) +// > Set remaps (A method for marking a set as an alias [irony?] of another) +// +// ==== Set chains ==== +// This builder has a notion of some value A being above, below, or with some +// other value B: +// > The `A above B` relationship implies that there is a reference edge going +// from A to B. Namely, it notes that A can store anything in B's set. +// > The `A below B` relationship is the opposite of `A above B`. It implies +// that there's a dereference edge going from A to B. +// > The `A with B` relationship states that there's an assignment edge going +// from A to B, and that A and B should be treated as equals. +// +// As an example, take the following code snippet: +// +// %a = alloca i32, align 4 +// %ap = alloca i32*, align 8 +// %app = alloca i32**, align 8 +// store %a, %ap +// store %ap, %app +// %aw = getelementptr %ap, 0 +// +// Given this, the follow relations exist: +// - %a below %ap & %ap above %a +// - %ap below %app & %app above %ap +// - %aw with %ap & %ap with %aw +// +// These relations produce the following sets: +// [{%a}, {%ap, %aw}, {%app}] +// +// ...Which states that the only MayAlias relationship in the above program is +// between %ap and %aw. +// +// Life gets more complicated when we actually have logic in our programs. So, +// we either must remove this logic from our programs, or make consessions for +// it in our AA algorithms. In this case, we have decided to select the latter +// option. +// +// First complication: Conditionals +// Motivation: +// %ad = alloca int, align 4 +// %a = alloca int*, align 8 +// %b = alloca int*, align 8 +// %bp = alloca int**, align 8 +// %c = call i1 @SomeFunc() +// %k = select %c, %ad, %bp +// store %ad, %a +// store %b, %bp +// +// %k has 'with' edges to both %a and %b, which ordinarily would not be linked +// together. So, we merge the set that contains %a with the set that contains +// %b. We then recursively merge the set above %a with the set above %b, and +// the set below %a with the set below %b, etc. Ultimately, the sets for this +// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below +// {%a, %b, %c} is below {%ad}. +// +// Second complication: Arbitrary casts +// Motivation: +// %ip = alloca int*, align 8 +// %ipp = alloca int**, align 8 +// %i = bitcast ipp to int +// store %ip, %ipp +// store %i, %ip +// +// This is impossible to construct with any of the rules above, because a set +// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed +// to be below the set with %ip, and the set with %ip is supposed to be below +// the set with %ipp. Because we don't allow circular relationships like this, +// we merge all concerned sets into one. So, the above code would generate a +// single StratifiedSet: {%ip, %ipp, %i}. +// +// ==== Set remaps ==== +// More of an implementation detail than anything -- when merging sets, we need +// to update the numbers of all of the elements mapped to those sets. Rather +// than doing this at each merge, we note in the SetLinks structure that a remap +// has occurred, and use this information so we can defer renumbering set +// elements until build time. +template class StratifiedSetsBuilder { + typedef typename StratifiedSets::StratifiedInfo StratifiedInfo; + + // \brief This is a value used to signify "does not exist" where + // the StratifiedIndex type is used. This is used instead of + // Optional because Optional would + // eat up a considerable amount of extra memory, after struct + // padding/alignment is taken into account. + constexpr static auto SetSentinel = + std::numeric_limits::max(); + + // \brief Represents a Stratified Set, with information about the Stratified + // Set above it, the set below it, and whether the current set has been + // remapped to another. + struct SetLinks { + const StratifiedIndex Number; + + SetLinks(StratifiedIndex N) : Number(N) { + Remap = SetSentinel; + Above = SetSentinel; + Below = SetSentinel; + AliasedExternal = false; + } + + bool hasAbove() const { + assert(!isRemapped()); + return Above != SetSentinel; + } + + bool hasBelow() const { + assert(!isRemapped()); + return Below != SetSentinel; + } + + void setBelow(StratifiedIndex I) { + assert(!isRemapped()); + assert(I != SetSentinel); + Below = I; + } + + void setAbove(StratifiedIndex I) { + assert(!isRemapped()); + assert(I != SetSentinel); + Above = I; + } + + void clearBelow() { Below = SetSentinel; } + + void clearAbove() { Above = SetSentinel; } + + void setAliasedExternal(bool Val) { + assert(!isRemapped()); + AliasedExternal = Val; + } + + void noteAliasedExternal() { + assert(!isRemapped()); + setAliasedExternal(true); + } + + bool hasAliasedExternal() const { + assert(!isRemapped()); + return AliasedExternal; + } + + StratifiedIndex getBelow() const { + assert(hasBelow()); + return Below; + } + + StratifiedIndex getAbove() const { + assert(hasAbove()); + return Above; + } + + bool isRemapped() const { return Remap != SetSentinel; } + + // \brief For initial remapping to another set + void remapTo(StratifiedIndex Other) { + assert(!isRemapped()); + assert(Other != SetSentinel); + Above = SetSentinel - 1; + Below = SetSentinel - 1; + AliasedExternal = false; + Remap = Other; + } + + StratifiedIndex getRemapIndex() const { + assert(isRemapped()); + return Remap; + } + + // \brief Should only be called when we're already remapped. + void updateRemap(StratifiedIndex Other) { + assert(isRemapped()); + // Once remapped, a set should never be un-remapped. + assert(Other != SetSentinel); + Remap = Other; + } + + private: + StratifiedIndex Remap; + StratifiedIndex Above; + StratifiedIndex Below; + bool AliasedExternal; + + // \brief Most accessors only function on sets that haven't been remapped. + // We need an accessor for aliasedExternal that doesn't make this check so + // generateAliasingExternals will work. + bool hasAliasedExternalRaw() const { return AliasedExternal; } + friend std::vector + StratifiedSetsBuilder::generateAliasingExternals(); + }; + + // Removing remapped sets may not be worth it. + // The only penalty we suffer for not removing these sets is that the + // AliasingExternals bitvector we make at build time (which StratifiedSets + // keeps) is bigger than necessary. However, this size increase is + // 20 bits in the average case (51 sets total) and 4KB in the worst case (out + // of 1.07M sets == ~133KB total) when bootstrapping LLVM. + + // \brief Instead of renumbering our sets when we merge, we do it as one + // operation at build time. This handles making sure that all StratifiedInfo + // we emit is mapped to the proper set index. + void renumberSets() { + for (auto &Pair : Sets) { + auto &Info = Pair.second; + auto &Link = linksAt(Info.Index); + Info.Index = Link.Number; + } + } + + SetLinks *getHighestParentOf(SetLinks *Child) { + auto *Current = Child; + while (Current->hasAbove()) + Current = &linksAt(Current->getAbove()); + return Current; + } + + // \brief If some level aliased an external, we need to assume that all levels + // below that can dereference the external. So, we propogate that external + // status down the chain of sets. + void markSetExternals() { + SmallPtrSet Visited; + bool HasExternals = false; + for (unsigned i = 0; i < Links.size(); ++i) { + auto &Link = linksAt(i); + auto *Current = getHighestParentOf(&Link); + if (!Visited.insert(Current)) + continue; + + while (Current->hasBelow()) { + HasExternals = HasExternals || Current->hasAliasedExternal(); + Current->setAliasedExternal(HasExternals); + Current = &linksAt(Current->getBelow()); + } + } + } + + // \brief In the builder, we store aliasExternals information as a field in + // SetLinks. In StratifiedSets, we store this same information as a + // vector, because SetLinks don't exist in the StratifiedSets class. + // This function creates a vector to be used in the StratifiedSets + // instance we build. + std::vector generateAliasingExternals() { + std::vector AliasingExternals(Links.size(), false); + std::transform(Links.begin(), Links.end(), AliasingExternals.begin(), + [](const SetLinks &L) { return L.hasAliasedExternalRaw(); }); + return std::move(AliasingExternals); + } + +public: + // \brief Builds a StratifiedSet from the information we've been given since + // either construction or the prior build() call. + StratifiedSets build() { + auto AliasingExternals = generateAliasingExternals(); + renumberSets(); + markSetExternals(); + Links.clear(); + return StratifiedSets(std::move(Sets), std::move(AliasingExternals)); + } + + std::size_t size() const { return Sets.size(); } + std::size_t numSets() const { return Links.size(); } + + bool has(const T &Elem) const { return get(Elem).hasValue(); } + + bool add(const T &Main) { + if (get(Main).hasValue()) + return false; + + auto NewIndex = getNewUnlinkedIndex(); + return addAtMerging(Main, NewIndex); + } + + bool addAbove(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasAbove()) + addLinkAbove(Index); + + auto Above = linksAt(Index).getAbove(); + return addAtMerging(ToAdd, Above); + } + + bool addBelow(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasBelow()) + addLinkBelow(Index); + + auto Below = linksAt(Index).getBelow(); + return addAtMerging(ToAdd, Below); + } + + bool addWith(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto MainIndex = *indexOf(Main); + return addAtMerging(ToAdd, MainIndex); + } + + void noteHasAliasingExternal(const T &Main) { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + Link.noteAliasedExternal(); + } + + bool hasAliasingExternal(const T &Main) const { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + return Link.hasAliasedExternal(); + } + +private: + DenseMap Sets; + std::vector Links; + + // \brief Adds the given element at the given index, merging sets if + // necessary. + bool addAtMerging(const T &ToAdd, StratifiedIndex Index) { + StratifiedInfo Info = {Index}; + auto Pair = Sets.insert({ToAdd, Info}); + if (Pair.second) + return true; + + auto &Iter = Pair.first; + auto &IterSet = linksAt(Iter->second.Index); + auto &ReqSet = linksAt(Index); + + // Failed to add where we wanted to. Merge the sets. + if (&IterSet != &ReqSet) + merge(IterSet.Number, ReqSet.Number); + + return false; + } + + // \brief Gets the SetLinks at the given index, taking set remapping into + // account. + SetLinks &linksAt(StratifiedIndex Index) { + auto *Start = &Links[Index]; + if (!Start->isRemapped()) + return *Start; + + auto *Current = Start; + while (Current->isRemapped()) + Current = &Links[Current->getRemapIndex()]; + + auto NewRemap = Current->Number; + + // Run through everything that has yet to be updated, and update them to + // remap to NewRemap + Current = Start; + while (Current->isRemapped()) { + auto *Next = &Links[Current->getRemapIndex()]; + Current->updateRemap(NewRemap); + Current = Next; + } + + return *Current; + } + + // \brief Merges two sets into one another. Assumes that these sets are not + // already one in the same + void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + assert(&linksAt(Idx1) != &linksAt(Idx2) && + "Merging a set into itself is not allowed"); + + // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge + // both the + // given sets, and all sets between them, into one. + if (tryMergeUpwards(Idx1, Idx2)) + return; + + if (tryMergeUpwards(Idx2, Idx1)) + return; + + // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`. + // We therefore need to merge the two chains together. + mergeDirect(Idx1, Idx2); + } + + // \brief Merges two sets assuming that the set at `Idx1` is unreachable from + // traversing above or below the set at `Idx2`. + void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + assert(!Links[Idx1].isRemapped() && !Links[Idx2].isRemapped()); + + // Merging strategy: + // > If neither has links above, stop. + // > If only `LinksInto` has links above, stop. + // > If only `LinksFrom` has links above, reset `LinksInto.Above` to + // match `LinksFrom.Above` + // > If both have links in some direction, deal with those next. + auto *LinksInto = &linksAt(Idx1); + auto *LinksFrom = &linksAt(Idx2); + while (true) { + if (LinksFrom->hasAliasedExternal()) + LinksInto->noteAliasedExternal(); + + if (!LinksFrom->hasAbove()) + break; + + if (!LinksInto->hasAbove()) { + LinksInto->setAbove(LinksFrom->getAbove()); + auto &NewAbove = linksAt(LinksInto->getAbove()); + NewAbove.setBelow(LinksInto->Number); + break; + } + + LinksFrom->remapTo(LinksInto->Number); + } + + LinksInto = &linksAt(Idx1); + LinksFrom = &linksAt(Idx2); + while (true) { + if (LinksFrom->hasAliasedExternal()) + LinksInto->noteAliasedExternal(); + + if (!LinksFrom->hasBelow()) + break; + + if (!LinksInto->hasBelow()) { + LinksInto->setBelow(LinksFrom->getBelow()); + auto &NewBelow = linksAt(LinksInto->getBelow()); + NewBelow.setBelow(LinksInto->Number); + break; + } + + LinksFrom->remapTo(LinksInto->Number); + } + } + + // \brief Checks to see if lowerIndex is at a level lower than upperIndex. + // If so, it will merge lowerIndex with upperIndex (and all of the sets + // between) and return true. Otherwise, it will return false. + bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) { + assert(inbounds(LowerIndex) && inbounds(UpperIndex)); + auto *Lower = &linksAt(LowerIndex); + auto *Upper = &linksAt(UpperIndex); + if (Lower == Upper) + return true; + + SmallVector Found; + bool HasAliasingExternals = false; + auto *Current = Lower; + while (Current->hasAbove() && Current != Upper) { + Found.push_back(Current); + HasAliasingExternals = + HasAliasingExternals || Current->hasAliasedExternal(); + auto AboveIndex = Current->getAbove(); + Current = &linksAt(AboveIndex); + } + + if (Current != Upper) + return false; + + if (HasAliasingExternals) + Upper->noteAliasedExternal(); + + if (Lower->hasBelow()) { + auto NewBelowIndex = Lower->getBelow(); + Upper->setBelow(NewBelowIndex); + auto &NewBelow = linksAt(NewBelowIndex); + NewBelow.setAbove(UpperIndex); + } else { + Upper->clearBelow(); + } + + for (const auto &Ptr : Found) + Ptr->remapTo(Upper->Number); + + return true; + } + + Optional get(const T &Val) const { + auto Result = Sets.find(Val); + if (Result == Sets.end()) + return NoneType(); + return &Result->second; + } + + Optional get(const T &Val) { + auto Result = Sets.find(Val); + if (Result == Sets.end()) + return NoneType(); + return &Result->second; + } + + Optional indexOf(const T &Val) { + auto MaybeVal = get(Val); + if (!MaybeVal.hasValue()) + return NoneType(); + auto *Info = *MaybeVal; + auto &Link = linksAt(Info->Index); + return Link.Number; + } + + StratifiedIndex addLinkBelow(StratifiedIndex Set) { + auto At = addLinks(); + Links[Set].setBelow(At); + Links[At].setAbove(Set); + return At; + } + + StratifiedIndex addLinkAbove(StratifiedIndex Set) { + auto At = addLinks(); + Links[At].setBelow(Set); + Links[Set].setAbove(At); + return At; + } + + StratifiedIndex getNewUnlinkedIndex() { return addLinks(); } + + StratifiedIndex addLinks() { + auto Link = Links.size(); + Links.push_back(SetLinks(Link)); + return Link; + } + + bool inbounds(StratifiedIndex N) const { return N >= 0 && N < Links.size(); } +}; +} +#endif // LLVM_ADT_STRATIFIEDSETS_H Index: lib/Analysis/WeightedBidirectedGraph.h =================================================================== --- /dev/null +++ lib/Analysis/WeightedBidirectedGraph.h @@ -0,0 +1,154 @@ +//===- WeightedBidirectedGraph.h - Abstract weighted, bidirected graph. ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H +#define LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H + +#include "llvm/ADT/Optional.h" +#include +#include +#include + +namespace llvm { +// \brief A weighted bidirected graph made for use with the CFL Alias Analysis +// implementation. +template class WeightedBidirectedGraph { +public: + typedef std::size_t Node; + +private: + constexpr static Node StartNode = Node(0); + + struct Edge { + EdgeWeightT Weight; + Node Other; + + bool operator==(const Edge &E) const { + return Weight == E.Weight && Other == E.Other; + } + + bool operator!=(const Edge &E) const { return !operator==(E); } + }; + + struct NodeImpl { + std::vector Edges; + }; + + std::vector NodeImpls; + + bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); } + + const NodeImpl &getNode(Node N) const { return NodeImpls[N]; } + NodeImpl &getNode(Node N) { return NodeImpls[N]; } + +public: + // ----- Various Edge iterators for the graph ----- // + + // \brief Iterator for edges. Because this graph is bidirected, we don't + // allow modificaiton of the edges using this iterator. Additionally, the + // iterator becomes invalid if you add edges to or from the node you're + // getting the edges of. + struct EdgeIterator : public std::iterator> { + EdgeIterator(const typename std::vector::const_iterator &Iter) + : Current(Iter) {} + + EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {} + + EdgeIterator &operator++() { + ++Current; + return *this; + } + + EdgeIterator operator++(int) { + EdgeIterator Copy(Current); + operator++(); + return Copy; + } + + std::tuple &operator*() { + Store = std::make_tuple(Current->Weight, Current->Other); + return Store; + } + + bool operator==(const EdgeIterator &Other) const { + return Current == Other.Current; + } + + bool operator!=(const EdgeIterator &Other) const { + return !operator==(Other); + } + + private: + typename std::vector::const_iterator Current; + std::tuple Store; + }; + + // Wrapper for EdgeIterator with begin()/end() calls. + struct EdgeIterable { + EdgeIterable(const std::vector &Edges) + : BeginIter(Edges.begin()), EndIter(Edges.end()) {} + + EdgeIterator begin() { return EdgeIterator(BeginIter); } + + EdgeIterator end() { return EdgeIterator(EndIter); } + + private: + typename std::vector::const_iterator BeginIter; + typename std::vector::const_iterator EndIter; + }; + + // ----- Actual graph-related things ----- // + + WeightedBidirectedGraph() = default; + + WeightedBidirectedGraph(WeightedBidirectedGraph &&Other) + : NodeImpls(std::move(Other.NodeImpls)) {} + + WeightedBidirectedGraph & + operator=(WeightedBidirectedGraph &&Other) { + NodeImpls = std::move(Other.NodeImpls); + return *this; + } + + Node addNode() { + auto Index = NodeImpls.size(); + auto NewNode = Node(Index); + NodeImpls.push_back(NodeImpl()); + return NewNode; + } + + void addEdge(Node From, Node To, const EdgeWeightT &Weight, + const EdgeWeightT &ReverseWeight) { + assert(inbounds(From)); + assert(inbounds(To)); + auto &FromNode = getNode(From); + auto &ToNode = getNode(To); + FromNode.Edges.push_back(Edge{Weight, To}); + ToNode.Edges.push_back(Edge{ReverseWeight, From}); + } + + EdgeIterable edgesFor(const Node &N) const { + const auto &Node = getNode(N); + return EdgeIterable(Node.Edges); + } + + bool empty() const { return NodeImpls.empty(); } + std::size_t size() const { return NodeImpls.size(); } + + // \brief Gets an arbitrary node in the graph as a starting point for + // traversal. + Node getEntryNode() { + assert(inbounds(StartNode)); + return StartNode; + } +}; +} + +#endif // LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H