Index: lib/Analysis/StratifiedSets.h =================================================================== --- /dev/null +++ lib/Analysis/StratifiedSets.h @@ -0,0 +1,653 @@ +//===- StratifiedSets.h - Abstract stratified sets implementation. --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STRATIFIEDSETS_H +#define LLVM_ADT_STRATIFIEDSETS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { +// \brief An index into Stratified Sets. +typedef unsigned StratifiedIndex; +// NOTE: ^ This can't be a short -- bootstrapping clang has a case where +// ~1M sets exist. + +// \brief Container of information related to a value in a StratifiedSet. +struct StratifiedInfo { + StratifiedIndex Index; + // For field sensitivity, etc. we can tack attributes on to this struct. +}; + +// The number of attributes that StratifiedAttrs should contain +static constexpr unsigned NumStratifiedAttrs = 32; +// StratifiedLinks need to carry attributes with them. Because they're used so +// often, this was lifted from StratifiedLink and given a name. +typedef std::bitset StratifiedAttrs; + +// \brief A "link" between two StratifiedSets. +struct StratifiedLink { + // \brief This is a value used to signify "does not exist" where + // the StratifiedIndex type is used. This is used instead of + // Optional because Optional would + // eat up a considerable amount of extra memory, after struct + // padding/alignment is taken into account. + static constexpr auto SetSentinel = + std::numeric_limits::max(); + + // \brief The index for the set "above" current + StratifiedIndex Above; + // \brief The link for the set "below" current + StratifiedIndex Below; + // \brief Attributes that have to do with values external to the current + // function. Please note that each bit set in the `externals` above one set + // *will* be set in all `externals` in sets below the current one. + StratifiedAttrs Attrs; + + StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {} + + bool hasBelow() const { return Below != SetSentinel; } + bool hasAbove() const { return Above != SetSentinel; } + + void clearBelow() { Below = SetSentinel; } + void clearAbove() { Above = SetSentinel; } +}; + +// \brief These are stratified sets, as described in "Fast algorithms for +// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M +// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets +// of Value*s. If two Value*s are in the same set, or if both sets have the +// aliasingAttrs attribute, then the Value*s are considered to potentially +// alias. Otherwise, they are not. +template class StratifiedSets { +public: + StratifiedSets() {} + + StratifiedSets(DenseMap Map, + std::vector Links) + : Values(std::move(Map)), Links(std::move(Links)) {} + + StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); } + + StratifiedSets &operator=(StratifiedSets &&Other) { + Values = std::move(Other.Values); + Links = std::move(Other.Links); + return *this; + } + + Optional find(const T &Elem) const { + auto Iter = Values.find(Elem); + if (Iter == Values.end()) { + return NoneType(); + } + return Iter->second; + } + + const StratifiedLink &getLink(StratifiedIndex Index) const { + assert(inbounds(Index)); + return Links[Index]; + } + +private: + DenseMap Values; + std::vector Links; + + bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); } +}; + +// \brief Generic Builder class that produces StratifiedSets instances. +// +// The goal of this builder is to efficiently produce correct StratifiedSets +// instances. To this end, we use a few tricks: +// > Set chains (A method for linking sets together) +// > Set remaps (A method for marking a set as an alias [irony?] of another) +// +// ==== Set chains ==== +// This builder has a notion of some value A being above, below, or with some +// other value B: +// > The `A above B` relationship implies that there is a reference edge going +// from A to B. Namely, it notes that A can store anything in B's set. +// > The `A below B` relationship is the opposite of `A above B`. It implies +// that there's a dereference edge going from A to B. +// > The `A with B` relationship states that there's an assignment edge going +// from A to B, and that A and B should be treated as equals. +// +// As an example, take the following code snippet: +// +// %a = alloca i32, align 4 +// %ap = alloca i32*, align 8 +// %app = alloca i32**, align 8 +// store %a, %ap +// store %ap, %app +// %aw = getelementptr %ap, 0 +// +// Given this, the follow relations exist: +// - %a below %ap & %ap above %a +// - %ap below %app & %app above %ap +// - %aw with %ap & %ap with %aw +// +// These relations produce the following sets: +// [{%a}, {%ap, %aw}, {%app}] +// +// ...Which states that the only MayAlias relationship in the above program is +// between %ap and %aw. +// +// Life gets more complicated when we actually have logic in our programs. So, +// we either must remove this logic from our programs, or make consessions for +// it in our AA algorithms. In this case, we have decided to select the latter +// option. +// +// First complication: Conditionals +// Motivation: +// %ad = alloca int, align 4 +// %a = alloca int*, align 8 +// %b = alloca int*, align 8 +// %bp = alloca int**, align 8 +// %c = call i1 @SomeFunc() +// %k = select %c, %ad, %bp +// store %ad, %a +// store %b, %bp +// +// %k has 'with' edges to both %a and %b, which ordinarily would not be linked +// together. So, we merge the set that contains %a with the set that contains +// %b. We then recursively merge the set above %a with the set above %b, and +// the set below %a with the set below %b, etc. Ultimately, the sets for this +// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below +// {%a, %b, %c} is below {%ad}. +// +// Second complication: Arbitrary casts +// Motivation: +// %ip = alloca int*, align 8 +// %ipp = alloca int**, align 8 +// %i = bitcast ipp to int +// store %ip, %ipp +// store %i, %ip +// +// This is impossible to construct with any of the rules above, because a set +// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed +// to be below the set with %ip, and the set with %ip is supposed to be below +// the set with %ipp. Because we don't allow circular relationships like this, +// we merge all concerned sets into one. So, the above code would generate a +// single StratifiedSet: {%ip, %ipp, %i}. +// +// ==== Set remaps ==== +// More of an implementation detail than anything -- when merging sets, we need +// to update the numbers of all of the elements mapped to those sets. Rather +// than doing this at each merge, we note in the BuilderLink structure that a +// remap +// has occurred, and use this information so we can defer renumbering set +// elements until build time. +template class StratifiedSetsBuilder { + // \brief Represents a Stratified Set, with information about the Stratified + // Set above it, the set below it, and whether the current set has been + // remapped to another. + struct BuilderLink { + const StratifiedIndex Number; + + BuilderLink(StratifiedIndex N) : Number(N) { + Remap = StratifiedLink::SetSentinel; + } + + bool hasAbove() const { + assert(!isRemapped()); + return Link.hasAbove(); + } + + bool hasBelow() const { + assert(!isRemapped()); + return Link.hasBelow(); + } + + void setBelow(StratifiedIndex I) { + assert(!isRemapped()); + Link.Below = I; + } + + void setAbove(StratifiedIndex I) { + assert(!isRemapped()); + Link.Above = I; + } + + void clearBelow() { + assert(!isRemapped()); + Link.clearBelow(); + } + + void clearAbove() { + assert(!isRemapped()); + Link.clearAbove(); + } + + StratifiedIndex getBelow() const { + assert(!isRemapped()); + assert(hasBelow()); + return Link.Below; + } + + StratifiedIndex getAbove() const { + assert(!isRemapped()); + assert(hasAbove()); + return Link.Above; + } + + StratifiedAttrs &getAttrs() { + assert(!isRemapped()); + return Link.Attrs; + } + + void setAttr(unsigned index) { + assert(!isRemapped()); + assert(index < NumStratifiedAttrs); + Link.Attrs.set(index); + } + + void addAttrs(const StratifiedAttrs &other) { + assert(!isRemapped()); + Link.Attrs |= other; + } + + bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; } + + // \brief For initial remapping to another set + void remapTo(StratifiedIndex Other) { + assert(!isRemapped()); + Remap = Other; + } + + StratifiedIndex getRemapIndex() const { + assert(isRemapped()); + return Remap; + } + + // \brief Should only be called when we're already remapped. + void updateRemap(StratifiedIndex Other) { + assert(isRemapped()); + Remap = Other; + } + + // \brief Prefer the above functions to calling things directly on what's + // returned from this -- they guard against unexpected calls when the + // current BuilderLink is remapped. + const StratifiedLink &getLink() const { return Link; } + + private: + StratifiedLink Link; + StratifiedIndex Remap; + }; + + // \brief This function performs all of the set unioning/value renumbering + // that we've been putting off, and generates a vector that + // may be placed in a StratifiedSets instance. + void finalizeSets(std::vector &StratLinks) { + DenseMap Remaps; + for (auto &Link : Links) { + if (Link.isRemapped()) { + continue; + } + + StratifiedIndex Number = StratLinks.size(); + Remaps.insert({Link.Number, Number}); + StratLinks.push_back(Link.getLink()); + } + + for (auto &Link : StratLinks) { + if (Link.hasAbove()) { + auto &Above = linksAt(Link.Above); + auto Iter = Remaps.find(Above.Number); + assert(Iter != Remaps.end()); + Link.Above = Iter->second; + } + + if (Link.hasBelow()) { + auto &Below = linksAt(Link.Below); + auto Iter = Remaps.find(Below.Number); + assert(Iter != Remaps.end()); + Link.Below = Iter->second; + } + } + + for (auto &Pair : Values) { + auto &Info = Pair.second; + auto &Link = linksAt(Info.Index); + auto Iter = Remaps.find(Link.Number); + assert(Iter != Remaps.end()); + Info.Index = Iter->second; + } + } + + // \brief There's a guarantee in StratifiedLink where all bits set in a + // Link.externals will be set in all Link.externals "below" it. + static void propogateAttrs(std::vector &Links) { + const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) { + const auto *Link = &Links[Idx]; + while (Link->hasAbove()) { + Idx = Link->Above; + Link = &Links[Idx]; + } + return Idx; + }; + + SmallSet Visited; + for (unsigned I = 0, E = Links.size(); I < E; ++I) { + auto CurrentIndex = getHighestParentAbove(I); + if (!Visited.insert(CurrentIndex)) { + continue; + } + + while (Links[CurrentIndex].hasBelow()) { + auto &CurrentBits = Links[CurrentIndex].Attrs; + auto NextIndex = Links[CurrentIndex].Below; + auto &NextBits = Links[NextIndex].Attrs; + NextBits |= CurrentBits; + CurrentIndex = NextIndex; + } + } + } + +public: + // \brief Builds a StratifiedSet from the information we've been given since + // either construction or the prior build() call. + StratifiedSets build() { + std::vector StratLinks; + finalizeSets(StratLinks); + propogateAttrs(StratLinks); + Links.clear(); + return StratifiedSets(std::move(Values), std::move(StratLinks)); + } + + std::size_t size() const { return Values.size(); } + std::size_t numSets() const { return Links.size(); } + + bool has(const T &Elem) const { return get(Elem).hasValue(); } + + bool add(const T &Main) { + if (get(Main).hasValue()) + return false; + + auto NewIndex = getNewUnlinkedIndex(); + return addAtMerging(Main, NewIndex); + } + + bool addAbove(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasAbove()) + addLinkAbove(Index); + + auto Above = linksAt(Index).getAbove(); + return addAtMerging(ToAdd, Above); + } + + bool addBelow(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasBelow()) + addLinkBelow(Index); + + auto Below = linksAt(Index).getBelow(); + return addAtMerging(ToAdd, Below); + } + + bool addWith(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto MainIndex = *indexOf(Main); + return addAtMerging(ToAdd, MainIndex); + } + + void noteAttribute(const T &Main, unsigned AttrNum) { + assert(has(Main)); + assert(AttrNum < StratifiedLink::SetSentinel); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + Link.setAttr(AttrNum); + } + + void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + Link.addAttrs(NewAttrs); + } + + bool getAttribute(const T &Main, unsigned AttrNum) const { + assert(has(Main)); + assert(AttrNum < StratifiedLink::SetSentinel); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + auto &Attrs = Link.getAttrs(); + return Attrs[AttrNum]; + } + + StratifiedAttrs getAttributes(const T &Main) const { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + return Link.getAttrs(); + } + +private: + DenseMap Values; + std::vector Links; + + // \brief Adds the given element at the given index, merging sets if + // necessary. + bool addAtMerging(const T &ToAdd, StratifiedIndex Index) { + StratifiedInfo Info = {Index}; + auto Pair = Values.insert({ToAdd, Info}); + if (Pair.second) + return true; + + auto &Iter = Pair.first; + auto &IterSet = linksAt(Iter->second.Index); + auto &ReqSet = linksAt(Index); + + // Failed to add where we wanted to. Merge the sets. + if (&IterSet != &ReqSet) + merge(IterSet.Number, ReqSet.Number); + + return false; + } + + // \brief Gets the BuilderLink at the given index, taking set remapping into + // account. + BuilderLink &linksAt(StratifiedIndex Index) { + auto *Start = &Links[Index]; + if (!Start->isRemapped()) + return *Start; + + auto *Current = Start; + while (Current->isRemapped()) + Current = &Links[Current->getRemapIndex()]; + + auto NewRemap = Current->Number; + + // Run through everything that has yet to be updated, and update them to + // remap to NewRemap + Current = Start; + while (Current->isRemapped()) { + auto *Next = &Links[Current->getRemapIndex()]; + Current->updateRemap(NewRemap); + Current = Next; + } + + return *Current; + } + + // \brief Merges two sets into one another. Assumes that these sets are not + // already one in the same + void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + assert(&linksAt(Idx1) != &linksAt(Idx2) && + "Merging a set into itself is not allowed"); + + // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge + // both the + // given sets, and all sets between them, into one. + if (tryMergeUpwards(Idx1, Idx2)) + return; + + if (tryMergeUpwards(Idx2, Idx1)) + return; + + // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`. + // We therefore need to merge the two chains together. + mergeDirect(Idx1, Idx2); + } + + // \brief Merges two sets assuming that the set at `Idx1` is unreachable from + // traversing above or below the set at `Idx2`. + void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + assert(!Links[Idx1].isRemapped() && !Links[Idx2].isRemapped()); + + // Merging strategy: + // > If neither has links above, stop. + // > If only `LinksInto` has links above, stop. + // > If only `LinksFrom` has links above, reset `LinksInto.Above` to + // match `LinksFrom.Above` + // > If both have links in some direction, deal with those next. + auto *LinksInto = &linksAt(Idx1); + auto *LinksFrom = &linksAt(Idx2); + while (true) { + auto &IntoAttrs = LinksInto->getAttrs(); + IntoAttrs |= LinksFrom->getAttrs(); + + if (!LinksFrom->hasAbove()) + break; + + if (!LinksInto->hasAbove()) { + LinksInto->setAbove(LinksFrom->getAbove()); + auto &NewAbove = linksAt(LinksInto->getAbove()); + NewAbove.setBelow(LinksInto->Number); + break; + } + + LinksFrom->remapTo(LinksInto->Number); + } + + LinksInto = &linksAt(Idx1); + LinksFrom = &linksAt(Idx2); + while (true) { + auto &IntoAttrs = LinksInto->getAttrs(); + IntoAttrs |= LinksFrom->getAttrs(); + + if (!LinksFrom->hasBelow()) + break; + + if (!LinksInto->hasBelow()) { + LinksInto->setBelow(LinksFrom->getBelow()); + auto &NewBelow = linksAt(LinksInto->getBelow()); + NewBelow.setBelow(LinksInto->Number); + break; + } + + LinksFrom->remapTo(LinksInto->Number); + } + } + + // \brief Checks to see if lowerIndex is at a level lower than upperIndex. + // If so, it will merge lowerIndex with upperIndex (and all of the sets + // between) and return true. Otherwise, it will return false. + bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) { + assert(inbounds(LowerIndex) && inbounds(UpperIndex)); + auto *Lower = &linksAt(LowerIndex); + auto *Upper = &linksAt(UpperIndex); + if (Lower == Upper) + return true; + + SmallVector Found; + auto *Current = Lower; + auto Attrs = Current->getAttrs(); + while (Current->hasAbove() && Current != Upper) { + Found.push_back(Current); + auto &CurrentAttrs = Current->getAttrs(); + Attrs |= CurrentAttrs; + auto AboveIndex = Current->getAbove(); + Current = &linksAt(AboveIndex); + } + + if (Current != Upper) + return false; + + auto &UpperAttrs = Upper->getAttrs(); + UpperAttrs |= Attrs; + + if (Lower->hasBelow()) { + auto NewBelowIndex = Lower->getBelow(); + Upper->setBelow(NewBelowIndex); + auto &NewBelow = linksAt(NewBelowIndex); + NewBelow.setAbove(UpperIndex); + } else { + Upper->clearBelow(); + } + + for (const auto &Ptr : Found) + Ptr->remapTo(Upper->Number); + + return true; + } + + Optional get(const T &Val) const { + auto Result = Values.find(Val); + if (Result == Values.end()) + return NoneType(); + return &Result->second; + } + + Optional get(const T &Val) { + auto Result = Values.find(Val); + if (Result == Values.end()) + return NoneType(); + return &Result->second; + } + + Optional indexOf(const T &Val) { + auto MaybeVal = get(Val); + if (!MaybeVal.hasValue()) + return NoneType(); + auto *Info = *MaybeVal; + auto &Link = linksAt(Info->Index); + return Link.Number; + } + + StratifiedIndex addLinkBelow(StratifiedIndex Set) { + auto At = addLinks(); + Links[Set].setBelow(At); + Links[At].setAbove(Set); + return At; + } + + StratifiedIndex addLinkAbove(StratifiedIndex Set) { + auto At = addLinks(); + Links[At].setBelow(Set); + Links[Set].setAbove(At); + return At; + } + + StratifiedIndex getNewUnlinkedIndex() { return addLinks(); } + + StratifiedIndex addLinks() { + auto Link = Links.size(); + Links.push_back(BuilderLink(Link)); + return Link; + } + + bool inbounds(StratifiedIndex N) const { return N >= 0 && N < Links.size(); } +}; +} +#endif // LLVM_ADT_STRATIFIEDSETS_H Index: lib/Analysis/WeightedBidirectedGraph.h =================================================================== --- /dev/null +++ lib/Analysis/WeightedBidirectedGraph.h @@ -0,0 +1,154 @@ +//===- WeightedBidirectedGraph.h - Abstract weighted, bidirected graph. ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H +#define LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H + +#include "llvm/ADT/Optional.h" +#include +#include +#include + +namespace llvm { +// \brief A weighted bidirected graph made for use with the CFL Alias Analysis +// implementation. +template class WeightedBidirectedGraph { +public: + typedef std::size_t Node; + +private: + constexpr static Node StartNode = Node(0); + + struct Edge { + EdgeWeightT Weight; + Node Other; + + bool operator==(const Edge &E) const { + return Weight == E.Weight && Other == E.Other; + } + + bool operator!=(const Edge &E) const { return !operator==(E); } + }; + + struct NodeImpl { + std::vector Edges; + }; + + std::vector NodeImpls; + + bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); } + + const NodeImpl &getNode(Node N) const { return NodeImpls[N]; } + NodeImpl &getNode(Node N) { return NodeImpls[N]; } + +public: + // ----- Various Edge iterators for the graph ----- // + + // \brief Iterator for edges. Because this graph is bidirected, we don't + // allow modificaiton of the edges using this iterator. Additionally, the + // iterator becomes invalid if you add edges to or from the node you're + // getting the edges of. + struct EdgeIterator : public std::iterator> { + EdgeIterator(const typename std::vector::const_iterator &Iter) + : Current(Iter) {} + + EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {} + + EdgeIterator &operator++() { + ++Current; + return *this; + } + + EdgeIterator operator++(int) { + EdgeIterator Copy(Current); + operator++(); + return Copy; + } + + std::tuple &operator*() { + Store = std::make_tuple(Current->Weight, Current->Other); + return Store; + } + + bool operator==(const EdgeIterator &Other) const { + return Current == Other.Current; + } + + bool operator!=(const EdgeIterator &Other) const { + return !operator==(Other); + } + + private: + typename std::vector::const_iterator Current; + std::tuple Store; + }; + + // Wrapper for EdgeIterator with begin()/end() calls. + struct EdgeIterable { + EdgeIterable(const std::vector &Edges) + : BeginIter(Edges.begin()), EndIter(Edges.end()) {} + + EdgeIterator begin() { return EdgeIterator(BeginIter); } + + EdgeIterator end() { return EdgeIterator(EndIter); } + + private: + typename std::vector::const_iterator BeginIter; + typename std::vector::const_iterator EndIter; + }; + + // ----- Actual graph-related things ----- // + + WeightedBidirectedGraph() = default; + + WeightedBidirectedGraph(WeightedBidirectedGraph &&Other) + : NodeImpls(std::move(Other.NodeImpls)) {} + + WeightedBidirectedGraph & + operator=(WeightedBidirectedGraph &&Other) { + NodeImpls = std::move(Other.NodeImpls); + return *this; + } + + Node addNode() { + auto Index = NodeImpls.size(); + auto NewNode = Node(Index); + NodeImpls.push_back(NodeImpl()); + return NewNode; + } + + void addEdge(Node From, Node To, const EdgeWeightT &Weight, + const EdgeWeightT &ReverseWeight) { + assert(inbounds(From)); + assert(inbounds(To)); + auto &FromNode = getNode(From); + auto &ToNode = getNode(To); + FromNode.Edges.push_back(Edge{Weight, To}); + ToNode.Edges.push_back(Edge{ReverseWeight, From}); + } + + EdgeIterable edgesFor(const Node &N) const { + const auto &Node = getNode(N); + return EdgeIterable(Node.Edges); + } + + bool empty() const { return NodeImpls.empty(); } + std::size_t size() const { return NodeImpls.size(); } + + // \brief Gets an arbitrary node in the graph as a starting point for + // traversal. + Node getEntryNode() { + assert(inbounds(StartNode)); + return StartNode; + } +}; +} + +#endif // LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H