Index: lib/Analysis/StratifiedSets.h =================================================================== --- /dev/null +++ lib/Analysis/StratifiedSets.h @@ -0,0 +1,635 @@ +//===- StratifiedSets.h - Abstract stratified sets implementation. --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STRATIFIEDSETS_H +#define LLVM_ADT_STRATIFIEDSETS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include +#include +#include +#include +#include + +namespace llvm { +// \brief An index into Stratified Sets. +typedef unsigned StratifiedIndex; +// NOTE: ^ This can't be a short -- bootstrapping clang has a case where +// ~1M sets exist. + +// \brief These are stratified sets, as described in the paper located at +// www.cse.cuhk.edu.hk/lyu/_media/paper/pldi2013.pdf -- in short, this is meant +// to represent different sets of Value*s. If two Value*s are in the same set, +// or if both sets have the aliasingExternals attribute, then the Value*s are +// considered to potentially alias. Otherwise, they are not. +template class StratifiedSets { +public: + class StratifiedInfo; + + class StratifiedInfo { + public: + StratifiedIndex index; + // For field sensitivity, etc. we can tack attributes on to this struct. + }; + + StratifiedSets(DenseMap map, + std::vector aliasingExternals) + : sets(std::move(map)), aliasingExternals(std::move(aliasingExternals)) {} + + StratifiedSets(StratifiedSets &&other) + : sets(std::move(other.sets)), + aliasingExternals(std::move(other.aliasingExternals)) {} + + StratifiedSets &operator=(StratifiedSets &&other) { + sets = std::move(other.sets); + aliasingExternals = std::move(other.aliasingExternals); + return *this; + } + + bool hasAliasingExternal(StratifiedIndex idx) const { + assert(inbounds(idx)); + return aliasingExternals[idx]; + } + + Optional find(const T &t) const { + auto iter = sets.find(t); + if (iter == sets.end()) { + return NoneType(); + } + return iter->second; + } + + std::size_t numSets() const { return aliasingExternals.size(); } + std::size_t size() const { return sets.size(); } + +private: + DenseMap sets; + std::vector aliasingExternals; + + bool inbounds(StratifiedIndex idx) const { + return idx < aliasingExternals.size(); + } +}; + +// TODO: This class needs more cleanup -- linksAt is called too often, and +// other uglies need to be looked at. + +// \brief Generic Builder class that produces StratifiedSets instances. +// +// The goal of this builder is to efficiently produce correct StratifiedSets +// instances. To this end, we use a few tricks: +// > Set chains (A method for linking sets together) +// > Set remaps (A method for marking a set as an alias [irony?] of another) +// +// ==== Set chains ==== +// This builder has a notion of some value A being above, below, or with some +// other value B: +// > The `A above B` relationship implies that there is a reference edge going +// from A to B. Namely, it notes that A can store anything in B's set. +// > The `A below B` relationship is the opposite of `A above B`. It implies +// that there's a dereference edge going from A to B. +// > The `A with B` relationship states that there's an assignment edge going +// from A to B, and that A and B should be treated as equals. +// +// As an example, take the following code snippet: +// +// %a = alloca i32, align 4 +// %ap = alloca i32*, align 8 +// %app = alloca i32**, align 8 +// store %a, %ap +// store %ap, %app +// %aw = getelementptr %ap, 0 +// +// Given this, the follow relations exist: +// - %a below %ap & %ap above %a +// - %ap below %app & %app above %ap +// - %aw with %ap & %ap with %aw +// +// These relations produce the following sets: +// [{%a}, {%ap, %aw}, {%app}] +// +// ...Which states that the only MayAlias relationship in the above program is +// between %ap and %aw. +// +// Life gets more complicated when we actually have logic in our programs. So, +// we either must remove this logic from our programs, or make consessions for +// it in our AA algorithms. In this case, we have decided to select the latter +// option. +// +// First complication: Conditionals +// Motivation: +// %ad = alloca int, align 4 +// %a = alloca int*, align 8 +// %b = alloca int*, align 8 +// %bp = alloca int**, align 8 +// %c = call i1 @SomeFunc() +// %k = select %c, %ap, %bp +// store %ad, %a +// store %b, %bp +// +// %k has 'with' edges to both %a and %b, which alone are not linked together. +// So, we merge the set that contains %a with the set that contains %b. We then +// recursively merge the set above %a with the set above %b, and the set below +// %a with the set below %b, etc. Ultimately, the sets for this program would +// end up like: +// {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below {%a, %b, %c} is below {%ad}. +// +// Second complication: Arbitrary casts +// Motivation: +// %ip = alloca int*, align 8 +// %ipp = alloca int**, align 8 +// %i = bitcast ipp to int +// store %ip, %ipp +// store %i, %ip +// +// This is impossible to construct with any of the rules above, because a set +// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed +// to be below the set with %ip, and the set with %ip is supposed to be below +// the set with %ipp. Because we don't allow circular relationships like this, +// we merge all concerned sets into one. So, the above code would generate a +// single StratifiedSet: {%ip, %ipp, %i}. +// +// ==== Set remaps ==== +// More of an implementation detail than anything -- when merging sets, we defer +// updating the elements' set numbers until build time, because each merging +// operation would be O(n) time. With remaps, merges are done in O(1), at the +// cost of building taking linear time. +template class StratifiedSetsBuilder { + typedef typename StratifiedSets::StratifiedInfo StratifiedInfo; + + constexpr static auto SetSentinel = + std::numeric_limits::max(); + + // \brief Represents a "link" between a set, the set deemed as above it, + // and the set deemed as below it. This also keeps information on the notion + // of "remapping", i.e. the joining of multiple StratifiedSets into one. + struct SetLinks { + const StratifiedIndex number; + + SetLinks(StratifiedIndex n) : number(n), aliasedExternal(false) { + remap = SetSentinel; + above = SetSentinel; + below = SetSentinel; + } + + // Because SetLinks can be remapped (i.e. are part of a union-find + // datastructure for rapid joining), we disable certain operations on + // elements that have parents in the union-find structure. This is super + // difficult to debug, so we use accessors. + + bool hasAbove() const { + assert(!isRemapped()); + return above != SetSentinel; + } + + bool hasBelow() const { + assert(!isRemapped()); + return below != SetSentinel; + } + + void setBelow(StratifiedIndex i) { + assert(!isRemapped()); + below = i; + } + + void setAbove(StratifiedIndex i) { + assert(!isRemapped()); + above = i; + } + + void clearBelow() { below = SetSentinel; } + + void clearAbove() { above = SetSentinel; } + + void setAliasedExternal(bool val) { + assert(!isRemapped()); + aliasedExternal = val; + } + + void noteAliasedExternal() { + assert(!isRemapped()); + setAliasedExternal(true); + } + + // \brief Most accessors have an assert to ensure we're not querying + // a set we don't mean to query. However, building needs access to + // aliasedExternal without that check. So we have this. + bool hasAliasedExternalRaw() const { return aliasedExternal; } + + bool hasAliasedExternal() const { + assert(!isRemapped()); + return aliasedExternal; + } + + StratifiedIndex getBelow() const { + assert(hasBelow()); + return below; + } + + StratifiedIndex getAbove() const { + assert(hasAbove()); + return above; + } + + bool isRemapped() const { return remap != SetSentinel; } + + // \brief For initial remapping to another set + void remapTo(StratifiedIndex other) { + assert(!isRemapped()); + above = SetSentinel - 1; + below = SetSentinel - 1; + aliasedExternal = false; + remap = other; + } + + StratifiedIndex getRemapIndex() const { + assert(isRemapped()); + return remap; + } + + // \brief Should only be called when we're already remapped. + void updateRemap(StratifiedIndex other) { + assert(isRemapped()); + remap = other; + } + + private: + StratifiedIndex remap; + StratifiedIndex above; + StratifiedIndex below; + bool aliasedExternal; + }; + + // Removing dead sets may not be worth it -- "saves" us 20 bits in the average + // case (51 sets total) and 4KB in the worst case (out of 1.07M sets == ~133KB + // total) when bootstrapping LLVM. + + // \brief Instead of renumbering our sets when we merge, we do it as one + // operation at build time. This handles making sure that all StratifiedInfo + // we emit is mapped to the proper set index. + void renumberSets() { + for (auto &pair : sets) { + auto &info = pair.second; + auto &link = linksAt(info.index); + info.index = link.number; + } + } + + SetLinks *getHighestParentOf(SetLinks *child) { + auto *current = child; + while (current->hasAbove()) { + current = &linksAt(current->getAbove()); + } + return current; + } + + // \brief If some level aliased an external, we need to assume that all levels + // below that can dereference the external. So, we propogate that external + // status down the chain of sets. + void markSetExternals() { + DenseSet visited; + bool hasExternals = false; + for (unsigned i = 0; i < links.size(); ++i) { + auto &links = linksAt(i); + auto *current = getHighestParentOf(&links); + auto insertSuccess = visited.insert(current).second; + if (!insertSuccess) { + continue; + } + + while (current->hasBelow()) { + hasExternals = hasExternals || current->hasAliasedExternal(); + current->setAliasedExternal(hasExternals); + current = &linksAt(current->getBelow()); + } + } + } + + // \brief We store external aliasing information as part of SetLinks, when + // it's way more space efficient to store it as a bitset. This generates said + // bitset. + std::vector generateAliasingExternals() { + std::vector aliasingExternals; + aliasingExternals.resize(links.size()); + std::transform(links.begin(), links.end(), aliasingExternals.begin(), + [](const SetLinks &l) { return l.hasAliasedExternalRaw(); }); + return std::move(aliasingExternals); + } + +public: + // \brief Builds a StratifiedSet from the information we've been given since + // either construction or the prior build() call. + StratifiedSets build() { + auto aliasingExternals = generateAliasingExternals(); + renumberSets(); + markSetExternals(); + links.clear(); + return StratifiedSets(std::move(sets), std::move(aliasingExternals)); + } + + std::size_t size() const { return sets.size(); } + std::size_t numSets() const { return links.size(); } + + bool has(const T &elem) const { return get(elem).hasValue(); } + + bool add(const T &main) { + if (get(main).hasValue()) { + return false; + } + auto newIndex = getNewUnlinkedIndex(); + return addAtMerging(main, newIndex); + } + + bool addAbove(const T &main, const T &toAdd) { + assert(has(main)); + auto index = *indexOf(main); + if (!linksAt(index).hasAbove()) { + addLinkAbove(index); + } + auto above = linksAt(index).getAbove(); + return addAtMerging(toAdd, above); + } + + bool addBelow(const T &main, const T &toAdd) { + assert(has(main)); + auto index = *indexOf(main); + if (!linksAt(index).hasBelow()) { + addLinkBelow(index); + } + auto below = linksAt(index).getBelow(); + return addAtMerging(toAdd, below); + } + + bool addWith(const T &main, const T &toAdd) { + assert(has(main)); + auto mainIndex = *indexOf(main); + return addAtMerging(toAdd, mainIndex); + } + + void noteHasAliasingExternal(const T &main) { + assert(has(main)); + auto *info = *get(main); + auto &links = linksAt(info->index); + links.noteAliasedExternal(); + } + + bool hasAliasingExternal(const T &main) const { + assert(has(main)); + auto *info = *get(main); + auto &links = linksAt(info->index); + return links.hasAliasedExternal(); + } + +private: + DenseMap sets; + std::vector links; + + // \brief Adds the given element at the given index, merging sets if + // necessary. + bool addAtMerging(const T &toAdd, StratifiedIndex index) { + StratifiedInfo info = {index}; + auto pair = sets.insert({toAdd, info}); + if (pair.second) { + return true; + } + + auto &iter = pair.first; + auto &iterSet = linksAt(iter->second.index); + auto &reqSet = linksAt(index); + + // Failed to add where we wanted to. Merge the sets. + if (&iterSet != &reqSet) { + merge(iterSet.number, reqSet.number); + } + return false; + } + + // \brief Gets the SetLinks at the given index, taking set remapping into + // account. + SetLinks &linksAt(StratifiedIndex index) { + auto *start = &links[index]; + auto *current = start; + while (current->isRemapped()) { + current = &links[current->getRemapIndex()]; + start->updateRemap(current->number); + } + + return *current; + } + + // \brief Merges two sets into one another. Assumes that these sets are not + // already one in the same + void merge(StratifiedIndex idx1, StratifiedIndex idx2) { + assert(inbounds(idx1) && inbounds(idx2)); + assert(&linksAt(idx1) != &linksAt(idx2)); + + // CASE 1: If `into` is above or below `from`, we need to merge both the + // given sets, and all sets between them, into one. + if (tryMergeUpwards(idx1, idx2)) { + return; + } + + if (tryMergeUpwards(idx2, idx1)) { + return; + } + + // CASE 2: Into is not in the same list of sets as from -- we need to merge + // them together. + mergeDirect(idx1, idx2); + } + + // \brief Merges two sets assuming that linksAt(idx1) is unreachable from + // traversing above or below linksAt(idx2) (and, obviously, is not + // linksAt(idx2)) + void mergeDirect(StratifiedIndex idx1, StratifiedIndex idx2) { + assert(inbounds(idx1) && inbounds(idx2)); + assert(!links[idx1].isRemapped() && !links[idx2].isRemapped()); + + struct WorklistItem { + StratifiedIndex into; + StratifiedIndex from; + // We want directional searches up/down, but for the initial case, we want + // to search both up and down. + bool checkUp; + bool checkDown; + }; + + // Funny story: the current implementation of this function makes it + // impossible to have more than 2 items in the worklist at once. + SmallVector worklist; + worklist.push_back({idx1, idx2, true, true}); + + // Here we remap the chain of sets at idx2 to the chain of sets at idx1. + // This is done by merging the set at idx2 with the set at idx1, then + // merging the sets above idx1 and idx2 with each other, then the sets above + // those, ... Then we proceed to do the same with the sets below idx1 and + // idx2. + while (!worklist.empty()) { + auto current = worklist.pop_back_val(); + auto &linksInto = linksAt(current.into); + auto &linksFrom = linksAt(current.from); + assert(&linksInto != &linksFrom); + + if (linksFrom.hasAliasedExternal()) { + linksInto.noteAliasedExternal(); + } + + // Merging strategy: + // > If neither has links in some direction, do nothing + // > If only linksInto has links in some direction, do nothing + // > If only linksFrom has links in some direction, set linksInto.above + // to it + // > If both have links in some direction, "recurse" -- merge on those + // links. + + // TODO: Maybe find a better way to do this than a forest of if + // statements? + if (current.checkUp) { + if (!linksInto.hasAbove()) { + if (linksFrom.hasAbove()) { + linksInto.setAbove(linksFrom.getAbove()); + } + } else { + if (linksFrom.hasAbove()) { + // Continue merging up, don't merge down. + worklist.push_back( + {linksInto.getAbove(), linksFrom.getAbove(), true, false}); + } + } + + if (linksInto.hasAbove()) { + linksAt(linksInto.getAbove()).setBelow(current.into); + } + } + + if (current.checkDown) { + if (!linksInto.hasBelow()) { + if (linksFrom.hasBelow()) { + linksInto.setBelow(linksFrom.getBelow()); + } + } else { + if (linksFrom.hasBelow()) { + // Continue merging down + worklist.push_back( + {linksInto.getBelow(), linksFrom.getBelow(), false, true}); + } + } + + if (linksInto.hasBelow()) { + linksAt(linksInto.getBelow()).setAbove(current.into); + } + } + + linksFrom.remapTo(linksInto.number); + } + } + + // \brief Checks to see if lowerIndex is at a level lower than upperIndex. If + // so, it will merge lowerIndex with upperIndex (and all of the sets between) + // and return true. Otherwise, it will return false. + bool tryMergeUpwards(StratifiedIndex lowerIndex, StratifiedIndex upperIndex) { + assert(inbounds(lowerIndex) && inbounds(upperIndex)); + auto *lower = &linksAt(lowerIndex); + auto *upper = &linksAt(upperIndex); + if (lower == upper) { + return true; + } + + SmallVector found; + bool hasAliasingExternals = false; + auto *current = lower; + while (current->hasAbove() && current != upper) { + found.push_back(current); + hasAliasingExternals = + hasAliasingExternals || current->hasAliasedExternal(); + auto aboveIndex = current->getAbove(); + current = &linksAt(aboveIndex); + } + + if (current != upper) { + return false; + } + + if (hasAliasingExternals) { + upper->noteAliasedExternal(); + } + + if (lower->hasBelow()) { + auto newBelowIndex = lower->getBelow(); + upper->setBelow(newBelowIndex); + auto &newBelow = linksAt(newBelowIndex); + newBelow.setAbove(upperIndex); + } else { + upper->clearBelow(); + } + + for (const auto &ptr : found) { + ptr->remapTo(upper->number); + } + return true; + } + + Optional get(const T &val) const { + auto result = sets.find(val); + if (result == sets.end()) { + return NoneType(); + } + return &result->second; + } + + Optional get(const T &val) { + auto result = sets.find(val); + if (result == sets.end()) { + return NoneType(); + } + return &result->second; + } + + Optional indexOf(const T &val) { + auto maybeVal = get(val); + if (!maybeVal.hasValue()) { + return NoneType(); + } + auto *info = *maybeVal; + auto &links = linksAt(info->index); + return links.number; + } + + StratifiedIndex addLinkBelow(StratifiedIndex set) { + auto at = addLinks(); + links[set].setBelow(at); + links[at].setAbove(set); + return at; + } + + StratifiedIndex addLinkAbove(StratifiedIndex set) { + auto at = addLinks(); + links[at].setBelow(set); + links[set].setAbove(at); + return at; + } + + StratifiedIndex getNewUnlinkedIndex() { return addLinks(); } + + StratifiedIndex addLinks() { + auto link = links.size(); + links.push_back(SetLinks(link)); + return link; + } + + bool inbounds(StratifiedIndex num) const { + return num >= 0 && num < links.size(); + } +}; +} + +#endif // LLVM_ADT_STRATIFIEDSETS_H Index: lib/Analysis/WeightedBidirectedGraph.h =================================================================== --- /dev/null +++ lib/Analysis/WeightedBidirectedGraph.h @@ -0,0 +1,155 @@ +//===- WeightedBidirectedGraph.h - Abstract weighted, bidirected graph. ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H +#define LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H + +#include "llvm/ADT/Optional.h" +#include +#include +#include + +namespace llvm { +// \brief A weighted bidirected graph made for use with the CFL Alias Analysis +// implementation. +template class WeightedBidirectedGraph { +public: + // TODO: Maybe restructure? I don't like public->private->public + typedef std::size_t Node; + +private: + constexpr static Node StartNode = Node(0); + + struct Edge { + EdgeWeightT weight; + Node other; + + bool operator==(const Edge &e) const { + return weight == e.weight && other == e.other; + } + + bool operator!=(const Edge &e) const { return !operator==(e); } + }; + + struct NodeImpl { + std::vector edges; + }; + + std::vector nodeImpls; + + bool inbounds(Node nodeIndex) const { return nodeIndex < nodeImpls.size(); } + + const NodeImpl &getNode(Node n) const { return nodeImpls[n]; } + NodeImpl &getNode(Node n) { return nodeImpls[n]; } + +public: + // ----- Various Edge iterators for the graph ----- // + + // \brief Iterator for edges. Because this graph is bidirected, we don't + // allow modificaiton of the edges using this iterator. Additionally, the + // iterator becomes invalid if you add edges to or from the node you're + // getting the edges of. + struct EdgeIterator : public std::iterator> { + EdgeIterator(const typename std::vector::const_iterator &iter) + : current(iter) {} + + EdgeIterator(NodeImpl &impl) : current(impl.begin()) {} + + EdgeIterator &operator++() { + ++current; + return *this; + } + + EdgeIterator operator++(int) { + EdgeIterator copy(current); + operator++(); + return copy; + } + + std::tuple &operator*() { + store = std::make_tuple(current->weight, current->other); + return store; + } + + bool operator==(const EdgeIterator &other) const { + return current == other.current; + } + + bool operator!=(const EdgeIterator &other) const { + return !operator==(other); + } + + private: + typename std::vector::const_iterator current; + std::tuple store; + }; + + // Wrapper for EdgeIterator with begin()/end() calls. + struct EdgeIterable { + EdgeIterable(const std::vector &edges) + : beginIter(edges.begin()), endIter(edges.end()) {} + + EdgeIterator begin() { return EdgeIterator(beginIter); } + + EdgeIterator end() { return EdgeIterator(endIter); } + + private: + typename std::vector::const_iterator beginIter; + typename std::vector::const_iterator endIter; + }; + + // ----- Actual graph-related things ----- // + + WeightedBidirectedGraph() = default; + + WeightedBidirectedGraph(WeightedBidirectedGraph &&other) + : nodeImpls(std::move(other.nodeImpls)) {} + + WeightedBidirectedGraph & + operator=(WeightedBidirectedGraph &&other) { + nodeImpls = std::move(other.nodeImpls); + return *this; + } + + Node addNode() { + auto index = nodeImpls.size(); + auto newNode = Node(index); + nodeImpls.push_back(NodeImpl()); + return newNode; + } + + void addEdge(Node from, Node to, const EdgeWeightT &weight, + const EdgeWeightT &reverseWeight) { + assert(inbounds(from)); + assert(inbounds(to)); + auto &fromNode = getNode(from); + auto &toNode = getNode(to); + fromNode.edges.push_back(Edge{weight, to}); + toNode.edges.push_back(Edge{reverseWeight, from}); + } + + EdgeIterable edgesFor(const Node &n) const { + const auto &node = getNode(n); + return EdgeIterable(node.edges); + } + + bool empty() const { return nodeImpls.empty(); } + std::size_t size() const { return nodeImpls.size(); } + + // \brief Gets an arbitrary node in the graph as a starting point for + // traversal. + Node getEntryNode() { + assert(inbounds(StartNode)); + return StartNode; + } +}; +} + +#endif // LLVM_ADT_WEIGHTEDBIDIRECTEDGRPAH_H