Index: include/llvm/Analysis/Passes.h =================================================================== --- include/llvm/Analysis/Passes.h +++ include/llvm/Analysis/Passes.h @@ -66,6 +66,13 @@ //===--------------------------------------------------------------------===// // + // createCFLAliasAnalysisPass - This pass implements a set-based approach to + // alias analysis. + // + ImmutablePass *createCFLAliasAnalysisPass(); + + //===--------------------------------------------------------------------===// + // /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows /// about the semantics of a set of libcalls specified by LCI. The newly /// constructed pass takes ownership of the pointer that is provided. Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -89,6 +89,7 @@ void initializeCFGOnlyViewerPass(PassRegistry&); void initializeCFGPrinterPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); +void initializeCFLAliasAnalysisPass(PassRegistry&); void initializeFlattenCFGPassPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); void initializeCFGViewerPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -61,6 +61,7 @@ (void) llvm::createCallGraphPrinterPass(); (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); + (void) llvm::createCFLAliasAnalysisPass(); (void) llvm::createStructurizeCFGPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); Index: lib/Analysis/CFLAliasAnalysis.cpp =================================================================== --- /dev/null +++ lib/Analysis/CFLAliasAnalysis.cpp @@ -0,0 +1,702 @@ +//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a CFL-based context-insensitive alias analysis +// algorithm. It does not depend on types The algorithm is a mixture of the one +// described here: www.cs.cornell.edu/~rugina/papers/popl08.pdf and here: +// www.cse.cuhk.edu.hk/lyu/_media/paper/pldi2013.pdf -- to summarize the papers, +// we build a graph of the uses of a variable, where each node is a memory +// location, and each edge is an action that happened on that memory location. +// The "actions" can be one of Dereference, Reference, AssignTo, or AssignFrom. +// +// Two variables are considered as aliasing iff you can reach one value's node +// from the other value's node and the language formed by concatenating all of +// the edge labels (actions) conforms to a context-free grammar. +// +// Because this algorithm requires a graph search on each query, we execute the +// algorithm outlined in pldi2013.pdf (above) in order to transform the graph +// into sets of variables that may alias in ~nlogn time (n = number of +// variables.). This makes queries take constant time. +//===----------------------------------------------------------------------===// + +#include "StratifiedSets.h" +#include "WeightedBidirectedGraph.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/None.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +using namespace llvm; + +// Try to go from a Value* to a Function*. Never returns nullptr. +static Optional valueToFunction(Value *); + +// Returns possible functions called by the CallInst* into the given +// SmallVectorImpl. Returns true if targets found, false otherwise. +static bool getPossibleTargets(CallInst *, SmallVectorImpl *); + +// Some instructions need to have their users tracked. Instructions like +// `add` require you to get the users of the Instruction* itself, other +// instructions like `store` require you to get the users of the first +// operand. This function gets the "proper" value to track for each +// type of instruction we support. +static Optional getTargetValue(Instruction *); + +// There are certain instructions (i.e. FenceInst, etc.) that we ignore. +// This detects those. +static bool isIgnoredInstruction(Instruction *inst); + +// Is this value a global or not-noalias argument? +static bool isAliasingExternal(Value *val); + +namespace { +struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis { +private: + /// \brief Cached mapping of Functions to their StratifiedSets. + /// If a function's sets arecurrently being built, it is marked + /// in the cache as an Optional without a value. This way, if we + /// have any kind of recursion, it is discernable from a function + /// that simply has empty sets. + DenseMap>> cache; + +public: + static char ID; + + CFLAliasAnalysis() : ImmutablePass(ID) { + initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + } + + void *getAdjustedAnalysisPointer(const void *ID) override { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis *)this; + return this; + } + + /// \brief Inserts the given Function into the cache. + void scan(Function *fn); + + /// \brief Ensures that the given function is available in the cache. + /// Returns the appropriate entry from the cache. + const Optional> &ensureCached(Function *fn) { + auto iter = cache.find(fn); + if (iter == cache.end()) { + scan(fn); + iter = cache.find(fn); + assert(iter != cache.end()); + assert(iter->second.hasValue()); + } + return iter->second; + } + + Optional query(const Location &locA, const Location &locB); + + AliasResult alias(const Location &locA, const Location &locB) override { + // This should be covered by BasicAA, but it's super cheap to check. + if (locA.Ptr == locB.Ptr) { + if (locA.Size == locB.Size) { + return MustAlias; + } else { + return PartialAlias; + } + } + + return MayAlias; + if (isa(locA.Ptr) && isa(locB.Ptr)) { + return MayAlias; + } + + auto maybeResult = query(locA, locB); + if (!maybeResult.hasValue()) { + return MayAlias; + } + + return *maybeResult; + } + + void initializePass() override { InitializeAliasAnalysis(this); } +}; + +// \brief StratifiedSets call for knowledge of "direction", so this is how we +// represent that locally. +enum class Direction { None, Above, Below }; + +// \brief Edges can be one of four "weights" +enum class EdgeWeight { AssignFrom, AssignTo, Dereference, Reference }; + +// \brief Encodes the notion of a "use" +struct Edge { + Value *from; + Value *to; + EdgeWeight weight; +}; + +// \brief Gets the edges our graph should have, based on an Instruction* +// +// Currently everything returns an Optional because it makes life easier +// to be able to selectively disable support for instructions and troubleshoot +// our coverage of the instructions we'll get. I plan to remove this as this AA +// gets tested more/nearer finalization. +// +// Return values: +// Nothing - Unknown instruction. Program should die. +// Just(false) - Known instruction, but not supported (probably disabled +// while trying to find a bug). Discard results from GetEdgesVisitor. +// Just(true) - Known instruction, results are valid. Continue as normal. +class GetEdgesVisitor : public InstVisitor> { + CFLAliasAnalysis *aa; + SmallVectorImpl *output; + +public: + GetEdgesVisitor(CFLAliasAnalysis *aa, SmallVectorImpl *output) + : aa(aa), output(output) {} + + Optional visitInstruction(Instruction &) { return NoneType(); } + + Optional visitCastInst(CastInst &inst) { + assert(isa(inst)); + output->push_back({&inst, inst.getOperand(0), EdgeWeight::AssignFrom}); + // Casting does nothing -- it's the operations on the casted value that we + // get restrictive about. + return true; + } + + Optional visitBinaryOperator(BinaryOperator &inst) { + auto *op1 = inst.getOperand(0); + auto *op2 = inst.getOperand(1); + output->push_back({&inst, op1, EdgeWeight::AssignFrom}); + output->push_back({&inst, op2, EdgeWeight::AssignFrom}); + return true; + } + + Optional visitAtomicCmpXchgInst(AtomicCmpXchgInst &inst) { + auto *ptr = inst.getPointerOperand(); + auto *val = inst.getNewValOperand(); + output->push_back({ptr, val, EdgeWeight::Dereference}); + return true; + } + + Optional visitAtomicRMWInst(AtomicRMWInst &inst) { + auto *ptr = inst.getPointerOperand(); + auto *val = inst.getValOperand(); + output->push_back({ptr, val, EdgeWeight::Dereference}); + return true; + } + + Optional visitPHINode(PHINode &inst) { + for (unsigned i = 0, e = inst.getNumIncomingValues(); i < e; ++i) { + Value *val = inst.getIncomingValue(i); + output->push_back({&inst, val, EdgeWeight::AssignFrom}); + } + return true; + } + + Optional visitGetElementPtrInst(GetElementPtrInst &inst) { + Value *op = inst.getPointerOperand(); + output->push_back({&inst, op, EdgeWeight::AssignFrom}); + return true; + } + + Optional visitSelectInst(SelectInst &inst) { + auto *trueVal = inst.getTrueValue(); + auto *falseVal = inst.getFalseValue(); + output->push_back({&inst, trueVal, EdgeWeight::AssignFrom}); + output->push_back({&inst, falseVal, EdgeWeight::AssignFrom}); + return true; + } + + Optional visitAllocaInst(AllocaInst &inst) { return true; } + + Optional visitExtractValueInst(ExtractValueInst &inst) { + auto *ptr = inst.getAggregateOperand(); + auto *val = &inst; + output->push_back({val, ptr, EdgeWeight::Dereference}); + return true; + } + + Optional visitLoadInst(LoadInst &inst) { + auto *ptr = inst.getPointerOperand(); + auto *val = &inst; + output->push_back({val, ptr, EdgeWeight::Reference}); + return true; + } + + Optional visitStoreInst(StoreInst &inst) { + auto *ptr = inst.getPointerOperand(); + auto *val = inst.getValueOperand(); + output->push_back({ptr, val, EdgeWeight::Dereference}); + return true; + } + + Optional visitCallInst(CallInst &inst) { + SmallVector possibleTargets; + // TODO: Interprocedural + if (getPossibleTargets(&inst, &possibleTargets)) { + bool allOnlyReadMemory = true; + for (Function *fn : possibleTargets) { + aa->ensureCached(fn); + if (!fn->onlyReadsMemory()) { + allOnlyReadMemory = false; + break; + } + } + + if (allOnlyReadMemory) { + return true; + } + } + + // Couldn't determine target. Treat everything as potentially + // aliasing everything. + for (Value *v : inst.arg_operands()) { + output->push_back({&inst, v, EdgeWeight::AssignFrom}); + } + + return true; + } + + Optional visitInvokeInst(InvokeInst &inst) { + // TODO: Make this work like CallInst. + for (Value *v : inst.arg_operands()) { + output->push_back({&inst, v, EdgeWeight::AssignFrom}); + } + return true; + } + + Optional visitExtractElementInst(ExtractElementInst &inst) { + auto *ptr = inst.getVectorOperand(); + auto *val = &inst; + output->push_back({val, ptr, EdgeWeight::Reference}); + return true; + } + + Optional visitInsertElementInst(InsertElementInst &inst) { + auto *ptr = inst.getOperand(0); + auto *val = inst.getOperand(1); + output->push_back({val, ptr, EdgeWeight::Reference}); + return true; + } + + Optional visitInsertValueInst(InsertValueInst &inst) { + auto *ptr = inst.getAggregateOperand(); + auto *val = &inst; + output->push_back({val, ptr, EdgeWeight::Reference}); + return true; + } + + Optional visitShuffleVectorInst(ShuffleVectorInst &inst) { + auto *from1 = inst.getOperand(0); + auto *from2 = inst.getOperand(1); + output->push_back({&inst, from1, EdgeWeight::AssignFrom}); + output->push_back({&inst, from2, EdgeWeight::AssignFrom}); + return true; + } +}; + +// For a given instruction, we need to know which Value* to get the +// users of in order to build our graph. In some cases (i.e. add), +// we simply need the Instruction*. In other cases (i.e. store), +// finding the users of the Instruction* is useless; we need to find +// the users of the first operand. This handles determining which +// value to follow for us. +// +// Note: we *need* to keep this in sync with GetEdgesVisitor. Add +// something to GetEdgesVisitor, add it here -- remove something from +// GetEdgesVisitor, remove it here. +class GetTargetValueVisitor + : public InstVisitor> { +public: + Optional visitInstruction(Instruction &) { return NoneType(); } + Optional visitCastInst(CastInst &inst) { return &inst; } + Optional visitBinaryOperator(BinaryOperator &inst) { return &inst; } + Optional visitLoadInst(LoadInst &inst) { return &inst; } + Optional visitSelectInst(SelectInst &inst) { return &inst; } + Optional visitAllocaInst(AllocaInst &inst) { return &inst; } + Optional visitPHINode(PHINode &inst) { return &inst; } + Optional visitCallInst(CallInst &inst) { return &inst; } + Optional visitInvokeInst(InvokeInst &inst) { return &inst; } + Optional visitShuffleVectorInst(ShuffleVectorInst &inst) { + return &inst; + } + + Optional visitExtractValueInst(ExtractValueInst &inst) { + return &inst; + } + + Optional visitAtomicCmpXchgInst(AtomicCmpXchgInst &inst) { + return inst.getPointerOperand(); + } + + Optional visitAtomicRMWInst(AtomicRMWInst &inst) { + return inst.getPointerOperand(); + } + + Optional visitGetElementPtrInst(GetElementPtrInst &inst) { + return &inst; + } + + Optional visitStoreInst(StoreInst &inst) { + return inst.getPointerOperand(); + } + + Optional visitExtractElementInst(ExtractElementInst &inst) { + return &inst; + } + + Optional visitInsertElementInst(InsertElementInst &inst) { + return inst.getOperand(0); + } + + Optional visitInsertValueInst(InsertValueInst &inst) { + return inst.getAggregateOperand(); + } +}; + +typedef WeightedBidirectedGraph Graph; +typedef DenseMap NodeMap; +} + +// -- Setting up/registering AA pass -- // +char CFLAliasAnalysis::ID = 0; + +INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa", + "CFL-Based AA implementation", false, true, false) + +ImmutablePass *llvm::createCFLAliasAnalysisPass() { + return new CFLAliasAnalysis(); +} + +//===----------------------------------------------------------------------===// +// Function declarations that require types defined in the namespace above +//===----------------------------------------------------------------------===// + +// Gets the inverse of a given EdgeWeight. +static EdgeWeight flipWeight(EdgeWeight initial); + +// Gets edges of the given Instruction*, writing them to the SmallVector*. +// Returns Optional -- Nothing if we didn't expected to encounter +// the instruction, Just(false) if we're aware the instruction exists, but +// don't support it. Just(true) if everything worked fine. +// TODO: Swap Optional with asserts, remove return type. +static Optional getEdgesOf(CFLAliasAnalysis *, Instruction *, + SmallVectorImpl *); + +// Converts the arguments of the given instruction to Edges. Returns true +// on success, false on failure. +// TODO: Change return value to assertion +static bool argsToEdges(CFLAliasAnalysis *, Instruction *, + SmallVectorImpl *); + +// Gets the "Direction" that one should travel in StratifiedSets +// given an EdgeWeight. +static Direction directionOfEdgeWeight(EdgeWeight); + +// Builds the graph needed for constructing the StratifiedSets for the +// given function +static bool buildGraphFrom(CFLAliasAnalysis *, Function *, NodeMap *, Graph *); + +// Builds the graph + StratifiedSets for a function. +static Optional> buildSetsFrom(CFLAliasAnalysis *, + Function *); + +//===----------------------------------------------------------------------===// +// Definitions of static functions declared at the top/middle of the file +//===----------------------------------------------------------------------===// +static Optional valueToFunction(Value *val) { + if (auto *inst = dyn_cast(val)) { + auto *bb = inst->getParent(); + return bb->getParent(); + } else if (auto *arg = dyn_cast(val)) { + return arg->getParent(); + } + return NoneType(); +} + +// TODO: Make this better +static bool getPossibleTargets(CallInst *call, + SmallVectorImpl *output) { + if (auto *fn = call->getCalledFunction()) { + output->push_back(fn); + return true; + } + return false; +} + +static Optional getTargetValue(Instruction *inst) { + GetTargetValueVisitor v; + return v.visit(inst); +} + +static bool isIgnoredInstruction(Instruction *inst) { + bool boringTerminator = isa(inst) && !isa(inst); + // If this gets much bigger, refactor. + return isa(inst) || isa(inst) || + isa(inst) || boringTerminator; +} + +static bool isAliasingExternal(Value *val) { + bool isGlobal = isa(val); + auto *arg = dyn_cast(val); + bool isAliasArg = arg != nullptr && !arg->hasNoAliasAttr(); + return isGlobal || isAliasArg; +} + +static EdgeWeight flipWeight(EdgeWeight initial) { + switch (initial) { + case EdgeWeight::AssignFrom: + return EdgeWeight::AssignTo; + case EdgeWeight::AssignTo: + return EdgeWeight::AssignFrom; + case EdgeWeight::Dereference: + return EdgeWeight::Reference; + case EdgeWeight::Reference: + return EdgeWeight::Dereference; + } + llvm_unreachable("Incomplete coverage of EdgeWeight enum"); +} + +static Optional getEdgesOf(CFLAliasAnalysis *analysis, Instruction *inst, + SmallVectorImpl *output) { + GetEdgesVisitor v(analysis, output); + return v.visit(inst); +} + +static bool argsToEdges(CFLAliasAnalysis *analysis, Instruction *inst, + SmallVectorImpl *output) { + auto maybeResult = getEdgesOf(analysis, inst, output); + if (maybeResult.hasValue()) { + return *maybeResult; + } + + llvm_unreachable("Got unrecognized instruction."); +} + +static Direction directionOfEdgeWeight(EdgeWeight weight) { + switch (weight) { + case EdgeWeight::Reference: + return Direction::Above; + case EdgeWeight::Dereference: + return Direction::Below; + case EdgeWeight::AssignFrom: + case EdgeWeight::AssignTo: + return Direction::None; + } + llvm_unreachable("Incomplete switch coverage"); +} + +// Aside: We may remove graph construction entirely, because it doesn't really +// buy us much that we don't already have. I'd like to add interprocedural +// analysis prior to this however, in case that somehow requires the graph +// produced by this for efficient execution +static bool buildGraphFrom(CFLAliasAnalysis *analysis, Function *fn, + NodeMap *map, Graph *graph) { + const auto findOrInsertNode = [map, graph](Value *val) { + auto pair = map->insert({val, Graph::Node()}); + auto &iter = pair.first; + if (pair.second) { + auto newNode = graph->addNode(); + iter->second = newNode; + } + return iter->second; + }; + + SmallVector edges; + for (auto &bb : fn->getBasicBlockList()) { + for (auto &inst : bb.getInstList()) { + if (isIgnoredInstruction(&inst)) { + continue; + } + + edges.clear(); + bool ok = argsToEdges(analysis, &inst, &edges); + if (!ok) { + map->clear(); + *graph = Graph(); + return false; + } + + // In the case of an unused alloca (or similar), edges may be empty. Note + // that it exists so we can potentially answer NoAlias. + if (edges.empty()) { + auto maybeVal = getTargetValue(&inst); + assert(maybeVal.hasValue()); + auto *target = *maybeVal; + findOrInsertNode(target); + continue; + } + + for (const Edge &e : edges) { + auto to = findOrInsertNode(e.to); + auto from = findOrInsertNode(e.from); + auto flippedWeight = flipWeight(e.weight); + graph->addEdge(from, to, e.weight, flippedWeight); + } + } + } + return true; +} + +static Optional> +buildSetsFrom(CFLAliasAnalysis *analysis, Function *fn) { + NodeMap map; + Graph graph; + + bool ok = buildGraphFrom(analysis, fn, &map, &graph); + if (!ok) { + return NoneType(); + } + + DenseMap nodeValueMap; + nodeValueMap.resize(map.size()); + for (const auto &pair : map) { + nodeValueMap.insert({pair.second, pair.first}); + } + + const auto findValueOrDie = [&nodeValueMap](Graph::Node node) { + auto valIter = nodeValueMap.find(node); + assert(valIter != nodeValueMap.end()); + return valIter->second; + }; + + StratifiedSetsBuilder builder; + std::deque worklist; + for (auto &pair : map) { + auto *value = pair.first; + worklist.clear(); + builder.add(value); + auto initialNode = pair.second; + worklist.push_back(initialNode); + + while (!worklist.empty()) { + auto node = worklist.front(); + worklist.pop_front(); + + auto *curValue = findValueOrDie(node); + for (const auto &edgeTuple : graph.edgesFor(node)) { + auto weight = std::get<0>(edgeTuple); + auto &otherNode = std::get<1>(edgeTuple); + auto *otherValue = findValueOrDie(otherNode); + + bool added; + switch (directionOfEdgeWeight(weight)) { + case Direction::Above: + added = builder.addAbove(curValue, otherValue); + break; + case Direction::Below: + added = builder.addBelow(curValue, otherValue); + break; + case Direction::None: + added = builder.addWith(curValue, otherValue); + break; + } + + if (added) { + // Aside: As we near a finalized implementation for this, I'll + // probably remove the explicit graph building portion of this + // algorithm, and deal directly with the Edge list here. If this + // happens, I'll tag edges with an attribute instead of doing + // isa or isa here. If this doesn't happen, + // we'll do something cleaner than this. + // TODO: ^ + if (isAliasingExternal(otherValue) || isa(curValue) || + isa(curValue)) { + builder.noteHasAliasingExternal(curValue); + builder.noteHasAliasingExternal(otherValue); + } + worklist.push_back(otherNode); + } + } + } + } + + return builder.build(); +} + +//===----------------------------------------------------------------------===// +// Lengthy CFL AA methods +//===----------------------------------------------------------------------===// +void CFLAliasAnalysis::scan(Function *fn) { + auto dummyPair = cache.insert({fn, Optional>()}); + (void)dummyPair; + assert(dummyPair.second); + + Optional> maybeSets(buildSetsFrom(this, fn)); + // Give it a value to distinguish between "finished building -- nothing was + // there" and "in the process of building" + if (!maybeSets.hasValue()) { + maybeSets = StratifiedSetsBuilder().build(); + } + assert(maybeSets.hasValue()); + + // Note: buildSetsFrom *can* cause a resize on cache, invalidating all + // iterators. So we can't use the iterator given above. + cache[fn] = std::move(maybeSets); +} + +Optional +CFLAliasAnalysis::query(const AliasAnalysis::Location &locA, + const AliasAnalysis::Location &locB) { + auto *valA = const_cast(locA.Ptr); + auto *valB = const_cast(locB.Ptr); + + Function *fn = nullptr; + auto maybeFnA = valueToFunction(valA); + auto maybeFnB = valueToFunction(valB); + if (!maybeFnA.hasValue() && !maybeFnB.hasValue()) { + assert(false); + return NoneType(); + } + + if (maybeFnA.hasValue()) { + fn = *maybeFnA; + assert(!maybeFnB.hasValue() || *maybeFnB == *maybeFnA); + } else { + fn = *maybeFnB; + } + + assert(fn != nullptr); + auto &maybeSets = ensureCached(fn); + assert(maybeSets.hasValue()); + + auto &sets = *maybeSets; + auto maybeA = sets.find(valA); + if (!maybeA.hasValue()) { + return NoneType(); + } + + auto maybeB = sets.find(valB); + if (!maybeB.hasValue()) { + return NoneType(); + } + + auto setA = *maybeA; + auto setB = *maybeB; + + bool sameIndex = setA.index == setB.index; + bool bothExternals = sets.hasAliasingExternal(setA.index) && + sets.hasAliasingExternal(setB.index); + + if (sameIndex || bothExternals) { + return PartialAlias; + } + + return NoAlias; +} + Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -11,6 +11,7 @@ BranchProbabilityInfo.cpp CFG.cpp CFGPrinter.cpp + CFLAliasAnalysis.cpp CGSCCPassManager.cpp CaptureTracking.cpp CostModel.cpp