Index: include/llvm/Analysis/ExternalFunctionAnalysis.h =================================================================== --- /dev/null +++ include/llvm/Analysis/ExternalFunctionAnalysis.h @@ -0,0 +1,139 @@ +//=- ExternalFunctionAnalysis.h: Find external function pointers -*- C++ -*-==// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief A pass that finds incoming external function pointers and finds +/// annotated storage locations and indirect calls based on these locations. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_EXTERNALFUNCTIONANALYSIS_H_ +#define LLVM_ANALYSIS_EXTERNALFUNCTIONANALYSIS_H_ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Pass.h" + +namespace llvm { + +class AnalysisUsage; +class CallInst; +class CallSite; +class Function; +class GlobalVariable; +class Instruction; +class StoreInst; +class Value; + +/// External-Function Analysis (EFA) finds external function pointers and +/// related stores and indirect calls. +/// +/// EFA can answer two questions after it has run on a Module: +/// 1. Is this Instruction an indirect call that is maybe calling a function +/// that is not defined or declared in the current Module? +/// 2. Does this Function contain indirect calls that maybe target functions +/// that are not defined or declared in the current Module? +/// +/// Answers to these questions are more useful when the current Module is the +/// Module used during Link-Time Optimization (LTO), since LTO gathers as much +/// of the code as possible into a single Module. This pass is useful in +/// particular for Control-Flow Integrity (CFI), since CFI passes often need to +/// find indirect calls that might be made through external function pointers. +/// +/// EFA looks for function pointers returned by functions external to the module +/// it is analyzing, and it traces the dataflow of these incoming external +/// function pointers to find places where they are stored or called in indirect +/// function calls. +/// +/// EFA also supports an attribute annotation: "efa-maybe-external". This can be +/// applied to variables, pointers, or functions (using llvm.var.annotation, +/// llvm.ptr.annotation, and llvm.global.annotations, respectively). EFA traces +/// the dataflow from variables and pointers annotated with efa-maybe-external +/// and finds store instructions and indirect calls. It tries to match these +/// store locations with the stores found from the incoming external function +/// pointer analysis. And it warns if external function pointers are being +/// stored into non-annotated locations. +class ExternalFunctionAnalysis : public ModulePass { + + ExternalFunctionAnalysis(const ExternalFunctionAnalysis &) + LLVM_DELETED_FUNCTION; + + ExternalFunctionAnalysis & + operator=(const ExternalFunctionAnalysis &) LLVM_DELETED_FUNCTION; + +public: + static char ID; + ExternalFunctionAnalysis() : ModulePass(ID) { + initializeExternalFunctionAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual ~ExternalFunctionAnalysis() {} + + bool runOnModule(Module &M); + void getAnalysisUsage(AnalysisUsage &AU) const; + const char *getPassName() const { return "ExternalFunctionAnalysis"; } + + /// Analyzes an instruction to see if it might be an indirect call to an + /// external function pointer. + bool maybeIsExternalCall(const Instruction *I); + + /// Analyzes a function to see if it was annotated to say it might contain + /// indirect external calls. + bool maybeContainsExternalCall(const Function *F); + +private: + typedef struct { + const Function *Source; + const Function *Caller; + } SourcePair; + + typedef DenseSet InstructionSet; + typedef DenseSet StoreSet; + typedef DenseSet FunctionSet; + typedef DenseMap StoreSources; + + StoreSet MaybeExternalStores; + FunctionSet MaybeExternalFuns; + InstructionSet MaybeExternalCalls; + + /// Gets indirect call/invoke instructions that came from values that + /// were annotated with __attribute__((annotate("efa-maybe-external"))). + void computeMaybeExternalPtrInstrs(Module &M); + + /// Gets indirect call/invoke instructions that came from values that + /// were annotated with __attribute__((annotate("efa-maybe-external"))). Also + /// finds places where values are stored into these variables. + void computeMaybeExternalVarInstrs(Module &M); + + /// Gets indirect call/invoked instructions that are in functions that + /// are annotated with __attribute__((annotate("efa-maybe-external"))). Also + /// gets call instructions that flow from annotated global function pointer + /// variables. + void computeMaybeExternalFuns(Module &M); + + /// Finds calls to the given GlobalVariable and finds stores into this + /// variable. + void findCalls(const GlobalVariable *GV); + + /// Finds call instructions and function types for each call that returns an + /// external function pointer. + void findExternalFunctionPointers(const Module &M); + + /// Finds store instructions that flow from a function pointer in the given + /// instruction (and are derived from a call to the given function). + void findFPStores(const Function *F, const Instruction *I, + StoreSources &FPStores); + + /// Walks the chain of uses from a Value and adds any call instructions in + /// chain to the Instrs set. + void findRelatedInstrs(const Value *Val); +}; + +ModulePass *createExternalFunctionAnalysisPass(); +} + +#endif /* LLVM_ANALYSIS_EXTERNALFUNCTIONANALYSIS_H_ */ Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -88,6 +88,7 @@ void initializeCFGOnlyViewerPass(PassRegistry&); void initializeCFGPrinterPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); +void initializeExternalFunctionAnalysisPass(PassRegistry&); void initializeFlattenCFGPassPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); void initializeCFGViewerPass(PassRegistry&); Index: lib/Analysis/Analysis.cpp =================================================================== --- lib/Analysis/Analysis.cpp +++ lib/Analysis/Analysis.cpp @@ -40,6 +40,7 @@ initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); initializeDomOnlyViewerPass(Registry); + initializeExternalFunctionAnalysisPass(Registry); initializePostDomViewerPass(Registry); initializeDomOnlyPrinterPass(Registry); initializePostDomPrinterPass(Registry); Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -18,6 +18,7 @@ DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp + ExternalFunctionAnalysis.cpp IVUsers.cpp InstCount.cpp InstructionSimplify.cpp Index: lib/Analysis/ExternalFunctionAnalysis.cpp =================================================================== --- /dev/null +++ lib/Analysis/ExternalFunctionAnalysis.cpp @@ -0,0 +1,519 @@ +//=- ExternalFunctionAnalysis.cpp: Find external function pointers -*- C++ -*-// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief A pass that finds incoming external function pointers and finds +/// annotated storage locations and indirect calls based on these locations. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "efa" +#include "llvm/Analysis/ExternalFunctionAnalysis.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" + +#include + +using namespace llvm; + +STATISTIC(NumMaybeExternalCalls, "Number of indirect call sites maybe using" + " external function pointers"); +STATISTIC(NumMaybeExternalStores, "Number of store instructions into annotated" + " locations"); +STATISTIC(NumAnnotatedFunctions, "Number of functions annotated as maybe" + " containing indirect calls to external code"); + +char ExternalFunctionAnalysis::ID = 0; +INITIALIZE_PASS(ExternalFunctionAnalysis, "efa", "ExternalFunctionAnalysis", + true, true) + +ModulePass *llvm::createExternalFunctionAnalysisPass() { + return new ExternalFunctionAnalysis(); +} + +namespace { +// This is the annotation string used to help the analysis. +const char efa_annotation[] = "efa-maybe-external"; + +// These are the names of llvm-added external functions that appear when code is +// annotated with __attribute__((annotate(...))). +const char llvm_var_annotation[] = "llvm.var.annotation"; +const char llvm_ptr_annotation[] = "llvm.ptr.annotation"; +const char llvm_ptr_annotation_p0i8[] = "llvm.ptr.annotation.p0i8"; +const char llvm_global_annotations[] = "llvm.global.annotations"; + +// Helper functions + +bool isLLVMExternal(const Function *F) { + StringRef FunName(F->getName()); + StringRef VarAnnotation(llvm_var_annotation); + StringRef PtrAnnotation(llvm_ptr_annotation); + return (FunName.startswith(VarAnnotation) || + FunName.startswith(PtrAnnotation)); +} + +const Value *getParameterForArgument(unsigned ArgNo, const Function *F) { + unsigned count = 0; + Function::const_arg_iterator FAI, FAE; + for (FAI = F->arg_begin(), FAE = F->arg_end(); FAI != FAE; ++FAI, ++count) { + if (count == ArgNo) { + const Value *Arg = FAI; + return Arg; + } + } + + return NULL; +} + +void followValue(const Value *Val, std::list &FoundValues, + DenseSet &SeenValues) { + if (SeenValues.find(Val) == SeenValues.end()) { + SeenValues.insert(Val); + FoundValues.push_back(Val); + } +} + +// Follows a given call instruction that is either calling Val or is passing Val +// in one of its arguments. If Val is being called, then add this to the set of +// calls that might use an external function pointer. Otherwise, trace the +// argument of a direct call down into the function itself and add the argument +// to the list of values to follow. +void followCall(const CallInst *CI, const Use *U, const Value *Val, + std::list &FoundValues, + DenseSet &SeenValues, + DenseSet &MaybeExternalCalls) { + if (CI->getCalledValue() == Val) { + MaybeExternalCalls.insert(CI); + ++NumMaybeExternalCalls; + return; + } + + // It must be one of the operands. So, add the operand in the called + // function if this is a direct call to a function defined in this + // module. + Function *CalledFun = CI->getCalledFunction(); + if (!CalledFun) + return; + + if (CalledFun->isDeclaration()) { + if (isLLVMExternal(CalledFun)) { + // An LLVM external like llvm.ptr.annotation or + // llvm.var.annotation is like a cast instruction in effect. + followValue(Val, FoundValues, SeenValues); + } + + return; + } + + ImmutableCallSite ICS(CI); + if (!ICS.hasArgument(Val)) + return; + + unsigned ArgNo = ICS.getArgumentNo(U); + + // This can return NULL if CalledFun is a VarArg function, and the + // argument we want is in the "..." part. + const Value *Arg = getParameterForArgument(ArgNo, CalledFun); + if (Arg) + followValue(Arg, FoundValues, SeenValues); +} + +const Function *getFunctionParent(const Value *Val) { + // Add to the list all the calls to this function. + const Instruction *I = dyn_cast(Val); + assert(I && "Couldn't get an instruction from a return value"); + + const BasicBlock *BB = I->getParent(); + assert(BB && "Couldn't get the parent of an instruction"); + + const Function *ParentFun = BB->getParent(); + assert(ParentFun && "Couldn't get the function parent of a BasicBlock"); + return ParentFun; +} + +// Follows a return instruction in Val by finding places it might return and +// adding them to the list of values to follow (in FoundValues), if they haven't +// been seen before. +void followReturn(const Value *Val, std::list &FoundValues, + DenseSet &SeenValues) { + const Function *ParentFun = getFunctionParent(Val); + Function::const_use_iterator PFI, PFE; + for (PFI = ParentFun->use_begin(), PFE = ParentFun->use_end(); PFI != PFE; + ++PFI) { + const Use &PFU = *PFI; + const User *PFUs = PFU.getUser(); + followValue(cast(PFUs), FoundValues, SeenValues); + } +} + +// Compares a function name against a list of known memory allocation +// functions, as provided by TargetLibraryInfo. This is used to skip +// memory-allocation functions in the analysis, since they are a frequent +// source of false positives. +bool isMemoryAllocation(const TargetLibraryInfo &TLI, StringRef Name) { + return + // void *new[](unsigned int) + TLI.getName(LibFunc::Znaj).equals(Name) || + // void *new[](unsigned int, nothrow) + TLI.getName(LibFunc::ZnajRKSt9nothrow_t).equals(Name) || + // void *new[](unsigned long) + TLI.getName(LibFunc::Znam).equals(Name) || + // void *new[](unsigned long, nothrow) + TLI.getName(LibFunc::ZnamRKSt9nothrow_t).equals(Name) || + // void *new(unsigned int) + TLI.getName(LibFunc::Znwj).equals(Name) || + // void *new(unsigned int, nothrow) + TLI.getName(LibFunc::ZnwjRKSt9nothrow_t).equals(Name) || + // void *new(unsigned long) + TLI.getName(LibFunc::Znwm).equals(Name) || + // void *new(unsigned long, nothrow) + TLI.getName(LibFunc::ZnwmRKSt9nothrow_t).equals(Name) || + // void *calloc(size_t count, size_t size) + TLI.getName(LibFunc::calloc).equals(Name) || + // void *malloc(size_t size) + TLI.getName(LibFunc::malloc).equals(Name) || + // void *memalign(size_t boundary, size_t size) + TLI.getName(LibFunc::memalign).equals(Name) || + // int posix_memalign(void **memptr, size_t alignment, size_t size) + TLI.getName(LibFunc::posix_memalign).equals(Name) || + // void *valloc(size_t size) + TLI.getName(LibFunc::valloc).equals(Name); +} +} // end anonymous namespace + +// Member functions + +void ExternalFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +void ExternalFunctionAnalysis::findRelatedInstrs(const Value *Val) { + // Search all related values and the instructions that use them. This is + // a very restricted data-flow analysis. + + const Function *F = getFunctionParent(Val); + // We can't use any of the vector classes here, since iterators must still be + // valid after push_back, which isn't guaranteed by a vector. + std::list Vals; + Vals.push_back(Val); + DenseSet SeenValues; + SeenValues.insert(Val); + + for (const Value *V : Vals) { + for (const Use &U : V->uses()) { + const User *Us = U.getUser(); + if (isa(Us) || isa(Us) || + isa(Us)) { + // We only follow instructions that are used as statements, not + // subexpressions of other instructions. + if (const Instruction *I = dyn_cast(Us)) { + if (isa(I->getParent())) { + followValue(cast(Us), Vals, SeenValues); + } + } + } else if (const CallInst *CI = dyn_cast(Us)) { + followCall(CI, &U, V, Vals, SeenValues, MaybeExternalCalls); + } else if (const StoreInst *S = dyn_cast(Us)) { + if (S->getValueOperand() == V) { + DEBUG(dbgs() << "Warning: in " << F->getName() << " an" + << " efa-maybe-external pointer gets stored into another" + << " pointer.\n"); + } else if (S->getPointerOperand() == V) { + MaybeExternalStores.insert(S); + ++NumMaybeExternalStores; + } + } + } + } +} + +void ExternalFunctionAnalysis::computeMaybeExternalPtrInstrs(Module &M) { + const Function *PtrAnnotation = M.getFunction(llvm_ptr_annotation_p0i8); + if (!PtrAnnotation) + return; + + size_t annotation_len = array_lengthof(efa_annotation); + for (const auto &Us : PtrAnnotation->users()) { + // The second operand should say "efa-maybe-external" for this to be the + // right kind of annotation. + const Value *OpVal = Us->getOperand(1); + if (!OpVal) + continue; + + const Value *V = cast(OpVal)->getOperand(0); + assert(V && "Couldn't get operand 0 of operand 1 of a User"); + + const ConstantDataSequential *StrInit = + cast(cast(V)->getInitializer()); + StringRef str = StrInit->getAsString(); + size_t count = str.size(); + if ((count == annotation_len) && + (memcmp(str.data(), efa_annotation, count) == 0)) { + // The llvm.ptr.annotation returns a value that replaces the first arg. + const Value *VPtr = cast(Us); + findRelatedInstrs(VPtr); + } + } +} + +void ExternalFunctionAnalysis::computeMaybeExternalVarInstrs(Module &M) { + Function *VarAnnotation = M.getFunction(llvm_var_annotation); + if (!VarAnnotation) + return; + + size_t annotation_len = array_lengthof(efa_annotation); + for (const auto &Us : VarAnnotation->users()) { + // The second operand should say "efa-maybe-external" for this to be the + // right kind of annotation. + Value *OpVal = Us->getOperand(1); + if (!OpVal) + continue; + + const Value *V = cast(OpVal)->getOperand(0); + assert(V && "Couldn't get operand 0 of operand 1 of a User"); + const ConstantDataSequential *StrInit = + cast(cast(V)->getInitializer()); + StringRef str = StrInit->getAsString(); + size_t count = str.size(); + if ((count == annotation_len) && + (memcmp(str.data(), efa_annotation, count) == 0)) { + // The llvm.var.annotation does not return a value, but the pointer it + // annotates is its first argument. + const User *OpUser = cast(Us->getOperand(0)); + assert(OpUser && "Couldn't get the User for operand 0"); + const Value *VPtr = OpUser->getOperand(0); + assert(VPtr && "Couldn't get the second operand 0 for the annotation"); + findRelatedInstrs(VPtr); + } + } +} + +void ExternalFunctionAnalysis::computeMaybeExternalFuns(Module &M) { + GlobalVariable *Annotations = M.getNamedGlobal(llvm_global_annotations); + if (!Annotations) + return; + + size_t annotation_len = array_lengthof(efa_annotation); + Constant *Init = Annotations->getInitializer(); + assert(Init && "No initializer for global annotations"); + for (const auto &V : Init->operand_values()) { + const User *U = cast(V); + const Value *V2 = U->getOperand(0); + const User *U2 = cast(V2); + if (const Function *F = dyn_cast(U2->getOperand(0))) { + + const User *StrUser = cast(U->getOperand(1)); + assert(StrUser && "Couldn't get operand 1 for the annotation"); + const Value *StrV = StrUser->getOperand(0); + assert(StrV && "Couldn't get the value for the annotation"); + const ConstantDataSequential *StrInit = cast( + cast(StrV)->getInitializer()); + assert(StrInit && "Couldn't get the string initializer for annotation"); + StringRef str = StrInit->getAsString(); + size_t count = str.size(); + if ((count == annotation_len) && + (memcmp(str.data(), efa_annotation, count) == 0)) { + MaybeExternalFuns.insert(F); + ++NumAnnotatedFunctions; + } + } else if (const GlobalVariable *GV = + dyn_cast(U2->getOperand(0))) { + // Only look at this global variable if it is of pointer type. + Type *GVT = GV->getType(); + if (isa(GVT)) { + findCalls(GV); + } + } + } +} + +void ExternalFunctionAnalysis::findCalls(const GlobalVariable *GV) { + for (const auto &GUs : GV->users()) { + const Value *V = cast(GUs); + + // For store instructions, the global variable can be used directly. + if (const StoreInst *VS = dyn_cast(V)) { + const Value *P = VS->getPointerOperand(); + if (P == GV) { + MaybeExternalStores.insert(VS); + ++NumMaybeExternalStores; + } + } + + // We can't use a vector type here because this algorithm depends on being + // able to continue iterating through FoundValues as new items are added. + std::list FoundValues; + FoundValues.push_back(V); + DenseSet SeenValues; + SeenValues.insert(V); + + for (const Value *V : FoundValues) { + for (const Use &U : V->uses()) { + const User *Us = U.getUser(); + const Value *Val = cast(Us); + if (isa(Val)) { + followValue(Val, FoundValues, SeenValues); + } else if (isa(Val)) { + followReturn(Val, FoundValues, SeenValues); + } else if (const CallInst *CI = dyn_cast(Val)) { + followCall(CI, &U, V, FoundValues, SeenValues, MaybeExternalCalls); + } else if (const StoreInst *S = dyn_cast(Val)) { + // If it's storing into this variable, then mark this store as a safe + // store. + const Value *P = S->getPointerOperand(); + if (P == V) { + MaybeExternalStores.insert(S); + ++NumMaybeExternalStores; + } + } + } + } + } +} + +void ExternalFunctionAnalysis::findFPStores(const Function *F, + const Instruction *I, + StoreSources &FPStores) { + // We can't use a vector here because the algorithm depends on modifying + // FoundValues while iterating through it. + std::list FoundValues; + FoundValues.push_back(I); + DenseSet SeenValues; + SeenValues.insert(I); + + const Function *CallerFun = getFunctionParent(I); + + for (const auto &V : FoundValues) { + for (const Use &U : V->uses()) { + const User *Us = U.getUser(); + const Value *Val = cast(Us); + if (isa(Val)) { + followValue(Val, FoundValues, SeenValues); + } else if (isa(Val)) { + followReturn(Val, FoundValues, SeenValues); + } else if (const StoreInst *S = dyn_cast(Val)) { + const Value *P = S->getPointerOperand(); + Type *OpTy = P->getType(); + PointerType *PPTy = dyn_cast(OpTy); + if (!PPTy) + continue; + + Type *PElementTy = PPTy->getElementType(); + PointerType *PTy = dyn_cast(PElementTy); + if (!PTy) + continue; + + Type *ElementTy = PTy->getElementType(); + if (isa(ElementTy)) { + FPStores[S].Source = F; + FPStores[S].Caller = CallerFun; + } + } else if (const CallInst *CI = dyn_cast(Val)) { + followCall(CI, &U, V, FoundValues, SeenValues, MaybeExternalCalls); + } + } + } +} +bool ExternalFunctionAnalysis::runOnModule(Module &M) { + // Build up the sets of maybe-external functions, variables, pointers, and + // their associated indirect calls and stores by looking for incoming external + // function pointers and tracing both their dataflow and dataflow from + // annotated storage locations. + computeMaybeExternalFuns(M); + computeMaybeExternalVarInstrs(M); + computeMaybeExternalPtrInstrs(M); + + // Find function pointers returned from external functions. + findExternalFunctionPointers(M); + + return true; +} + +// For each call site of an external function that returns a pointer, trace this +// value up to see if it is ever cast to a function pointer and stored. +void ExternalFunctionAnalysis::findExternalFunctionPointers(const Module &M) { + + TargetLibraryInfo TLI; + + // Walk through the set of functions looking for ones that return pointers. + // The first function in FPStores is the external function that originally + // generated the external pointer, and the second function is the function in + // which the call to the first function took place. + StoreSources FPStores; + for (const Function &FR : M) { + const Function *F = &FR; + // We only follow calls to external pointers. + const GlobalValue *GV = cast(F); + if (!GV->isDeclaration()) + continue; + + // We don't follow calls to llvm annotation functions + if (isLLVMExternal(F)) + continue; + + // By policy, we ignore memory allocation, since it is a frequent source of + // false positives. + if (isMemoryAllocation(TLI, F->getName())) + continue; + + // Make sure this external function returns a pointer type. + FunctionType *FT = F->getFunctionType(); + Type *RT = FT->getReturnType(); + if (!isa(RT)) + continue; + DEBUG(dbgs() << "External function '" << F->getName() + << "' returns a pointer\n"); + + for (const auto &Us : F->users()) { + const Instruction *I = dyn_cast(Us); + if (!I) + continue; + + // Only trace uses of direct calls. + ImmutableCallSite ICS(I); + if ((ICS.isCall() || ICS.isInvoke()) && ICS.getCalledFunction()) { + findFPStores(F, I, FPStores); + } + } + } + + for (const auto &KV : FPStores) { + // Is the storage location annotated with efa-maybe-external? If not, then + // complain. + if (MaybeExternalStores.find(KV.first) != MaybeExternalStores.end()) + continue; + + const Function *ParentFun = getFunctionParent(KV.first); + if (!maybeContainsExternalCall(ParentFun)) { + errs() << "A store instruction in " << ParentFun->getName() + << " is storing an external function pointer derived from a call" + << " to " << KV.second.Source->getName() << " in the function " + << KV.second.Caller->getName() << " but is not annotated with" + << " efa-maybe-external\n"; + } + } +} + +bool ExternalFunctionAnalysis::maybeIsExternalCall(const Instruction *I) { + return MaybeExternalCalls.find(I) != MaybeExternalCalls.end(); +} + +bool ExternalFunctionAnalysis::maybeContainsExternalCall(const Function *F) { + return MaybeExternalFuns.find(F) != MaybeExternalFuns.end(); +} Index: test/Analysis/ExternalFunctionAnalysis/external_function_analysis.ll =================================================================== --- /dev/null +++ test/Analysis/ExternalFunctionAnalysis/external_function_analysis.ll @@ -0,0 +1,170 @@ +; RUN: llvm-as < %s >%t1 +; RUN: opt -efa -o %t2 %t1 -stats -debug-only=efa 2>&1 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +@.str1 = private unnamed_addr constant [17 x i8] c"external_call.ll\00", section "llvm.metadata" +@.str2 = private unnamed_addr constant [19 x i8] c"efa-maybe-external\00", section "llvm.metadata" +@fff = internal global i32 (...)* null, align 8 +@llvm.global.annotations = appending global [3 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (i32 (...)** @fff to i8*), i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 19 }, { i8*, i8*, i8*, i32 } { i8* bitcast (void ()* @known_external_call_fun to i8*), i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 19 }, { i8*, i8*, i8*, i32 } { i8* bitcast (void ()* @known_external_call_fun to i8*), i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 19 }], section "llvm.metadata" + +%struct.fun_struct = type { i32 (...)* } +@sfs = internal global %struct.fun_struct zeroinitializer, align 8 + +define internal i32 (...)* @g() { +entry: + %call = call i32 (...)* (...)* @f() + ret i32 (...)* %call +} + +declare i32 (...)* @f(...) +declare i8* (...)* @f2(...) +declare void @indirect_fun() +declare void ()* @get_fun() + +define i32 @m(void ()* %fun) { + call void ()* %fun() + ret i32 0 +} + +define i32 @m_no_rewrite(void ()* %fun) { + call void ()* %fun() + ret i32 0 +} + +define void @retfun() { + ret void +} + +declare void @llvm.var.annotation(i8*, i8*, i8*, i32) +declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) + +define i32 @call_ext_fun() { + %f = call void ()* ()* @get_fun() + %a = call i32 @m(void ()* %f) + ret i32 %a +} + +; Check the case of storing into an annotated variable +define void @var_annotation() { + %h = alloca i32 (...)*, align 8 + %h1 = bitcast i32 (...)** %h to i8* + call void @llvm.var.annotation(i8* %h1, i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 25) + %call = call i32 (...)* ()* @g() + store i32 (...)* %call, i32 (...)** %h, align 8 + ret void +} + +; Check the case of storing into an annotated struct member +define void @struct_annotation() { + %fs = alloca %struct.fun_struct, align 8 + %call = call i8* (...)* (...)* @f2() + %1 = bitcast i8* (...)* %call to i32 (...)* + %v = getelementptr inbounds %struct.fun_struct* %fs, i32 0, i32 0 + %2 = bitcast i32 (...)** %v to i8* + %3 = call i8* @llvm.ptr.annotation.p0i8(i8* %2, i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 10) + %4 = bitcast i8* %3 to i32 (...)** + store i32 (...)* %1, i32 (...)** %4, align 8 + ret void +} + +; Check the case of storing into part of an annotated structure +define void @struct_annotation_through_GEP() { + %fs = alloca %struct.fun_struct, align 8 + %fs.v = bitcast %struct.fun_struct* %fs to i8* + %1 = call i8* @llvm.ptr.annotation.p0i8(i8* %fs.v, i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 10) + %2 = bitcast i8* %1 to %struct.fun_struct* + %call = call i8* (...)* (...)* @f2() + %call.cast = bitcast i8* (...)* %call to i32 (...)* + %v = getelementptr inbounds %struct.fun_struct* %2, i32 0, i32 0 + store i32 (...)* %call.cast, i32 (...)** %v, align 8 + ret void +} + +; Check the case of storing into a non-annotated struct member +define void @struct_non_annotation() { + %fs = alloca %struct.fun_struct, align 8 + %call = call i8* (...)* (...)* @f2() + %1 = bitcast i8* (...)* %call to i32 (...)* + %v = getelementptr inbounds %struct.fun_struct* %fs, i32 0, i32 0 + store i32 (...)* %1, i32 (...)** %v, align 8 + ret void +} + +; Check the case of calling a value from an annotated struct member +define void @call_struct_annotation() { + %fs = alloca %struct.fun_struct, align 8 + %v = getelementptr inbounds %struct.fun_struct* %fs, i32 0, i32 0 + %1 = bitcast i32 (...)** %v to i8* + %2 = call i8* @llvm.ptr.annotation.p0i8(i8* %1, i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 10) + %3 = bitcast i8* %2 to i32 (...)** + %4 = load i32 (...)** %3, align 8 + %rv = call i32 (...)* %4() + ret void +} + +define i32 (...)* @transitive_ext_fun() { + %f = call i32 (...)* (...)* @f() + %b = bitcast i32 (...)* %f to i8* + %a = bitcast i8* %b to i32 (...)* + ret i32 (...)* %f +} + +define i32 (...)* @transitive_return_fun(i32 (...)* %in) { + %b = bitcast i32 (...)* %in to i8* + %a = bitcast i8* %b to i32 (...)* + ret i32 (...)* %a +} + +; Check the case of storing into an annotated variable +define void @transitive_pointer_detection() { + %h = alloca i32 (...)*, align 8 + %h1 = bitcast i32 (...)** %h to i8* + call void @llvm.var.annotation(i8* %h1, i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 25) + %call = call i32 (...)* ()* @transitive_ext_fun() + store i32 (...)* %call, i32 (...)** %h, align 8 + ret void +} + +; Check the case of storing into an annotated variable +define void @transitive_return_detection() { + %h = alloca i32 (...)*, align 8 + %h1 = bitcast i32 (...)** %h to i8* + call void @llvm.var.annotation(i8* %h1, i8* getelementptr inbounds ([19 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([17 x i8]* @.str1, i32 0, i32 0), i32 25) + %f = call i32 (...)* (...)* @f() + %call = call i32 (...)* (i32 (...)*)* @transitive_return_fun(i32 (...)* %f) + store i32 (...)* %call, i32 (...)** %h, align 8 + ret void +} + +; Check the case of storing into an annotated variable +define void @var_non_annotation() { + %h = alloca i32 (...)*, align 8 + %call = call i32 (...)* ()* @g() + store i32 (...)* %call, i32 (...)** %h, align 8 + ret void +} + +define void @known_external_call_fun() { + %h = alloca i32 (...)*, align 8 + %call = call i32 (...)* ()* @g() + store i32 (...)* %call, i32 (...)** %h, align 8 + ret void +} + +declare i8* @composite_pointer_external() +define void @call_through_GEP() { + + ret void +} + +; XFAIL: win32 +; CHECK: External function 'f' returns a pointer +; CHECK: External function 'f2' returns a pointer +; CHECK: External function 'get_fun' returns a pointer +; CHECK-NOT: A store instruction in struct_annotation_through_GEP is storing an external function pointer derived from a call to f2 in the function struct_annotation_through_GEP +; CHECK-DAG: A store instruction in struct_non_annotation is storing an external function pointer derived from a call to f2 in the function struct_non_annotation +; CHECK-DAG: A store instruction in var_non_annotation is storing an external function pointer derived from a call to f in the function g +; CHECK-NOT: A store instruction in known_external_call_fun is storing an external function pointer derived from a call to f in the function g +; CHECK: 2 efa - Number of indirect call sites maybe using external function pointers +; CHECK: 5 efa - Number of store instructions into annotated locations