Index: include/llvm-c/Transforms/IPO.h =================================================================== --- include/llvm-c/Transforms/IPO.h +++ include/llvm-c/Transforms/IPO.h @@ -40,6 +40,9 @@ /** See llvm::createFunctionAttrsPass function. */ void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM); +/** See llvm::createFunctionAttrsTDPass function. */ +void LLVMAddFunctionAttrsTDPass(LLVMPassManagerRef PM); + /** See llvm::createFunctionInliningPass function. */ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -129,6 +129,7 @@ void initializeExpandISelPseudosPass(PassRegistry&); void initializeFindUsedTypesPass(PassRegistry&); void initializeFunctionAttrsPass(PassRegistry&); +void initializeFunctionAttrsTDPass(PassRegistry&); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -142,6 +142,7 @@ (void) llvm::createInstructionNamerPass(); (void) llvm::createMetaRenamerPass(); (void) llvm::createFunctionAttrsPass(); + (void) llvm::createFunctionAttrsTDPass(); (void) llvm::createMergeFunctionsPass(); (void) llvm::createPrintModulePass(*(llvm::raw_ostream*)nullptr); (void) llvm::createPrintFunctionPass(*(llvm::raw_ostream*)nullptr); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -179,6 +179,12 @@ Pass *createFunctionAttrsPass(); //===----------------------------------------------------------------------===// +/// createFunctionAttrsTDPass - This pass discovers function arguments that +/// are always nonnull, dereferenceable and/or noalias. +/// +ModulePass *createFunctionAttrsTDPass(); + +//===----------------------------------------------------------------------===// /// createMergeFunctionsPass - This pass discovers identical functions and /// collapses them. /// Index: lib/LTO/LTOCodeGenerator.cpp =================================================================== --- lib/LTO/LTOCodeGenerator.cpp +++ lib/LTO/LTOCodeGenerator.cpp @@ -105,6 +105,7 @@ initializeSROA_DTPass(R); initializeSROA_SSAUpPass(R); initializeFunctionAttrsPass(R); + initializeFunctionAttrsTDPass(R); initializeGlobalsModRefPass(R); initializeLICMPass(R); initializeMergedLoadStoreMotionPass(R); Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -5,6 +5,7 @@ DeadArgumentElimination.cpp ExtractGV.cpp FunctionAttrs.cpp + FunctionAttrsTD.cpp GlobalDCE.cpp GlobalOpt.cpp IPConstantPropagation.cpp Index: lib/Transforms/IPO/FunctionAttrsTD.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/FunctionAttrsTD.cpp @@ -0,0 +1,345 @@ +//===----- FunctionAttrsTD.cpp - Deduce Function Parameter Attributes -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass traverses the call graph top-down in order to deduce +// function-parameter attributes for local functions based on properties of +// their callers (nonnull, dereferenceable, noalias). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +#define DEBUG_TYPE "functionattrs-td" + +STATISTIC(NumNonNull, "Number of arguments marked nonnull"); +STATISTIC(NumNoAlias, "Number of arguments marked noalias"); +STATISTIC(NumDereferenceable, "Number of arguments marked dereferenceable"); +STATISTIC(NumAlign, "Number of arguments marked with enhanced alignment"); + +namespace { + class FunctionAttrsTD : public ModulePass { + const DataLayout *DL; + AliasAnalysis *AA; + TargetLibraryInfo *TLI; + + bool addFuncAttrs(Function &F, DenseMap &DTs); + + public: + static char ID; // Pass identification, replacement for typeid + explicit FunctionAttrsTD(); + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + } + }; +} // end anonymous namespace + +char FunctionAttrsTD::ID = 0; +INITIALIZE_PASS(FunctionAttrsTD, "functionattrs-td", + "Top-down function parameter attributes", false, false) + +FunctionAttrsTD::FunctionAttrsTD() : ModulePass(ID) { + initializeFunctionAttrsTDPass(*PassRegistry::getPassRegistry()); +} + +bool FunctionAttrsTD::addFuncAttrs(Function &F, + DenseMap &DTs) { + bool MadeChange = false; + if (F.isDeclaration() || !F.hasLocalLinkage() || F.arg_empty() || + F.use_empty()) + return MadeChange; + + struct AttrState { + bool IsNonNull; + bool IsNoAlias; + bool IsDereferenceable; + bool IsAlign; + uint64_t MaxSize, MaxAlign; + + AttrState() : IsNonNull(true), IsNoAlias(true), + IsDereferenceable(true), IsAlign(true), + MaxSize(UINT64_MAX), MaxAlign(UINT64_MAX) {} + + bool any() const { + return IsNonNull || IsNoAlias || IsDereferenceable || IsAlign; + } + + void setNone() { + IsNonNull = false; + IsNoAlias = false; + IsDereferenceable = false; + IsAlign = false; + } + }; + + // For each argument, keep track of whether it is donated or not. + // The bool is set to true when found to be non-donated. + SmallVector ArgumentState; + ArgumentState.resize(F.arg_size()); + + for (Use &U : F.uses()) { + User *UR = U.getUser(); + // Ignore blockaddress uses. + if (isa(UR)) continue; + + // Used by a non-instruction, or not the callee of a function, do not + // transform. + if (!isa(UR) && !isa(UR)) + return MadeChange; + + CallSite CS(cast(UR)); + if (!CS.isCallee(&U)) + return MadeChange; + + // Check out all of the arguments. Note that we don't inspect varargs here. + CallSite::arg_iterator AI = CS.arg_begin(); + Function::arg_iterator Arg = F.arg_begin(); + for (unsigned i = 0, e = ArgumentState.size(); i != e; ++i, ++AI, ++Arg) { + // If we've already excluded this argument, ignore it. + if (!ArgumentState[i].any()) + continue; + + Type *Ty = (*AI)->getType(); + if (!Ty->isPointerTy()) { + ArgumentState[i].setNone(); + continue; + } + + if (ArgumentState[i].IsNonNull && !Arg->hasNonNullAttr()) { + if (!isKnownNonNull(*AI, TLI)) + ArgumentState[i].IsNonNull = false; + } + + Type *ETy = Ty->getPointerElementType(); + if (!DL || !ETy->isSized()) { + ArgumentState[i].IsDereferenceable = false; + } else if (ArgumentState[i].IsDereferenceable && + ArgumentState[i].MaxSize > Arg->getDereferenceableBytes()) { + uint64_t TypeSize = DL->getTypeStoreSize(ETy); + if (!(*AI)->isDereferenceablePointer(DL) && + !isSafeToLoadUnconditionally(*AI, CS.getInstruction(), + getKnownAlignment(*AI, DL), DL)) { + ArgumentState[i].IsDereferenceable = false; + // FIXME: isSafeToLoadUnconditionally does not understand memset. + // FIXME: We can use getObjectSize for most things, but for mallocs + // we need to make sure that the answer is nonnull. We can use the + // existing logic that checks for nonnull for that. + } else { + uint64_t Size; + if (!getObjectSize(*AI, Size, DL, TLI)) { + // FIXME: getObjectSize does not use the dereferenceable attribute + // to get the size when possible. + Size = TypeSize; + } else { + // We've already verified that a load of the type size would not + // trap, so we can use at least that size. + Size = std::max(Size, TypeSize); + } + + ArgumentState[i].MaxSize = std::min(ArgumentState[i].MaxSize, Size); + if (!ArgumentState[i].MaxSize) + ArgumentState[i].IsDereferenceable = false; + } + } + + if (!DL || !ETy->isSized() || Arg->hasByValOrInAllocaAttr()) { + ArgumentState[i].IsAlign = false; + } else if (ArgumentState[i].IsAlign && + ArgumentState[i].MaxAlign > Arg->getParamAlignment()) { + unsigned Align = getKnownAlignment(*AI, DL); + if (Align > DL->getABITypeAlignment(ETy)) + ArgumentState[i].MaxAlign = + std::min(ArgumentState[i].MaxAlign, (uint64_t) Align); + else + ArgumentState[i].IsAlign = false; + } + + if (isa(*AI)) { + ArgumentState[i].IsNoAlias = false; + } else if (ArgumentState[i].IsNoAlias && !Arg->hasNoAliasAttr()) { + bool NoAlias = true; + for (CallSite::arg_iterator BI = CS.arg_begin(), BIE = CS.arg_end(); + BI != BIE; ++BI) { + if (BI == AI) + continue; + + if (AA->alias(*AI, *BI) != AliasAnalysis::NoAlias) { + NoAlias = false; + break; + } + } + + SmallVector UObjects; + GetUnderlyingObjects(*AI, UObjects, DL); + for (Value *V : UObjects) + if (!isIdentifiedFunctionLocal(V)) { + NoAlias = false; + break; + } + + if (NoAlias) { + DominatorTree *DT; + Function *Caller = CS.getInstruction()->getParent()->getParent(); + auto DTI = DTs.find(Caller); + if (DTI == DTs.end()) { + DT = new DominatorTree; + DT->recalculate(*Caller); + DTs[Caller] = DT; + } else { + DT = DTI->second; + } + + if (PointerMayBeCapturedBefore(*AI, /* ReturnCaptures */ false, + /* StoreCaptures */ false, + CS.getInstruction(), DT)) + ArgumentState[i].IsNoAlias = false; + } else { + ArgumentState[i].IsNoAlias = false; + } + } + } + + bool HaveCand = false; + for (unsigned i = 0, e = ArgumentState.size(); i != e; ++i) + if (ArgumentState[i].any()) { + HaveCand = true; + break; + } + + if (!HaveCand) + break; + } + + Function::arg_iterator AI = F.arg_begin(); + for (unsigned i = 0, e = ArgumentState.size(); i != e; ++i, ++AI) { + if (!ArgumentState[i].any() || AI->hasInAllocaAttr() || AI->hasByValAttr()) + continue; + + if (!AI->hasNonNullAttr() && ArgumentState[i].IsNonNull && + (!ArgumentState[i].IsDereferenceable || + AI->getType()->getPointerAddressSpace() != 0)) { + AttrBuilder B; + B.addAttribute(Attribute::NonNull); + + AI->addAttr(AttributeSet::get(F.getContext(), AI->getArgNo() + 1, B)); + ++NumNonNull; + MadeChange = true; + } + + if (!AI->hasNoAliasAttr() && ArgumentState[i].IsNoAlias) { + AttrBuilder B; + B.addAttribute(Attribute::NoAlias); + + AI->addAttr(AttributeSet::get(F.getContext(), AI->getArgNo() + 1, B)); + ++NumNoAlias; + MadeChange = true; + } + + if (AI->getDereferenceableBytes() < ArgumentState[i].MaxSize && + ArgumentState[i].IsDereferenceable) { + AttrBuilder B; + B.addDereferenceableAttr(ArgumentState[i].MaxSize); + + AI->addAttr(AttributeSet::get(F.getContext(), AI->getArgNo() + 1, B)); + ++NumDereferenceable; + MadeChange = true; + } + + if (AI->getParamAlignment() < ArgumentState[i].MaxAlign && + ArgumentState[i].IsAlign) { + AttrBuilder B; + B.addAlignmentAttr(ArgumentState[i].MaxAlign); + + AI->addAttr(AttributeSet::get(F.getContext(), AI->getArgNo() + 1, B)); + ++NumAlign; + MadeChange = true; + } + } + + return MadeChange; +} + +bool FunctionAttrsTD::runOnModule(Module &M) { + AA = &getAnalysis(); + TLI = &getAnalysis(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + + CallGraph &CG = getAnalysis().getCallGraph(); + bool Changed = false; + + // We want to iterate top-down, but also want to do a fixed-point iteration + // on SCCs to ensure a stable answer. The regular SCC iterator returns the + // SCCs bottom-up (it uses Tarjan's Algorithm, and knows it has found the + // root of an SCC as it passes it on the way up -- it is not clear there is a + // better way). Because we can't collect SCCs in a purely top-down fashion + // anyway, just store those returned by the bottom-up SCC iterator and visit + // this stored list in reverse order. + SmallVector, 8> CGSCCs; + for (scc_iterator CGI = scc_begin(&CG), CGIE = scc_end(&CG); + CGI != CGIE; ++CGI) { + CGSCCs.push_back(SmallVector(CGI->begin(), + CGI->end())); + } + + scc_iterator CGI = scc_begin(&CG); + + DenseMap DTs; + for (auto S = CGSCCs.rbegin(), SE = CGSCCs.rend(); S != SE; ++S) { + // FIXME: We could get a better answer by speculating the attributes within + // each SCC (meaning setting all attributes that might be set, and then + // removing those found be to unprovable at some call sites of the SCC's + // functions). + + bool ChangedThisSCC; + do { + ChangedThisSCC = false; + for (CallGraphNode *CGN : *S) { + Function *F = CGN->getFunction(); + if (!F) + continue; + DEBUG(dbgs() << "FATD visiting: " << F->getName() << "\n"); + Changed |= (ChangedThisSCC |= addFuncAttrs(*F, DTs)); + } + } while (ChangedThisSCC); + } + + return Changed; +} + +ModulePass *llvm::createFunctionAttrsTDPass() { return new FunctionAttrsTD(); } + Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -27,6 +27,7 @@ initializeDAEPass(Registry); initializeDAHPass(Registry); initializeFunctionAttrsPass(Registry); + initializeFunctionAttrsTDPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); @@ -67,6 +68,10 @@ unwrap(PM)->add(createFunctionAttrsPass()); } +void LLVMAddFunctionAttrsTDPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionAttrsTDPass()); +} + void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionInliningPass()); } Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -164,6 +164,9 @@ MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + MPM.add(createFunctionAttrsPass()); // Set nocapture attr + MPM.add(createFunctionAttrsTDPass()); // Set noalias attr } // Start of CallGraph SCC passes. @@ -345,6 +348,7 @@ // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createFunctionAttrsTDPass()); // Add noalias. PM.add(createGlobalsModRefPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. Index: test/Transforms/FunctionAttrsTD/large-agg.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrsTD/large-agg.ll @@ -0,0 +1,77 @@ +; RUN: opt -S -basicaa -functionattrs-td < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.s = type { [65280 x i32] } +%struct.x = type opaque +%struct.x.0 = type opaque + +; CHECK: define void @bar() + +; Function Attrs: nounwind uwtable +define void @bar() #0 { +entry: + %x = alloca %struct.s, align 32 + %y = alloca %struct.s, align 4 + %0 = bitcast %struct.s* %x to i8* + call void @llvm.lifetime.start(i64 261120, i8* %0) #1 + %1 = bitcast %struct.s* %y to i8* + call void @llvm.lifetime.start(i64 261120, i8* %1) #1 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds %struct.s* %x, i64 0, i32 0, i64 %indvars.iv + %2 = trunc i64 %indvars.iv to i32 + store i32 %2, i32* %arrayidx, align 4, !tbaa !0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 17800 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %3 = bitcast %struct.s* %y to %struct.x* + call fastcc void @foo(%struct.s* %x, %struct.x* %3) + call void @llvm.lifetime.end(i64 261120, i8* %1) #1 + call void @llvm.lifetime.end(i64 261120, i8* %0) #1 + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; CHECK: define internal fastcc void @foo(%struct.s* noalias nocapture readonly align 32 dereferenceable(261120) %x, %struct.x* noalias %y) + +; Function Attrs: noinline nounwind uwtable +define internal fastcc void @foo(%struct.s* nocapture readonly %x, %struct.x* %y) #2 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds %struct.s* %x, i64 0, i32 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %add = add nsw i32 %0, %sum.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %1 = bitcast %struct.x* %y to %struct.x.0* + tail call void @goo(i32 %add, %struct.x.0* %1) #1 + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +declare void @goo(i32, %struct.x.0*) + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { noinline nounwind uwtable } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"int", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} Index: test/Transforms/FunctionAttrsTD/malloc.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrsTD/malloc.ll @@ -0,0 +1,42 @@ +; RUN: opt -S -basicaa -functionattrs-td < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: define i32 @bar() + +; Function Attrs: nounwind uwtable +define i32 @bar() #0 { +entry: + %call = tail call noalias i8* @malloc(i64 12345) #3 + %0 = bitcast i8* %call to i32* + store i32 10, i32* %0, align 4, !tbaa !0 + tail call fastcc void @foo(i32* %0) + ret i32 0 +} + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) #1 + +; CHECK: define internal fastcc void @foo(i32* noalias dereferenceable(12345) %x) + +; Function Attrs: noinline nounwind uwtable +define internal fastcc void @foo(i32* %x) #2 { +entry: + %arrayidx = getelementptr inbounds i32* %x, i64 5 + store i32 10, i32* %arrayidx, align 4, !tbaa !0 + tail call void @goo(i32* %x) #3 + ret void +} + +declare void @goo(i32*) #3 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { noinline nounwind uwtable } +attributes #3 = { nounwind } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"int", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} +