Index: include/llvm-c/Transforms/IPO.h =================================================================== --- include/llvm-c/Transforms/IPO.h +++ include/llvm-c/Transforms/IPO.h @@ -40,6 +40,9 @@ /** See llvm::createFunctionAttrsPass function. */ void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM); +/** See llvm::createFunctionAttrsTDPass function. */ +void LLVMAddFunctionAttrsTDPass(LLVMPassManagerRef PM); + /** See llvm::createFunctionInliningPass function. */ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -129,6 +129,7 @@ void initializeExpandISelPseudosPass(PassRegistry&); void initializeFindUsedTypesPass(PassRegistry&); void initializeFunctionAttrsPass(PassRegistry&); +void initializeFunctionAttrsTDPass(PassRegistry&); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -142,6 +142,7 @@ (void) llvm::createInstructionNamerPass(); (void) llvm::createMetaRenamerPass(); (void) llvm::createFunctionAttrsPass(); + (void) llvm::createFunctionAttrsTDPass(); (void) llvm::createMergeFunctionsPass(); (void) llvm::createPrintModulePass(*(llvm::raw_ostream*)nullptr); (void) llvm::createPrintFunctionPass(*(llvm::raw_ostream*)nullptr); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -179,6 +179,12 @@ Pass *createFunctionAttrsPass(); //===----------------------------------------------------------------------===// +/// createFunctionAttrsTDPass - This pass discovers function arguments that +/// are always nonnull, dereferenceable and/or noalias. +/// +ModulePass *createFunctionAttrsTDPass(); + +//===----------------------------------------------------------------------===// /// createMergeFunctionsPass - This pass discovers identical functions and /// collapses them. /// Index: lib/LTO/LTOCodeGenerator.cpp =================================================================== --- lib/LTO/LTOCodeGenerator.cpp +++ lib/LTO/LTOCodeGenerator.cpp @@ -105,6 +105,7 @@ initializeSROA_DTPass(R); initializeSROA_SSAUpPass(R); initializeFunctionAttrsPass(R); + initializeFunctionAttrsTDPass(R); initializeGlobalsModRefPass(R); initializeLICMPass(R); initializeMergedLoadStoreMotionPass(R); Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -5,6 +5,7 @@ DeadArgumentElimination.cpp ExtractGV.cpp FunctionAttrs.cpp + FunctionAttrsTD.cpp GlobalDCE.cpp GlobalOpt.cpp IPConstantPropagation.cpp Index: lib/Transforms/IPO/FunctionAttrsTD.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/FunctionAttrsTD.cpp @@ -0,0 +1,288 @@ +//===----- FunctionAttrsTD.cpp - Deduce Function Parameter Attributes -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass traverses the call graph top-down in order to deduce +// function-parameter attributes for local functions based on properties of +// their callers (nonnull, dereferenceable, noalias). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +#define DEBUG_TYPE "functionattrs-td" + +STATISTIC(NumNonNull, "Number of arguments marked nonnull"); +STATISTIC(NumNoAlias, "Number of arguments marked noalias"); +STATISTIC(NumDereferenceable, "Number of arguments marked dereferenceable"); + +namespace { + class FunctionAttrsTD : public ModulePass { + const DataLayout *DL; + AliasAnalysis *AA; + TargetLibraryInfo *TLI; + + bool addFuncAttrs(Function &F, DenseMap &DTs); + + public: + static char ID; // Pass identification, replacement for typeid + explicit FunctionAttrsTD(); + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + } + }; +} // end anonymous namespace + +char FunctionAttrsTD::ID = 0; +INITIALIZE_PASS(FunctionAttrsTD, "functionattrs-td", + "Top-down function parameter attributes", false, false) + +FunctionAttrsTD::FunctionAttrsTD() : ModulePass(ID) { + initializeFunctionAttrsTDPass(*PassRegistry::getPassRegistry()); +} + +bool FunctionAttrsTD::addFuncAttrs(Function &F, + DenseMap &DTs) { + bool MadeChange = false; + if (F.isDeclaration() || !F.hasLocalLinkage() || F.arg_empty() || + F.use_empty()) + return MadeChange; + + struct AttrState { + bool IsNonNull; + bool IsNoAlias; + bool IsDereferenceable; + uint64_t MaxSize; + + AttrState() : IsNonNull(true), IsNoAlias(true), + IsDereferenceable(true), MaxSize(UINT64_MAX) {} + + bool any() const { + return IsNonNull || IsNoAlias || IsDereferenceable; + } + + void setNone() { + IsNonNull = false; + IsNoAlias = false; + IsDereferenceable = false; + } + }; + + // For each argument, keep track of whether it is donated or not. + // The bool is set to true when found to be non-donated. + SmallVector ArgumentState; + ArgumentState.resize(F.arg_size()); + + for (Use &U : F.uses()) { + User *UR = U.getUser(); + // Ignore blockaddress uses. + if (isa(UR)) continue; + + // Used by a non-instruction, or not the callee of a function, do not + // transform. + if (!isa(UR) && !isa(UR)) + return MadeChange; + + CallSite CS(cast(UR)); + if (!CS.isCallee(&U)) + return MadeChange; + + // Check out all of the arguments. Note that we don't inspect varargs here. + CallSite::arg_iterator AI = CS.arg_begin(); + Function::arg_iterator Arg = F.arg_begin(); + for (unsigned i = 0, e = ArgumentState.size(); i != e; ++i, ++AI, ++Arg) { + // If we've already excluded this argument, ignore it. + if (!ArgumentState[i].any()) + continue; + + Type *Ty = (*AI)->getType(); + if (!Ty->isPointerTy()) { + ArgumentState[i].setNone(); + continue; + } + + if (ArgumentState[i].IsNonNull && !Arg->hasNonNullAttr()) { + if (!isKnownNonNull(*AI, TLI)) + ArgumentState[i].IsNonNull = false; + } + + Type *ETy = Ty->getPointerElementType(); + if (!DL || !ETy->isSized()) { + ArgumentState[i].IsDereferenceable = false; + } else if (ArgumentState[i].IsDereferenceable && + ArgumentState[i].MaxSize > Arg->getDereferenceableBytes()) { + uint64_t TypeSize = DL->getTypeStoreSize(ETy); + if (!(*AI)->isDereferenceablePointer(DL) && + !isSafeToLoadUnconditionally(*AI, CS.getInstruction(), + getKnownAlignment(*AI, DL), DL)) { + ArgumentState[i].IsDereferenceable = false; + } else { + uint64_t Size; + if (!getObjectSize(*AI, Size, DL, TLI)) { + Size = TypeSize; + } else { + // We've already verified that a load of the type size would not + // trap, so we can use at least that size. + Size = std::max(Size, TypeSize); + } + + ArgumentState[i].MaxSize = std::min(ArgumentState[i].MaxSize, Size); + if (!ArgumentState[i].MaxSize) + ArgumentState[i].IsDereferenceable = false; + } + } + + if (isa(*AI)) { + ArgumentState[i].IsNoAlias = false; + } else if (ArgumentState[i].IsNoAlias && !Arg->hasNoAliasAttr()) { + bool NoAlias = true; + for (CallSite::arg_iterator BI = CS.arg_begin(), BIE = CS.arg_end(); + BI != BIE; ++BI) { + if (BI == AI) + continue; + + if (AA->alias(*AI, *BI) != AliasAnalysis::NoAlias) { + NoAlias = false; + break; + } + } + + SmallVector UObjects; + GetUnderlyingObjects(*AI, UObjects, DL); + for (Value *V : UObjects) + if (!isIdentifiedFunctionLocal(V)) { + NoAlias = false; + break; + } + + if (NoAlias) { + DominatorTree *DT; + Function *Caller = CS.getInstruction()->getParent()->getParent(); + auto DTI = DTs.find(Caller); + if (DTI == DTs.end()) { + DT = new DominatorTree; + DT->recalculate(*Caller); + DTs[Caller] = DT; + } else { + DT = DTI->second; + } + + if (PointerMayBeCapturedBefore(*AI, /* ReturnCaptures */ false, + /* StoreCaptures */ false, + CS.getInstruction(), DT)) + ArgumentState[i].IsNoAlias = false; + } else { + ArgumentState[i].IsNoAlias = false; + } + } + } + + bool HaveCand = false; + for (unsigned i = 0, e = ArgumentState.size(); i != e; ++i) + if (ArgumentState[i].any()) { + HaveCand = true; + break; + } + + if (!HaveCand) + break; + } + + Function::arg_iterator AI = F.arg_begin(); + for (unsigned i = 0, e = ArgumentState.size(); i != e; ++i, ++AI) { + if (!ArgumentState[i].any() || AI->hasInAllocaAttr() || AI->hasByValAttr()) + continue; + + if (!AI->hasNonNullAttr() && ArgumentState[i].IsNonNull && + (!ArgumentState[i].IsDereferenceable || + AI->getType()->getPointerAddressSpace() != 0)) { + AttrBuilder B; + B.addAttribute(Attribute::NonNull); + + AI->addAttr(AttributeSet::get(F.getContext(), AI->getArgNo() + 1, B)); + ++NumNonNull; + MadeChange = true; + } + + if (!AI->hasNoAliasAttr() && ArgumentState[i].IsNoAlias) { + AttrBuilder B; + B.addAttribute(Attribute::NoAlias); + + AI->addAttr(AttributeSet::get(F.getContext(), AI->getArgNo() + 1, B)); + ++NumNoAlias; + MadeChange = true; + } + + if (!AI->getDereferenceableBytes() && ArgumentState[i].IsDereferenceable) { + AttrBuilder B; + B.addDereferenceableAttr(ArgumentState[i].MaxSize); + + AI->addAttr(AttributeSet::get(F.getContext(), AI->getArgNo() + 1, B)); + ++NumDereferenceable; + MadeChange = true; + } + } + + return MadeChange; +} + +bool FunctionAttrsTD::runOnModule(Module &M) { + AA = &getAnalysis(); + TLI = &getAnalysis(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + + CallGraph &CG = getAnalysis().getCallGraph(); + CallGraphNode *ExternalNode = CG.getExternalCallingNode(); + bool Changed = false; + + DenseMap DTs; + for (df_iterator I = df_begin(ExternalNode), + IE = df_end(ExternalNode); I != IE; ++I) { + Function *F = (*I)->getFunction(); + if (!F) + continue; + DEBUG(dbgs() << "FATD visiting: " << F->getName() << "\n"); + + Changed |= addFuncAttrs(*F, DTs); + } + + return Changed; +} + +ModulePass *llvm::createFunctionAttrsTDPass() { return new FunctionAttrsTD(); } + Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -27,6 +27,7 @@ initializeDAEPass(Registry); initializeDAHPass(Registry); initializeFunctionAttrsPass(Registry); + initializeFunctionAttrsTDPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); @@ -67,6 +68,10 @@ unwrap(PM)->add(createFunctionAttrsPass()); } +void LLVMAddFunctionAttrsTDPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionAttrsTDPass()); +} + void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionInliningPass()); } Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -164,6 +164,9 @@ MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + MPM.add(createFunctionAttrsPass()); // Set nocapture attr + MPM.add(createFunctionAttrsTDPass()); // Set noalias attr } // Start of CallGraph SCC passes. @@ -345,6 +348,7 @@ // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createFunctionAttrsTDPass()); // Add noalias. PM.add(createGlobalsModRefPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. Index: test/Transforms/FunctionAttrsTD/large-agg.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrsTD/large-agg.ll @@ -0,0 +1,77 @@ +; RUN: opt -S -basicaa -functionattrs-td < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.s = type { [65280 x i32] } +%struct.x = type opaque +%struct.x.0 = type opaque + +; CHECK: define void @bar() + +; Function Attrs: nounwind uwtable +define void @bar() #0 { +entry: + %x = alloca %struct.s, align 4 + %y = alloca %struct.s, align 4 + %0 = bitcast %struct.s* %x to i8* + call void @llvm.lifetime.start(i64 261120, i8* %0) #1 + %1 = bitcast %struct.s* %y to i8* + call void @llvm.lifetime.start(i64 261120, i8* %1) #1 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds %struct.s* %x, i64 0, i32 0, i64 %indvars.iv + %2 = trunc i64 %indvars.iv to i32 + store i32 %2, i32* %arrayidx, align 4, !tbaa !0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 17800 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %3 = bitcast %struct.s* %y to %struct.x* + call fastcc void @foo(%struct.s* %x, %struct.x* %3) + call void @llvm.lifetime.end(i64 261120, i8* %1) #1 + call void @llvm.lifetime.end(i64 261120, i8* %0) #1 + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; CHECK: define internal fastcc void @foo(%struct.s* noalias nocapture readonly dereferenceable(261120) %x, %struct.x* noalias %y) + +; Function Attrs: noinline nounwind uwtable +define internal fastcc void @foo(%struct.s* nocapture readonly %x, %struct.x* %y) #2 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds %struct.s* %x, i64 0, i32 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %add = add nsw i32 %0, %sum.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %1 = bitcast %struct.x* %y to %struct.x.0* + tail call void @goo(i32 %add, %struct.x.0* %1) #1 + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +declare void @goo(i32, %struct.x.0*) + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { noinline nounwind uwtable } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"int", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} Index: test/Transforms/FunctionAttrsTD/malloc.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrsTD/malloc.ll @@ -0,0 +1,42 @@ +; RUN: opt -S -basicaa -functionattrs-td < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: define i32 @bar() + +; Function Attrs: nounwind uwtable +define i32 @bar() #0 { +entry: + %call = tail call noalias i8* @malloc(i64 12345) #3 + %0 = bitcast i8* %call to i32* + store i32 10, i32* %0, align 4, !tbaa !0 + tail call fastcc void @foo(i32* %0) + ret i32 0 +} + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) #1 + +; CHECK: define internal fastcc void @foo(i32* noalias dereferenceable(12345) %x) + +; Function Attrs: noinline nounwind uwtable +define internal fastcc void @foo(i32* %x) #2 { +entry: + %arrayidx = getelementptr inbounds i32* %x, i64 5 + store i32 10, i32* %arrayidx, align 4, !tbaa !0 + tail call void @goo(i32* %x) #3 + ret void +} + +declare void @goo(i32*) #3 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { noinline nounwind uwtable } +attributes #3 = { nounwind } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"int", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} +