Index: clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll =================================================================== --- clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -36,7 +36,7 @@ ; Round trip it through llvm-as ; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS ; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi-devirt.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}})) -; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 18, typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0)))))) +; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 18, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0), typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0)))))) ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi"))))) ; guid = 7004155349499253778 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ Index: clang/test/CodeGen/thinlto-distributed-cfi.ll =================================================================== --- clang/test/CodeGen/thinlto-distributed-cfi.ll +++ clang/test/CodeGen/thinlto-distributed-cfi.ll @@ -24,7 +24,7 @@ ; Round trip it through llvm-as ; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS ; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}})) -; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 7, typeIdInfo: (typeTests: (^2))))) +; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 7, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0), typeIdInfo: (typeTests: (^2))))) ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) ; guid = 7004155349499253778 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ Index: llvm/include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- llvm/include/llvm/IR/ModuleSummaryIndex.h +++ llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -686,6 +686,8 @@ /// Get function summary flags. FFlags fflags() const { return FunFlags; } + void setFFlags(FFlags newFlags) { FunFlags = newFlags; } + /// Get the instruction count recorded for this function. unsigned instCount() const { return InstCount; } Index: llvm/include/llvm/LTO/LTO.h =================================================================== --- llvm/include/llvm/LTO/LTO.h +++ llvm/include/llvm/LTO/LTO.h @@ -23,6 +23,7 @@ #include "llvm/Object/IRSymtab.h" #include "llvm/Support/Error.h" #include "llvm/Support/thread.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" namespace llvm { Index: llvm/include/llvm/Transforms/IPO/FunctionAttrs.h =================================================================== --- llvm/include/llvm/Transforms/IPO/FunctionAttrs.h +++ llvm/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -17,6 +17,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -38,6 +39,14 @@ /// Returns the memory access properties of this copy of the function. MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR); +/// Propagate function attributes for function summaries along the index's +/// callgraph during thinlink +bool thinLTOPropagateFunctionAttrs(ModuleSummaryIndex &Index); + +/// Inserts the FunctionAttr flags from the Index into \p TheModule. +void thinLTOInsertFunctionAttrsForModule(Module &TheModule, + const GVSummaryMapTy &DefinedGlobals); + /// Computes function attributes in post-order over the call graph. /// /// By operating in post-order, this pass computes precise attributes for Index: llvm/lib/LTO/LTO.cpp =================================================================== --- llvm/lib/LTO/LTO.cpp +++ llvm/lib/LTO/LTO.cpp @@ -1507,6 +1507,8 @@ thinLTOResolvePrevailingInIndex(Conf, ThinLTO.CombinedIndex, isPrevailing, recordNewLinkage, GUIDPreservedSymbols); + thinLTOPropagateFunctionAttrs(ThinLTO.CombinedIndex); + generateParamAccessSummary(ThinLTO.CombinedIndex); std::unique_ptr BackendProc = Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -601,6 +601,8 @@ thinLTOResolvePrevailingInModule(Mod, DefinedGlobals); + thinLTOInsertFunctionAttrsForModule(Mod, DefinedGlobals); + if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -439,6 +440,8 @@ // Apply summary-based prevailing-symbol resolution decisions. thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals); + thinLTOInsertFunctionAttrsForModule(TheModule, DefinedGlobals); + // Save temps: after promotion. saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc"); } @@ -1054,6 +1057,8 @@ *Index, IsExported(ExportLists, GUIDPreservedSymbols), IsPrevailing(PrevailingCopy)); + thinLTOPropagateFunctionAttrs(*Index); + // Make sure that every module has an entry in the ExportLists, ImportList, // GVSummary and ResolvedODR maps to enable threaded access to these maps // below. Index: llvm/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -95,6 +96,10 @@ "disable-nofree-inference", cl::Hidden, cl::desc("Stop inferring nofree attribute during function-attrs pass")); +static cl::opt DisableThinLTOPropagation( + "disable-thinlto-funcattrs", cl::Hidden, + cl::desc("Don't propagate function-attrs in thinLTO")); + namespace { using SCCNodeSet = SmallSetVector; @@ -322,6 +327,148 @@ return MadeChange; } +std::vector getFunctionSummaries(ValueInfo VI) { + if (!VI) + return {}; + auto SummaryList = VI.getSummaryList(); + std::vector Summaries; + for (const auto& GVS : SummaryList) { + if (!GVS->isLive()) + continue; + if (const AliasSummary *AS = dyn_cast(GVS.get())) + if (!AS->hasAliasee()) + continue; + if (!isa(GVS->getBaseObject())) + continue; + if (GlobalValue::isLocalLinkage(GVS->linkage())) { + Summaries.push_back(GVS.get()); + break; + } else if (GlobalValue::isExternalLinkage(GVS->linkage())) { + if (!Summaries.empty()) { + return {}; + } + Summaries.push_back(GVS.get()); + } else if (GlobalValue::isWeakLinkage(GVS->linkage())) { + // Accumulates all the weak linkage summaries + Summaries.push_back(GVS.get()); + } else if (GlobalValue::isAvailableExternallyLinkage(GVS->linkage()) || + GlobalValue::isLinkOnceLinkage(GVS->linkage())) { + Summaries.push_back(GVS.get()); + } + }; + + std::vector ResolvedSummaries; + for (auto S : Summaries) { + while (S) { + if (FunctionSummary *FS = dyn_cast(S)) { + ResolvedSummaries.push_back(FS); + break; + } + AliasSummary *AS = dyn_cast(S); + if (!AS || !AS->hasAliasee()) + return {}; + S = AS->getBaseObject(); + if (S == AS) + return {}; + } + } + return ResolvedSummaries; +} + +bool llvm::thinLTOPropagateFunctionAttrs(ModuleSummaryIndex &Index) { + // TODO: implement addNoAliasAttrs once + // there's more information about the return type in the summary + if (DisableThinLTOPropagation) + return false; + + auto addNoRecurseAttrs = [](std::vector &SCCNodes) { + if (SCCNodes.size() != 1) + return false; + + ValueInfo V = SCCNodes.front(); + + // Bail if we don't have a FunctionSummary to work with + // TODO: consider adding summaries to external nodes + if (!V.getSummaryList().size()) + return false; + + // Multiple linkonce_odr copies can exist of a function with + // potentially different semantics/callees. We treat propagating + // through these conservatively as a whole as it's unknown + // which one will be used by which callsite. + std::vector callSummaries = getFunctionSummaries(V); + DenseSet calleeSummaries; + bool noCallees = true; + for (const auto& callSummary : callSummaries) { + for (const auto& Callee : callSummary->calls()) { + std::vector tmpSummaries = getFunctionSummaries(Callee.first); + std::for_each(tmpSummaries.begin(), + tmpSummaries.end(),[&](FunctionSummary * Summary) { + calleeSummaries.insert(Summary); + } + ); + noCallees = false; + } + } + + bool calleesMightRecurse = std::any_of( + calleeSummaries.begin(), calleeSummaries.end(), + [](const FunctionSummary * S) { + return !(S->fflags().NoRecurse); + }); + + if (noCallees || !calleesMightRecurse) { + for (const auto& S : callSummaries) { + auto newFlags = S->fflags(); + newFlags.NoRecurse = 1; + S->setFFlags(newFlags); + NumNoRecurse++; + } + return true; + } + + return false; + }; + + bool Changed = false; + + // Call propagation functions on each SCC in the Index + for (scc_iterator I = scc_begin(&Index); !I.isAtEnd(); + ++I) { + std::vector Nodes(*I); + Changed |= addNoRecurseAttrs(Nodes); + } + return Changed; +} + +/// Insert function attributes in the Index back into the \p TheModule. +void llvm::thinLTOInsertFunctionAttrsForModule( + Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { + if (DisableThinLTOPropagation) + return; + + for (Function &F : TheModule) { + const auto &GV = DefinedGlobals.find(F.getGUID()); + if (GV == DefinedGlobals.end()) + continue; + + if (FunctionSummary *FS = dyn_cast(GV->second)) + { + if (FS->fflags().ReadNone) + if (!F.doesNotAccessMemory()) + F.setDoesNotAccessMemory(); + + if (FS->fflags().ReadOnly) + if (!F.onlyReadsMemory()) + F.setOnlyReadsMemory(); + + if (FS->fflags().NoRecurse) + if (!F.doesNotRecurse()) + F.setDoesNotRecurse(); + } + } +} + namespace { /// For a given pointer Argument, this retains a list of Arguments of functions Index: llvm/test/ThinLTO/X86/Inputs/functionattr-prop.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/Inputs/functionattr-prop.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @callee_norecurse() { + ret void +} Index: llvm/test/ThinLTO/X86/Inputs/linkonce_functionattrs_comdat.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/Inputs/linkonce_functionattrs_comdat.ll @@ -0,0 +1,14 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$c2 = comdat any + +define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c2) { + %i = call i32 @f(i8* null) + ret i32 41 +} + +define i32 @g() { + %i = call i32 @f(i8* null) + ret i32 %i +} Index: llvm/test/ThinLTO/X86/deadstrip.ll =================================================================== --- llvm/test/ThinLTO/X86/deadstrip.ll +++ llvm/test/ThinLTO/X86/deadstrip.ll @@ -66,7 +66,7 @@ ; LTO2-NOT: available_externally {{.*}} @baz() ; LTO2: @llvm.global_ctors = ; LTO2: define internal void @_GLOBAL__I_a() -; LTO2: define internal void @bar() { +; LTO2: define internal void @bar() [[ATTR_NORECURSE:#[0-9]+]] { ; LTO2: define internal void @bar_internal() ; LTO2-NOT: @dead_func() ; LTO2-NOT: available_externally {{.*}} @baz() @@ -78,7 +78,7 @@ ; Make sure we keep @linkonceodrfuncwithalias in Input/deadstrip.ll alive as it ; is reachable from @main. -; LTO2-CHECK2: define weak_odr dso_local void @linkonceodrfuncwithalias() { +; LTO2-CHECK2: define weak_odr dso_local void @linkonceodrfuncwithalias() [[ATTR_NORECURSE:#[0-9]+]] { ; We should have eventually removed @baz since it was internalized and unused ; CHECK2-NM-NOT: _baz @@ -98,6 +98,8 @@ ; DEBUG-DAG: Initialize import for 15611644523426561710 (boo) ; DEBUG-DAG: Ignores Dead GUID: 2384416018110111308 (another_dead_func) +; LTO2-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } + ; STATS: 3 function-import - Number of dead stripped symbols in index ; Next test the case where Inputs/deadstrip.ll does not get a module index, Index: llvm/test/ThinLTO/X86/function_entry_count.ll =================================================================== --- llvm/test/ThinLTO/X86/function_entry_count.ll +++ llvm/test/ThinLTO/X86/function_entry_count.ll @@ -14,11 +14,12 @@ ; RUN: -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. %t1.bc %t2.bc ; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s -; CHECK: define void @h() !prof ![[PROF2:[0-9]+]] -; CHECK: define void @f(i32{{.*}}) !prof ![[PROF1:[0-9]+]] +; CHECK: define void @h() [[ATTR_NORECURSE:#[0-9]+]] !prof ![[PROF2:[0-9]+]] +; CHECK: define void @f(i32{{.*}}) [[ATTR_NORECURSE:#[0-9]+]] !prof ![[PROF1:[0-9]+]] ; CHECK: define available_externally void @g() !prof ![[PROF2]] ; CHECK-DAG: ![[PROF1]] = !{!"synthetic_function_entry_count", i64 10} ; CHECK-DAG: ![[PROF2]] = !{!"synthetic_function_entry_count", i64 198} +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } target triple = "x86_64-unknown-linux-gnu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/ThinLTO/X86/functionattr-prop.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/functionattr-prop.ll @@ -0,0 +1,18 @@ +; RUN: opt -thinlto-bc %s -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc +; RUN: opt -thinlto-bc %p/Inputs/functionattr-prop.ll -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc + +; First perform the thin link on the normal bitcode file. +; RUN: llvm-lto2 run -O0 %t1.bc %t2.bc -o %t.o -r %t2.bc,callee_norecurse,px -r %t1.bc,caller_norecurse,px -r %t1.bc,callee_norecurse,l -save-temps +; RUN: llvm-dis -o - %t.o.1.3.import.bc | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @callee_norecurse() + +; CHECK: Function Attrs: norecurse +; CHECK-NEXT: define void @caller_norecurse() +define void @caller_norecurse() { + call void @callee_norecurse() + ret void +} Index: llvm/test/ThinLTO/X86/linkonce_functionattrs_comdat.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/linkonce_functionattrs_comdat.ll @@ -0,0 +1,24 @@ +; Tests that function attribute propagation takes the conservative set when propagating +; through linkonce_odr functions with different attributes. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/linkonce_functionattrs_comdat.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=run %t1.bc %t2.bc -exported-symbol=f -exported-symbol=g -thinlto-save-temps=%t3. + +; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT1 +; RUN: llvm-dis %t3.1.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT2 +; Copy from first module is prevailing and converted to weak_odr, copy +; from second module is preempted and converted to available_externally and +; removed from comdat. +; Note that lack of attributes on all of these functions +; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr comdat($c1) { +; IMPORT2: define available_externally i32 @f(i8* %0) unnamed_addr { +; IMPORT2: define i32 @g() { + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$c1 = comdat any + +define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c1) { + ret i32 43 +} Index: llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll =================================================================== --- llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll +++ llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll @@ -10,8 +10,10 @@ ; Copy from first module is prevailing and converted to weak_odr, copy ; from second module is preempted and converted to available_externally and ; removed from comdat. -; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr comdat($c1) { -; IMPORT2: define available_externally i32 @f(i8* %0) unnamed_addr { +; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR_NORECURSE:#[0-9]+]] comdat($c1) { +; IMPORT2: define available_externally i32 @f(i8* %0) unnamed_addr [[ATTR_NORECURSE:#[0-9]+]] { + +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } ; RUN: llvm-nm -o - < %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 ; NM1: W f Index: llvm/test/ThinLTO/X86/not-internalized.ll =================================================================== --- llvm/test/ThinLTO/X86/not-internalized.ll +++ llvm/test/ThinLTO/X86/not-internalized.ll @@ -15,7 +15,9 @@ ; Thin LTO internalization shouldn't internalize `bar` as well ; RUN: llvm-dis %t.out.1.2.internalize.bc -o - | FileCheck %s -; CHECK: define linkonce_odr dso_local i32 @bar() comdat($foo) +; CHECK: define linkonce_odr dso_local i32 @bar() [[ATTR_NORECURSE:#[0-9]+]] comdat($foo) + +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } $foo = comdat any Index: llvm/test/ThinLTO/X86/weak_externals.ll =================================================================== --- llvm/test/ThinLTO/X86/weak_externals.ll +++ llvm/test/ThinLTO/X86/weak_externals.ll @@ -11,8 +11,10 @@ ; CHECK: @_ZZN9SingletonI1SE11getInstanceEvE8instance = available_externally dso_local global %struct.S zeroinitializer ; CHECK: @_ZZN9SingletonI1SE11getInstanceEvE13instance_weak = available_externally dso_local global %struct.S* null, align 8 -; CHECK: define linkonce_odr dso_local dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() comdat -; INTERNALIZE: define internal dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() +; CHECK: define linkonce_odr dso_local dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() [[ATTR_NORECURSE:#[0-9]+]] comdat +; INTERNALIZE: define internal dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() [[ATTR_NORECURSE:#[0-9]+]] + +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -40,4 +42,3 @@ define linkonce_odr dso_local dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() #0 comdat align 2 { ret %struct.S* @_ZZN9SingletonI1SE11getInstanceEvE8instance } -