Index: clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll =================================================================== --- clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -36,7 +36,7 @@ ; Round trip it through llvm-as ; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS ; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi-devirt.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}})) -; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 18, typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0)))))) +; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 18, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0), typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0)))))) ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi"))))) ; guid = 7004155349499253778 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ Index: clang/test/CodeGen/thinlto-distributed-cfi.ll =================================================================== --- clang/test/CodeGen/thinlto-distributed-cfi.ll +++ clang/test/CodeGen/thinlto-distributed-cfi.ll @@ -24,7 +24,7 @@ ; Round trip it through llvm-as ; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS ; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}})) -; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 7, typeIdInfo: (typeTests: (^2))))) +; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 7, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0), typeIdInfo: (typeTests: (^2))))) ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) ; guid = 7004155349499253778 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ Index: llvm/include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- llvm/include/llvm/IR/ModuleSummaryIndex.h +++ llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -686,6 +686,8 @@ /// Get function summary flags. FFlags fflags() const { return FunFlags; } + void setFFlags(FFlags newFlags) { FunFlags = newFlags; } + /// Get the instruction count recorded for this function. unsigned instCount() const { return InstCount; } Index: llvm/include/llvm/LTO/LTO.h =================================================================== --- llvm/include/llvm/LTO/LTO.h +++ llvm/include/llvm/LTO/LTO.h @@ -23,6 +23,7 @@ #include "llvm/Object/IRSymtab.h" #include "llvm/Support/Error.h" #include "llvm/Support/thread.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" namespace llvm { Index: llvm/include/llvm/Transforms/IPO/FunctionAttrs.h =================================================================== --- llvm/include/llvm/Transforms/IPO/FunctionAttrs.h +++ llvm/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -17,6 +17,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -38,6 +39,14 @@ /// Returns the memory access properties of this copy of the function. MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR); +/// Propagate function attributes for function summaries along the index's +/// callgraph during thinlink +bool thinLTOPropagateFunctionAttrs(ModuleSummaryIndex &Index); + +/// Inserts the FunctionAttr flags from the Index into \p TheModule. +void thinLTOInsertFunctionAttrsForModule(Module &TheModule, + const GVSummaryMapTy &DefinedGlobals); + /// Computes function attributes in post-order over the call graph. /// /// By operating in post-order, this pass computes precise attributes for Index: llvm/lib/LTO/LTO.cpp =================================================================== --- llvm/lib/LTO/LTO.cpp +++ llvm/lib/LTO/LTO.cpp @@ -1430,6 +1430,8 @@ // Synthesize entry counts for functions in the CombinedIndex. computeSyntheticCounts(ThinLTO.CombinedIndex); + thinLTOPropagateFunctionAttrs(ThinLTO.CombinedIndex); + StringMap ImportLists( ThinLTO.ModuleMap.size()); StringMap ExportLists( Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -601,6 +601,8 @@ thinLTOResolvePrevailingInModule(Mod, DefinedGlobals); + thinLTOInsertFunctionAttrsForModule(Mod, DefinedGlobals); + if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -439,6 +440,8 @@ // Apply summary-based prevailing-symbol resolution decisions. thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals); + thinLTOInsertFunctionAttrsForModule(TheModule, DefinedGlobals); + // Save temps: after promotion. saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc"); } @@ -1005,6 +1008,8 @@ // Synthesize entry counts for functions in the combined index. computeSyntheticCounts(*Index); + thinLTOPropagateFunctionAttrs(*Index); + // Currently there is no support for enabling whole program visibility via a // linker option in the old LTO API, but this call allows it to be specified // via the internal option. Must be done before WPD below. Index: llvm/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -95,6 +95,10 @@ "disable-nofree-inference", cl::Hidden, cl::desc("Stop inferring nofree attribute during function-attrs pass")); +static cl::opt DisableThinLTOPropagation( + "disable-thinlto-funcattrs", cl::Hidden, + cl::desc("Don't propagate function-attrs in thinLTO")); + namespace { using SCCNodeSet = SmallSetVector; @@ -322,6 +326,95 @@ return MadeChange; } + +bool llvm::thinLTOPropagateFunctionAttrs(ModuleSummaryIndex &Index) { + // TODO: implement addNoAliasAttrs once + // there's more information about the return type in the summary + if (DisableThinLTOPropagation) + return false; + + auto addNoRecurseAttrs = [](std::vector &SCCNodes) { + if (SCCNodes.size() != 1) + return false; + + ValueInfo V = SCCNodes.front(); + + // Bail if we don't have a FunctionSummary to work with + // TODO: consider adding summaries to external nodes + if (!V.getSummaryList().size()) + return false; + + FunctionSummary *FS = + dyn_cast(V.getSummaryList().front().get()); + + if (!FS) + return false; + + // No changes needed if the function already has the attribute + if (FS->fflags().NoRecurse) + return false; + + bool calleesMightRecurse = std::any_of( + FS->calls().begin(), FS->calls().end(), + [](const FunctionSummary::EdgeTy &E) { + if (E.first.getGUID() == 0 || !E.first.getSummaryList().size()) + return true; // might recurse - we can't reason about external + // functions + FunctionSummary *CFS = + dyn_cast(E.first.getSummaryList().front().get()); + + return !CFS || !CFS->fflags().NoRecurse; + }); + + if (calleesMightRecurse) + return false; + + auto newFlags = FS->fflags(); + newFlags.NoRecurse = 1; + FS->setFFlags(newFlags); + NumNoRecurse++; + return true; + }; + + bool Changed = false; + + // Call propagation functions on each SCC in the Index + for (scc_iterator I = scc_begin(&Index); !I.isAtEnd(); + ++I) { + std::vector Nodes(*I); + Changed |= addNoRecurseAttrs(Nodes); + } + return Changed; +} + +/// Insert function attributes in the Index back into the \p TheModule. +void llvm::thinLTOInsertFunctionAttrsForModule( + Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { + if (DisableThinLTOPropagation) + return; + + for (Function &F : TheModule) { + const auto &GV = DefinedGlobals.find(F.getGUID()); + if (GV == DefinedGlobals.end()) + continue; + + if (FunctionSummary *FS = dyn_cast(GV->second)) + { + if (FS->fflags().ReadNone) + if (!F.doesNotAccessMemory()) + F.setDoesNotAccessMemory(); + + if (FS->fflags().ReadOnly) + if (!F.onlyReadsMemory()) + F.setOnlyReadsMemory(); + + if (FS->fflags().NoRecurse) + if (!F.doesNotRecurse()) + F.setDoesNotRecurse(); + } + } +} + namespace { /// For a given pointer Argument, this retains a list of Arguments of functions Index: llvm/test/ThinLTO/X86/Inputs/functionattr-prop.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/Inputs/functionattr-prop.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @callee_norecurse() norecurse { + ret void +} Index: llvm/test/ThinLTO/X86/deadstrip.ll =================================================================== --- llvm/test/ThinLTO/X86/deadstrip.ll +++ llvm/test/ThinLTO/X86/deadstrip.ll @@ -66,7 +66,7 @@ ; LTO2-NOT: available_externally {{.*}} @baz() ; LTO2: @llvm.global_ctors = ; LTO2: define internal void @_GLOBAL__I_a() -; LTO2: define internal void @bar() { +; LTO2: define internal void @bar() [[ATTR_NORECURSE:#[0-9]+]] { ; LTO2: define internal void @bar_internal() ; LTO2-NOT: @dead_func() ; LTO2-NOT: available_externally {{.*}} @baz() @@ -98,6 +98,8 @@ ; DEBUG-DAG: Initialize import for 15611644523426561710 (boo) ; DEBUG-DAG: Ignores Dead GUID: 2384416018110111308 (another_dead_func) +; LTO2-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } + ; STATS: 3 function-import - Number of dead stripped symbols in index ; Next test the case where Inputs/deadstrip.ll does not get a module index, Index: llvm/test/ThinLTO/X86/function_entry_count.ll =================================================================== --- llvm/test/ThinLTO/X86/function_entry_count.ll +++ llvm/test/ThinLTO/X86/function_entry_count.ll @@ -14,11 +14,12 @@ ; RUN: -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. %t1.bc %t2.bc ; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s -; CHECK: define void @h() !prof ![[PROF2:[0-9]+]] -; CHECK: define void @f(i32{{.*}}) !prof ![[PROF1:[0-9]+]] +; CHECK: define void @h() [[ATTR_NORECURSE:#[0-9]+]] !prof ![[PROF2:[0-9]+]] +; CHECK: define void @f(i32{{.*}}) [[ATTR_NORECURSE:#[0-9]+]] !prof ![[PROF1:[0-9]+]] ; CHECK: define available_externally void @g() !prof ![[PROF2]] ; CHECK-DAG: ![[PROF1]] = !{!"synthetic_function_entry_count", i64 10} ; CHECK-DAG: ![[PROF2]] = !{!"synthetic_function_entry_count", i64 198} +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } target triple = "x86_64-unknown-linux-gnu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/ThinLTO/X86/functionattr-prop.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/functionattr-prop.ll @@ -0,0 +1,18 @@ +; RUN: opt -thinlto-bc %s -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc +; RUN: opt -thinlto-bc %p/Inputs/functionattr-prop.ll -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc + +; First perform the thin link on the normal bitcode file. +; RUN: llvm-lto2 run -O0 %t1.bc %t2.bc -o %t.o -r %t2.bc,callee_norecurse,px -r %t1.bc,caller_norecurse,px -r %t1.bc,callee_norecurse,l -save-temps +; RUN: llvm-dis -o - %t.o.1.3.import.bc | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @callee_norecurse() + +; CHECK: Function Attrs: norecurse +; CHECK-NEXT: define void @caller_norecurse() +define void @caller_norecurse() { + call void @callee_norecurse() + ret void +} Index: llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll =================================================================== --- llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll +++ llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll @@ -10,9 +10,11 @@ ; Copy from first module is prevailing and converted to weak_odr, copy ; from second module is preempted and converted to available_externally and ; removed from comdat. -; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr comdat($c1) { +; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR_NORECURSE:#[0-9]+]] comdat($c1) { ; IMPORT2: define available_externally i32 @f(i8* %0) unnamed_addr { +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } + ; RUN: llvm-nm -o - < %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 ; NM1: W f Index: llvm/test/ThinLTO/X86/not-internalized.ll =================================================================== --- llvm/test/ThinLTO/X86/not-internalized.ll +++ llvm/test/ThinLTO/X86/not-internalized.ll @@ -15,7 +15,9 @@ ; Thin LTO internalization shouldn't internalize `bar` as well ; RUN: llvm-dis %t.out.1.2.internalize.bc -o - | FileCheck %s -; CHECK: define linkonce_odr dso_local i32 @bar() comdat($foo) +; CHECK: define linkonce_odr dso_local i32 @bar() [[ATTR_NORECURSE:#[0-9]+]] comdat($foo) + +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } $foo = comdat any Index: llvm/test/ThinLTO/X86/weak_externals.ll =================================================================== --- llvm/test/ThinLTO/X86/weak_externals.ll +++ llvm/test/ThinLTO/X86/weak_externals.ll @@ -11,8 +11,10 @@ ; CHECK: @_ZZN9SingletonI1SE11getInstanceEvE8instance = available_externally dso_local global %struct.S zeroinitializer ; CHECK: @_ZZN9SingletonI1SE11getInstanceEvE13instance_weak = available_externally dso_local global %struct.S* null, align 8 -; CHECK: define linkonce_odr dso_local dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() comdat -; INTERNALIZE: define internal dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() +; CHECK: define linkonce_odr dso_local dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() [[ATTR_NORECURSE:#[0-9]+]] comdat +; INTERNALIZE: define internal dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() [[ATTR_NORECURSE:#[0-9]+]] + +; CHECK-DAG: attributes [[ATTR_NORECURSE]] = { norecurse } target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -40,4 +42,3 @@ define linkonce_odr dso_local dereferenceable(16) %struct.S* @_ZN9SingletonI1SE11getInstanceEv() #0 comdat align 2 { ret %struct.S* @_ZZN9SingletonI1SE11getInstanceEvE8instance } -