diff --git a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h --- a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h +++ b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h @@ -35,9 +35,16 @@ /// Pass to remove unused function declarations. class GlobalDCEPass : public PassInfoMixin { public: + GlobalDCEPass(bool InLTOPostLink = false) : InLTOPostLink(InLTOPostLink) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); + private: + bool InLTOPostLink = false; + SmallPtrSet AliveGlobals; /// Global -> Global that uses this global. diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1290,8 +1290,6 @@ GV->setLinkage(GlobalValue::InternalLinkage); } - RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); - if (Conf.PostInternalizeModuleHook && !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -617,9 +617,6 @@ // Mark which symbols can not be internalized this->applyScopeRestrictions(); - // Write LTOPostLink flag for passes that require all the modules. - MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); - // Add an appropriate DataLayout instance for this module... MergedModule->setDataLayout(TargetMach->createDataLayout()); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -671,6 +671,10 @@ return Result; } +Expected parseGlobalDCEPassOptions(StringRef Params) { + return parseSinglePassOption(Params, "in-lto-post-link", "GlobalDCE"); +} + Expected parseInlinerPassOptions(StringRef Params) { return parseSinglePassOption(Params, "only-mandatory", "InlinerPass"); } diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1626,7 +1626,7 @@ // Remove unused virtual tables to improve the quality of code generated by // whole-program devirtualization and bitset lowering. - MPM.addPass(GlobalDCEPass()); + MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); // Do basic inference of function attributes from known properties of system // libraries and other oracles. @@ -1745,7 +1745,7 @@ MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); // Garbage collect dead functions. - MPM.addPass(GlobalDCEPass()); + MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); // If we didn't decide to inline a function, check to see if we can // transform it to pass arguments by value instead of by reference. @@ -1883,7 +1883,7 @@ MPM.addPass(EliminateAvailableExternallyPass()); // Now that we have optimized the program, discard unreachable functions. - MPM.addPass(GlobalDCEPass()); + MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -60,7 +60,6 @@ MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) -MODULE_PASS("globaldce", GlobalDCEPass()) MODULE_PASS("globalopt", GlobalOptPass()) MODULE_PASS("globalsplit", GlobalSplitPass()) MODULE_PASS("hotcoldsplit", HotColdSplittingPass()) @@ -142,6 +141,13 @@ }, parseLoopExtractorPassOptions, "single") +MODULE_PASS_WITH_PARAMS("globaldce", + "GlobalDCEPass", + [](bool InLTOPostLink) { + return GlobalDCEPass(InLTOPostLink); + }, + parseGlobalDCEPassOptions, + "in-lto-post-link") MODULE_PASS_WITH_PARAMS("hwasan", "HWAddressSanitizerPass", [](HWAddressSanitizerOptions Opts) { diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -120,11 +120,6 @@ SmallVector Types; LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); - auto *LTOPostLinkMD = - cast_or_null(M.getModuleFlag("LTOPostLink")); - bool LTOPostLink = - LTOPostLinkMD && !cast(LTOPostLinkMD->getValue())->isZero(); - for (GlobalVariable &GV : M.globals()) { Types.clear(); GV.getMetadata(LLVMContext::MD_type, Types); @@ -151,7 +146,7 @@ if (auto GO = dyn_cast(&GV)) { GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility(); if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || - (LTOPostLink && + (InLTOPostLink && TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); VFESafeVTables.insert(&GV); @@ -414,3 +409,11 @@ return PreservedAnalyses::none(); return PreservedAnalyses::all(); } + +void GlobalDCEPass::printPipeline( + raw_ostream &OS, function_ref MapClassName2PassName) { + static_cast *>(this)->printPipeline( + OS, MapClassName2PassName); + if (InLTOPostLink) + OS << ""; +} diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -112,7 +112,7 @@ attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } !llvm.dbg.cu = !{!0, !25, !26} -!llvm.module.flags = !{!27, !28, !29, !30, !31, !32, !33} +!llvm.module.flags = !{!27, !28, !29, !30, !31, !32} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 05256c8d95e0b15bcc502d595c15d902ff520f97)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !8, imports: !20, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "dummy", directory: "dummy", checksumkind: CSK_MD5, checksum: "b67bec84bdce3730b4a6f2ed8d50b85c") @@ -147,7 +147,6 @@ !30 = !{i32 7, !"openmp", i32 50} !31 = !{i32 7, !"openmp-device", i32 50} !32 = !{i32 7, !"PIC Level", i32 2} -!33 = !{i32 1, !"LTOPostLink", i32 1} !34 = distinct !DISubprogram(name: "dummy", linkageName: "dummy", scope: !35, file: !1, line: 49, type: !23, scopeLine: 288, flags: DIFlagEnumClass, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, declaration: !36, retainedNodes: !37) !35 = distinct !DICompositeType(tag: DW_TAG_class_type, file: !1, line: 49, size: 32, flags: DIFlagEnumClass, elements: !6, identifier: "dummy") !36 = !DISubprogram(name: "dummy", scope: !35, file: !1, line: 49, type: !23, scopeLine: 288, flags: DIFlagEnumClass, spFlags: DISPFlagOptimized) diff --git a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir --- a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir +++ b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir @@ -16,11 +16,10 @@ } declare zeroext i8 @foo_len() local_unnamed_addr !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!3, !4} + !llvm.module.flags = !{!3} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !6, producer: "Apple clang", isOptimized: true, flags: "-fsanitize=fuzzer-no-link,address", runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, splitDebugInlining: false, nameTableKind: None, sysroot: "/", sdk: "MacOSX.sdk") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} - !4 = !{i32 1, !"LTOPostLink", i32 1} !5 = distinct !DISubprogram(name: "__foo_block_invoke", linkageName: "__foo_block_invoke", scope: !6, file: !6, line: 557, type: !7, scopeLine: 557, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !6 = !DIFile(filename: "t.c", directory: "") !7 = !DISubroutineType(types: !2) diff --git a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir --- a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir +++ b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir @@ -9,12 +9,11 @@ unreachable } !llvm.dbg.cu = !{!3} - !llvm.module.flags = !{!5, !6} + !llvm.module.flags = !{!5} !2 = !{} !3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "clang", runtimeVersion: 0, emissionKind: FullDebug) !4 = !DIFile(filename: "t.c", directory: "/") !5 = !{i32 2, !"Debug Info Version", i32 3} - !6 = !{i32 1, !"LTOPostLink", i32 1} !7 = distinct !DISubprogram(name: "__foo_block_invoke", scope: !4, file: !4, line: 573, type: !9, scopeLine: 573, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !3, retainedNodes: !2) !9 = !DISubroutineType(types: !2) !11 = !DILocalVariable(name: ".block_descriptor", arg: 1, scope: !7, file: !4, line: 557, type: !12, flags: DIFlagArtificial) diff --git a/llvm/test/DebugInfo/X86/subprogram-across-cus.ll b/llvm/test/DebugInfo/X86/subprogram-across-cus.ll --- a/llvm/test/DebugInfo/X86/subprogram-across-cus.ll +++ b/llvm/test/DebugInfo/X86/subprogram-across-cus.ll @@ -61,7 +61,7 @@ !llvm.dbg.cu = !{!0, !9} !llvm.ident = !{!10, !10} -!llvm.module.flags = !{!11, !12, !13, !14, !15, !16} +!llvm.module.flags = !{!11, !12, !13, !14, !15} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.0 (git@github.com:llvm/llvm-project bc9ab9a5cd6bafc5e1293f3d5d51638f8f5cd26c)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "1.cpp", directory: "/tmp/bees") @@ -79,7 +79,6 @@ !13 = !{i32 1, !"wchar_size", i32 4} !14 = !{i32 1, !"ThinLTO", i32 0} !15 = !{i32 1, !"EnableSplitLTOUnit", i32 1} -!16 = !{i32 1, !"LTOPostLink", i32 1} !17 = distinct !DISubprogram(name: "main", scope: !8, file: !8, line: 10, type: !18, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !9, retainedNodes: !2) !18 = !DISubroutineType(types: !19) !19 = !{!20} diff --git a/llvm/test/LTO/ARM/lto-linking-metadata.ll b/llvm/test/LTO/ARM/lto-linking-metadata.ll deleted file mode 100644 --- a/llvm/test/LTO/ARM/lto-linking-metadata.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: opt %s -o %t1.bc - -; RUN: llvm-lto %t1.bc -o %t1.save.opt -save-linked-module -save-merged-module -O1 --exported-symbol=foo -; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s -; RUN: llvm-dis < %t1.save.opt.linked.bc | FileCheck %s --check-prefix=CHECK-LINKED - -; RUN: llvm-lto2 run %t1.bc -o %t.out.o -save-temps \ -; RUN: -r=%t1.bc,foo,pxl -; RUN: llvm-dis < %t.out.o.0.2.internalize.bc | FileCheck %s - -target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "armv7a-unknown-linux" - -define void @foo() { -entry: - ret void -} - -; CHECK: !llvm.module.flags = !{[[MD_NUM:![0-9]+]]} -; CHECK: [[MD_NUM]] = !{i32 1, !"LTOPostLink", i32 1} - -; CHECK-LINKED: @foo -; CHECK-LINKED-NOT: LTOPostLink diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll --- a/llvm/test/Other/new-pm-print-pipeline.ll +++ b/llvm/test/Other/new-pm-print-pipeline.ll @@ -114,3 +114,9 @@ ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop(loop-rotate))' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-33 ; CHECK-33: function(loop(loop-rotate)) + +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='globaldce' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-34 +; CHECK-34: globaldce + +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='globaldce' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-35 +; CHECK-35: globaldce diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll --- a/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll @@ -1,8 +1,14 @@ -; RUN: opt < %s -passes=globaldce -S | FileCheck %s +; RUN: opt < %s -passes='globaldce' -S | FileCheck %s +; RUN: opt < %s -passes='lto' -S | FileCheck %s ; structs A, B and C have vcall_visibility of public, linkage-unit and ; translation-unit respectively. This test is run after LTO linking (the -; LTOPostLink metadata is present), so B and C can be VFE'd. +; pass parameter simulates how GlobalDCE is invoked from the regular LTO +; pipeline), so B and C can be VFE'd. + +;; Try again without being in the LTO post link, we can only eliminate C. +; RUN: opt < %s -passes='globaldce' -S | FileCheck %s --check-prefix=NO-LTO +; RUN: opt < %s -passes='default' -S | FileCheck %s --check-prefix=NO-LTO target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -17,6 +23,7 @@ } ; CHECK: define {{.*}} @_ZN1A3fooEv( +; NO-LTO: define {{.*}} @_ZN1A3fooEv( define internal void @_ZN1A3fooEv(ptr nocapture %this) { entry: ret void @@ -41,6 +48,7 @@ } ; CHECK-NOT: define {{.*}} @_ZN1B3fooEv( +; NO-LTO: define {{.*}} @_ZN1B3fooEv( define internal void @_ZN1B3fooEv(ptr nocapture %this) { entry: ret void @@ -65,6 +73,7 @@ } ; CHECK-NOT: define {{.*}} @_ZN1C3fooEv( +; NO-LTO-NOT: define {{.*}} @_ZN1C3fooEv( define internal void @_ZN1C3fooEv(ptr nocapture %this) { entry: ret void @@ -79,12 +88,11 @@ declare dso_local noalias nonnull ptr @_Znwm(i64) -!llvm.module.flags = !{!5, !6} +!llvm.module.flags = !{!6} !0 = !{i64 16, !"_ZTS1A"} !1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} !2 = !{i64 0} ; public vcall visibility !3 = !{i64 1} ; linkage-unit vcall visibility !4 = !{i64 2} ; translation-unit vcall visibility -!5 = !{i32 1, !"LTOPostLink", i32 1} !6 = !{i32 1, !"Virtual Function Elim", i32 1} diff --git a/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll b/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll --- a/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll +++ b/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll @@ -37,10 +37,9 @@ declare dso_local noalias nonnull ptr @_Znwm(i64) @_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global ptr -!llvm.module.flags = !{!3, !4} +!llvm.module.flags = !{!4} !0 = !{i64 16, !"_ZTS1A"} !1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} !2 = !{i64 2} ; translation-unit vcall visibility -!3 = !{i32 1, !"LTOPostLink", i32 1} !4 = !{i32 1, !"Virtual Function Elim", i32 1} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll @@ -34,7 +34,7 @@ } !llvm.ident = !{!2} -!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.module.flags = !{!3, !4, !5} !0 = !{i64 16, !"_ZTS1A"} !1 = !{i64 16, !"_ZTSM1AKFivE.virtual"} @@ -42,4 +42,3 @@ !3 = !{i32 1, !"wchar_size", i32 4} !4 = !{i32 1, !"EnableSplitLTOUnit", i32 1} !5 = !{i32 1, !"ThinLTO", i32 0} -!6 = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/tools/llvm-reduce/reduce-module-flags.ll b/llvm/test/tools/llvm-reduce/reduce-module-flags.ll --- a/llvm/test/tools/llvm-reduce/reduce-module-flags.ll +++ b/llvm/test/tools/llvm-reduce/reduce-module-flags.ll @@ -30,11 +30,10 @@ ; RESULT2: !llvm.module.flags = !{} -!llvm.module.flags = !{!0, !1, !2, !3, !4, !5} +!llvm.module.flags = !{!0, !1, !2, !3, !4} !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{i32 7, !"openmp", i32 50} !3 = !{i32 7, !"openmp-device", i32 50} !4 = !{i32 8, !"PIC Level", i32 1} -!5 = !{i32 1, !"LTOPostLink", i32 1}