Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -27,6 +27,7 @@ class StringRef; class AAManager; class TargetMachine; +class ModuleSummaryIndex; /// A struct capturing PGO tunables. struct PGOOptions { @@ -310,8 +311,9 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + ModulePassManager + buildThinLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, + const ModuleSummaryIndex *ImportSummary); /// Build a pre-link, LTO-targeting default optimization pipeline to a pass /// manager. @@ -340,7 +342,8 @@ /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging, + ModuleSummaryIndex *ExportSummary); /// Build the default `AAManager` with the default alias analysis pipeline /// registered. Index: include/llvm/Transforms/IPO/LowerTypeTests.h =================================================================== --- include/llvm/Transforms/IPO/LowerTypeTests.h +++ include/llvm/Transforms/IPO/LowerTypeTests.h @@ -26,6 +26,7 @@ namespace llvm { class Module; +class ModuleSummaryIndex; class raw_ostream; namespace lowertypetests { @@ -197,6 +198,11 @@ class LowerTypeTestsPass : public PassInfoMixin { public: + ModuleSummaryIndex *ExportSummary; + const ModuleSummaryIndex *ImportSummary; + LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; Index: include/llvm/Transforms/IPO/WholeProgramDevirt.h =================================================================== --- include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -28,6 +28,7 @@ template class MutableArrayRef; class Function; class GlobalVariable; +class ModuleSummaryIndex; namespace wholeprogramdevirt { @@ -218,6 +219,13 @@ } // end namespace wholeprogramdevirt struct WholeProgramDevirtPass : public PassInfoMixin { + ModuleSummaryIndex *ExportSummary; + const ModuleSummaryIndex *ImportSummary; + WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + assert(!(ExportSummary && ImportSummary)); + } PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -144,7 +144,9 @@ } static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, - unsigned OptLevel, bool IsThinLTO) { + unsigned OptLevel, bool IsThinLTO, + ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary) { Optional PGOOpt; if (!Conf.SampleProfile.empty()) PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true); @@ -194,9 +196,10 @@ } if (IsThinLTO) - MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager); + MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager, + ImportSummary); else - MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager); + MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary); MPM.run(Mod, MAM); // FIXME (davide): verify the output. @@ -279,7 +282,8 @@ runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); else if (Conf.UseNewPM) - runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO); + runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary, + ImportSummary); else runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -922,15 +922,28 @@ return MPM; } -ModulePassManager -PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { - // FIXME: The summary index is not hooked in the new pass manager yet. - // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest - // here. - +ModulePassManager PassBuilder::buildThinLTODefaultPipeline( + OptimizationLevel Level, bool DebugLogging, + const ModuleSummaryIndex *ImportSummary) { ModulePassManager MPM(DebugLogging); + if (ImportSummary) { + // These passes import type identifier resolutions for whole-program + // devirtualization and CFI. They must run early because other passes may + // disturb the specific instruction patterns that these passes look for, + // creating dependencies on resolutions that may not appear in the summary. + // + // For example, GVN may transform the pattern assume(type.test) appearing in + // two basic blocks into assume(phi(type.test, type.test)), which would + // transform a dependency on a WPD resolution into a dependency on a type + // identifier resolution for CFI. + // + // Also, WPD has access to more precise information than ICP and can + // devirtualize more effectively, so it should operate on the IR first. + MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); + MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); + } + // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); @@ -961,8 +974,9 @@ return buildPerModuleDefaultPipeline(Level, DebugLogging); } -ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { +ModulePassManager +PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, + ModuleSummaryIndex *ExportSummary) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -1012,11 +1026,15 @@ // Run whole program optimization of virtual call when the list of callees // is fixed. - MPM.addPass(WholeProgramDevirtPass()); + MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); // Stop here at -O1. - if (Level == 1) + if (Level == 1) { + // The LowerTypeTestsPass needs to run to lower type metadata and the + // type.test intrinsics. The pass does nothing if CFI is disabled. + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); return MPM; + } // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); @@ -1125,12 +1143,7 @@ // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. - // Enable once we add support for the summary in the new PM. -#if 0 - MPM.addPass(LowerTypeTestsPass(Summary ? PassSummaryAction::Export : - PassSummaryAction::None, - Summary)); -#endif + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. @@ -1442,12 +1455,12 @@ } else if (Matches[1] == "thinlto-pre-link") { MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging)); } else if (Matches[1] == "thinlto") { - MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging)); + MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging, nullptr)); } else if (Matches[1] == "lto-pre-link") { MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging)); } else { assert(Matches[1] == "lto" && "Not one of the matched options!"); - MPM.addPass(buildLTODefaultPipeline(L, DebugLogging)); + MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr)); } return true; } Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -56,7 +56,7 @@ MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("ipsccp", IPSCCPPass()) -MODULE_PASS("lowertypetests", LowerTypeTestsPass()) +MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr)) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) @@ -75,7 +75,7 @@ MODULE_PASS("sample-profile", SampleProfileLoaderPass()) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) -MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass()) +MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) MODULE_PASS("verify", VerifierPass()) #undef MODULE_PASS Index: lib/Transforms/IPO/LowerTypeTests.cpp =================================================================== --- lib/Transforms/IPO/LowerTypeTests.cpp +++ lib/Transforms/IPO/LowerTypeTests.cpp @@ -2102,9 +2102,7 @@ PreservedAnalyses LowerTypeTestsPass::run(Module &M, ModuleAnalysisManager &AM) { - bool Changed = LowerTypeTestsModule(M, /*ExportSummary=*/nullptr, - /*ImportSummary=*/nullptr) - .lower(); + bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); Index: lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- lib/Transforms/IPO/WholeProgramDevirt.cpp +++ lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -611,7 +611,8 @@ auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { return FAM.getResult(*F); }; - if (!DevirtModule(M, AARGetter, OREGetter, nullptr, nullptr).run()) + if (!DevirtModule(M, AARGetter, OREGetter, ExportSummary, ImportSummary) + .run()) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } Index: test/Other/new-pm-lto-defaults.ll =================================================================== --- test/Other/new-pm-lto-defaults.ll +++ test/Other/new-pm-lto-defaults.ll @@ -54,6 +54,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: GlobalSplitPass ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass +; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: GlobalOptPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> ; CHECK-O2-NEXT: Running analysis: DominatorTreeAnalysis @@ -82,6 +83,7 @@ ; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass +; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass> ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass Index: test/ThinLTO/X86/cfi-devirt.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/cfi-devirt.ll @@ -0,0 +1,100 @@ +; REQUIRES: x86-registered-target + +; Test CFI devirtualization through the thin link and backend. + +; RUN: opt -thinlto-bc -o %t.o %s + +; Legacy PM +; RUN: llvm-lto2 run %t.o -save-temps \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; New PM +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2 + +; CHECK-IR-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8** + %vtable5 = load i8*, i8** %0 + + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A") + %2 = extractvalue { i8*, i1 } %1, 1 + br i1 %2, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + %3 = extractvalue { i8*, i1 } %1, 0 + %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) + %vtable16 = load i8*, i8** %0 + %5 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable16, i32 0, metadata !"_ZTS1A") + %6 = extractvalue { i8*, i1 } %5, 1 + br i1 %6, label %cont2, label %trap + +cont2: + %7 = extractvalue { i8*, i1 } %5, 0 + %8 = bitcast i8* %7 to i32 (%struct.A*, i32)* + + ; Check that traps are conditional. Invalid TYPE_ID can cause + ; unconditional traps. + ; CHECK-IR: br i1 {{.*}}, label %trap + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %8 + %call3 = tail call i32 %8(%struct.A* nonnull %obj, i32 %call) + ret i32 %call3 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) +declare void @llvm.trap() + +declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) +declare i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) +declare i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} Index: test/ThinLTO/X86/cfi.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/cfi.ll @@ -0,0 +1,60 @@ +; REQUIRES: x86-registered-target + +; Test CFI through the thin link and backend. + +; RUN: opt -thinlto-bc -o %t.o %s + +; Legacy PM +; RUN: llvm-lto2 run -save-temps %t.o \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZTV1B,px +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; New PM +; RUN: llvm-lto2 run -save-temps %t.o -use-new-pm \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZTV1B,px +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.B = type { %struct.A } +%struct.A = type { i32 (...)** } + +@_ZTV1B = constant { [3 x i8*] } { [3 x i8*] [i8* undef, i8* undef, i8* undef] }, !type !0 + +; CHECK-IR-LABEL: define void @test +define void @test(i8* %b) { +entry: + ; Ensure that traps are conditional. Invalid TYPE_ID can cause + ; unconditional traps. + ; CHECK-IR: br i1 {{.*}}, label %trap + %0 = bitcast i8* %b to i8** + %vtable2 = load i8*, i8** %0 + %1 = tail call i1 @llvm.type.test(i8* %vtable2, metadata !"_ZTS1A") + br i1 %1, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + ; CHECK-IR-LABEL: ret void + ret void +} +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.trap() + +declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"}