diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -26,7 +26,7 @@ ; Ensure that typeids are in the index. ; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s ; CHECK-LABEL: +; CHECK: ; CHECK-LABEL: +; CHECK: ; CHECK-LABEL: struct ScalarEnumerationTraits { static void enumeration(IO &io, TypeTestResolution::Kind &value) { + io.enumCase(value, "Unknown", TypeTestResolution::Unknown); io.enumCase(value, "Unsat", TypeTestResolution::Unsat); io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray); io.enumCase(value, "Inline", TypeTestResolution::Inline); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7659,6 +7659,9 @@ return true; switch (Lex.getKind()) { + case lltok::kw_unknown: + TTRes.TheKind = TypeTestResolution::Unknown; + break; case lltok::kw_unsat: TTRes.TheKind = TypeTestResolution::Unsat; break; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2769,6 +2769,8 @@ static const char *getTTResKindName(TypeTestResolution::Kind K) { switch (K) { + case TypeTestResolution::Unknown: + return "unknown"; case TypeTestResolution::Unsat: return "unsat"; case TypeTestResolution::ByteArray: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -747,6 +747,12 @@ true /* SamplePGO */)); } + // Lower type metadata and the type.test intrinsic in the ThinLTO + // post link pipeline after ICP. This is to enable usage of the type + // tests in ICP sequences. + if (Phase == ThinLTOPhase::PostLink) + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); + // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -1169,6 +1175,9 @@ // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP. + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1235,6 +1244,10 @@ // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO + // pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1362,6 +1375,9 @@ // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Enable splitting late in the FullLTO post-link pipeline. This is done in // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -735,6 +735,9 @@ /// replace the call with. Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL) { + // Delay lowering if the resolution is currently unknown. + if (TIL.TheKind == TypeTestResolution::Unknown) + return nullptr; if (TIL.TheKind == TypeTestResolution::Unsat) return ConstantInt::getFalse(M.getContext()); @@ -1043,8 +1046,10 @@ TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } // ThinLTO backend: the function F has a jump table entry; update this module @@ -1167,8 +1172,10 @@ for (CallInst *CI : TIUI.CallSites) { ++NumTypeTestCallsLowered; Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } } } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -478,6 +478,7 @@ MPM.add(createBarrierNoopPass()); if (PerformThinLTO) { + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. @@ -511,9 +512,11 @@ // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) + if (PerformThinLTO) { MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); + } // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops // as it will change the CFG too much to make the 2nd profile annotation @@ -1013,8 +1016,8 @@ PM.add(createVerifierPass()); if (ImportSummary) { - // These passes import type identifier resolutions for whole-program - // devirtualization and CFI. They must run early because other passes may + // This pass imports type identifier resolutions for whole-program + // devirtualization and CFI. It must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // @@ -1062,6 +1065,9 @@ // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -504,7 +504,9 @@ bool areRemarksEnabled(); - void scanTypeTestUsers(Function *TypeTestFunc); + void + scanTypeTestUsers(Function *TypeTestFunc, + DenseMap> &TypeIdMap); void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); void buildTypeIdentifierMap( @@ -1631,7 +1633,9 @@ return false; } -void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) { +void DevirtModule::scanTypeTestUsers( + Function *TypeTestFunc, + DenseMap> &TypeIdMap) { // Find all virtual calls via a virtual table pointer %p under an assumption // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p // points to a member of the type identifier %md. Group calls by (type ID, @@ -1651,10 +1655,10 @@ auto &DT = LookupDomTree(*CI->getFunction()); findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + Metadata *TypeId = + cast(CI->getArgOperand(1))->getMetadata(); // If we found any, add them to CallSlots. if (!Assumes.empty()) { - Metadata *TypeId = - cast(CI->getArgOperand(1))->getMetadata(); Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); for (DevirtCallSite Call : DevirtCalls) { // Only add this CallSite if we haven't seen it before. The vtable @@ -1667,6 +1671,13 @@ } } + // If we have any uses on type metadata, keep the type test assumes for + // later analysis. Otherwise remove as they aren't useful, and + // LowerTypeTests will think they are Unsat and lower to False, which + // breaks any uses on assumes. + if (TypeIdMap.count(TypeId)) + continue; + // We no longer need the assumes or the type test. for (auto Assume : Assumes) Assume->eraseFromParent(); @@ -1865,8 +1876,13 @@ (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) return false; + // Rebuild type metadata into a map for easy lookup. + std::vector Bits; + DenseMap> TypeIdMap; + buildTypeIdentifierMap(Bits, TypeIdMap); + if (TypeTestFunc && AssumeFunc) - scanTypeTestUsers(TypeTestFunc); + scanTypeTestUsers(TypeTestFunc, TypeIdMap); if (TypeCheckedLoadFunc) scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); @@ -1888,10 +1904,6 @@ return true; } - // Rebuild type metadata into a map for easy lookup. - std::vector Bits; - DenseMap> TypeIdMap; - buildTypeIdentifierMap(Bits, TypeIdMap); if (TypeIdMap.empty()) return true; @@ -1948,14 +1960,17 @@ // function implementation at offset S.first.ByteOffset, and add to // TargetsForSlot. std::vector TargetsForSlot; + WholeProgramDevirtResolution *Res = nullptr; + if (ExportSummary && isa(S.first.TypeID)) + // Create the type id summary resolution regardlness of whether we can + // devirtualize, so that lower type tests knows the type id is used on + // a global and not Unsat. + Res = &ExportSummary + ->getOrInsertTypeIdSummary( + cast(S.first.TypeID)->getString()) + .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = nullptr; - if (ExportSummary && isa(S.first.TypeID)) - Res = &ExportSummary - ->getOrInsertTypeIdSummary( - cast(S.first.TypeID)->getString()) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= @@ -2069,11 +2084,14 @@ std::vector TargetsForSlot; auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); assert(TidSummary); + // Create the type id summary resolution regardlness of whether we can + // devirtualize, so that lower type tests knows the type id is used on + // a global and not Unsat. + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = - &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, DevirtTargets)) diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -92,6 +92,7 @@ ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass ; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass> ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -79,6 +79,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -48,6 +48,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -59,6 +59,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/ThinLTO/X86/cfi-icall-only-defuse.ll b/llvm/test/ThinLTO/X86/cfi-icall-only-defuse.ll --- a/llvm/test/ThinLTO/X86/cfi-icall-only-defuse.ll +++ b/llvm/test/ThinLTO/X86/cfi-icall-only-defuse.ll @@ -41,7 +41,7 @@ ; FOOBAZ: -; FOOBAZ: +; FOOBAZ: ; FOOBAZ: ; FOOBAZ: blob data = 'barbazfoot1' @@ -49,7 +49,7 @@ ; BARQUX: -; BARQUX: +; BARQUX: ; BARQUX: ; BARQUX: blob data = 'barbazquxt1' diff --git a/llvm/test/ThinLTO/X86/cfi-icall.ll b/llvm/test/ThinLTO/X86/cfi-icall.ll --- a/llvm/test/ThinLTO/X86/cfi-icall.ll +++ b/llvm/test/ThinLTO/X86/cfi-icall.ll @@ -29,7 +29,7 @@ ; COMBINED: ; COMBINED: -; COMBINED: +; COMBINED: ; COMBINED: ; COMBINED: