diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -894,7 +894,8 @@ Single, ///< Single element (last example in "Short Inline Bit Vectors") AllOnes, ///< All-ones bit vector ("Eliminating Bit Vector Checks for /// All-Ones Bit Vectors") - } TheKind = Unsat; + Unknown, ///< Unknown (analysis not performed, don't lower) + } TheKind = Unknown; /// Range of size-1 expressed as a bit width. For example, if the size is in /// range [1,256], this number will be 8. This helps generate the most compact @@ -1092,7 +1093,7 @@ // in the way some record are interpreted, like flags for instance. // Note that incrementing this may require changes in both BitcodeReader.cpp // and BitcodeWriter.cpp. - static constexpr uint64_t BitcodeSummaryVersion = 8; + static constexpr uint64_t BitcodeSummaryVersion = 9; // Regular LTO module name for ASM writer static constexpr const char *getRegularLTOModuleName() { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -17,6 +17,7 @@ template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, TypeTestResolution::Kind &value) { + io.enumCase(value, "Unknown", TypeTestResolution::Unknown); io.enumCase(value, "Unsat", TypeTestResolution::Unsat); io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray); io.enumCase(value, "Inline", TypeTestResolution::Inline); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7799,6 +7799,9 @@ return true; switch (Lex.getKind()) { + case lltok::kw_unknown: + TTRes.TheKind = TypeTestResolution::Unknown; + break; case lltok::kw_unsat: TTRes.TheKind = TypeTestResolution::Unsat; break; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2861,6 +2861,8 @@ static const char *getTTResKindName(TypeTestResolution::Kind K) { switch (K) { + case TypeTestResolution::Unknown: + return "unknown"; case TypeTestResolution::Unsat: return "unsat"; case TypeTestResolution::ByteArray: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -971,6 +971,12 @@ if (AttributorRun & AttributorRunOption::MODULE) MPM.addPass(AttributorPass()); + // Lower type metadata and the type.test intrinsic in the ThinLTO + // post link pipeline after ICP. This is to enable usage of the type + // tests in ICP sequences. + if (Phase == ThinLTOPhase::PostLink) + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); + // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -1355,6 +1361,9 @@ // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP. + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1421,6 +1430,10 @@ // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO + // pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1548,6 +1561,9 @@ // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Enable splitting late in the FullLTO post-link pipeline. This is done in // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -735,6 +735,9 @@ /// replace the call with. Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL) { + // Delay lowering if the resolution is currently unknown. + if (TIL.TheKind == TypeTestResolution::Unknown) + return nullptr; if (TIL.TheKind == TypeTestResolution::Unsat) return ConstantInt::getFalse(M.getContext()); @@ -1036,14 +1039,18 @@ report_fatal_error("Second argument of llvm.type.test must be metadata"); auto TypeIdStr = dyn_cast(TypeIdMDVal->getMetadata()); + // If this is a local unpromoted type, which doesn't have a metadata string, + // treat as Unknown and delay lowering, so that we can still utilize it for + // later optimizations. if (!TypeIdStr) - report_fatal_error( - "Second argument of llvm.type.test must be a metadata string"); + return; TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } // ThinLTO backend: the function F has a jump table entry; update this module @@ -1166,8 +1173,10 @@ for (CallInst *CI : TIUI.CallSites) { ++NumTypeTestCallsLowered; Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } } } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -515,6 +515,7 @@ MPM.add(createBarrierNoopPass()); if (PerformThinLTO) { + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. @@ -548,9 +549,11 @@ // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) + if (PerformThinLTO) { MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); + } // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops // as it will change the CFG too much to make the 2nd profile annotation @@ -1079,8 +1082,8 @@ PM.add(createVerifierPass()); if (ImportSummary) { - // These passes import type identifier resolutions for whole-program - // devirtualization and CFI. They must run early because other passes may + // This pass imports type identifier resolutions for whole-program + // devirtualization and CFI. It must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // @@ -1128,6 +1131,9 @@ // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -540,7 +540,9 @@ bool areRemarksEnabled(); - void scanTypeTestUsers(Function *TypeTestFunc); + void + scanTypeTestUsers(Function *TypeTestFunc, + DenseMap> &TypeIdMap); void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); void buildTypeIdentifierMap( @@ -1705,7 +1707,9 @@ return false; } -void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) { +void DevirtModule::scanTypeTestUsers( + Function *TypeTestFunc, + DenseMap> &TypeIdMap) { // Find all virtual calls via a virtual table pointer %p under an assumption // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p // points to a member of the type identifier %md. Group calls by (type ID, @@ -1724,22 +1728,59 @@ auto &DT = LookupDomTree(*CI->getFunction()); findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + Metadata *TypeId = + cast(CI->getArgOperand(1))->getMetadata(); // If we found any, add them to CallSlots. if (!Assumes.empty()) { - Metadata *TypeId = - cast(CI->getArgOperand(1))->getMetadata(); Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); for (DevirtCallSite Call : DevirtCalls) CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr); } - // We no longer need the assumes or the type test. - for (auto Assume : Assumes) - Assume->eraseFromParent(); - // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we - // may use the vtable argument later. - if (CI->use_empty()) - CI->eraseFromParent(); + auto RemoveTypeTestAssumes = [&]() { + // We no longer need the assumes or the type test. + for (auto Assume : Assumes) + Assume->eraseFromParent(); + // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we + // may use the vtable argument later. + if (CI->use_empty()) + CI->eraseFromParent(); + }; + + // At this point we could remove all type test assume sequences, as they + // were originally inserted for WPD. However, we can keep these in the + // code stream for later analysis (e.g. to help drive more efficient ICP + // sequences). They will eventually be removed by a second LowerTypeTests + // invocation that cleans them up. In order to do this correctly, the first + // LowerTypeTests invocation needs to know that they have "Unknown" type + // test resolution, so that they aren't treated as Unsat and lowered to + // False, which will break any uses on assumes. Below we remove any type + // test assumes that will not be treated as Unknown by LTT. + + // The type test assumes will be treated by LTT as Unsat if the type id is + // not used on a global (in which case it has no entry in the TypeIdMap). + if (!TypeIdMap.count(TypeId)) + RemoveTypeTestAssumes(); + + // For ThinLTO importing, we need to remove the type test assumes if this is + // an MDString type id without a corresponding TypeIdSummary. Any + // non-MDString type ids are ignored and treated as Unknown by LTT, so their + // type test assumes can be kept. If the MDString type id is missing a + // TypeIdSummary (e.g. because there was no use on a vcall, preventing the + // exporting phase of WPD from analyzing it), then it would be treated as + // Unsat by LTT and we need to remove its type test assumes here. If not + // used on a vcall we don't need them for later optimization use in any + // case. + else if (ImportSummary && isa(TypeId)) { + const TypeIdSummary *TidSummary = + ImportSummary->getTypeIdSummary(cast(TypeId)->getString()); + if (!TidSummary) + RemoveTypeTestAssumes(); + else + // If one was created it should not be Unsat, because if we reached here + // the type id was used on a global. + assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat); + } } } @@ -1931,8 +1972,13 @@ (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) return false; + // Rebuild type metadata into a map for easy lookup. + std::vector Bits; + DenseMap> TypeIdMap; + buildTypeIdentifierMap(Bits, TypeIdMap); + if (TypeTestFunc && AssumeFunc) - scanTypeTestUsers(TypeTestFunc); + scanTypeTestUsers(TypeTestFunc, TypeIdMap); if (TypeCheckedLoadFunc) scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); @@ -1954,10 +2000,6 @@ return true; } - // Rebuild type metadata into a map for easy lookup. - std::vector Bits; - DenseMap> TypeIdMap; - buildTypeIdentifierMap(Bits, TypeIdMap); if (TypeIdMap.empty()) return true; @@ -2014,14 +2056,22 @@ // function implementation at offset S.first.ByteOffset, and add to // TargetsForSlot. std::vector TargetsForSlot; - if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], + WholeProgramDevirtResolution *Res = nullptr; + const std::set &TypeMemberInfos = TypeIdMap[S.first.TypeID]; + if (ExportSummary && isa(S.first.TypeID) && + TypeMemberInfos.size()) + // For any type id used on a global's type metadata, create the type id + // summary resolution regardless of whether we can devirtualize, so that + // lower type tests knows the type id is not Unsat. If it was not used on + // a global's type metadata, the TypeIdMap entry set will be empty, and + // we don't want to create an entry (with the default Unknown type + // resolution), which can prevent detection of the Unsat. + Res = &ExportSummary + ->getOrInsertTypeIdSummary( + cast(S.first.TypeID)->getString()) + .WPDRes[S.first.ByteOffset]; + if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = nullptr; - if (ExportSummary && isa(S.first.TypeID)) - Res = &ExportSummary - ->getOrInsertTypeIdSummary( - cast(S.first.TypeID)->getString()) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= @@ -2135,11 +2185,14 @@ std::vector TargetsForSlot; auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); assert(TidSummary); + // Create the type id summary resolution regardlness of whether we can + // devirtualize, so that lower type tests knows the type id is used on + // a global and not Unsat. + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = - &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, DevirtTargets)) diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll --- a/llvm/test/Bitcode/summary_version.ll +++ b/llvm/test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -99,6 +99,7 @@ ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass ; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass> ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -79,6 +79,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -48,6 +48,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -59,6 +59,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll b/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll @@ -0,0 +1,68 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { i32 (...)** } + +@_ZTV1B = linkonce_odr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1nEi to i8*)] }, !type !0 + +$test = comdat any +$testb = comdat any + +define linkonce_odr i32 @test(%struct.A* %obj, i32 %a) comdat { +entry: + %0 = bitcast %struct.A* %obj to i8** + %vtable5 = load i8*, i8** %0 + + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A") + %2 = extractvalue { i8*, i1 } %1, 1 + br i1 %2, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + %3 = extractvalue { i8*, i1 } %1, 0 + %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* + + %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} + +define linkonce_odr i32 @testb(%struct.A* %obj, i32 %a) comdat { +entry: + %0 = bitcast %struct.A* %obj to i8** + %vtable5 = load i8*, i8** %0 + + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 0, metadata !"_ZTS1A") + %2 = extractvalue { i8*, i1 } %1, 1 + br i1 %2, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + %3 = extractvalue { i8*, i1 } %1, 0 + %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* + + %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) +declare void @llvm.trap() + +define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) { +entry: + ret i32 0 +} +define internal i32 @_ZN1B1nEi(%struct.B* %this, i32 %a) { +entry: + ret i32 0 +} + +!0 = !{i64 16, !"_ZTS1B"} diff --git a/llvm/test/ThinLTO/X86/cfi-unsat.ll b/llvm/test/ThinLTO/X86/cfi-unsat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/ThinLTO/X86/cfi-unsat.ll @@ -0,0 +1,82 @@ +; REQUIRES: x86-registered-target + +; Test CFI devirtualization through the thin link and backend when +; a type id is Unsat (not used on any global's type metadata). +; +; In this test case, the first module is split and will import a resolution +; for its type test. The resolution would be exported by the second +; module, which is set up so that it does not get split (treated as regular +; LTO because it does not have any external globals from which to create +; a unique module ID). We should not actually get any resolution for the +; type id in this case, since no globals include it in their type metadata, +; so the resolution is Unsat and the type.checked.load instructions are +; converted to type tests that evaluate to false. + +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %p/Inputs/cfi-unsat.ll + +; RUN: llvm-lto2 run %t.o %t1.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test2,px \ +; RUN: -r=%t1.o,_ZTV1B,px \ +; RUN: -r=%t1.o,test,px \ +; RUN: -r=%t1.o,testb,px +; RUN: llvm-dis %t3.index.bc -o - | FileCheck %s --check-prefix=INDEX +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR0 +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 + +; INDEX-NOT: "typeid:" + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } + +$test2 = comdat any + +define linkonce_odr i32 @test2(%struct.A* %obj, i32 %a) comdat { +entry: + %0 = bitcast %struct.A* %obj to i8** + %vtable5 = load i8*, i8** %0 + + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A") + %2 = extractvalue { i8*, i1 } %1, 1 + br i1 %2, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + %3 = extractvalue { i8*, i1 } %1, 0 + %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* + + %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} + +; CHECK-IR0: define weak_odr i32 @test +; CHECK-IR0-NEXT: entry: +; CHECK-IR0-NEXT: %0 = bitcast +; CHECK-IR0-NEXT: %vtable5 = +; CHECK-IR0-NEXT: tail call void @llvm.trap() +; CHECK-IR0-NEXT: unreachable +; CHECK-IR0-NEXT: } +; CHECK-IR0: define weak_odr i32 @testb +; CHECK-IR0-NEXT: entry: +; CHECK-IR0-NEXT: %0 = bitcast +; CHECK-IR0-NEXT: %vtable5 = +; CHECK-IR0-NEXT: tail call void @llvm.trap() +; CHECK-IR0-NEXT: unreachable +; CHECK-IR0-NEXT: } + +; CHECK-IR1: define weak_odr i32 @test2 +; CHECK-IR1-NEXT: entry: +; CHECK-IR1-NEXT: tail call void @llvm.trap() +; CHECK-IR1-NEXT: unreachable +; CHECK-IR1-NEXT: } + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) +declare void @llvm.trap() diff --git a/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll b/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll --- a/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll +++ b/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll @@ -33,6 +33,8 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @g, i8* null }] + %struct.D = type { i32 (...)** } @_ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 @@ -57,6 +59,23 @@ ; CHECK-IR-LABEL: ret i32 ; CHECK-IR-LABEL: } +; Function Attrs: inlinehint nounwind uwtable +define internal void @_ZN1DC2Ev(%struct.D* %this) unnamed_addr align 2 { +entry: + %this.addr = alloca %struct.D*, align 8 + store %struct.D* %this, %struct.D** %this.addr, align 8 + %this1 = load %struct.D*, %struct.D** %this.addr + %0 = bitcast %struct.D* %this1 to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1D, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define internal void @g() section ".text.startup" { + %d = alloca %struct.D, align 8 + call void @_ZN1DC2Ev(%struct.D* %d) + ret void +} + declare i1 @llvm.type.test(i8*, metadata) declare void @llvm.assume(i1) diff --git a/llvm/test/ThinLTO/X86/type_test_noindircall.ll b/llvm/test/ThinLTO/X86/type_test_noindircall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/ThinLTO/X86/type_test_noindircall.ll @@ -0,0 +1,59 @@ +; Test to ensure that we correctly handle a type test not used for a virtual call. +; If it isn't removed correctly by WPD then we could incorrectly get an Unsat +; (resulting in an unreachable in the IR). + +; REQUIRES: x86-registered-target + +; RUN: opt -thinlto-bc -o %t.o %s + +; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. +; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -r=%t.o,_ZTVN12_GLOBAL__N_18RealFileE,px \ +; RUN: -o %t2 +; RUN: llvm-dis %t2.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Try again without LTO unit splitting. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t3.o %s +; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -r=%t.o,_ZTVN12_GLOBAL__N_18RealFileE,px \ +; RUN: -o %t4 +; RUN: llvm-dis %t4.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%"class.llvm::vfs::File" = type { i32 (...)** } +%"class.llvm::vfs::Status" = type <{ %"class.std::__cxx11::basic_string", %"class.llvm::sys::fs::UniqueID", %"struct.std::chrono::time_point", i32, i32, i64, i32, i32, i8, [7 x i8] }> +%"class.std::__cxx11::basic_string" = type { %"struct.std::__cxx11::basic_string, std::allocator >::_Alloc_hider", i64, %union.anon } +%"struct.std::__cxx11::basic_string, std::allocator >::_Alloc_hider" = type { i8* } +%union.anon = type { i64, [8 x i8] } +%"class.llvm::sys::fs::UniqueID" = type { i64, i64 } +%"struct.std::chrono::time_point" = type { %"struct.std::chrono::duration" } +%"struct.std::chrono::duration" = type { i64 } +%"class.(anonymous namespace)::RealFile" = type { %"class.llvm::vfs::File", i32, [4 x i8], %"class.llvm::vfs::Status", %"class.std::__cxx11::basic_string" } + +@_ZTVN12_GLOBAL__N_18RealFileE = unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.(anonymous namespace)::RealFile"*)* @_ZN12_GLOBAL__N_18RealFileD2Ev to i8*)] }, align 8, !type !74 + +define internal void @_ZN12_GLOBAL__N_18RealFileD2Ev(%"class.(anonymous namespace)::RealFile"* %this) unnamed_addr #0 align 2 { +entry: +; CHECK-IR: %0 = getelementptr + %0 = getelementptr %"class.(anonymous namespace)::RealFile", %"class.(anonymous namespace)::RealFile"* %this, i64 0, i32 0, i32 0 +; CHECK-IR-NEXT: store + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + %1 = tail call i1 @llvm.type.test(i8* bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i8*), metadata !"4$09c6cc733fc6accb91e5d7b87cb48f2d") + tail call void @llvm.assume(i1 %1) +; CHECK-IR-NEXT: ret void + ret void +} + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!74 = !{i64 16, !"4$09c6cc733fc6accb91e5d7b87cb48f2d"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -10,7 +10,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -23,7 +23,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -36,7 +36,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid2: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll @@ -14,7 +14,7 @@ ; RUN: -wholeprogramdevirt-summary-action=export -o /dev/null 2>&1 | FileCheck %s --check-prefix=MISSING-MODULE ; Check single impl devirtulation in summary -; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid +; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unknown, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid ; MISSING-MODULE: combined summary should contain Regular LTO module diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll --- a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll @@ -4,7 +4,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -17,7 +17,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -30,7 +30,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid2: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -43,7 +43,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll --- a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll @@ -6,7 +6,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll --- a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll @@ -6,7 +6,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -24,7 +24,7 @@ ; SUMMARY-NEXT: Bit: 0 ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll --- a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll @@ -9,7 +9,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -29,7 +29,7 @@ ; SUMMARY-ARM-NEXT: Bit: 1 ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll --- a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll @@ -32,7 +32,7 @@ ; SUMMARY-NEXT: TypeIdMap: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: Kind: Unknown ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll --- a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll @@ -25,7 +25,7 @@ %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i32 (i8*)* %result = call i32 %fptr_casted(i8* %obj) - ; CHECK-NOT: call + ; CHECK-NOT: call i32 % ; CHECK: ret i32 123 ret i32 %result }