diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h --- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h +++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h @@ -64,7 +64,7 @@ /// Used for example from GlobalDCE to find an entry in a C++ vtable that /// matches a vcall offset. /// -/// To support Swift vtables, getPointerAtOffset can see through "relative +/// To support relative vtables, getPointerAtOffset can see through "relative /// pointers", i.e. (sub-)expressions of the form of: /// /// @symbol = ... { diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryProfileInfo.h" @@ -580,7 +581,8 @@ /// within the initializer. static void findFuncPointers(const Constant *I, uint64_t StartingOffset, const Module &M, ModuleSummaryIndex &Index, - VTableFuncList &VTableFuncs) { + VTableFuncList &VTableFuncs, + const GlobalVariable &OrigGV) { // First check if this is a function pointer. if (I->getType()->isPointerTy()) { auto C = I->stripPointerCasts(); @@ -608,7 +610,7 @@ auto Offset = SL->getElementOffset(EI.index()); unsigned Op = SL->getElementContainingOffset(Offset); findFuncPointers(cast(I->getOperand(Op)), - StartingOffset + Offset, M, Index, VTableFuncs); + StartingOffset + Offset, M, Index, VTableFuncs, OrigGV); } } else if (auto *C = dyn_cast(I)) { ArrayType *ATy = C->getType(); @@ -616,7 +618,34 @@ uint64_t EltSize = DL.getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { findFuncPointers(cast(I->getOperand(i)), - StartingOffset + i * EltSize, M, Index, VTableFuncs); + StartingOffset + i * EltSize, M, Index, VTableFuncs, + OrigGV); + } + } else if (const auto *CE = dyn_cast(I)) { + // For relative vtables, the next sub-component should be a trunc. + if (CE->getOpcode() != Instruction::Trunc || + !(CE = dyn_cast(CE->getOperand(0)))) + return; + + // If this constant can be reduced to the offset between a function and a + // global, then we know this is a valid virtual function if the RHS is the + // original vtable we're scanning through. + if (CE->getOpcode() == Instruction::Sub) { + GlobalValue *LHS, *RHS; + APSInt LHSOffset, RHSOffset; + if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHS, LHSOffset, DL) && + IsConstantOffsetFromGlobal(CE->getOperand(1), RHS, RHSOffset, DL) && + RHS == &OrigGV && + + // For relative vtables, this component should point to the callable + // function without any offsets. + LHSOffset == 0 && + + // Also, the RHS should always point to somewhere within the vtable. + RHSOffset <= + static_cast(DL.getTypeAllocSize(OrigGV.getInitializer()->getType()))) { + findFuncPointers(LHS, StartingOffset, M, Index, VTableFuncs, OrigGV); + } } } } @@ -629,7 +658,7 @@ return; findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index, - VTableFuncs); + VTableFuncs, V); #ifndef NDEBUG // Validate that the VTableFuncs list is ordered by offset. diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp --- a/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -67,6 +67,14 @@ findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset, CI, DT); } + } else if (auto *Call = dyn_cast(User)) { + if (Call->getIntrinsicID() == llvm::Intrinsic::load_relative) { + if (auto *LoadOffset = dyn_cast(Call->getOperand(1))) { + findCallsAtConstantOffset(DevirtCalls, nullptr, User, + Offset + LoadOffset->getSExtValue(), CI, + DT); + } + } } } } @@ -129,6 +137,12 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, Constant *TopLevelGlobal) { + // TODO: Ideally it would be the caller who knows if it's appropriate to strip + // the DSOLocalEquicalent. More generally, it would feel more appropriate to + // have two functions that handle absolute and relative pointers separately. + if (auto *Equiv = dyn_cast(I)) + I = Equiv->getGlobalValue(); + if (I->getType()->isPointerTy()) { if (Offset == 0) return I; @@ -159,7 +173,7 @@ Offset % ElemSize, M, TopLevelGlobal); } - // (Swift-specific) relative-pointer support starts here. + // Relative-pointer support starts here. if (auto *CI = dyn_cast(I)) { if (Offset == 0 && CI->getZExtValue() == 0) { return I; diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1006,7 +1006,7 @@ return false; Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), - TM.Offset + ByteOffset, M); + TM.Offset + ByteOffset, M, TM.Bits->GV); if (!Ptr) return false; diff --git a/llvm/test/ThinLTO/X86/devirt.ll b/llvm/test/ThinLTO/X86/devirt.ll --- a/llvm/test/ThinLTO/X86/devirt.ll +++ b/llvm/test/ThinLTO/X86/devirt.ll @@ -27,24 +27,36 @@ ; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]]) ; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]]) ; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]]) +; NOENABLESPLITFLAG-DAG: [[B_RV:\^[0-9]+]] = gv: (name: "_ZTV1B_RV", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 8), (virtFunc: [[An]], offset: 12)), refs: ([[B_RV]], [[Bf]], [[An]]) +; NOENABLESPLITFLAG-DAG: [[C_RV:\^[0-9]+]] = gv: (name: "_ZTV1C_RV", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 8), (virtFunc: [[An]], offset: 12)), refs: ([[C_RV]], [[An]], [[Cf]]) +; NOENABLESPLITFLAG-DAG: [[D_RV:\^[0-9]+]] = gv: (name: "_ZTV1D_RV", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 8)), refs: ([[D_RV]], [[Dm]]) ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1A", summary: ((offset: 16, [[B]]), (offset: 16, [[C]]))) ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1B", summary: ((offset: 16, [[B]]))) ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1C", summary: ((offset: 16, [[C]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1A_RV", summary: ((offset: 8, [[B_RV]]), (offset: 8, [[C_RV]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1B_RV", summary: ((offset: 8, [[B_RV]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1C_RV", summary: ((offset: 8, [[C_RV]]))) ; Type Id on _ZTV1D should have been promoted ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "1.{{.*}}", summary: ((offset: 16, [[D]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "2.{{.*}}", summary: ((offset: 8, [[D_RV]]))) ; Index based WPD ; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ ; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,test_rv,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ ; RUN: -r=%t2.o,_ZN1B1fEi,p \ ; RUN: -r=%t2.o,_ZN1C1fEi,p \ ; RUN: -r=%t2.o,_ZN1D1mEi,p \ ; RUN: -r=%t2.o,_ZTV1B,px \ ; RUN: -r=%t2.o,_ZTV1C,px \ -; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1B_RV,px \ +; RUN: -r=%t2.o,_ZTV1C_RV,px \ +; RUN: -r=%t2.o,_ZTV1D_RV,px \ +; RUN: 2>&1 | FileCheck %s --check-prefix=REMARK ; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR ; Check that we're able to prevent specific function from being @@ -54,18 +66,24 @@ ; RUN: -wholeprogramdevirt-skip=_ZN1A1nEi \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,test_rv,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ ; RUN: -r=%t2.o,_ZN1B1fEi,p \ ; RUN: -r=%t2.o,_ZN1C1fEi,p \ ; RUN: -r=%t2.o,_ZN1D1mEi,p \ ; RUN: -r=%t2.o,_ZTV1B,px \ ; RUN: -r=%t2.o,_ZTV1C,px \ -; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=SKIP +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1B_RV,px \ +; RUN: -r=%t2.o,_ZTV1C_RV,px \ +; RUN: -r=%t2.o,_ZTV1D_RV,px \ +; RUN: 2>&1 | FileCheck %s --check-prefix=SKIP ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ ; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,test_rv,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ ; RUN: -r=%t.o,_ZN1B1fEi,p \ ; RUN: -r=%t.o,_ZN1C1fEi,p \ @@ -73,15 +91,24 @@ ; RUN: -r=%t.o,_ZTV1B, \ ; RUN: -r=%t.o,_ZTV1C, \ ; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZTV1B_RV, \ +; RUN: -r=%t.o,_ZTV1C_RV, \ +; RUN: -r=%t.o,_ZTV1D_RV, \ ; RUN: -r=%t.o,_ZN1A1nEi, \ ; RUN: -r=%t.o,_ZN1B1fEi, \ ; RUN: -r=%t.o,_ZN1C1fEi, \ ; RUN: -r=%t.o,_ZN1D1mEi, \ ; RUN: -r=%t.o,_ZTV1B,px \ ; RUN: -r=%t.o,_ZTV1C,px \ -; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail +; RUN: -r=%t.o,_ZTV1D,px \ +; RUN: -r=%t.o,_ZTV1B_RV,px \ +; RUN: -r=%t.o,_ZTV1C_RV,px \ +; RUN: -r=%t.o,_ZTV1D_RV,px \ +; RUN: 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail ; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi ; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi ; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi @@ -99,6 +126,25 @@ @_ZTV1C = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr undef, ptr @_ZN1C1fEi, ptr @_ZN1A1nEi] }, !type !0, !type !2 @_ZTV1D = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr undef, ptr @_ZN1D1mEi] }, !type !3 +@_ZTV1B_RV = constant { [4 x i32] } { [4 x i32] [ + i32 0, + i32 undef, + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1B1fEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1B_RV, i32 0, i32 0, i32 2) to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1A1nEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1B_RV, i32 0, i32 0, i32 3) to i64)) to i32) +] }, !type !7, !type !8 + +@_ZTV1C_RV = constant { [4 x i32] } { [4 x i32] [ + i32 0, + i32 undef, + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1C1fEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1C_RV, i32 0, i32 0, i32 2) to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1A1nEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1C_RV, i32 0, i32 0, i32 3) to i64)) to i32) +] }, !type !7, !type !9 + +@_ZTV1D_RV = constant { [3 x i32] } { [3 x i32] [ + i32 0, + i32 undef, + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1D1mEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1D_RV, i32 0, i32 0, i32 2) to i64)) to i32) +] }, !type !10 ; CHECK-IR-LABEL: define i32 @test define i32 @test(ptr %obj, ptr %obj2, i32 %a) { @@ -136,6 +182,43 @@ ; CHECK-IR-LABEL: ret i32 ; CHECK-IR-LABEL: } +declare ptr @llvm.load.relative.i32(ptr, i32) + +; CHECK-IR-LABEL: define i32 @test_rv +define i32 @test_rv(ptr %obj, ptr %obj2, i32 %a) { +entry: + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS1A_RV") + call void @llvm.assume(i1 %p) + %fptr1_rv = call ptr @llvm.load.relative.i32(ptr %vtable, i32 4) + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; Ensure !prof and !callees metadata for indirect call promotion removed. + ; CHECK-IR-NOT: prof + ; CHECK-IR-NOT: callees + %call = tail call i32 %fptr1_rv(ptr nonnull %obj, i32 %a), !prof !5, !callees !6 + + %fptr22_rv = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22_rv(ptr nonnull %obj, i32 %call) + + %vtable2 = load ptr, ptr %obj2 + %p2 = call i1 @llvm.type.test(ptr %vtable2, metadata !11) + call void @llvm.assume(i1 %p2) + + %fptr33_rv = call ptr @llvm.load.relative.i32(ptr %vtable2, i32 0) + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + %call4 = tail call i32 %fptr33_rv(ptr nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) @@ -165,3 +248,9 @@ !4 = distinct !{} !5 = !{!"VP", i32 0, i64 1, i64 1621563287929432257, i64 1} !6 = !{ptr @_ZN1A1nEi} + +!7 = !{i64 8, !"_ZTS1A_RV"} +!8 = !{i64 8, !"_ZTS1B_RV"} +!9 = !{i64 8, !"_ZTS1C_RV"} +!10 = !{i64 8, !11} +!11 = distinct !{} diff --git a/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml b/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml --- a/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml +++ b/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml @@ -5,14 +5,22 @@ TypeTestAssumeVCalls: - GUID: 14276520915468743435 # typeid1 Offset: 0 + - GUID: 271751036925422857 # typeid1_rv + Offset: 0 TypeCheckedLoadVCalls: - GUID: 15427464259790519041 # typeid2 Offset: 0 + - GUID: 1146149264729288256 # typeid2_rv + Offset: 0 TypeTestAssumeConstVCalls: - VFunc: GUID: 3515965990081467659 # typeid3 Offset: 0 Args: [12, 24] + - VFunc: + GUID: 2777626534618191571 # typeid3_rv + Offset: 0 + Args: [12, 24] TypeCheckedLoadConstVCalls: - VFunc: GUID: 17525413373118030901 # typeid4 diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -8,6 +8,45 @@ ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY: TypeIdMap: +; SUMMARY-NEXT: typeid1_rv: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: BranchFunnel +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: +; SUMMARY-NEXT: typeid2_rv: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: Indir +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: +; SUMMARY-NEXT: typeid3_rv: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: BranchFunnel +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: ; SUMMARY-NEXT: Kind: Unknown @@ -93,6 +132,29 @@ declare i32 @vf4_1(ptr %this, i32 %arg) declare i32 @vf4_2(ptr %this, i32 %arg) +declare ptr @llvm.load.relative.i32(ptr, i32) + +;; These are relative vtables equivalent to the ones above. +@vt1_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1_1 to i64), i64 ptrtoint (ptr @vt1_1_rv to i64)) to i32)], !type !5 +@vt1_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1_2 to i64), i64 ptrtoint (ptr @vt1_2_rv to i64)) to i32)], !type !5 + +@vt2_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_1 to i64), i64 ptrtoint (ptr @vt2_1_rv to i64)) to i32)], !type !6 +@vt2_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_2 to i64), i64 ptrtoint (ptr @vt2_2_rv to i64)) to i32)], !type !6 +@vt2_3_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_3 to i64), i64 ptrtoint (ptr @vt2_3_rv to i64)) to i32)], !type !6 +@vt2_4_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_4 to i64), i64 ptrtoint (ptr @vt2_4_rv to i64)) to i32)], !type !6 +@vt2_5_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_5 to i64), i64 ptrtoint (ptr @vt2_5_rv to i64)) to i32)], !type !6 +@vt2_6_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_6 to i64), i64 ptrtoint (ptr @vt2_6_rv to i64)) to i32)], !type !6 +@vt2_7_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_7 to i64), i64 ptrtoint (ptr @vt2_7_rv to i64)) to i32)], !type !6 +@vt2_8_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_8 to i64), i64 ptrtoint (ptr @vt2_8_rv to i64)) to i32)], !type !6 +@vt2_9_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_9 to i64), i64 ptrtoint (ptr @vt2_9_rv to i64)) to i32)], !type !6 +@vt2_10_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_10 to i64), i64 ptrtoint (ptr @vt2_10_rv to i64)) to i32)], !type !6 +@vt2_11_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_11 to i64), i64 ptrtoint (ptr @vt2_11_rv to i64)) to i32)], !type !6 + +@vt3_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf3_1 to i64), i64 ptrtoint (ptr @vt3_1_rv to i64)) to i32)], !type !7 +@vt3_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf3_2 to i64), i64 ptrtoint (ptr @vt3_2_rv to i64)) to i32)], !type !7 + +@vt4_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4_1 to i64), i64 ptrtoint (ptr @vt4_1_rv to i64)) to i32)], !type !8 +@vt4_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4_2 to i64), i64 ptrtoint (ptr @vt4_2_rv to i64)) to i32)], !type !8 ; CHECK-LABEL: define i32 @fn1 @@ -108,6 +170,19 @@ ret i32 %result } +; CHECK-LABEL: define i32 @fn1_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn1_rv(ptr %obj) #0 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest %vtable, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + ; CHECK-LABEL: define i32 @fn2 ; CHECK-NOT: call void (...) @llvm.icall.branch.funnel define i32 @fn2(ptr %obj) #0 { @@ -120,6 +195,18 @@ ret i32 %result } +; CHECK-LABEL: define i32 @fn2_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn2_rv(ptr %obj) #0 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call i32 % + %result = call i32 %fptr(ptr %obj, i32 1) + ret i32 %result +} + ; CHECK-LABEL: define i32 @fn3 ; CHECK-NOT: call void (...) @llvm.icall.branch.funnel define i32 @fn3(ptr %obj) #0 { @@ -133,10 +220,27 @@ ret i32 %result } +; CHECK-LABEL: define i32 @fn3_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn3_rv(ptr %obj) #0 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !9) + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; RETP: call i32 @branch_funnel.1(ptr + ; NORETP: call i32 % + %result = call i32 %fptr(ptr %obj, i32 1) + ret i32 %result +} + ; CHECK-LABEL: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) ; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2, ptr {{(nonnull )?}}@vf1_2, ...) +; CHECK-LABEL: define hidden void @__typeid_typeid1_rv_0_branch_funnel(ptr nest %0, ...) +; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1_rv, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2_rv, ptr {{(nonnull )?}}@vf1_2, ...) + ; CHECK: define internal void @branch_funnel(ptr +; CHECK: define internal void @branch_funnel.1(ptr declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) @@ -146,5 +250,10 @@ !2 = !{i32 0, !"typeid3"} !3 = !{i32 0, !4} !4 = distinct !{} +!5 = !{i32 0, !"typeid1_rv"} +!6 = !{i32 0, !"typeid2_rv"} +!7 = !{i32 0, !"typeid3_rv"} +!8 = !{i32 0, !9} +!9 = distinct !{} attributes #0 = { "target-features"="+retpoline" } diff --git a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll --- a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll @@ -7,6 +7,10 @@ ; CHECK: private constant { [8 x i8], [1 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x ptr] [ptr @vf2], [0 x i8] zeroinitializer }, !type [[T8]] ; CHECK: private constant { [8 x i8], [1 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [1 x ptr] [ptr @vf4], [0 x i8] zeroinitializer }, !type [[T8]] ; CHECK: private constant { [8 x i8], [1 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x ptr] [ptr @vf8], [0 x i8] zeroinitializer }, !type [[T8]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\01", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1 to i64), i64 ptrtoint (ptr @vt1_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4:![0-9]+]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\02", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2 to i64), i64 ptrtoint (ptr @vt2_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\01", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4 to i64), i64 ptrtoint (ptr @vt4_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\02", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf8 to i64), i64 ptrtoint (ptr @vt8_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4]] @vt1 = constant [1 x ptr] [ptr @vf1], !type !0 @vt2 = constant [1 x ptr] [ptr @vf2], !type !0 @@ -61,8 +65,49 @@ ret i1 %result } +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt1_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1 to i64), i64 ptrtoint (ptr @vt1_rv to i64)) to i32) +], align 4, !type !1 +@vt2_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2 to i64), i64 ptrtoint (ptr @vt2_rv to i64)) to i32) +], align 4, !type !1 +@vt4_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4 to i64), i64 ptrtoint (ptr @vt4_rv to i64)) to i32) +], align 4, !type !1 +@vt8_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf8 to i64), i64 ptrtoint (ptr @vt8_rv to i64)) to i32) +], align 4, !type !1 + +; CHECK: define i1 @call3 +define i1 @call3(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: getelementptr {{.*}} -1 + ; CHECK: and {{.*}}, 1 + %result = call i1 %fptr(ptr %obj, i32 5) + ret i1 %result +} + +; CHECK: define i1 @call4 +define i1 @call4(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: getelementptr {{.*}} -1 + ; CHECK: and {{.*}}, 2 + %result = call i1 %fptr(ptr %obj, i32 10) + ret i1 %result +} + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) ; CHECK: [[T8]] = !{i32 8, !"typeid"} +; CHECK: [[T4]] = !{i32 4, !"typeid2"} !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll @@ -3,6 +3,7 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" +; CHECK: remark: :0:0: single-impl: devirtualized a call to vf ; CHECK: remark: :0:0: single-impl: devirtualized a call to vf ; CHECK: remark: :0:0: devirtualized vf ; CHECK-NOT: devirtualized @@ -33,7 +34,31 @@ unreachable } +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !1 + +; CHECK: define void @call2 +define void @call2(ptr %obj) { + %vtable = load ptr, ptr %obj + %pair = call {ptr, i1} @llvm.type.checked.load(ptr %vtable, i32 0, metadata !"typeid2") + %fptr = extractvalue {ptr, i1} %pair, 0 + %p = extractvalue {ptr, i1} %pair, 1 + ; CHECK: br i1 true, + br i1 %p, label %cont, label %trap + +cont: + ; CHECK: call void @vf( + call void %fptr(ptr %obj) + ret void + +trap: + call void @llvm.trap() + unreachable +} + declare {ptr, i1} @llvm.type.checked.load(ptr, i32, metadata) declare void @llvm.trap() !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll @@ -23,7 +23,27 @@ ret void } +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !1 + +; CHECK: define void @call2 +define void @call2(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %p2 = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p2) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call void @vf( + call void %fptr(ptr %obj) + ret void +} + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll @@ -7,6 +7,8 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf ; CHECK: remark: devirt-single.cc:13:0: devirtualized vf ; CHECK-NOT: devirtualized @@ -28,6 +30,41 @@ ret void } +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !11 + +; CHECK: define void @call2 +define void @call2(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call void @vf( + call void %fptr(ptr %obj), !dbg !10 + ret void +} + +@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [ + i32 0, ; offset to top + i32 0, ; rtti + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vfunc offset +] }, align 4, !type !14 + +; CHECK: define void @call3 +define void @call3(ptr %obj) #1 !dbg !12 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8) + ; CHECK: call void @vf( + call void %fptr(ptr %obj), !dbg !13 + ret void +} + + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) @@ -45,5 +82,13 @@ !7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) !8 = !{i32 0, !"typeid"} +!9 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 41, column: 32, scope: !9) +!11 = !{i32 0, !"typeid2"} + +!12 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!13 = !DILocation(line: 51, column: 32, scope: !12) +!14 = !{i32 0, !"typeid3"} + ; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets -; CHECK: 1 wholeprogramdevirt - Number of single implementation devirtualizations +; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations diff --git a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll --- a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll @@ -20,7 +20,23 @@ ret void } +@vt2 = constant i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt2 to i64)) to i32), !type !1 + +declare ptr @llvm.load.relative.i32(ptr, i32) + +; CHECK: define void @call2 +define void @call2(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call void @vf( + call void %fptr(ptr %obj) + ret void +} + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"}