Index: llvm/docs/TypeMetadata.rst =================================================================== --- llvm/docs/TypeMetadata.rst +++ llvm/docs/TypeMetadata.rst @@ -288,3 +288,10 @@ calls sites can be correlated with the vtables which they might load from. Other parts of the vtable (RTTI, offset-to-top, ...) can still be accessed with normal loads. + +Alternatively, the ``!vcall_visibility`` metadata attachment can have an +extended format of a tuple with two additional integer values representing the +begin and end offset within the vtable that the visibility applies to. When the +range is missing, the meaning is the same as a range covering the entire vtable. +Any part of the vtable that is not covered by the specified range is not +eligible for elimination of virtual functions. Index: llvm/include/llvm/IR/GlobalObject.h =================================================================== --- llvm/include/llvm/IR/GlobalObject.h +++ llvm/include/llvm/IR/GlobalObject.h @@ -138,6 +138,7 @@ void addTypeMetadata(unsigned Offset, Metadata *TypeID); void setVCallVisibilityMetadata(VCallVisibility Visibility); VCallVisibility getVCallVisibility() const; + std::tuple getVTableOffsetRange() const; /// Returns true if the alignment of the value can be unilaterally /// increased. Index: llvm/include/llvm/Transforms/IPO/GlobalDCE.h =================================================================== --- llvm/include/llvm/Transforms/IPO/GlobalDCE.h +++ llvm/include/llvm/Transforms/IPO/GlobalDCE.h @@ -47,9 +47,9 @@ DenseMap, 4>> TypeIdMap; - // Global variables which are vtables, and which we have enough information - // about to safely do dead virtual function elimination. - SmallPtrSet VFESafeVTables; + /// VTable -> set of vfuncs in that vtable (that are within the range + /// specified in !vcall_visibility). + DenseMap> VFESafeVTablesAndFns; void UpdateGVDependencies(GlobalValue &GV); void MarkLive(GlobalValue &GV, Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -1534,6 +1534,23 @@ return VCallVisibility::VCallVisibilityPublic; } +std::tuple GlobalObject::getVTableOffsetRange() const { + if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) { + if (MD->getNumOperands() >= 3) { + uint64_t RangeStart = + cast( + cast(MD->getOperand(1))->getValue()) + ->getZExtValue(); + uint64_t RangeEnd = + cast( + cast(MD->getOperand(2))->getValue()) + ->getZExtValue(); + return std::tuple(RangeStart, RangeEnd); + } + } + return std::tuple(0, UINT64_MAX); +} + void Function::setSubprogram(DISubprogram *SP) { setMetadata(LLVMContext::MD_dbg, SP); } Index: llvm/lib/Transforms/IPO/GlobalDCE.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -127,11 +127,12 @@ ComputeDependencies(User, Deps); Deps.erase(&GV); // Remove self-reference. for (GlobalValue *GVU : Deps) { - // If this is a dep from a vtable to a virtual function, and we have + // If this is a dep from a vtable to a virtual function, and it's within the + // range specified in !vcall_visibility, and we have // complete information about all virtual call sites which could call // though this vtable, then skip it, because the call site information will // be more precise. - if (VFESafeVTables.count(GVU) && isa(&GV)) { + if (isa(&GV) && VFESafeVTablesAndFns[GVU].contains(&GV)) { LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> " << GV.getName() << "\n"); continue; @@ -157,6 +158,44 @@ } } +/// Recursively iterate over the (sub-)constants in the vtable and look for +/// vptrs, report them with their offsets via `Callback`. +static void FindVirtualFunctionsInVTable( + Module &M, Constant *C, + std::function Callback, + uint64_t BaseOffset = 0) { + if (auto *GV = dyn_cast(C)) { + if (auto *F = dyn_cast(GV)) { + Callback(F, BaseOffset); + } + + // Do not recurse outside of the current global. + return; + } + + if (auto *S = dyn_cast(C)) { + StructType *STy = dyn_cast(S->getType()); + const StructLayout *SL = M.getDataLayout().getStructLayout(STy); + for (auto EI : llvm::enumerate(STy->elements())) { + auto Offset = SL->getElementOffset(EI.index()); + unsigned Op = SL->getElementContainingOffset(Offset); + FindVirtualFunctionsInVTable(M, cast(S->getOperand(Op)), + Callback, BaseOffset + Offset); + } + } else if (auto *A = dyn_cast(C)) { + ArrayType *ATy = A->getType(); + auto EltSize = M.getDataLayout().getTypeAllocSize(ATy->getElementType()); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + FindVirtualFunctionsInVTable(M, cast(A->getOperand(i)), + Callback, BaseOffset + EltSize * i); + } + } else { + for (auto &Op : C->operands()) { + FindVirtualFunctionsInVTable(M, cast(Op), Callback, BaseOffset); + } + } +} + void GlobalDCEPass::ScanVTables(Module &M) { SmallVector Types; LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); @@ -192,11 +231,23 @@ // so VFE is safe. if (auto GO = dyn_cast(&GV)) { GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility(); + auto Range = GO->getVTableOffsetRange(); if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || (LTOPostLink && TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); - VFESafeVTables.insert(&GV); + + // Find and record all the vfunctions that are within the offset range + // specified in the !vcall_visibility attribute. + SmallPtrSet VFuncs; + FindVirtualFunctionsInVTable( + M, GV.getInitializer(), + [&Range, &VFuncs](GlobalValue *VPtr, uint64_t Offset) { + if (std::get<0>(Range) <= Offset && Offset < std::get<1>(Range)) { + VFuncs.insert(VPtr); + } + }); + VFESafeVTablesAndFns[&GV] = VFuncs; } } } @@ -213,14 +264,14 @@ *Caller->getParent(), VTable); if (!Ptr) { LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); - VFESafeVTables.erase(VTable); + VFESafeVTablesAndFns.erase(VTable); return; } auto Callee = dyn_cast(Ptr->stripPointerCasts()); if (!Callee) { LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); - VFESafeVTables.erase(VTable); + VFESafeVTablesAndFns.erase(VTable); return; } @@ -253,7 +304,7 @@ // type.checked.load with a non-constant offset, so assume every entry in // every matching vtable is used. for (auto &VTableInfo : TypeIdMap[TypeId]) { - VFESafeVTables.erase(VTableInfo.first); + VFESafeVTablesAndFns.erase(VTableInfo.first); } } } @@ -274,15 +325,15 @@ ScanVTables(M); - if (VFESafeVTables.empty()) + if (VFESafeVTablesAndFns.empty()) return; ScanTypeCheckedLoadIntrinsics(M); LLVM_DEBUG( dbgs() << "VFE safe vtables:\n"; - for (auto *VTable : VFESafeVTables) - dbgs() << " " << VTable->getName() << "\n"; + for (auto &Entry : VFESafeVTablesAndFns) + dbgs() << " " << Entry.first->getName() << "\n"; ); } @@ -449,7 +500,7 @@ GVDependencies.clear(); ComdatMembers.clear(); TypeIdMap.clear(); - VFESafeVTables.clear(); + VFESafeVTablesAndFns.clear(); if (Changed) return PreservedAnalyses::none(); Index: llvm/test/Transforms/GlobalDCE/virtual-functions-non-vfunc-entries.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalDCE/virtual-functions-non-vfunc-entries.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +; A vtable that contains a non-nfunc entry, @regular_non_virtual_funcA, but +; without a range specific in !vcall_visibility, which means *all* function +; pointers are eligible for VFE, so GlobalDCE will treat the +; @regular_non_virtual_funcA slot as eligible for VFE, and remove it. +@vtableA = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ + i8* bitcast (void ()* @vfunc1_live to i8*), + i8* bitcast (void ()* @vfunc2_dead to i8*), + i8* bitcast (void ()* @regular_non_virtual_funcA to i8*) +]}, align 8, !type !{i64 0, !"vfunc1.type"}, !type !{i64 8, !"vfunc2.type"}, !vcall_visibility !{i64 2} + +; CHECK: @vtableA = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ +; CHECK-SAME: i8* bitcast (void ()* @vfunc1_live to i8*), +; CHECK-SAME: i8* null, +; CHECK-SAME: i8* null +; CHECK-SAME: ] }, align 8 + + +; A vtable that contains a non-nfunc entry, @regular_non_virtual_funcB, with a +; range of [0,16) which means only the first two entries are eligible for VFE. +; GlobalDCE should keep @regular_non_virtual_funcB in the vtable. +@vtableB = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ + i8* bitcast (void ()* @vfunc1_live to i8*), + i8* bitcast (void ()* @vfunc2_dead to i8*), + i8* bitcast (void ()* @regular_non_virtual_funcB to i8*) +]}, align 8, !type !{i64 0, !"vfunc1.type"}, !type !{i64 8, !"vfunc2.type"}, !vcall_visibility !{i64 2, i64 0, i64 16} + +; CHECK: @vtableB = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ +; CHECK-SAME: i8* bitcast (void ()* @vfunc1_live to i8*), +; CHECK-SAME: i8* null, +; CHECK-SAME: i8* bitcast (void ()* @regular_non_virtual_funcB to i8*) +; CHECK-SAME: ] }, align 8 + +; A vtable that contains a non-nfunc entry, @regular_non_virtual_funcB, with a +; range of [0,16) which means only the first two entries are eligible for VFE. +; GlobalDCE should keep @regular_non_virtual_funcB in the vtable. +@vtableC = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ + i8* bitcast (void ()* @regular_non_virtual_funcC to i8*), + i8* bitcast (void ()* @vfunc1_live to i8*), + i8* bitcast (void ()* @vfunc2_dead to i8*) +]}, align 8, !type !{i64 8, !"vfunc1.type"}, !type !{i64 16, !"vfunc2.type"}, !vcall_visibility !{i64 2, i64 8, i64 24} + +; CHECK: @vtableC = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ +; CHECK-SAME: i8* bitcast (void ()* @regular_non_virtual_funcC to i8*), +; CHECK-SAME: i8* bitcast (void ()* @vfunc1_live to i8*), +; CHECK-SAME: i8* null +; CHECK-SAME: ] }, align 8 + +; (1) vfunc1_live is referenced from @main, stays alive +define internal void @vfunc1_live() { + ; CHECK: define internal void @vfunc1_live( + ret void +} + +; (2) vfunc2_dead is never referenced, gets removed and vtable slot is null'd +define internal void @vfunc2_dead() { + ; CHECK-NOT: define internal void @vfunc2_dead( + ret void +} + +; (3) not using a range in !vcall_visibility, global gets removed +define internal void @regular_non_virtual_funcA() { + ; CHECK-NOT: define internal void @regular_non_virtual_funcA( + ret void +} + +; (4) using a range in !vcall_visibility, pointer is outside of range, so should +; stay alive +define internal void @regular_non_virtual_funcB() { + ; CHECK: define internal void @regular_non_virtual_funcB( + ret void +} + +; (5) using a range in !vcall_visibility, pointer is outside of range, so should +; stay alive +define internal void @regular_non_virtual_funcC() { + ; CHECK: define internal void @regular_non_virtual_funcC( + ret void +} + +define void @main() { + %1 = ptrtoint { [3 x i8*] }* @vtableA to i64 ; to keep @vtableA alive + %2 = ptrtoint { [3 x i8*] }* @vtableB to i64 ; to keep @vtableB alive + %3 = ptrtoint { [3 x i8*] }* @vtableC to i64 ; to keep @vtableB alive + %4 = tail call { i8*, i1 } @llvm.type.checked.load(i8* null, i32 0, metadata !"vfunc1.type") + ret void +} + +!999 = !{i32 1, !"Virtual Function Elim", i32 1} +!llvm.module.flags = !{!999}