Index: llvm/docs/TypeMetadata.rst =================================================================== --- llvm/docs/TypeMetadata.rst +++ llvm/docs/TypeMetadata.rst @@ -288,3 +288,10 @@ calls sites can be correlated with the vtables which they might load from. Other parts of the vtable (RTTI, offset-to-top, ...) can still be accessed with normal loads. + +Alternatively, the ``!vcall_visibility`` metadata attachment can have an +extended format of a tuple with two additional integer values representing the +begin and end offset within the vtable that the visibility applies to. When the +range is missing, the meaning is the same as a range covering the entire vtable. +Any part of the vtable that is not covered by the specified range is not +eligible for elimination of virtual functions. Index: llvm/include/llvm/IR/GlobalObject.h =================================================================== --- llvm/include/llvm/IR/GlobalObject.h +++ llvm/include/llvm/IR/GlobalObject.h @@ -138,6 +138,7 @@ void addTypeMetadata(unsigned Offset, Metadata *TypeID); void setVCallVisibilityMetadata(VCallVisibility Visibility); VCallVisibility getVCallVisibility() const; + std::tuple getVTableOffsetRange() const; /// Returns true if the alignment of the value can be unilaterally /// increased. Index: llvm/include/llvm/Transforms/IPO/GlobalDCE.h =================================================================== --- llvm/include/llvm/Transforms/IPO/GlobalDCE.h +++ llvm/include/llvm/Transforms/IPO/GlobalDCE.h @@ -47,6 +47,12 @@ DenseMap, 4>> TypeIdMap; + /// VTable -> set of vfuncs in that vtable (that are within the range + /// specified in !vcall_visibility). + DenseMap> VFuncMap; + + bool IsVPtrEligibleForVFE(GlobalValue *VTableVal, GlobalValue *VPtr); + // Global variables which are vtables, and which we have enough information // about to safely do dead virtual function elimination. SmallPtrSet VFESafeVTables; Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -1534,6 +1534,23 @@ return VCallVisibility::VCallVisibilityPublic; } +std::tuple GlobalObject::getVTableOffsetRange() const { + if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) { + if (MD->getNumOperands() >= 3) { + uint64_t RangeStart = + cast( + cast(MD->getOperand(1))->getValue()) + ->getZExtValue(); + uint64_t RangeEnd = + cast( + cast(MD->getOperand(2))->getValue()) + ->getZExtValue(); + return std::tuple(RangeStart, RangeEnd); + } + } + return std::tuple(0, UINT64_MAX); +} + void Function::setSubprogram(DISubprogram *SP) { setMetadata(LLVMContext::MD_dbg, SP); } Index: llvm/lib/Transforms/IPO/GlobalDCE.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -121,6 +121,19 @@ } } +bool GlobalDCEPass::IsVPtrEligibleForVFE(GlobalValue *VTableVal, GlobalValue *VPtr) { + auto *VTable = dyn_cast(VTableVal); + assert(VTable); + + if (VFuncMap[VTable].contains(VPtr)) + // Have a match in VFuncMap, i.e. have VPtr is within the range specified in + // !vcall_visibility attribute. Allow VFE on this slot. + return true; + + // No matching entry in VFuncMap. Don't allow VFE on this slot. + return false; +} + void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) { SmallPtrSet Deps; for (User *User : GV.users()) @@ -131,7 +144,8 @@ // complete information about all virtual call sites which could call // though this vtable, then skip it, because the call site information will // be more precise. - if (VFESafeVTables.count(GVU) && isa(&GV)) { + if (VFESafeVTables.count(GVU) && isa(&GV) && + IsVPtrEligibleForVFE(GVU, &GV)) { LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> " << GV.getName() << "\n"); continue; @@ -157,6 +171,44 @@ } } +/// Recursively iterate over the (sub-)constants in the vtable and look for +/// vptrs, report them with their offsets via `Callback`. +static void FindVirtualFunctionsInVTable( + Module &M, Constant *C, + std::function Callback, + uint64_t BaseOffset = 0) { + if (auto *GV = dyn_cast(C)) { + if (auto *F = dyn_cast(GV)) { + Callback(F, BaseOffset); + } + + // Do not recurse outside of the current global. + return; + } + + if (auto *S = dyn_cast(C)) { + StructType *STy = dyn_cast(S->getType()); + const StructLayout *SL = M.getDataLayout().getStructLayout(STy); + for (auto EI : llvm::enumerate(STy->elements())) { + auto Offset = SL->getElementOffset(EI.index()); + unsigned Op = SL->getElementContainingOffset(Offset); + FindVirtualFunctionsInVTable(M, cast(S->getOperand(Op)), + Callback, BaseOffset + Offset); + } + } else if (auto *A = dyn_cast(C)) { + ArrayType *ATy = A->getType(); + auto EltSize = M.getDataLayout().getTypeAllocSize(ATy->getElementType()); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + FindVirtualFunctionsInVTable(M, cast(A->getOperand(i)), + Callback, BaseOffset + EltSize * i); + } + } else { + for (auto &Op : C->operands()) { + FindVirtualFunctionsInVTable(M, cast(Op), Callback, BaseOffset); + } + } +} + void GlobalDCEPass::ScanVTables(Module &M) { SmallVector Types; LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); @@ -192,11 +244,24 @@ // so VFE is safe. if (auto GO = dyn_cast(&GV)) { GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility(); + auto Range = GO->getVTableOffsetRange(); if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || (LTOPostLink && TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); VFESafeVTables.insert(&GV); + + // Find and record all the vfunctions that are within the offset range + // specified in the !vcall_visibility attribute. + SmallPtrSet VFuncs; + FindVirtualFunctionsInVTable( + M, GV.getInitializer(), + [&Range, &VFuncs](GlobalValue *VPtr, uint64_t Offset) { + if (std::get<0>(Range) <= Offset && Offset < std::get<1>(Range)) { + VFuncs.insert(VPtr); + } + }); + VFuncMap[&GV] = VFuncs; } } } @@ -449,6 +514,7 @@ GVDependencies.clear(); ComdatMembers.clear(); TypeIdMap.clear(); + VFuncMap.clear(); VFESafeVTables.clear(); if (Changed) Index: llvm/test/Transforms/GlobalDCE/virtual-functions-non-vfunc-entries.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalDCE/virtual-functions-non-vfunc-entries.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +; A vtable that contains a non-nfunc entry, @regular_non_virtual_funcA, but +; without a range specific in !vcall_visibility, which means *all* function +; pointers are eligible for VFE, so GlobalDCE will treat the +; @regular_non_virtual_funcA slot as eligible for VFE, and remove it. +@vtableA = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ + i8* bitcast (void ()* @vfunc1_live to i8*), + i8* bitcast (void ()* @vfunc2_dead to i8*), + i8* bitcast (void ()* @regular_non_virtual_funcA to i8*) +]}, align 8, !type !0, !type !1, !vcall_visibility !{i64 2} +!0 = !{i64 0, !"vfunc1.type"} +!1 = !{i64 8, !"vfunc2.type"} + +; CHECK: @vtableA = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ +; CHECK-SAME: i8* bitcast (void ()* @vfunc1_live to i8*), +; CHECK-SAME: i8* null, +; CHECK-SAME: i8* null +; CHECK-SAME: ] }, align 8, !type !0, !type !1, !vcall_visibility !2 + + +; A vtable that contains a non-nfunc entry, @regular_non_virtual_funcB, with a +; range of [0,16) which means only the first two entries are eligible for VFE. +; GlobalDCE should keep @regular_non_virtual_funcB in the vtable. +@vtableB = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ + i8* bitcast (void ()* @vfunc1_live to i8*), + i8* bitcast (void ()* @vfunc2_dead to i8*), + i8* bitcast (void ()* @regular_non_virtual_funcB to i8*) +]}, align 8, !type !0, !type !1, !vcall_visibility !{i64 2, i64 0, i64 16} + +; CHECK: @vtableB = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [ +; CHECK-SAME: i8* bitcast (void ()* @vfunc1_live to i8*), +; CHECK-SAME: i8* null, +; CHECK-SAME: i8* bitcast (void ()* @regular_non_virtual_funcB to i8*) +; CHECK-SAME: ] }, align 8, !type !0, !type !1, !vcall_visibility !3 + +; (1) vfunc1_live is referenced from @main, stays alive +define internal void @vfunc1_live() { + ; CHECK: define internal void @vfunc1_live( + ret void +} + +; (2) vfunc2_dead is never referenced, gets removed and vtable slot is null'd +define internal void @vfunc2_dead() { + ; CHECK-NOT: define internal void @vfunc2_dead( + ret void +} + +; (3) not using a range in !vcall_visibility, global gets removed +define internal void @regular_non_virtual_funcA() { + ; CHECK-NOT: define internal void @regular_non_virtual_funcA( + ret void +} + +; (4) using a range in !vcall_visibility, pointer is outside of range, so should +; stay alive +define internal void @regular_non_virtual_funcB() { + ; CHECK: define internal void @regular_non_virtual_funcB( + ret void +} + +define void @main() { + %1 = ptrtoint { [3 x i8*] }* @vtableA to i64 ; to keep @vtableA alive + %2 = ptrtoint { [3 x i8*] }* @vtableB to i64 ; to keep @vtableB alive + %3 = tail call { i8*, i1 } @llvm.type.checked.load(i8* null, i32 0, metadata !"vfunc1.type") + ret void +} + +!999 = !{i32 1, !"Virtual Function Elim", i32 1} +!llvm.module.flags = !{!999}