diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -278,6 +278,10 @@ CODEGENOPT(WholeProgramVTables, 1, 0) ///< Whether to apply whole-program /// vtable optimization. +CODEGENOPT(VirtualFunctionElimination, 1, 0) ///< Whether to apply the dead + /// virtual function elimination + /// optimization. + /// Whether to use public LTO visibility for entities in std and stdext /// namespaces. This is enabled by clang-cl's /MT and /MTd flags. CODEGENOPT(LTOVisibilityPublicStd, 1, 0) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1843,6 +1843,13 @@ HelpText<"Emits more virtual tables to improve devirtualization">; def fno_force_emit_vtables : Flag<["-"], "fno-force-emit-vtables">, Group, Flags<[CoreOption]>; + +def fvirtual_function_elimination : Flag<["-"], "fvirtual-function-elimination">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Enables dead virtual function elimination optimization. Requires -flto=full">; +def fno_virtual_function_elimination : Flag<["-"], "fno-virtual-function_elimination">, Group, + Flags<[CoreOption]>; + def fwrapv : Flag<["-"], "fwrapv">, Group, Flags<[CC1Option]>, HelpText<"Treat signed integer overflow as two's complement">; def fwritable_strings : Flag<["-"], "fwritable-strings">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2784,11 +2784,16 @@ bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { if (!CGM.getCodeGenOpts().WholeProgramVTables || - !SanOpts.has(SanitizerKind::CFIVCall) || - !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) || !CGM.HasHiddenLTOVisibility(RD)) return false; + if (CGM.getCodeGenOpts().VirtualFunctionElimination) + return true; + + if (!SanOpts.has(SanitizerKind::CFIVCall) || + !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall)) + return false; + std::string TypeName = RD->getQualifiedNameAsString(); return !getContext().getSanitizerBlacklist().isBlacklistedType( SanitizerKind::CFIVCall, TypeName); @@ -2811,8 +2816,13 @@ TypeId}); llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); - EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), - SanitizerHandler::CFICheckFail, nullptr, nullptr); + std::string TypeName = RD->getQualifiedNameAsString(); + if (SanOpts.has(SanitizerKind::CFIVCall) && + !getContext().getSanitizerBlacklist().isBlacklistedType( + SanitizerKind::CFIVCall, TypeName)) { + EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), + SanitizerHandler::CFICheckFail, {}, {}); + } return Builder.CreateBitCast( Builder.CreateExtractValue(CheckedLoad, 0), diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -779,7 +779,7 @@ assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration"); CGM.setGVProperties(VTable, RD); - CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); + CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get()); return VTable; } @@ -1010,7 +1010,32 @@ return true; } -void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, +llvm::GlobalObject::VCallVisibility +CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) { + LinkageInfo LV = RD->getLinkageAndVisibility(); + llvm::GlobalObject::VCallVisibility TypeVis; + if (!isExternallyVisible(LV.getLinkage())) + TypeVis = llvm::GlobalObject::VCallVisibilityTranslationUnit; + else if (HasHiddenLTOVisibility(RD)) + TypeVis = llvm::GlobalObject::VCallVisibilityLinkageUnit; + else + TypeVis = llvm::GlobalObject::VCallVisibilityPublic; + + for (auto B : RD->bases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + for (auto B : RD->vbases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + return TypeVis; +} + +void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { if (!getCodeGenOpts().LTOUnit) return; @@ -1070,4 +1095,10 @@ VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD); } } + + if (getCodeGenOpts().VirtualFunctionElimination) { + llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD); + if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic) + VTable->addVCallVisibilityMetadata(TypeVis); + } } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1275,8 +1275,16 @@ /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); + /// Returns the vcall visibility of the given type. This is the scope in which + /// a virtual function call could be made which ends up being dispatched to a + /// member function of this class. This scope can be wider than the visibility + /// of the class itself when the class has a more-visible dynamic base class. + llvm::GlobalObject::VCallVisibility + GetVCallVisibilityLevel(const CXXRecordDecl *RD); + /// Emit type metadata for the given vtable using the given layout. - void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, + void EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout); /// Generate a cross-DSO type identifier for MD. diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -604,8 +604,6 @@ VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty); VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy); } - // Compute the address of the virtual function pointer. - llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); // Check the address of the function pointer if CFI on member function // pointers is enabled. @@ -613,44 +611,81 @@ llvm::Constant *CheckTypeDesc; bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) && CGM.HasHiddenLTOVisibility(RD); - if (ShouldEmitCFICheck) { - CodeGenFunction::SanitizerScope SanScope(&CGF); - - CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); - CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); - llvm::Constant *StaticData[] = { - llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), - CheckSourceLocation, - CheckTypeDesc, - }; - - llvm::Metadata *MD = - CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); - llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && + CGM.HasHiddenLTOVisibility(RD); + llvm::Value *VirtualFn = nullptr; - llvm::Value *TypeTest = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId}); + { + CodeGenFunction::SanitizerScope SanScope(&CGF); + llvm::Value *TypeId = nullptr; + llvm::Value *CheckResult = nullptr; + + if (ShouldEmitCFICheck || ShouldEmitVFEInfo) { + // If doing CFI or VFE, we will need the metadata node to check against. + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); + TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + } - if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { - CGF.EmitTrapCheck(TypeTest); + llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); + + if (ShouldEmitVFEInfo) { + // If doing VFE, load from the vtable with a type.checked.load intrinsic + // call. Note that we use the GEP to calculate the address to load from + // and pass 0 as the offset to the intrinsic. This is because every + // vtable slot of the correct type is marked with matching metadata, and + // we know that the load must be from one of these slots. + llvm::Value *CheckedLoad = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), + {VFPAddr, llvm::ConstantInt::get(CGM.Int32Ty, 0), TypeId}); + CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); + VirtualFn = Builder.CreateExtractValue(CheckedLoad, 0); + VirtualFn = Builder.CreateBitCast(VirtualFn, FTy->getPointerTo(), + "memptr.virtualfn"); } else { - llvm::Value *AllVtables = llvm::MetadataAsValue::get( - CGM.getLLVMContext(), - llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); - llvm::Value *ValidVtable = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); - CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall), - SanitizerHandler::CFICheckFail, StaticData, - {VTable, ValidVtable}); + // When not doing VFE, emit a normal load, as it allows more + // optimisations than type.checked.load. + if (ShouldEmitCFICheck) { + CheckResult = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), + {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); + } + VFPAddr = + Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); + VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(), + "memptr.virtualfn"); } + assert(VirtualFn && "Virtual fuction pointer not created!"); + assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) && + "Check result required but not created!"); + + if (ShouldEmitCFICheck) { + // If doing CFI, emit the check. + CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); + CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), + CheckSourceLocation, + CheckTypeDesc, + }; - FnVirtual = Builder.GetInsertBlock(); - } + if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { + CGF.EmitTrapCheck(CheckResult); + } else { + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); + CGF.EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIMFCall), + SanitizerHandler::CFICheckFail, StaticData, + {VTable, ValidVtable}); + } + + FnVirtual = Builder.GetInsertBlock(); + } + } // End of sanitizer scope - // Load the virtual function to call. - VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); - llvm::Value *VirtualFn = Builder.CreateAlignedLoad( - VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn"); CGF.EmitBranch(FnEnd); // In the non-virtual path, the function pointer is actually a @@ -1594,7 +1629,7 @@ EmitFundamentalRTTIDescriptors(RD); if (!VTable->isDeclarationForLinker()) - CGM.EmitVTableTypeMetadata(VTable, VTLayout); + CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); } bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField( diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5432,9 +5432,30 @@ CmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); } - bool WholeProgramVTables = - Args.hasFlag(options::OPT_fwhole_program_vtables, - options::OPT_fno_whole_program_vtables, false); + bool VirtualFunctionElimination = + Args.hasFlag(options::OPT_fvirtual_function_elimination, + options::OPT_fno_virtual_function_elimination, false); + if (VirtualFunctionElimination) { + // VFE requires full LTO (currently, this might be relaxed to allow ThinLTO + // in the future). + if (D.getLTOMode() != LTOK_Full) + D.Diag(diag::err_drv_argument_only_allowed_with) + << "-fvirtual-function-elimination" + << "-flto=full"; + + CmdArgs.push_back("-fvirtual-function-elimination"); + } + + // VFE requires whole-program-vtables, and enables it by default. + bool WholeProgramVTables = Args.hasFlag( + options::OPT_fwhole_program_vtables, + options::OPT_fno_whole_program_vtables, VirtualFunctionElimination); + if (VirtualFunctionElimination && !WholeProgramVTables) { + D.Diag(diag::err_drv_argument_not_allowed_with) + << "-fno-whole-program-vtables" + << "-fvirtual-function-elimination"; + } + if (WholeProgramVTables) { if (!D.isUsingLTO()) D.Diag(diag::err_drv_argument_only_allowed_with) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -758,6 +758,8 @@ Opts.CodeViewGHash = Args.hasArg(OPT_gcodeview_ghash); Opts.MacroDebugInfo = Args.hasArg(OPT_debug_info_macro); Opts.WholeProgramVTables = Args.hasArg(OPT_fwhole_program_vtables); + Opts.VirtualFunctionElimination = + Args.hasArg(OPT_fvirtual_function_elimination); Opts.LTOVisibilityPublicStd = Args.hasArg(OPT_flto_visibility_public_std); Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file); Opts.SplitDwarfOutput = Args.getLastArgValue(OPT_split_dwarf_output); diff --git a/clang/test/CodeGenCXX/vcall-visibility-metadata.cpp b/clang/test/CodeGenCXX/vcall-visibility-metadata.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/vcall-visibility-metadata.cpp @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -emit-llvm -fvirtual-function-elimination -fwhole-program-vtables -o - %s | FileCheck %s + + +// Anonymous namespace. +namespace { +// CHECK: @_ZTVN12_GLOBAL__N_11AE = {{.*}} !vcall_visibility [[VIS_TU:![0-9]+]] +struct A { + A() {} + virtual int f() { return 1; } +}; +} +void *construct_A() { + return new A(); +} + + +// Hidden visibility. +// CHECK: @_ZTV1B = {{.*}} !vcall_visibility [[VIS_DSO:![0-9]+]] +struct __attribute__((visibility("hidden"))) B { + B() {} + virtual int f() { return 1; } +}; +B *construct_B() { + return new B(); +} + + +// Default visibility. +// CHECK-NOT: @_ZTV1C = {{.*}} !vcall_visibility +struct __attribute__((visibility("default"))) C { + C() {} + virtual int f() { return 1; } +}; +C *construct_C() { + return new C(); +} + + +// Hidden visibility, public LTO visibility. +// CHECK-NOT: @_ZTV1D = {{.*}} !vcall_visibility +struct __attribute__((visibility("hidden"))) [[clang::lto_visibility_public]] D { + D() {} + virtual int f() { return 1; } +}; +D *construct_D() { + return new D(); +} + + +// Hidden visibility, but inherits from class with default visibility. +// CHECK-NOT: @_ZTV1E = {{.*}} !vcall_visibility +struct __attribute__((visibility("hidden"))) E : C { + E() {} + virtual int f() { return 1; } +}; +E *construct_E() { + return new E(); +} + + +// Anonymous namespace, but inherits from class with default visibility. +// CHECK-NOT: @_ZTVN12_GLOBAL__N_11FE = {{.*}} !vcall_visibility +namespace { +struct __attribute__((visibility("hidden"))) F : C { + F() {} + virtual int f() { return 1; } +}; +} +void *construct_F() { + return new F(); +} + + +// Anonymous namespace, but inherits from class with hidden visibility. +// CHECK: @_ZTVN12_GLOBAL__N_11GE = {{.*}} !vcall_visibility [[VIS_DSO:![0-9]+]] +namespace { +struct __attribute__((visibility("hidden"))) G : B { + G() {} + virtual int f() { return 1; } +}; +} +void *construct_G() { + return new G(); +} + + +// CHECK-DAG: [[VIS_DSO]] = !{i64 1} +// CHECK-DAG: [[VIS_TU]] = !{i64 2} diff --git a/clang/test/CodeGenCXX/virtual-function-elimination.cpp b/clang/test/CodeGenCXX/virtual-function-elimination.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/virtual-function-elimination.cpp @@ -0,0 +1,75 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux -flto -flto-unit -fvirtual-function-elimination -fwhole-program-vtables -emit-llvm -o - %s | FileCheck %s + + +struct __attribute__((visibility("default"))) A { + virtual void foo(); +}; + +void test_1(A *p) { + // A has default visibility, so no need for type.checked.load. +// CHECK-LABEL: define void @_Z6test_1P1A +// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** {{%.+}}, i64 0 +// CHECK: [[FN_PTR:%.+]] = load void (%struct.A*)*, void (%struct.A*)** [[FN_PTR_ADDR]] +// CHECK: call void [[FN_PTR]]( + p->foo(); +} + + +struct __attribute__((visibility("hidden"))) [[clang::lto_visibility_public]] B { + virtual void foo(); +}; + +void test_2(B *p) { + // B has public LTO visibility, so no need for type.checked.load. +// CHECK-LABEL: define void @_Z6test_2P1B +// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** {{%.+}}, i64 0 +// CHECK: [[FN_PTR:%.+]] = load void (%struct.B*)*, void (%struct.B*)** [[FN_PTR_ADDR]] +// CHECK: call void [[FN_PTR]]( + p->foo(); +} + + +struct __attribute__((visibility("hidden"))) C { + virtual void foo(); + virtual void bar(); +}; + +void test_3(C *p) { + // C has hidden visibility, so we generate type.checked.load to allow VFE. +// CHECK-LABEL: define void @_Z6test_3P1C +// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 0, metadata !"_ZTS1C") +// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* +// CHECK: call void [[FN_PTR]]( + p->foo(); +} + +void test_4(C *p) { + // When using type.checked.load, we pass the vtable offset to the intrinsic, + // rather than adding it to the pointer with a GEP. +// CHECK-LABEL: define void @_Z6test_4P1C +// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 8, metadata !"_ZTS1C") +// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* +// CHECK: call void [[FN_PTR]]( + p->bar(); +} + +void test_5(C *p, void (C::*q)(void)) { + // We also use type.checked.load for the virtual side of member function + // pointer calls. We use a GEP to calculate the address to load from and pass + // 0 as the offset to the intrinsic, because we know that the load must be + // from exactly the point marked by one of the function-type metadatas (in + // this case "_ZTSM1CFvvE.virtual"). If we passed the offset from the member + // function pointer to the intrinsic, this information would be lost. No + // codegen changes on the non-virtual side. +// CHECK-LABEL: define void @_Z6test_5P1CMS_FvvE( +// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr i8, i8* %vtable, i64 {{%.+}} +// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* [[FN_PTR_ADDR]], i32 0, metadata !"_ZTSM1CFvvE.virtual") +// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* + +// CHECK: [[PHI:%.+]] = phi void (%struct.C*)* {{.*}}[ [[FN_PTR]], {{.*}} ] +// CHECK: call void [[PHI]]( + (p->*q)(); +} diff --git a/clang/test/Driver/virtual-function-elimination.cpp b/clang/test/Driver/virtual-function-elimination.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Driver/virtual-function-elimination.cpp @@ -0,0 +1,11 @@ +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -### %s 2>&1 | FileCheck --check-prefix=BAD-LTO %s +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto=thin -### %s 2>&1 | FileCheck --check-prefix=BAD-LTO %s +// BAD-LTO: invalid argument '-fvirtual-function-elimination' only allowed with '-flto=full' + +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto -### %s 2>&1 | FileCheck --check-prefix=GOOD %s +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto=full -### %s 2>&1 | FileCheck --check-prefix=GOOD %s +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=GOOD %s +// GOOD: "-fvirtual-function-elimination" "-fwhole-program-vtables" + +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=NO-WHOLE-PROGRAM-VTABLES %s +// NO-WHOLE-PROGRAM-VTABLES: invalid argument '-fno-whole-program-vtables' not allowed with '-fvirtual-function-elimination' diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -6241,6 +6241,13 @@ !0 = !{i32 1, !"short_wchar", i32 1} !1 = !{i32 1, !"short_enum", i32 0} +LTO Post-Link Module Flags Metadata +----------------------------------- + +Some optimisations are only when the entire LTO unit is present in the current +module. This is represented by the ``LTOPostLink`` module flags metadata, which +will be created with a value of ``1`` when LTO linking occurs. + Automatic Linker Flags Named Metadata ===================================== @@ -16520,6 +16527,8 @@ The ``llvm.type.test`` intrinsic tests whether the given pointer is associated with the given type identifier. +.. _type.checked.load: + '``llvm.type.checked.load``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/TypeMetadata.rst b/llvm/docs/TypeMetadata.rst --- a/llvm/docs/TypeMetadata.rst +++ b/llvm/docs/TypeMetadata.rst @@ -224,3 +224,67 @@ } .. _GlobalLayoutBuilder: https://github.com/llvm/llvm-project/blob/master/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h + +``!vcall_visibility`` Metadata +============================== + +In order to allow removing unused function pointers from vtables, we need to +know whether every virtual call which could use it is known to the compiler, or +whether another translation unit could introduce more calls through the vtable. +This is not the same as the linkage of the vtable, because call sites could be +using a pointer of a more widely-visible base class. For example, consider this +code: + +.. code-block:: c++ + + __attribute__((visibility("default"))) + struct A { + virtual void f(); + }; + + __attribute__((visibility("hidden"))) + struct B : A { + virtual void f(); + }; + +With LTO, we know that all code which can see the declaration of ``B`` is +visible to us. However, a pointer to a ``B`` could be cast to ``A*`` and passed +to another linkage unit, which could then call ``f`` on it. This call would +load from the vtable for ``B`` (using the object pointer), and then call +``B::f``. This means we can't remove the function pointer from ``B``'s vtable, +or the implementation of ``B::f``. However, if we can see all code which knows +about any dynamic base class (which would be the case if ``B`` only inherited +from classes with hidden visibility), then this optimisation would be valid. + +This concept is represented in IR by the ``!vcall_visibility`` metadata +attached to vtable objects, with the following values: + +.. list-table:: + :header-rows: 1 + :widths: 10 90 + + * - Value + - Behavior + + * - 0 (or omitted) + - **Public** + Virtual function calls using this vtable could be made from external + code. + + * - 1 + - **Linkage Unit** + All virtual function calls which might use this vtable are in the + current LTO unit, meaning they will be in the current module once + LTO linking has been performed. + + * - 2 + - **Translation Unit** + All virtual function calls which might use this vtable are in the + current module. + +In addition, all function pointer loads from a vtable marked with the +``!vcall_visibility`` metadata (with a non-zero value) must be done using the +:ref:`llvm.type.checked.load ` intrinsic, so that virtual +calls sites can be correlated with the vtables which they might load from. +Other parts of the vtable (RTTI, offset-to-top, ...) can still be accessed with +normal loads. diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h --- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h +++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h @@ -50,6 +50,8 @@ SmallVectorImpl &LoadedPtrs, SmallVectorImpl &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT); + +Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M); } #endif diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -39,3 +39,4 @@ LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25) LLVM_FIXED_MD_KIND(MD_callback, "callback", 26) LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) +LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28) diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -28,6 +28,20 @@ class Metadata; class GlobalObject : public GlobalValue { +public: + // VCallVisibility - values for visibility metadata attached to vtables. This + // describes the scope in which a virtual call could end up being dispatched + // through this vtable. + enum VCallVisibility { + // Type is potentially visible to external code. + VCallVisibilityPublic = 0, + // Type is only visible to code which will be in the current Module after + // LTO internalization. + VCallVisibilityLinkageUnit = 1, + // Type is only visible to code in the current Module. + VCallVisibilityTranslationUnit = 2, + }; + protected: GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, LinkageTypes Linkage, const Twine &Name, @@ -163,6 +177,8 @@ void copyMetadata(const GlobalObject *Src, unsigned Offset); void addTypeMetadata(unsigned Offset, Metadata *TypeID); + void addVCallVisibilityMetadata(VCallVisibility Visibility); + VCallVisibility getVCallVisibility() const; protected: void copyAttributesFrom(const GlobalObject *Src); diff --git a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h --- a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h +++ b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h @@ -43,11 +43,25 @@ /// Comdat -> Globals in that Comdat section. std::unordered_multimap ComdatMembers; + /// !type metadata -> set of (vtable, offset) pairs + DenseMap, 4>> + TypeIdMap; + + // Global variables which are vtables, and which we have enough information + // about to safely do dead virtual function elimination. + SmallPtrSet VFESafeVTables; + void UpdateGVDependencies(GlobalValue &GV); void MarkLive(GlobalValue &GV, SmallVectorImpl *Updates = nullptr); bool RemoveUnusedGlobalValue(GlobalValue &GV); + // Dead virtual function elimination. + void AddVirtualFunctionDependencies(Module &M); + void ScanVTables(Module &M); + void ScanTypeCheckedLoadIntrinsics(Module &M); + void ScanVTableLoad(Function *Caller, Metadata *TypeId, uint64_t CallOffset); + void ComputeDependencies(Value *V, SmallPtrSetImpl &U); }; diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp --- a/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -127,3 +127,35 @@ findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr, Offset->getZExtValue(), CI, DT); } + +Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) { + if (I->getType()->isPointerTy()) { + if (Offset == 0) + return I; + return nullptr; + } + + const DataLayout &DL = M.getDataLayout(); + + if (auto *C = dyn_cast(I)) { + const StructLayout *SL = DL.getStructLayout(C->getType()); + if (Offset >= SL->getSizeInBytes()) + return nullptr; + + unsigned Op = SL->getElementContainingOffset(Offset); + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset - SL->getElementOffset(Op), M); + } + if (auto *C = dyn_cast(I)) { + ArrayType *VTableTy = C->getType(); + uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); + + unsigned Op = Offset / ElemSize; + if (Op >= C->getNumOperands()) + return nullptr; + + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset % ElemSize, M); + } + return nullptr; +} diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1497,6 +1497,24 @@ TypeID})); } +void GlobalObject::addVCallVisibilityMetadata(VCallVisibility Visibility) { + addMetadata(LLVMContext::MD_vcall_visibility, + *MDNode::get(getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(getContext()), Visibility))})); +} + +GlobalObject::VCallVisibility GlobalObject::getVCallVisibility() const { + if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) { + uint64_t Val = cast( + cast(MD->getOperand(0))->getValue()) + ->getZExtValue(); + assert((Val >= 0 && Val <= 2) && "unknown vcall visibility!"); + return (VCallVisibility)Val; + } + return VCallVisibility::VCallVisibilityPublic; +} + void Function::setSubprogram(DISubprogram *SP) { setMetadata(LLVMContext::MD_dbg, SP); } diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1004,6 +1004,8 @@ GV->setLinkage(GlobalValue::InternalLinkage); } + RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + if (Conf.PostInternalizeModuleHook && !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -462,6 +462,8 @@ internalizeModule(*MergedModule, mustPreserveGV); + MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + ScopeRestrictionsDone = true; } diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -17,9 +17,11 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" @@ -29,10 +31,15 @@ #define DEBUG_TYPE "globaldce" +static cl::opt + ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("Enable virtual function elimination")); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs, "Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); +STATISTIC(NumVFuncs, "Number of virtual functions removed"); namespace { class GlobalDCELegacyPass : public ModulePass { @@ -118,6 +125,15 @@ ComputeDependencies(User, Deps); Deps.erase(&GV); // Remove self-reference. for (GlobalValue *GVU : Deps) { + // If this is a dep from a vtable to a virtual function, and we have + // complete information about all virtual call sites which could call + // though this vtable, then skip it, because the call site information will + // be more precise. + if (VFESafeVTables.count(GVU) && isa(&GV)) { + LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> " + << GV.getName() << "\n"); + continue; + } GVDependencies[GVU].insert(&GV); } } @@ -132,12 +148,133 @@ if (Updates) Updates->push_back(&GV); if (Comdat *C = GV.getComdat()) { - for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { MarkLive(*CM.second, Updates); // Recursion depth is only two because only // globals in the same comdat are visited. + } + } +} + +void GlobalDCEPass::ScanVTables(Module &M) { + SmallVector Types; + LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); + + auto *LTOPostLinkMD = + cast_or_null(M.getModuleFlag("LTOPostLink")); + bool LTOPostLink = + LTOPostLinkMD && + (cast(LTOPostLinkMD->getValue())->getZExtValue() != 0); + + for (GlobalVariable &GV : M.globals()) { + Types.clear(); + GV.getMetadata(LLVMContext::MD_type, Types); + if (GV.isDeclaration() || Types.empty()) + continue; + + // Use the typeid metadata on the vtable to build a mapping from typeids to + // the list of (GV, offset) pairs which are the possible vtables for that + // typeid. + for (MDNode *Type : Types) { + Metadata *TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset)); + } + + // If the type corresponding to the vtable is private to this translation + // unit, we know that we can see all virtual functions which might use it, + // so VFE is safe. + if (auto GO = dyn_cast(&GV)) { + GlobalObject::VCallVisibility TypeVis = GV.getVCallVisibility(); + if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || + (LTOPostLink && + TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { + LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); + VFESafeVTables.insert(&GV); + } + } + } +} + +void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId, + uint64_t CallOffset) { + for (auto &VTableInfo : TypeIdMap[TypeId]) { + GlobalVariable *VTable = VTableInfo.first; + uint64_t VTableOffset = VTableInfo.second; + + Constant *Ptr = + getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset, + *Caller->getParent()); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); + VFESafeVTables.erase(VTable); + return; + } + + auto Callee = dyn_cast(Ptr->stripPointerCasts()); + if (!Callee) { + LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); + VFESafeVTables.erase(VTable); + return; + } + + LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> " + << Callee->getName() << "\n"); + GVDependencies[Caller].insert(Callee); } } +void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) { + LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n"); + Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + + if (!TypeCheckedLoadFunc) + return; + + for (auto U : TypeCheckedLoadFunc->users()) { + auto CI = dyn_cast(U); + if (!CI) + continue; + + auto *Offset = dyn_cast(CI->getArgOperand(1)); + Value *TypeIdValue = CI->getArgOperand(2); + auto *TypeId = cast(TypeIdValue)->getMetadata(); + + if (Offset) { + ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue()); + } else { + // type.checked.load with a non-constant offset, so assume every entry in + // every matching vtable is used. + for (auto &VTableInfo : TypeIdMap[TypeId]) { + VFESafeVTables.erase(VTableInfo.first); + } + } + } +} + +void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) { + if (!ClEnableVFE) + return; + + ScanVTables(M); + + if (VFESafeVTables.empty()) + return; + + ScanTypeCheckedLoadIntrinsics(M); + + LLVM_DEBUG( + dbgs() << "VFE safe vtables:\n"; + for (auto *VTable : VFESafeVTables) + dbgs() << " " << VTable->getName() << "\n"; + ); +} + PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { bool Changed = false; @@ -163,6 +300,10 @@ if (Comdat *C = GA.getComdat()) ComdatMembers.insert(std::make_pair(C, &GA)); + // Add dependencies between virtual call sites and the virtual functions they + // might call, if we have that information. + AddVirtualFunctionDependencies(M); + // Loop over the module, adding globals which are obviously necessary. for (GlobalObject &GO : M.global_objects()) { Changed |= RemoveUnusedGlobalValue(GO); @@ -257,8 +398,17 @@ }; NumFunctions += DeadFunctions.size(); - for (Function *F : DeadFunctions) + for (Function *F : DeadFunctions) { + if (!F->use_empty()) { + // Virtual functions might still be referenced by one or more vtables, + // but if we've proven them to be unused then it's safe to replace the + // virtual function pointers with null, allowing us to remove the + // function itself. + ++NumVFuncs; + F->replaceAllUsesWith(ConstantPointerNull::get(F->getType())); + } EraseUnusedGlobalValue(F); + } NumVariables += DeadGlobalVars.size(); for (GlobalVariable *GV : DeadGlobalVars) @@ -277,6 +427,8 @@ ConstantDependenciesCache.clear(); GVDependencies.clear(); ComdatMembers.clear(); + TypeIdMap.clear(); + VFESafeVTables.clear(); if (Changed) return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -496,7 +496,6 @@ void buildTypeIdentifierMap( std::vector &Bits, DenseMap> &TypeIdMap); - Constant *getPointerAtOffset(Constant *I, uint64_t Offset); bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, const std::set &TypeMemberInfos, @@ -815,38 +814,6 @@ } } -Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) { - if (I->getType()->isPointerTy()) { - if (Offset == 0) - return I; - return nullptr; - } - - const DataLayout &DL = M.getDataLayout(); - - if (auto *C = dyn_cast(I)) { - const StructLayout *SL = DL.getStructLayout(C->getType()); - if (Offset >= SL->getSizeInBytes()) - return nullptr; - - unsigned Op = SL->getElementContainingOffset(Offset); - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset - SL->getElementOffset(Op)); - } - if (auto *C = dyn_cast(I)) { - ArrayType *VTableTy = C->getType(); - uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); - - unsigned Op = Offset / ElemSize; - if (Op >= C->getNumOperands()) - return nullptr; - - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset % ElemSize); - } - return nullptr; -} - bool DevirtModule::tryFindVirtualCallTargets( std::vector &TargetsForSlot, const std::set &TypeMemberInfos, uint64_t ByteOffset) { @@ -855,7 +822,7 @@ return false; Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), - TM.Offset + ByteOffset); + TM.Offset + ByteOffset, M); if (!Ptr) return false; @@ -1943,6 +1910,12 @@ for (VTableBits &B : Bits) rebuildGlobal(B); + // We have lowered or deleted the type checked load intrinsics, so we no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + return true; } diff --git a/llvm/test/LTO/ARM/lto-linking-metadata.ll b/llvm/test/LTO/ARM/lto-linking-metadata.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/ARM/lto-linking-metadata.ll @@ -0,0 +1,19 @@ +; RUN: opt %s -o %t1.bc + +; RUN: llvm-lto %t1.bc -o %t1.save.opt -save-merged-module -O1 --exported-symbol=foo +; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s + +; RUN: llvm-lto2 run %t1.bc -o %t.out.o -save-temps \ +; RUN: -r=%t1.bc,foo,pxl +; RUN: llvm-dis < %t.out.o.0.2.internalize.bc | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7a-unknown-linux" + +define void @foo() { +entry: + ret void +} + +; CHECK: !llvm.module.flags = !{[[MD_NUM:![0-9]+]]} +; CHECK: [[MD_NUM]] = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/ThinLTO/X86/lazyload_metadata.ll b/llvm/test/ThinLTO/X86/lazyload_metadata.ll --- a/llvm/test/ThinLTO/X86/lazyload_metadata.ll +++ b/llvm/test/ThinLTO/X86/lazyload_metadata.ll @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 61 bitcode-reader - Number of Metadata records loaded +; LAZY: 63 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 70 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 72 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(); +; }; +; +; struct B : A { +; B(); +; virtual int foo(); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo() { return 42; } +; int B::foo() { return 1337; } +; +; extern "C" int test(A *p) { return p->foo(); } + +; The virtual call in test could be dispatched to either A::foo or B::foo, so +; both must be retained. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] } +@_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +; CHECK: @_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] } +@_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !type !3, !type !4, !vcall_visibility !2 + +; CHECK: define internal i32 @_ZN1A3fooEv( +define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 42 +} + +; CHECK: define internal i32 @_ZN1B3fooEv( +define internal i32 @_ZN1B3fooEv(%struct.B* nocapture readnone %this) { +entry: + ret i32 1337 +} + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.A* %p) { +entry: + %0 = bitcast %struct.A* %p to i8** + %vtable1 = load i8*, i8** %0, align 8 + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable1, i32 0, metadata !"_ZTS1A"), !nosanitize !10 + %2 = extractvalue { i8*, i1 } %1, 0, !nosanitize !10 + %3 = bitcast i8* %2 to i32 (%struct.A*)*, !nosanitize !10 + %call = tail call i32 %3(%struct.A* %p) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) #2 + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFivE.virtual"} +!2 = !{i64 2} +!3 = !{i64 16, !"_ZTS1B"} +!4 = !{i64 16, !"_ZTSM1BFivE.virtual"} +!10 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll @@ -0,0 +1,118 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; struct B : A { +; B(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo(int) { return 1; } +; int A::bar(float) { return 2; } +; int B::foo(int) { return 3; } +; int B::bar(float) { return 4; } +; +; extern "C" int test(A *p, int (A::*q)(int)) { return (p->*q)(42); } + +; Member function pointers are tracked by the combination of their object type +; and function type, which must both be compatible. Here, the call is through a +; pointer of type "int (A::*q)(int)", so the call could be dispatched to A::foo +; or B::foo. It can't be dispatched to A::bar or B::bar as the function pointer +; does not match, so those can be removed. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* null] } +@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* bitcast (i32 (%struct.A*, float)* @_ZN1A3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3 +; CHECK: @_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* null] } +@_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* bitcast (i32 (%struct.B*, float)* @_ZN1B3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !type !4, !type !5, !type !6, !vcall_visibility !3 + + +; CHECK: define internal i32 @_ZN1A3fooEi( +define internal i32 @_ZN1A3fooEi(%struct.A* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 1 +} + +; CHECK-NOT: define internal i32 @_ZN1A3barEf( +define internal i32 @_ZN1A3barEf(%struct.A* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 2 +} + +; CHECK: define internal i32 @_ZN1B3fooEi( +define internal i32 @_ZN1B3fooEi(%struct.B* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 3 +} + +; CHECK-NOT: define internal i32 @_ZN1B3barEf( +define internal i32 @_ZN1B3barEf(%struct.B* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 4 +} + + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.A* %p, i64 %q.coerce0, i64 %q.coerce1) { +entry: + %0 = bitcast %struct.A* %p to i8* + %1 = getelementptr inbounds i8, i8* %0, i64 %q.coerce1 + %this.adjusted = bitcast i8* %1 to %struct.A* + %2 = and i64 %q.coerce0, 1 + %memptr.isvirtual = icmp eq i64 %2, 0 + br i1 %memptr.isvirtual, label %memptr.nonvirtual, label %memptr.virtual + +memptr.virtual: ; preds = %entry + %3 = bitcast i8* %1 to i8** + %vtable = load i8*, i8** %3, align 8 + %4 = add i64 %q.coerce0, -1 + %5 = getelementptr i8, i8* %vtable, i64 %4, !nosanitize !12 + %6 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %5, i32 0, metadata !"_ZTSM1AFiiE.virtual"), !nosanitize !12 + %7 = extractvalue { i8*, i1 } %6, 0, !nosanitize !12 + %memptr.virtualfn = bitcast i8* %7 to i32 (%struct.A*, i32)*, !nosanitize !12 + br label %memptr.end + +memptr.nonvirtual: ; preds = %entry + %memptr.nonvirtualfn = inttoptr i64 %q.coerce0 to i32 (%struct.A*, i32)* + br label %memptr.end + +memptr.end: ; preds = %memptr.nonvirtual, %memptr.virtual + %8 = phi i32 (%struct.A*, i32)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ] + %call = tail call i32 %8(%struct.A* %this.adjusted, i32 42) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFiiE.virtual"} +!2 = !{i64 24, !"_ZTSM1AFifE.virtual"} +!3 = !{i64 2} +!4 = !{i64 16, !"_ZTS1B"} +!5 = !{i64 16, !"_ZTSM1BFiiE.virtual"} +!6 = !{i64 24, !"_ZTSM1BFifE.virtual"} +!12 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(); +; }; +; +; struct B : A { +; B(); +; virtual int foo(); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo() { return 42; } +; int B::foo() { return 1337; } +; +; extern "C" int test(B *p) { return p->foo(); } + +; The virtual call in test can only be dispatched to B::foo (or a more-derived +; class, if there was one), so A::foo can be removed. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [3 x i8*] } zeroinitializer +@_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +; CHECK: @_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] } +@_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !type !3, !type !4, !vcall_visibility !2 + +; CHECK-NOT: define internal i32 @_ZN1A3fooEv( +define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 42 +} + +; CHECK: define internal i32 @_ZN1B3fooEv( +define internal i32 @_ZN1B3fooEv(%struct.B* nocapture readnone %this) { +entry: + ret i32 1337 +} + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.B* %p) { +entry: + %0 = bitcast %struct.B* %p to i8** + %vtable1 = load i8*, i8** %0, align 8 + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable1, i32 0, metadata !"_ZTS1B"), !nosanitize !10 + %2 = extractvalue { i8*, i1 } %1, 0, !nosanitize !10 + %3 = bitcast i8* %2 to i32 (%struct.B*)*, !nosanitize !10 + %call = tail call i32 %3(%struct.B* %p) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) #2 + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFivE.virtual"} +!2 = !{i64 2} +!3 = !{i64 16, !"_ZTS1B"} +!4 = !{i64 16, !"_ZTSM1BFivE.virtual"} +!10 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll @@ -0,0 +1,120 @@ + +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; struct B : A { +; B(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo(int) { return 1; } +; int A::bar(float) { return 2; } +; int B::foo(int) { return 3; } +; int B::bar(float) { return 4; } +; +; extern "C" int test(B *p, int (B::*q)(int)) { return (p->*q)(42); } + +; Member function pointers are tracked by the combination of their object type +; and function type, which must both be compatible. Here, the call is through a +; pointer of type "int (B::*q)(int)", so the call could only be dispatched to +; B::foo. It can't be dispatched to A::bar or B::bar as the function pointer +; does not match, and it can't be dispatched to A::foo as the object type +; doesn't match, so those can be removed. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } zeroinitializer +@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* bitcast (i32 (%struct.A*, float)* @_ZN1A3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3 +; CHECK: @_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* null] } +@_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* bitcast (i32 (%struct.B*, float)* @_ZN1B3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !type !4, !type !5, !type !6, !vcall_visibility !3 + + +; CHECK-NOT: define internal i32 @_ZN1A3fooEi( +define internal i32 @_ZN1A3fooEi(%struct.A* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 1 +} + +; CHECK-NOT: define internal i32 @_ZN1A3barEf( +define internal i32 @_ZN1A3barEf(%struct.A* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 2 +} + +; CHECK: define internal i32 @_ZN1B3fooEi( +define internal i32 @_ZN1B3fooEi(%struct.B* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 3 +} + +; CHECK-NOT: define internal i32 @_ZN1B3barEf( +define internal i32 @_ZN1B3barEf(%struct.B* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 4 +} + + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.B* %p, i64 %q.coerce0, i64 %q.coerce1) { +entry: + %0 = bitcast %struct.B* %p to i8* + %1 = getelementptr inbounds i8, i8* %0, i64 %q.coerce1 + %this.adjusted = bitcast i8* %1 to %struct.B* + %2 = and i64 %q.coerce0, 1 + %memptr.isvirtual = icmp eq i64 %2, 0 + br i1 %memptr.isvirtual, label %memptr.nonvirtual, label %memptr.virtual + +memptr.virtual: ; preds = %entry + %3 = bitcast i8* %1 to i8** + %vtable = load i8*, i8** %3, align 8 + %4 = add i64 %q.coerce0, -1 + %5 = getelementptr i8, i8* %vtable, i64 %4, !nosanitize !12 + %6 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %5, i32 0, metadata !"_ZTSM1BFiiE.virtual"), !nosanitize !12 + %7 = extractvalue { i8*, i1 } %6, 0, !nosanitize !12 + %memptr.virtualfn = bitcast i8* %7 to i32 (%struct.B*, i32)*, !nosanitize !12 + br label %memptr.end + +memptr.nonvirtual: ; preds = %entry + %memptr.nonvirtualfn = inttoptr i64 %q.coerce0 to i32 (%struct.B*, i32)* + br label %memptr.end + +memptr.end: ; preds = %memptr.nonvirtual, %memptr.virtual + %8 = phi i32 (%struct.B*, i32)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ] + %call = tail call i32 %8(%struct.B* %this.adjusted, i32 42) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFiiE.virtual"} +!2 = !{i64 24, !"_ZTSM1AFifE.virtual"} +!3 = !{i64 2} +!4 = !{i64 16, !"_ZTS1B"} +!5 = !{i64 16, !"_ZTSM1BFiiE.virtual"} +!6 = !{i64 24, !"_ZTSM1BFifE.virtual"} +!12 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +; structs A, B and C have vcall_visibility of public, linkage-unit and +; translation-unit respectively. This test is run after LTO linking (the +; LTOPostLink metadata is present), so B and C can be VFE'd. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +%struct.A = type { i32 (...)** } + +@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +define internal void @_ZN1AC2Ev(%struct.A* %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK: define {{.*}} @_ZN1A3fooEv( +define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Av() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC2Ev(%struct.A* %0) + ret i8* %call +} + + +%struct.B = type { i32 (...)** } + +@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !3 + +define internal void @_ZN1BC2Ev(%struct.B* %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1B3fooEv( +define internal void @_ZN1B3fooEv(%struct.B* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Bv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.B* + tail call void @_ZN1BC2Ev(%struct.B* %0) + ret i8* %call +} + + +%struct.C = type { i32 (...)** } + +@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !4 + +define internal void @_ZN1CC2Ev(%struct.C* %this) { +entry: + %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1C, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1C3fooEv( +define internal void @_ZN1C3fooEv(%struct.C* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Cv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.C* + tail call void @_ZN1CC2Ev(%struct.C* %0) + ret i8* %call +} + +declare dso_local noalias nonnull i8* @_Znwm(i64) + +!llvm.module.flags = !{!5} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 0} ; public vcall visibility +!3 = !{i64 1} ; linkage-unit vcall visibility +!4 = !{i64 2} ; translation-unit vcall visibility +!5 = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll @@ -0,0 +1,94 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +; structs A, B and C have vcall_visibility of public, linkage-unit and +; translation-unit respectively. This test is run before LTO linking occurs +; (the LTOPostLink metadata is not present), so only C can be VFE'd. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +%struct.A = type { i32 (...)** } + +@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +define internal void @_ZN1AC2Ev(%struct.A* %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK: define {{.*}} @_ZN1A3fooEv( +define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Av() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC2Ev(%struct.A* %0) + ret i8* %call +} + + +%struct.B = type { i32 (...)** } + +@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !3 + +define internal void @_ZN1BC2Ev(%struct.B* %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK: define {{.*}} @_ZN1B3fooEv( +define internal void @_ZN1B3fooEv(%struct.B* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Bv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.B* + tail call void @_ZN1BC2Ev(%struct.B* %0) + ret i8* %call +} + + +%struct.C = type { i32 (...)** } + +@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !4 + +define internal void @_ZN1CC2Ev(%struct.C* %this) { +entry: + %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1C, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1C3fooEv( +define internal void @_ZN1C3fooEv(%struct.C* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Cv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.C* + tail call void @_ZN1CC2Ev(%struct.C* %0) + ret i8* %call +} + +declare dso_local noalias nonnull i8* @_Znwm(i64) + +!llvm.module.flags = !{} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 0} ; public vcall visibility +!3 = !{i64 1} ; linkage-unit vcall visibility +!4 = !{i64 2} ; translation-unit vcall visibility diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare dso_local noalias nonnull i8* @_Znwm(i64) +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +; %struct.A is a C++ struct with two virtual functions, A::foo and A::bar. The +; !vcall_visibility metadata is set on the vtable, so we know that all virtual +; calls through this vtable are visible and use the @llvm.type.checked.load +; intrinsic. Function test_A makes a call to A::foo, but there is no call to +; A::bar anywhere, so A::bar can be deleted, and its vtable slot replaced with +; null. + +%struct.A = type { i32 (...)** } + +; The pointer to A::bar in the vtable can be removed, because it will never be +; loaded. We replace it with null to keep the layout the same. Because it is at +; the end of the vtable we could potentially shrink the vtable, but don't +; currently do that. +; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* null] } +@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3 + +; A::foo is called, so must be retained. +; CHECK: define internal i32 @_ZN1A3fooEv( +define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 42 +} + +; A::bar is not used, so can be deleted. +; CHECK-NOT: define internal i32 @_ZN1A3barEv( +define internal i32 @_ZN1A3barEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 1337 +} + +define dso_local i32 @test_A() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + %1 = bitcast i8* %call to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8 + %2 = tail call { i8*, i1 } @llvm.type.checked.load(i8* bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i8*), i32 0, metadata !"_ZTS1A"), !nosanitize !9 + %3 = extractvalue { i8*, i1 } %2, 0, !nosanitize !9 + %4 = bitcast i8* %3 to i32 (%struct.A*)*, !nosanitize !9 + %call1 = tail call i32 %4(%struct.A* nonnull %0) + ret i32 %call1 +} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFivE.virtual"} +!2 = !{i64 24, !"_ZTSM1AFivE.virtual"} +!3 = !{i64 2} +!9 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll b/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +; We currently only use llvm.type.checked.load for virtual function pointers, +; not any other part of the vtable, so we can't remove the RTTI pointer even if +; it's never going to be loaded from. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +%struct.A = type { i32 (...)** } + +; CHECK: @_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* null] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 +@_ZTS1A = hidden constant [3 x i8] c"1A\00", align 1 +@_ZTI1A = hidden constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }, align 8 + +define internal void @_ZN1AC2Ev(%struct.A* %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1A3fooEv( +define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Av() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC2Ev(%struct.A* %0) + ret i8* %call +} + + +declare dso_local noalias nonnull i8* @_Znwm(i64) +@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* + +!llvm.module.flags = !{!3} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 2} ; translation-unit vcall visibility +!3 = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/Transforms/Internalize/vcall-visibility.ll b/llvm/test/Transforms/Internalize/vcall-visibility.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Internalize/vcall-visibility.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -internalize -S | FileCheck %s + +%struct.A = type { i32 (...)** } +%struct.B = type { i32 (...)** } +%struct.C = type { i32 (...)** } + +; Class A has default visibility, so has no !vcall_visibility metadata before +; or after LTO. +; CHECK-NOT: @_ZTV1A = {{.*}}!vcall_visibility +@_ZTV1A = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1 + +; Class B has hidden visibility but public LTO visibility, so has no +; !vcall_visibility metadata before or after LTO. +; CHECK-NOT: @_ZTV1B = {{.*}}!vcall_visibility +@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !2, !type !3 + +; Class C has hidden visibility, so the !vcall_visibility metadata is set to 1 +; (linkage unit) before LTO, and 2 (translation unit) after LTO. +; CHECK: @_ZTV1C ={{.*}}!vcall_visibility [[MD_TU_VIS:![0-9]+]] +@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !4, !type !5, !vcall_visibility !6 + +; Class D has translation unit visibility before LTO, and this is not changed +; by LTO. +; CHECK: @_ZTVN12_GLOBAL__N_11DE = {{.*}}!vcall_visibility [[MD_TU_VIS:![0-9]+]] +@_ZTVN12_GLOBAL__N_11DE = internal unnamed_addr constant { [3 x i8*] } zeroinitializer, align 8, !type !7, !type !9, !vcall_visibility !11 + +define dso_local void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define hidden void @_ZN1B3fooEv(%struct.B* nocapture %this) { +entry: + ret void +} + +define hidden void @_ZN1C3fooEv(%struct.C* nocapture %this) { +entry: + ret void +} + +define hidden noalias nonnull i8* @_Z6make_dv() { +entry: + %call = tail call i8* @_Znwm(i64 8) #3 + %0 = bitcast i8* %call to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_11DE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret i8* %call +} + +declare dso_local noalias nonnull i8* @_Znwm(i64) + +; CHECK: [[MD_TU_VIS]] = !{i64 2} +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 16, !"_ZTS1B"} +!3 = !{i64 16, !"_ZTSM1BFvvE.virtual"} +!4 = !{i64 16, !"_ZTS1C"} +!5 = !{i64 16, !"_ZTSM1CFvvE.virtual"} +!6 = !{i64 1} +!7 = !{i64 16, !8} +!8 = distinct !{} +!9 = !{i64 16, !10} +!10 = distinct !{} +!11 = !{i64 2}