Index: clang/lib/CodeGen/ItaniumCXXABI.cpp =================================================================== --- clang/lib/CodeGen/ItaniumCXXABI.cpp +++ clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1471,6 +1471,56 @@ return SrcIsPtr; } +static void ComputeExtraHint(llvm::CallInst *Call, llvm::Value *HintArg, + const CXXRecordDecl *Src, const CXXRecordDecl *Dst) + { + if (!Call) return; + + // The "non-private-base" attribute indicates that the source type is + // guaranteed to be a public non-virtual base on all paths to the destination. + // We will do a search of all paths from source to destination type, and set + // this attribute if we find no private or virtual bases. + StringRef NonPrivateBaseAttr("non-private-base"); + + // Avoid the search if the ABI hint can tell us something. + if (isa(HintArg)) { + int64_t Hint = cast(HintArg)->getValue().getSExtValue(); + // Hint value of "-2" indicates that either + // a) Src is not a base type of Dst or + // b) Src is never a public base. + // Either way, we cannot set the attribute in this case. + if (Hint == -2) + return; + // + // If no hint was given, then it is because a public virtual inheritance was + // encountered, and so the attribute cannot be set in this case. + if (Hint == -1) + return; + } + + // Do the search. + CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true, + /*DetectVirtual=*/false); + Dst->isDerivedFrom(Src, Paths); + + // Now walk all possible inheritance paths, exiting if a violation is found. + for (const CXXBasePath &Path : Paths) { + if (Path.Access != AS_public) + return; + + for (const CXXBasePathElement &PathElement : Path) { + if (PathElement.Base->isVirtual()) + return; + } + } + + llvm::AttributeList PAL = Call->getAttributes(); + PAL = PAL.addAttribute(Call->getContext(), llvm::AttributeList::FunctionIndex, + NonPrivateBaseAttr); + Call->setAttributes(PAL); +} + + llvm::Value *ItaniumCXXABI::EmitDynamicCastCall( CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy, QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) { @@ -1496,6 +1546,7 @@ llvm::Value *args[] = {Value, SrcRTTI, DestRTTI, OffsetHint}; Value = CGF.EmitNounwindRuntimeCall(getItaniumDynamicCastFn(CGF), args); + ComputeExtraHint(dyn_cast(Value), OffsetHint, SrcDecl, DestDecl); Value = CGF.Builder.CreateBitCast(Value, DestLTy); /// C++ [expr.dynamic.cast]p9: Index: clang/test/CodeGen/non-private-base.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/non-private-base.cpp @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s 2>&1 | FileCheck %s +// +/* +Graph (4) + B B + ' / + E F + \ / + D + + - B is private base class of D + - "non-private-base" attribute cannot be set + + B B + \ / + A F + \ / + D2 + - B is public on all paths to D2 + - "non-private-base" attribute should be set + */ +struct B { virtual ~B(){} }; +struct E : private B { + B* getB() { return this; } +}; +struct F : public B { + B* getB() { return this; } +}; +struct A : public B { + B* getB() { return this; } +}; + +struct D : public E, public F { + E* getE() { return this; } +}; + +struct D2 : public A, public F { + A* getA() { return this; } +}; + +B* left_B() { return (new D)->getE()->getB(); } +B* pub_B() { return (new D2)->getA()->getB(); } + +int main() { + B *b = left_B(); +// CHECK: call i8* @__dynamic_cast(i8* %{{[0-9]*}}, i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*), i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI1D to i8*), i64 8) #[[ATTR1:[0-9]+]] + if (dynamic_cast (b) != 0) + return 1; + + b = pub_B(); +// CHECK: call i8* @__dynamic_cast(i8* %{{[0-9]*}}, i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*), i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI2D2 to i8*), i64 -3) #[[ATTR2:[0-9]+]] + if (dynamic_cast (b) != 0) + return 2; + return 0; +} + +// CHECK: attributes #[[ATTR1]] = { +// CHECK-NOT: non-private-base +// CHECK: } + +// CHECK: attributes #[[ATTR2]] = { +// CHECK-SAME: non-private-base + + Index: llvm/include/llvm/Transforms/IPO/DynamicCastOpt.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/IPO/DynamicCastOpt.h @@ -0,0 +1,23 @@ +//===- DynamicCastOpt.h - Optimize __dynamic_cast calls ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header is for the data structure split transformation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class DynamicCastOptPass : public PassInfoMixin { +public: + DynamicCastOptPass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -80,6 +80,7 @@ #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" +#include "llvm/Transforms/IPO/DynamicCastOpt.h" #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -1436,6 +1437,9 @@ // is fixed. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); + // optimize away __dynamic_cast calls + MPM.addPass(DynamicCastOptPass()); + // Stop here at -O1. if (Level == OptimizationLevel::O1) { // The LowerTypeTestsPass needs to run to lower type metadata and the Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -48,6 +48,7 @@ MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) +MODULE_PASS("dynamic-cast-opt", DynamicCastOptPass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) Index: llvm/lib/Transforms/IPO/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/IPO/CMakeLists.txt +++ llvm/lib/Transforms/IPO/CMakeLists.txt @@ -9,6 +9,7 @@ ConstantMerge.cpp CrossDSOCFI.cpp DeadArgumentElimination.cpp + DynamicCastOpt.cpp ElimAvailExtern.cpp ExtractGV.cpp ForceFunctionAttrs.cpp Index: llvm/lib/Transforms/IPO/DynamicCastOpt.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/IPO/DynamicCastOpt.cpp @@ -0,0 +1,518 @@ +//===- DynamicCastOpt.cpp - Optimize __dynamic_cast calls -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass tries to convert function calls to __dynamic_cast, into +// semantically equivalent expressions involving address comparison and +// calculation. For example, given an expression of the form +// __dynamic_cast(expr) != NULL (where B* expr) +// If it can be proven that T is leaf type, i.e. a type that has no derived +// types, and B is a public base class of T, then an equivalent expression is: +// typeid(T) == typeid(*expr) +// +// The C++ operator `typeid` is evaluated to an lvalue of type std::type_info +// by the frontend, as follows: +// 1) typeid(T) where T is a type, the result refers to a std::type_info +// object representing the type T. +// 2) typeid(expr) where expr is a glvalue of type B. +// If B is a polymorphic class type, the result is the std::type_info +// object that represents the dynamic type of the expression. The type_info +// object is evaluated using a virtual table lookup (see the function +// CreateDynamicPtrFromStaticPtr below). +// If B is any other type, including non-class types such as pointer or int, +// the std::type_info object representing the static type B is returned. +// +// The evaluation of the `typeid(*expr)` expression is effectively a lookup +// of the type_info pointer in the objects vft, which is at offset -1 from +// the address point of the vtable (where the vft pointer in the user object +// points to). Here's what the ABI says about it: +// The typeinfo pointer points to the typeinfo object used for RTTI. It is +// always present. All entries in each of the virtual tables for a given class +// must point to the same typeinfo object. A correct implementation of typeinfo +// equality is to check pointer equality, except for pointers (directly or +// indirectly) to incomplete types. The typeinfo pointer is a valid pointer for +// polymorphic classes, i.e. those with virtual functions, and is zero for +// non-polymorphic classes. +// +// In this transformation, the dynamic casts that are encountered are those +// where expr is of a polymorphic class type, because casts on non-polymorphic +// types are expanded by the frontend. +// Hence, when we say `typeid(expr)` (or `typeid(*expr)`) we mean that a vft +// lookup will be performed. +// +// Background: +// ------------ +// Given: +// struct B { virtual void foo(); }; +// struct C { virtual void bar(); }; +// struct D : public C, B { +// virtual void foo(); +// }; +// B* static_ptr = new D(); +// +// The layout of the D object, the B subobject within D, and the relevant +// virtual function table and typeinfo object, according to the Itanium C++ +// ABI is shown in the diagram below: +// +// +// D object B-in-D vtable +// +---------+ +---------------+ +// B *static_ptr | | |i64 offsetToTop| +// ' : : +---------------+ typeinfo for D +// '-----> +---------+ |void* TIPtr -----> +------+ +// |vft_ptr ----> +---------------+ : ... : +// +---------+ | &D::foo | +------+ +// : : +---------------+ +// | | : : +// +---------+ | | +// +---------------+ +// +// static_ptr points to the B subobject inside the full D object. The first +// member of B is the vft pointer which points to a B-compatible (not the +// real B) vft. The vft pointer, however, does not point to the begining of the +// vft. Instead, it points two 8-byte elements off the vft where the first +// function pointer lays. The two elements are the: +// 1. offsetToTop: a ptrdiff_t type value representing the offset that needs +// to be added to the static_ptr to get to the top of the full D object. +// 2. typeinfo pointer: points to the typeinfo object used for RTTI. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Demangle/Demangle.h" // for debugging only +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/DynamicCastOpt.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "dynamic-cast-opt" + +STATISTIC(NumOfDynCastEliminated, "Number of __dynamic_cast calls folded."); +STATISTIC(NumOfDynCastMissed, "Number of __dynamic_cast calls not folded."); +STATISTIC(LeafTypes, "Number of leaf types as determined by this pass."); +STATISTIC(NullableCandidates, "# of dynamic casts that can be null-folded."); +STATISTIC(TypeIDCompCandidates, + "# of dynamic casts that can be folded into typeid comparisons."); +STATISTIC(TypeIDComp2Candidates, + "# of dynamic casts that can be folded into static address calculation."); + +namespace { + +enum TransformationKind { + // Replace dynamic_cast(e) with value null. + ReplaceWithNull, + // Replace dynamic_cast(e) != null with a typeid compare. + ReplaceWithTypeidCMP, + // Replace dynamic_cast(e) with a conditional address calculcation. + ReplaceWithTypeidCMP2, +}; + +enum class Options { ON, OFF, AGGRESSIVE }; + +cl::opt + EnablePass("dynamic-cast-opt", cl::desc("Use this option to control the " + "DynamicCastOpt pass."), cl::values( + clEnumValN(Options::ON, "on", "Turn ON the pass"), + clEnumValN(Options::OFF, "off", "Turn OFF the pass"), + clEnumValN(Options::AGGRESSIVE, "aggr", + "Aggressive version of the pass")), + cl::Hidden, cl::init(Options::ON)); + +// +// common strings +StringRef TIPrefix("_ZTI"); // _ZTI1B = typeinfo for B +StringRef VTablePrefix("_ZTV"); // _ZTV1B = vtable for B +StringRef TINamePrefix("_ZTS"); // _ZTS1B = typeinfo name for B +StringRef NonPrivateBaseAttr("non-private-base"); +// +// _ZTI +std::string GetVTableFromTypeInfo(StringRef TypeInfoID) { + if (!TypeInfoID.startswith(TIPrefix)) + return ""; + + // get the substring from the "_ZTI" string. + StringRef TypeId = TypeInfoID.drop_front(TIPrefix.size()); + return VTablePrefix.str() + TypeId.str(); +} + +std::string GetVTableFromTypeInfoName(StringRef TypeInfoName) { + if (!TypeInfoName.startswith(TINamePrefix)) + return ""; + + // get the substring from the "_ZTS" string. + StringRef TypeId = TypeInfoName.drop_front(TINamePrefix.size()); + return VTablePrefix.str() + TypeId.str(); +} + +bool IsVTable(StringRef ID) { + LLVM_DEBUG(dbgs() << "IsVTable(" << ID << ")\n"); + return ID.startswith(VTablePrefix); +} + +/// for debugging only. +std::string GetTypeNameFromVtable(StringRef ID) { + if (ID.startswith(VTablePrefix)) { + std::string dm = demangle(ID.str()); + return dm.substr(sizeof("vtable for")); + } + return std::string(); +} + +enum SrcOrDest { + DestinationType, + SourceType +}; + +template +bool IsTypeALeaf(const CallInst *DynCast, StringMap &IsLeaf) { + const int OpIdx = Type == DestinationType ? 2 : 1; + const GlobalValue *DestTI = + dyn_cast(DynCast->getArgOperand(OpIdx)->stripPointerCasts()); + assert(DestTI && "expecting a typeinfo global object"); + assert(DestTI->hasName() && "expecting the typeinfo object to have an ID"); + StringRef TIName = DestTI->getName(); + + // compute the name of the corresponding vtable + std::string VTableName = GetVTableFromTypeInfo(TIName); + LLVM_DEBUG(dbgs() << "testing VTableName: " << VTableName << "\n"); + + auto Iter = IsLeaf.find(VTableName); + return Iter != IsLeaf.end() && Iter->second == true; +} + +/// Dynamic casts whose return value can be statically proven to be null: +/// a) dynamic_cast(b) where B *b, and B is a leaf type. +/// b) dynamic_cast(b) where B *b, and B is a non-public base class of T. +bool CanBeNull(const CallInst *DynCast, StringMap &IsLeaf) { + // case (a) + if (IsTypeALeaf(DynCast, IsLeaf)) + return true; + + // case (b) + ConstantInt *Hint = dyn_cast(DynCast->getArgOperand(3)); + assert(Hint && "4th argument must be a compile time constant"); + const APInt &HintInt = Hint->getValue(); + // from Itanium C++ ABI documentation for __dynamic_cast, the 4th parameter: + // src2dst_offset: a static hint about the location of the source subobject + // with respect to the complete object; special negative values are: + // -1: no hint + // -2: src is not a public base of dst + // -3: src is a multiple public base type but never a virtual base type + // otherwise, the src type is a unique public nonvirtual base type of dst at + // offset src2dst_offset from the origin of dst. + // + // so if we know the destination type is a leaf (which implies that cross + // casting is not possible, and the only possible cast is base-to-derived), + // and if the hint indicates that the source type is not a public base of + // the destination type, then by definition of dynamic_cast in the C++ spec, + // the result of the cast is a null pointer. + return HintInt.getSExtValue() == -2 && IsTypeALeaf(DynCast, IsLeaf); +} + +bool AllUsesCompareAgainstNull(const CallInst *CI) { + bool hasNonConformingUses = false; + for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE; ++CIU) { + ICmpInst::Predicate Pred; + // %1 = call i8* @__dynamic_cast(...) + // %2 = icmp [eq|ne] i8* %1, null + if (!match(CIU->getUser(), m_ICmp(Pred, m_Specific(CI), m_Zero())) || + !ICmpInst::isEquality(Pred)) { + hasNonConformingUses = true; + break; + } + } + return !hasNonConformingUses; +} + + +/// Given a pointer to a (sub)object, generate a pointer to the vft using the +/// given type. Different users need to index the VFT using different types. +Value *CreateVtablePtr(Value *StaticPtr, Type *VTablePtrType, IRBuilder<> &Builder) { + assert(StaticPtr->getType()->isPointerTy() && + "first argument of __dynamic_cast must be a pointer"); + + Type *PPVTableType = VTablePtrType->getPointerTo(0); + + // Assume VTablePtrType is i8** in the ongoing example. + // + // %1 = bitcast %struct.B* %static_ptr to i8*** + Value *PPVtable = Builder.CreateBitCast(StaticPtr, PPVTableType); + + // %vtable = load i8**, i8*** %1, align 8 + return Builder.CreateLoad(VTablePtrType, PPVtable, "vtable"); +} + + +/// +/// Create the equivalent of the following typeid comparison: +/// typeid(T) == typeid(*expr) +/// which get expanded by the Frontend to +/// typeinfo *dynTI = ((void**)expr)[-1] +/// _ZTI1T.operator==(dynTI) +/// the operator== ends up doing an address compare of two globals: +/// _ZTS1T (demangled to "typeinfo name for D") +/// dynTS (the typeinfo name for the dynamic type) +/// but the ABI promises that the typeinfo object is unique for a given complete +/// type, and the source and destination type on a dynamic_cast are required to +/// be complete types in the containing CU by the C++ spec. +/// So in this transformation, we lower the typeid comparison to a comparison +/// of the addresses of the two typeinfos. +Value* CreateTypeIDCompare(CallInst *CI, CmpInst::Predicate TypeidOp, + IRBuilder<> &Builder) { + assert(TypeidOp == CmpInst::ICMP_NE || TypeidOp == CmpInst::ICMP_EQ); + + // %vtable = *(i8***)StaticPtr + Type *PPChar = Builder.getInt8PtrTy()->getPointerTo(0); + Value *StaticPtr = CI->getArgOperand(0); + Value *VTablePtr = CreateVtablePtr(StaticPtr, PPChar, Builder); + + // %4 = getelementptr inbounds i8*, i8** %vtable, i64 -1 + Type *PChar = Builder.getInt8PtrTy(); + ConstantInt *MinusOne = ConstantInt::getSigned(Builder.getInt8Ty(), -1); + Value *PPTypeInfo = Builder.CreateInBoundsGEP(PChar, VTablePtr, MinusOne); + + // %DynamicTI = load i8*, i8** %4, align 8 + Value *DynamicTypeInfoPtr = + Builder.CreateLoad(PChar, PPTypeInfo, "DynamicTI"); + + // %2 = icmp eq i8* %DynamicTI, i8* @_ZTI1D + Value *DestTypeInfoPtr = + Builder.CreateBitCast(CI->getArgOperand(2), PChar); + return Builder.CreateICmp(TypeidOp, DynamicTypeInfoPtr, DestTypeInfoPtr); +} + +template +void Replace(CallInst *CI) { + assert(false && "unimplemented transformation"); +} + +/// Replace the dynamic_cast with a null pointer. +template<> +void Replace(CallInst *CI) { + ++NullableCandidates; + + LLVMContext &Ctx = CI->getParent()->getContext(); + Value *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(Ctx)); + CI->replaceAllUsesWith(NullPtr); + CI->dropAllReferences(); + CI->eraseFromParent(); +} + +/// Replace the icmp with an comparison of the typeinfo addresses: +/// +/// %1 = call i8* @__dynamic_cast(i8* %ptr, i8* @_ZTI1B, i8* @_ZTI1D, i64 0) +/// %2 = icmp op i8* %1, null // where op \in {eq,ne}. +/// ====> +/// %3 = bitcast %struct.B* %ptr to i8*** +/// %vtable = load i8**, i8*** %3, align 8 +/// %4 = getelementptr inbounds i8*, i8** %vtable, i64 -1 +/// %DynamicTI = load i8*, i8** %4, align 8 +/// %2 = icmp ~op i8* %DynamicTI, i8* @_ZTI1D // ~op is complement of op. +/// +template<> +void Replace(CallInst *CI) { + ++TypeIDCompCandidates; + + for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE; ++CIU) { + ICmpInst *cmp = cast(CIU->getUser()); + IRBuilder<> Builder(cmp); + // dynamic_cast(e) == null ==> typeid(T) != typeid(*e) + // dynamic_cast(e) != null ==> typeid(T) == typeid(*e) + CmpInst::Predicate pred = cmp->getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + + Value *NewCompare = CreateTypeIDCompare(CI, pred, Builder); + LLVM_DEBUG(dbgs() << "new compare = "; NewCompare->dump()); + + cmp->replaceAllUsesWith(NewCompare); + cmp->dropAllReferences(); + cmp->eraseFromParent(); + } + assert(CI->use_empty() && "the dynamic_cast should have no uses by now"); + CI->dropAllReferences(); + CI->eraseFromParent(); +} + +/// Generate the equivalent of the following: +/// (char*)static_ptr + (*(long**)(static_ptr))[-2] +/// +Value *CreateDynamicPtrFromStaticPtr(Value *StaticPtr, IRBuilder<> &Builder) { + Type *Int64 = Builder.getInt64Ty(); + Type *Int64Ptr = Int64->getPointerTo(0); + + // %vtable = *(i64**)StaticPtr + Value *VTablePtr = CreateVtablePtr(StaticPtr, Int64Ptr, Builder); + + // %2 = getelementptr inbounds i64, i64* %vtable, i64 -2 + ConstantInt *MinusTwo = ConstantInt::getSigned(Builder.getInt8Ty(), -2); + Value *PtrToOffset = Builder.CreateInBoundsGEP(Int64, VTablePtr, MinusTwo); + + // %offset.to.top = load i64, i64* %2, align 8 + Value *OffsetToTop = Builder.CreateLoad(Int64, PtrToOffset, "offset.to.top"); + + // %3 = bitcast %struct.B* %static_ptr to i8* + Type *PChar = Builder.getInt8PtrTy(); + Value *StaticPtrAsCharPtr = Builder.CreateBitCast(StaticPtr, PChar); + + // %4 = getelementptr inbounds i8, i8* %3, i64 %offset.to.top + Type *Char = Builder.getInt8Ty(); + return Builder.CreateInBoundsGEP(Char, StaticPtrAsCharPtr, OffsetToTop); +} + +/// +/// Given dynamic_cast(static_ptr), replace it with: +/// (typeid(T) == typeid(*static_ptr)) +/// ? ((char*)static_ptr + (*(long**)(static_ptr))[-2]) +/// : nullptr +template<> +void Replace(CallInst *CI) { + ++TypeIDComp2Candidates; + IRBuilder<> Builder(CI); + + Value *NewCompare = CreateTypeIDCompare(CI, CmpInst::ICMP_EQ, Builder); + + Value *StaticPtr = CI->getArgOperand(0); + Value *DynamicPtr = CreateDynamicPtrFromStaticPtr(StaticPtr, Builder); + + Value *OptimizedExpr = Builder.CreateSelect(NewCompare, DynamicPtr, ConstantPointerNull::get(Builder.getInt8PtrTy())); + + CI->replaceAllUsesWith(OptimizedExpr); + CI->dropAllReferences(); + CI->eraseFromParent(); +} + + +/// A type representing a dynamic_cast call instruction that potentially be +/// optimized, along with a tag indicating the kind of transformation that +/// applies. +using Candidate = PointerIntPair; + +} // anonymous namespace + +// perform the replacement, returning true if the IR has been changed. +static bool DoOpt(Module &M) { + Function *DynCastF = M.getFunction("__dynamic_cast"); + if (!DynCastF) + return false; + + // 1. Discover all vtables of leaf types. + // + // IsLeaf maps vtable identifier to a boolean value, where true indicates + // that the C++ type the vtable represents is a leaf type. + StringMap IsLeaf; + for (GlobalVariable &GV : M.globals()) { + if (!IsVTable(GV.getName())) + continue; + SmallVector Types; + + LLVM_DEBUG(dbgs() << "processing vtable for " + << GetTypeNameFromVtable(GV.getName()) << "\n"); + + GV.getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) { + MDString *OtherVT = dyn_cast(Type->getOperand(1).get()); + if (!OtherVT) + continue; + + assert(OtherVT->getString().startswith(TINamePrefix)); + std::string VTableName = GetVTableFromTypeInfoName(OtherVT->getString()); + // + // All vtables other than myself are now considered non-leaves. + // I will declare myself a leaf unless it's known that I'm not a leaf. + if (GV.getName() != VTableName) { + IsLeaf[VTableName] = false; + LLVM_DEBUG(dbgs() << GetTypeNameFromVtable(VTableName) << " is not a leaf\n"); + } + else { + auto Iter = IsLeaf.find(VTableName); + if (Iter == IsLeaf.end()) { + IsLeaf[VTableName] = true; + LLVM_DEBUG(dbgs() << GetTypeNameFromVtable(VTableName) << " is a leaf\n"); + } + } + } + } + + // 2. Collect various candidates + std::vector Candidates; + for (auto UI = DynCastF->use_begin(), UE = DynCastF->use_end(); UI != UE; + ++UI) { + CallInst *CI = dyn_cast(UI->getUser()); + if (!CI) + continue; + + LLVM_DEBUG(dbgs() << "Considering: "; CI->dump()); + + // find __dynamic_cast that we can turn to a null directly: + if (CanBeNull(CI, IsLeaf)) { + LLVM_DEBUG(dbgs() << "found Nullable candidate\n"); + Candidates.push_back({CI, ReplaceWithNull}); + continue; + } + + // Remaining candidates must be casting to a type that is known to be a leaf. + if (!IsTypeALeaf(CI, IsLeaf)) { + LLVM_DEBUG(dbgs() << "Not viable, destination type not a leaf\n"); + ++NumOfDynCastMissed; + continue; + } + + // If source type is not guaranteed to be public on all inheritance paths + // (to the destination type), then abandon the optimization because we + // cannot tell whether the static pointer points to a private base. + // The Fronted checks if all instances of the source type are on a public + // path, and sets the "non-private-base" attribute if so. + if (EnablePass != Options::AGGRESSIVE && !CI->hasFnAttr(NonPrivateBaseAttr)) { + LLVM_DEBUG(dbgs() << "Not viable, source type might be private in some " + "paths to the destination type\n"); + ++NumOfDynCastMissed; + continue; + } + + // If all uses only compare the return value to null then the dynamic_cast can + // be replaced with a typeid comparison: + // typeid(T) == typeid(*expr) ... where __dynamic_cast(expr) + // Otherwise, the return value is needed and it can be computed with a simple + // offset calculation: + // __dynamic_cast(expr) is replaced with + // (typeid(T) == typeid(*expr)) ? ((char*)expr)+vtable[-2]: nullptr + // + if (AllUsesCompareAgainstNull(CI)) { + LLVM_DEBUG(dbgs() << "found compare-to-null candidate\n"); + Candidates.push_back({CI, ReplaceWithTypeidCMP}); + } + else { + LLVM_DEBUG(dbgs() << "found a regular candidate\n"); + Candidates.push_back({CI, ReplaceWithTypeidCMP2}); + } + } + + // 3. perform the replacement + for (Candidate C : Candidates) { + CallInst *CI = C.getPointer(); + LLVM_DEBUG(dbgs() << "Candidate: "; CI->dump()); + switch (C.getInt()) { + case ReplaceWithNull: Replace(CI); break; + case ReplaceWithTypeidCMP: Replace(CI); break; + case ReplaceWithTypeidCMP2: Replace(CI); break; + } + } + + NumOfDynCastEliminated += Candidates.size(); + return Candidates.size() != 0; +} + +PreservedAnalyses DynamicCastOptPass::run(Module &M, ModuleAnalysisManager &AM) { + bool Changed = EnablePass != Options::OFF && DoOpt(M); + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} + Index: llvm/lib/Transforms/IPO/LLVMBuild.txt =================================================================== --- llvm/lib/Transforms/IPO/LLVMBuild.txt +++ llvm/lib/Transforms/IPO/LLVMBuild.txt @@ -19,4 +19,4 @@ name = IPO parent = Transforms library_name = ipo -required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core FrontendOpenMP InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation +required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core Demangle FrontendOpenMP InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -61,6 +61,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: GlobalSplitPass ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass +; CHECK-O-NEXT: Running pass: DynamicCastOptPass ; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: GlobalOptPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> Index: llvm/test/Transforms/DynamicCast/dynamic_cast_opt.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/DynamicCast/dynamic_cast_opt.ll @@ -0,0 +1,148 @@ +; +; RUN: opt < %s -passes=dynamic-cast-opt -S 2>&1 | FileCheck %s +; +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +;; B +;; \ +;; E B +;; \ / +;; D B is public on all paths +;; + +@_ZTV1D = external unnamed_addr constant { [4 x i8*], [4 x i8*] }, !type !0, !type !1, !type !2, !type !3 +@_ZTI1D = external constant { i8*, i8*, i32, i32, i8*, i64, i8*, i64 } +@_ZTS1D = internal constant [3 x i8] c"1D\00", align 1 + +@_ZTI1B = external constant { i8*, i8* } +@_ZTS1B = internal constant [3 x i8] c"1B\00", align 1 + +@_ZTI1E = external constant { i8*, i8*, i8* } +@_ZTS1E = internal constant [3 x i8] c"1E\00", align 1 + +@_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global i8* +@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8* +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* + +define dso_local signext i32 @foo1() { + %1 = tail call dereferenceable(16) i8* @_Znwm(i64 16) #6 + %2 = getelementptr inbounds i8, i8* %1, i64 8 + +; CHECK: %3 = bitcast i8* %2 to i8*** +; CHECK-NEXT: %vtable = load i8**, i8*** %3 +; CHECK-NEXT: %4 = getelementptr inbounds i8*, i8** %vtable, i8 -1 +; CHECK-NEXT: %DynamicTI = load i8*, i8** %4 +; CHECK-NEXT: %5 = icmp eq i8* %DynamicTI, bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI1D to i8*) +; CHECK-NEXT: %6 = zext i1 %5 to i32 + %DYNCAST1 = tail call i8* @__dynamic_cast(i8* nonnull %2, i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*), i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI1D to i8*), i64 -3) #7 + %3 = icmp ne i8* %DYNCAST1, null + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +!0 = !{i64 16, !"_ZTS1B"} +!1 = !{i64 48, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1D"} +!3 = !{i64 48, !"_ZTS1E"} + +attributes #7 = { nounwind "non-private-base" } + + +;; ----------------------------------------------------------------------------- +;; Test generic dynamic_cast case +;; ----------------------------------------------------------------------------- + +;; Corresponding C-code +;; struct B { virtual ~B(){} }; +;; struct E : public B {}; +;; struct D : public B, public E { +;; int x; +;; D(int y) : x(y){} +;; }; +;; +;; B* leftb = static_cast(new D(3)); +;; D* d = dynamic_cast (leftb); +;; return d ? d->x : 0; +;; +define dso_local signext i32 @foo2() { + %1 = tail call dereferenceable(24) i8* @_Znwm(i64 24) #6 + %2 = getelementptr inbounds i8, i8* %1, i64 8 + %3 = getelementptr inbounds i8, i8* %1, i64 16 + %4 = bitcast i8* %3 to i32* + store i32 3, i32* %4, align 8 + +; CHECK: %5 = bitcast i8* %2 to i8*** +; CHECK-NEXT: %vtable = load i8**, i8*** %5 +; CHECK-NEXT: %6 = getelementptr inbounds i8*, i8** %vtable, i8 -1 +; CHECK-NEXT: %DynamicTI = load i8*, i8** %6 +; CHECK-NEXT: %7 = icmp eq i8* %DynamicTI, bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI1D to i8*) +; CHECK-NEXT: %8 = bitcast i8* %2 to i64** +; CHECK-NEXT: %vtable1 = load i64*, i64** %8 +; CHECK-NEXT: %9 = getelementptr inbounds i64, i64* %vtable1, i8 -2 +; CHECK-NEXT: %offset.to.top = load i64, i64* %9 +; CHECK-NEXT: %10 = getelementptr inbounds i8, i8* %2, i64 %offset.to.top +; CHECK-NEXT: %11 = select i1 %7, i8* %10, i8* null +; CHECK-NEXT: %12 = icmp eq i8* %11, null + + %DYNCAST2 = tail call i8* @__dynamic_cast(i8* nonnull %2, i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*), i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI1D to i8*), i64 -3) #7 +; CHECK-NOT: %DYNCAST2 = tail call i8* @__dynamic_cast + + %5 = icmp eq i8* %DYNCAST2, null + br i1 %5, label %lab.true, label %lab.false + +lab.false: + %6 = getelementptr inbounds i8, i8* %DYNCAST2, i64 16 + %7 = bitcast i8* %6 to i32* + %8 = load i32, i32* %7, align 8 + br label %lab.true + +lab.true: + %9 = phi i32 [ %8, %lab.false ], [ 0, %0 ] + ret i32 %9 +} + +;; ----------------------------------------------------------------------------- +;; Test nullable dynamic_casts +;; B3 +;; ' +;; E3 B3 +;; \.' +;; D3 B3 is private on all paths +;; +@_ZTV2D3 = external unnamed_addr constant { [4 x i8*], [4 x i8*] }, !type !8, !type !9, !type !10, !type !11 +@_ZTI2D3 = external constant { i8*, i8*, i32, i32, i8*, i64, i8*, i64 } +@_ZTS2D3 = internal constant [4 x i8] c"2D3\00", align 1 + +@_ZTI2B3 = external constant { i8*, i8* } +@_ZTS2B3 = internal constant [4 x i8] c"2B3\00", align 1 + +@_ZTI2E3 = external constant { i8*, i8*, i8* } +@_ZTS2E3 = internal constant [4 x i8] c"2E3\00", align 1 + +define dso_local signext i32 @foo3() { + %1 = tail call dereferenceable(16) i8* @_Znwm(i64 16) #6 + %2 = getelementptr inbounds i8, i8* %1, i64 8 + %NULLABLE = tail call i8* @__dynamic_cast(i8* nonnull %2, i8* bitcast ({ i8*, i8* }* @_ZTI2B3 to i8*), i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI2D3 to i8*), i64 -2) #7 +; CHECK-NOT: %NULLABLE = tail call i8* @__dynamic_cast +; CHECK: %3 = icmp ne i8* null, null + %3 = icmp ne i8* %NULLABLE, null + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +!8 = !{i64 16, !"_ZTS2B3"} +!9 = !{i64 48, !"_ZTS2B3"} +!10 = !{i64 16, !"_ZTS2D3"} +!11 = !{i64 48, !"_ZTS2E3"} + +; ------------------------------------------------------------------------------ +; Common +; ------------------------------------------------------------------------------ +declare noalias nonnull i8* @_Znwm(i64) local_unnamed_addr +declare i8* @__dynamic_cast(i8*, i8*, i8*, i64) local_unnamed_addr #2 + +attributes #2 = { nounwind readonly } +attributes #6 = { builtin nounwind } + + Index: llvm/test/Transforms/DynamicCast/dynamic_cast_opt_negative.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/DynamicCast/dynamic_cast_opt_negative.ll @@ -0,0 +1,54 @@ +;; B +;; \ +;; E B +;; './ +;; D +; RUN: opt < %s -passes=dynamic-cast-opt -S 2>&1 | FileCheck %s +;; +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%struct.D = type { %struct.B, %struct.E } +%struct.B = type { i32 (...)** } +%struct.E = type { %struct.B } + +@_ZTV1D = external unnamed_addr constant { [4 x i8*], [4 x i8*] }, !type !0, !type !1, !type !2, !type !3 +@_ZTI1D = external constant { i8*, i8*, i32, i32, i8*, i64, i8*, i64 } +@_ZTS1D = internal constant [3 x i8] c"1D\00", align 1 + +@_ZTI1B = external constant { i8*, i8* } +@_ZTS1B = internal constant [3 x i8] c"1B\00", align 1 + +@_ZTI1E = external constant { i8*, i8*, i8* } +@_ZTS1E = internal constant [3 x i8] c"1E\00", align 1 + +@_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global i8* +@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8* +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* + +; CHECK: tail call i8* @__dynamic_cast(i8* nonnull %2, i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*), i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI1D to i8*), i64 0) +define dso_local signext i32 @main() { + %1 = tail call dereferenceable(16) i8* @_Znwm(i64 16) #6 + %2 = getelementptr inbounds i8, i8* %1, i64 8 + %3 = tail call i8* @__dynamic_cast(i8* nonnull %2, i8* bitcast ({ i8*, i8* }* @_ZTI1B to i8*), i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64, i8*, i64 }* @_ZTI1D to i8*), i64 0) #7 + %4 = icmp ne i8* %3, null + %5 = zext i1 %4 to i32 + ret i32 %5 +} + +; Function Attrs: nobuiltin nofree +declare noalias nonnull i8* @_Znwm(i64) local_unnamed_addr + +; Function Attrs: nounwind readonly +declare i8* @__dynamic_cast(i8*, i8*, i8*, i64) local_unnamed_addr #2 + + +attributes #2 = { nounwind readonly } +attributes #6 = { builtin nounwind } +attributes #7 = { nounwind } + + +!0 = !{i64 16, !"_ZTS1B"} +!1 = !{i64 48, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1D"} +!3 = !{i64 48, !"_ZTS1E"}