Index: llvm/include/llvm/IR/FixedMetadataKinds.def =================================================================== --- llvm/include/llvm/IR/FixedMetadataKinds.def +++ llvm/include/llvm/IR/FixedMetadataKinds.def @@ -40,3 +40,4 @@ LLVM_FIXED_MD_KIND(MD_callback, "callback", 26) LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28) +LLVM_FIXED_MD_KIND(MD_offset_type, "offset.type", 29) Index: llvm/include/llvm/IR/GlobalObject.h =================================================================== --- llvm/include/llvm/IR/GlobalObject.h +++ llvm/include/llvm/IR/GlobalObject.h @@ -162,7 +162,7 @@ /// Copy metadata from Src, adjusting offsets by Offset. void copyMetadata(const GlobalObject *Src, unsigned Offset); - void addTypeMetadata(unsigned Offset, Metadata *TypeID); + void addTypeMetadata(unsigned Offset, Metadata *TypeID, unsigned Level); protected: void copyAttributesFrom(const GlobalObject *Src); Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -188,6 +188,7 @@ void initializeInstructionSelectPass(PassRegistry&); void initializeInterleavedAccessPass(PassRegistry&); void initializeInterleavedLoadCombinePass(PassRegistry &); +void initializeInterleaveVTablesPass(PassRegistry &); void initializeInternalizeLegacyPassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); Index: llvm/include/llvm/Transforms/IPO.h =================================================================== --- llvm/include/llvm/Transforms/IPO.h +++ llvm/include/llvm/Transforms/IPO.h @@ -243,6 +243,9 @@ ModulePass *createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary); +/// This pass interleaves vtables and lowers llvm.type.test intrinsic. +ModulePass *createInterleaveVTablesPass(); + /// This pass export CFI checks for use by external modules. ModulePass *createCrossDSOCFIPass(); Index: llvm/include/llvm/Transforms/IPO/InterleaveVTables.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/IPO/InterleaveVTables.h @@ -0,0 +1,31 @@ +//===- InterleaveVTables.h - VTables interleaving pass ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass create a interleaved layout of vtables and lower calls to +// llvm.type.test to appropriate range checks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_INTERLEAVEVTABLES_H +#define LLVM_TRANSFORMS_IPO_INTERLEAVEVTABLES_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +class InterleaveVTablesPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -1488,13 +1488,24 @@ } } -void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) { - addMetadata( - LLVMContext::MD_type, - *MDTuple::get(getContext(), - {ConstantAsMetadata::get(ConstantInt::get( - Type::getInt64Ty(getContext()), Offset)), - TypeID})); +void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID, + unsigned Level) { + // We include a level field when Level > 0, which means that vtable + // interleaving is enabled. + if (Level == 0) + addMetadata(LLVMContext::MD_type, + *MDTuple::get(getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(getContext()), Offset)), + TypeID})); + else + addMetadata(LLVMContext::MD_type, + *MDTuple::get(getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(getContext()), Offset)), + TypeID, + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(getContext()), Level))})); } void Function::setSubprogram(DISubprogram *SP) { Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -82,6 +82,7 @@ #include "llvm/Transforms/IPO/HotColdSplitting.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" +#include "llvm/Transforms/IPO/InterleaveVTables.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/PartialInlining.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -60,6 +60,7 @@ MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) MODULE_PASS("instrprof", InstrProfiling()) +MODULE_PASS("interleavevtables", InterleaveVTablesPass()) MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("ipsccp", IPSCCPPass()) Index: llvm/lib/Transforms/IPO/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/IPO/CMakeLists.txt +++ llvm/lib/Transforms/IPO/CMakeLists.txt @@ -22,6 +22,7 @@ InferFunctionAttrs.cpp InlineSimple.cpp Inliner.cpp + InterleaveVTables.cpp Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp Index: llvm/lib/Transforms/IPO/GlobalSplit.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalSplit.cpp +++ llvm/lib/Transforms/IPO/GlobalSplit.cpp @@ -103,12 +103,22 @@ uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1; if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd) continue; - SplitGV->addMetadata( - LLVMContext::MD_type, - *MDNode::get(GV.getContext(), - {ConstantAsMetadata::get( - ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), - Type->getOperand(1)})); + + // When vtable interleaving is enabled, type metadata has three fields. + if (Type->getNumOperands() == 3) + SplitGV->addMetadata( + LLVMContext::MD_type, + *MDNode::get(GV.getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Int32Ty, ByteOffset - SplitBegin)), + Type->getOperand(1), Type->getOperand(2)})); + else + SplitGV->addMetadata( + LLVMContext::MD_type, + *MDNode::get(GV.getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Int32Ty, ByteOffset - SplitBegin)), + Type->getOperand(1)})); } } @@ -139,14 +149,29 @@ static bool splitGlobals(Module &M) { // First, see if the module uses either of the llvm.type.test or - // llvm.type.checked.load intrinsics, which indicates that splitting globals - // may be beneficial. + // llvm.type.checked.load intrinsics, or has offset global variables, which + // indicates that splitting globals may be beneficial. Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + Function *TypeCheckedLoadFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + + bool HasOffsetGV = false; + SmallVector TypeNodes; + for (GlobalVariable &GV : M.globals()) { + if (GV.isDeclarationForLinker()) + continue; + GV.getMetadata(LLVMContext::MD_offset_type, TypeNodes); + if (!TypeNodes.empty()) { + HasOffsetGV = true; + break; + } + } + if ((!TypeTestFunc || TypeTestFunc->use_empty()) && - (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) + (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) && + !HasOffsetGV) return false; bool Changed = false; Index: llvm/lib/Transforms/IPO/IPO.cpp =================================================================== --- llvm/lib/Transforms/IPO/IPO.cpp +++ llvm/lib/Transforms/IPO/IPO.cpp @@ -38,6 +38,7 @@ initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); + initializeInterleaveVTablesPass(Registry); initializeInternalizeLegacyPassPass(Registry); initializeLoopExtractorPass(Registry); initializeBlockExtractorPass(Registry); Index: llvm/lib/Transforms/IPO/InterleaveVTables.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/IPO/InterleaveVTables.cpp @@ -0,0 +1,1107 @@ +//===- InterleaveVTables.cpp - VTables interleaving pass ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Create a interleaved layout for all the virtual tables in a disjoint +// inheritance hierarchy and lower calls to llvm.type.test. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/InterleaveVTables.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TrailingObjects.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "interleavevtables" + +STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers"); +STATISTIC(NumTotalChecks, + "Number of llvm.type.tests inserted by the front-end"); +STATISTIC(NumSafeChecks, + "Number of llvm.type.tests that can be proved safe statically"); +STATISTIC(NumSingletonRangeChecks, + "Number of llvm.type.test that are lowered to singleton range"); +STATISTIC(NumNormalRangeChecks, "Number of llvm.type.tests that are lowered to " + "range that contains more than one element"); +STATISTIC(NumComplicatedChecks, + "Number of llvm.type.tests that require a bitset"); +STATISTIC(NumVTableObjects, "Number of vtable objects handled by the pass"); +STATISTIC(NumTotalEntries, "Number of total vtable entries"); +STATISTIC(NumUsedEntries, "Number of used vtable entries"); +STATISTIC(NumPaddingEntries, "Number of padding vtable entries"); + +namespace { + +/// A POD-like structure that we use to store a global reference together with +/// its metadata types. In this pass we frequently need to query the set of +/// metadata types referenced by a global, which at the IR level is an expensive +/// operation involving a map lookup; this data structure helps to reduce the +/// number of times we need to do this lookup. +class GlobalTypeMember final : TrailingObjects { + friend TrailingObjects; + + GlobalVariable *GV; + size_t NTypes; + + size_t numTrailingObjects(OverloadToken) const { return NTypes; } + +public: + static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalVariable *GV, + ArrayRef Types) { + auto *GTM = static_cast(Alloc.Allocate( + totalSizeToAlloc(Types.size()), alignof(GlobalTypeMember))); + GTM->GV = GV; + GTM->NTypes = Types.size(); + std::uninitialized_copy(Types.begin(), Types.end(), + GTM->getTrailingObjects()); + return GTM; + } + + GlobalVariable *getGlobal() const { return GV; } + + ArrayRef types() const { + return makeArrayRef(getTrailingObjects(), NTypes); + } +}; + +/// This struct stores related information of each type for interleaving. +struct TypeInterleavingInfo { + // Set of immediate children types. + SetVector ChildrenTypes; + // List of vtables that are only referenced by this type. + std::vector ExclusiveVTables; + // Map from offset to the corresponding offset global variable. + MapVector OffsetGlobalMap; + // The index of the first compatible vtable in the ordered vtable list for the + // type + size_t StartIndex; + // One past the index of the last compatible vtable in the ordered vtable list + // for the type + size_t EndIndex; + // List of indices of compatible vtables in the ordered vtable list for the + // type. We need this because in some corner cases the compatible vtables for + // a type in the ordered vtable list are not consecutive. + std::vector CompatibleIndices; +}; + +/// All the vtable entries in a disjoint hierarchy that must have the same +/// distance from their address points form a entry group. +/// Each group is identified by a (type, offset) pair. +/// Also we keep a pointer to one of offset global variables that reference +/// the entries and this global variable will be replaced once the interleaving +/// layout is decided. +struct EntryGroup { + Metadata *TypeId; + int64_t ByteOffset; + GlobalVariable *Global; +}; + +class InterleaveVTablesModule { + Module &M; + + PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); + PointerType *Int64PtrTy = Type::getInt64PtrTy(M.getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(M.getContext()); + IntegerType *Int64Ty = Type::getInt64Ty(M.getContext()); + IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0); + // Size in byte of an vtable entry + uint64_t EntrySize = M.getDataLayout().getTypeAllocSize(IntPtrTy); + + // Mapping from type identifiers to the call sites that test them + struct TypeIdUserInfo { + std::vector CallSites; + }; + DenseMap TypeIdUsers; + + DenseMap InterleavingInfoMap; + + void traverseHierarchy(Metadata *BaseType, + std::vector &OrderedGlobals, + std::vector &Groups, + std::map ProcessedGroups); + void lowerTypeTestCalls(ArrayRef TypeIds, + GlobalVariable *CombinedVTableAddr); + static int getAggregateElementNum(Constant *C); + void interleaveGlobalVariables( + ArrayRef TypeIds, ArrayRef Groups, + ArrayRef OrderedGlobals, + DenseMap &AddressPointIndices); + void addType(Metadata *TypdId, SetVector &Globals, + std::map &TypeMap, + std::set &ProcessedTypes); + void + buildHierarchy(std::vector>> + &TypeGlobals); + void handleDisjointSet( + ArrayRef TypeIds, ArrayRef Globals, + DenseMap &AddressPointIndices, + DenseMap> &TypeLevels); + template + void releaseMemory(std::vector &Vec, DenseMap &Map); + +public: + InterleaveVTablesModule(Module &M); + + bool lower(); +}; + +struct InterleaveVTables : public ModulePass { + static char ID; + + InterleaveVTables() : ModulePass(ID) { + initializeInterleaveVTablesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + return InterleaveVTablesModule(M).lower(); + } +}; + +} // end anonymous namespace + +char InterleaveVTables::ID = 0; + +INITIALIZE_PASS(InterleaveVTables, "interleavevtables", "Interleave vtables", + false, false) + +ModulePass *llvm::createInterleaveVTablesPass() { + return new InterleaveVTables; +} + +static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, + Value *V, uint64_t COffset) { + if (auto GV = dyn_cast(V)) { + SmallVector Types; + GV->getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) { + if (Type->getOperand(1) != TypeId) + continue; + uint64_t Offset = + cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + if (COffset == Offset) + return true; + } + return false; + } + + if (auto GEP = dyn_cast(V)) { + APInt APOffset(DL.getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(DL, APOffset); + if (!Result) + return false; + COffset += APOffset.getZExtValue(); + return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset); + } + + if (auto Op = dyn_cast(V)) { + if (Op->getOpcode() == Instruction::BitCast) + return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset); + + if (Op->getOpcode() == Instruction::Select) + return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) && + isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset); + } + + return false; +} + +/// Replace each call to llvm.type.test with a corresponding range check. +void InterleaveVTablesModule::lowerTypeTestCalls( + ArrayRef TypeIds, GlobalVariable *InterleavedVTable) { + for (Metadata *TypeId : TypeIds) { + TypeIdUserInfo &TIUI = TypeIdUsers[TypeId]; + if (TIUI.CallSites.size() == 0) + continue; + NumTotalChecks += TIUI.CallSites.size(); + + // The index of address point of the first compatible vtable + // in the interleaved layout. + size_t FirstAddrPtIndex = 2 * InterleavingInfoMap[TypeId].StartIndex + 2; + size_t Range = InterleavingInfoMap[TypeId].EndIndex - + InterleavingInfoMap[TypeId].StartIndex; + + // For each type, if the compatible address points are consecutive in the + // interleaved layout, we only need to use a range check to replace the + // llvm.type.test calls on this type; otherwise, we will need to create a + // bitset for the type. To do that, we simply mark the address points that + // are compatible with the type with type metadata, and let LowerTypeTest + // pass, which will be executed after this pass, to lower the llvm.type.test + // calls on this type. + if (InterleavingInfoMap[TypeId].CompatibleIndices.size() != Range) { + NumComplicatedChecks += TIUI.CallSites.size(); + // there is at least one address point in the range that is not compatible + // with TypeId, so we will mark all the address points compatible with + // TypeId with type metadata. + for (size_t Index : InterleavingInfoMap[TypeId].CompatibleIndices) { + unsigned ByteOffset = (2 * Index + 2) * (unsigned)EntrySize; + InterleavedVTable->addTypeMetadata(ByteOffset, TypeId, 0); + } + continue; + } + + Constant *InterleavedVTIdxs[] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, FirstAddrPtIndex)}; + Type *VTableType = InterleavedVTable->getValueType(); + Constant *FirstAddrPt = ConstantExpr::getGetElementPtr( + VTableType, InterleavedVTable, InterleavedVTIdxs); + Constant *FirstAddrPtAsInt = + ConstantExpr::getPtrToInt(FirstAddrPt, IntPtrTy); + + for (CallInst *CI : TIUI.CallSites) { + // Value to replace calls to llvm.type.test + Value *Lowered = nullptr; + Value *Ptr = CI->getArgOperand(0); + const DataLayout &DL = M.getDataLayout(); + if (isKnownTypeIdMember(TypeId, DL, Ptr, 0)) { + NumSafeChecks++; + Lowered = ConstantInt::getTrue(M.getContext()); + } else { + IRBuilder<> B(CI); + Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); + if (Range == 1) { + NumSingletonRangeChecks++; + Lowered = B.CreateICmpEQ(PtrAsInt, FirstAddrPtAsInt); + } else { + // We need to check that the offset both falls within our range and is + // suitably aligned. We can check both properties at the same time by + // performing a right rotate by log2(2*entry_size) followed by an + // integer comparison against the range size. The rotate will move the + // lower order bits that need to be zero into the higher order bits of + // the result, causing the comparison to fail if they are nonzero. + NumNormalRangeChecks++; + Value *PtrOffset = B.CreateSub(PtrAsInt, FirstAddrPtAsInt); + size_t RotateAmount = countTrailingZeros(EntrySize * 2, ZB_Undefined); + Constant *RotateAmountConstant = + ConstantInt::get(IntPtrTy, RotateAmount); + Value *OffsetSHR = B.CreateLShr(PtrOffset, RotateAmountConstant); + Constant *RangeConstant = ConstantInt::get(IntPtrTy, Range - 1); + Lowered = B.CreateICmpULE(OffsetSHR, RangeConstant); + } + } + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } + } +} + +int InterleaveVTablesModule::getAggregateElementNum(Constant *C) { + if (const ConstantAggregate *CC = dyn_cast(C)) + return CC->getNumOperands(); + + if (const ConstantAggregateZero *CAZ = dyn_cast(C)) + return CAZ->getNumElements(); + + if (const UndefValue *UV = dyn_cast(C)) + return UV->getNumElements(); + + if (const ConstantDataSequential *CDS = dyn_cast(C)) + return CDS->getNumElements(); + + return -1; +} + +/// Interleave the vtables and lower the llvm.type.test calls. +void InterleaveVTablesModule::interleaveGlobalVariables( + ArrayRef TypeIds, ArrayRef Groups, + ArrayRef OrderedGlobals, + // Map each vtable to the index of the entry at the address point + DenseMap &AddressPointIndices) { + // Two temporary vectors of entries to be interleaved: + // UpperEntries is initialized with offset-to-top entries. + // LowerEntreis is initialized with RTTI entries. + std::vector UpperEntries; + std::vector LowerEntries; + + // The interleaved layout after merging UpperEntries and LowerEntries. + std::vector InterleavedEntries; + + // Map each vtable to the index of its address point in the interleaved + // layout. + DenseMap AddrPoints; + + // For each vtable, map the old offset of each entry to the new offset in the + // interleaved layout of the entry. This table is also used for verifying the + // interleaved layout. + DenseMap> OldToNewOffsets; + + // Initialize UpperEntries and LowerEntries. + for (auto GV : OrderedGlobals) { + NumVTableObjects++; + int GVSize = getAggregateElementNum(GV->getInitializer()); + if (GVSize < 0) + report_fatal_error("The global initializer is not an array."); + + NumTotalEntries += (unsigned)GVSize; + Constant *UpperEntry = + GV->getInitializer()->getAggregateElement(AddressPointIndices[GV] - 2); + Constant *LowerEntry = + GV->getInitializer()->getAggregateElement(AddressPointIndices[GV] - 1); + if (!UpperEntry || !LowerEntry) + report_fatal_error("An invalid vtable entry is found."); + UpperEntries.push_back(UpperEntry); + LowerEntries.push_back(LowerEntry); + + // In the interleaving layout, the index of the address point for each + // vtable is the index of the entry immediately following the RTTI entry. + // Since we store RTTI entries in LowerEntries, the twice of its size is the + // index of the entry at the address point. + AddrPoints[GV] = LowerEntries.size() * 2; + + // Offsets for offset-to-top and RTTI do not change. + OldToNewOffsets[GV][-2] = -2; + OldToNewOffsets[GV][-1] = -1; + } + + std::vector *CurList; + + // Index of the first entry of each group in the interleaved layout. + int64_t InterleavedIndex; + for (const EntryGroup &Group : Groups) { + Metadata *TypeId = Group.TypeId; + + // Choose the shorter list to add the entries of current entry group. + if (UpperEntries.size() <= LowerEntries.size()) { + CurList = &UpperEntries; + InterleavedIndex = 2 * UpperEntries.size(); + } else { + CurList = &LowerEntries; + InterleavedIndex = 2 * LowerEntries.size() + 1; + } + + // Iterate through the compatible vtables of this entry group and + // add all the entries of this group to CurList. + size_t StartIndex = InterleavingInfoMap[TypeId].StartIndex; + size_t EndIndex = InterleavingInfoMap[TypeId].EndIndex; + int64_t Offset = Group.ByteOffset / (int64_t)EntrySize; + assert(Offset != -1 && Offset != -2 && + "There should not be any offset global variables for offset-to-top " + "or RTTI"); + + for (size_t I = StartIndex; I < EndIndex; I++) { + GlobalVariable *GV = OrderedGlobals[I]; + Constant *Entry = GV->getInitializer()->getAggregateElement( + Offset + (int64_t)AddressPointIndices[GV]); + if (!Entry) + report_fatal_error("Invalid VTable entry is found."); + CurList->push_back(Entry); + } + + // All the entries in a entry group have the same offset in the + // interleaved layout so here we compute the offset by using + // the entry in the first vtable of the range. + GlobalVariable *FirstVT = OrderedGlobals[StartIndex]; + int64_t NewOffset = InterleavedIndex - AddrPoints[FirstVT]; + OldToNewOffsets[FirstVT][Offset] = NewOffset; + + // Replace references to the offset global variable with the newly computed + // offset in the interleaving layout. + GlobalVariable *OffsetGlobal = Group.Global; + int64_t NewOffsetInBytes = NewOffset * (int64_t)EntrySize; + OffsetGlobal->replaceAllUsesWith(ConstantExpr::getIntToPtr( + ConstantInt::get(IntPtrTy, NewOffsetInBytes), OffsetGlobal->getType())); + OffsetGlobal->eraseFromParent(); + } + + NumUsedEntries += UpperEntries.size() + LowerEntries.size(); + + // Pad the shorter list + if (UpperEntries.size() != LowerEntries.size()) { + if (UpperEntries.size() < LowerEntries.size()) { + NumPaddingEntries += LowerEntries.size() - UpperEntries.size(); + CurList = &UpperEntries; + } else { + NumPaddingEntries += UpperEntries.size() - LowerEntries.size(); + CurList = &LowerEntries; + } + + while (UpperEntries.size() != LowerEntries.size()) + CurList->push_back(ConstantPointerNull::get(Int8PtrTy)); + } + + // Interleave the two vectors + for (size_t I = 0; I < UpperEntries.size(); I++) { + InterleavedEntries.push_back(UpperEntries[I]); + InterleavedEntries.push_back(LowerEntries[I]); + } + + // Verify that each entry in a old VTable has the same content as the + // corresponding entry in the interleaved table does. +#ifdef ENABLE_EXPENSIVE_CHECKS + for (const auto &GV : OrderedGlobals) { + Constant *Array = GV->getInitializer(); + bool IsDataArray = false; + if (ConstantDataArray *CDA = dyn_cast(Array)) + IsDataArray = true; + int64_t Size = IsDataArray + ? dyn_cast(Array)->getNumElements() + : dyn_cast(Array)->getNumOperands(); + for (int64_t I = 0; I < Size; ++I) { + if (OldToNewOffsets[GV].find(I - (int64_t)AddressPointIndices[GV]) == + OldToNewOffsets[GV].end()) + continue; + int64_t OldOffsetFromAddrPt = I - (int64_t)AddressPointIndices[GV]; + int64_t NewOffsetFromAddrPt = OldToNewOffsets[GV][OldOffsetFromAddrPt]; + uint64_t NewOffsetFromBeginning = + (uint64_t)(NewOffsetFromAddrPt + AddrPoints[GV]); + if (Array->getAggregateElement(I) == + InterleavedEntries[NewOffsetFromBeginning]) + continue; + + LLVM_DEBUG( + dbgs() << "The original VTable " << *GV + << " and the interleaved VTable have different values for the " + << I << "th entry\n";); + report_fatal_error( + "The old VTable and the interleaved VTable are inconsistent."); + } + } +#endif + + // Make sure that each element in InterleavedEntries is of type Int8Ptr + for (auto &Elem : InterleavedEntries) + if (Elem->getType() != Int8PtrTy) { + if (Elem->getType()->isPointerTy()) + Elem = ConstantExpr::getBitCast(Elem, Int8PtrTy); + else + Elem = ConstantExpr::getIntToPtr(Elem, Int8PtrTy); + } + + Constant *NewInit = ConstantArray::get( + ArrayType::get(Int8PtrTy, InterleavedEntries.size()), InterleavedEntries); + auto *InterleavedVTable = new GlobalVariable(M, NewInit->getType(), true, + GlobalValue::PrivateLinkage, + NewInit, "interleavedvtable"); + + // Lower calls to llvm.type.test + lowerTypeTestCalls(TypeIds, InterleavedVTable); + + // Build aliases pointing to offsets into the interleaved vtable for each + // vtable from which we built the interleaved one, and replace references + // to the original vtables with references to the aliases. Note that the + // front-end calculates the address point of a vtable by adding the offset + // between the start of the vtable and the address point in the original + // layout to the start of the vtable. To ensure this calculation still works, + // we replace each reference to a vtable with the address of the new address + // point minus the the offset between the start of the vtable and the address + // point in the original layout. + + // The first address point in the interleaved layout always starts at offset + // 2, and the beginnings of two adjacent address points are off by two. + int32_t StartOffset = 2; + for (auto GV : OrderedGlobals) { + + Constant *CombinedGlobalIdxs[] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, + StartOffset - (int32_t)AddressPointIndices[GV])}; + Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( + NewInit->getType(), InterleavedVTable, CombinedGlobalIdxs); + assert(GV->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = GlobalAlias::create(Int8PtrTy, 0, GV->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->setVisibility(GV->getVisibility()); + GAlias->takeName(GV); + GV->replaceAllUsesWith(ConstantExpr::getBitCast(GAlias, GV->getType())); + GV->eraseFromParent(); + StartOffset += 2; + } +} + +/// Traverse the type hierarchy in the pre-order to +/// (1) order vtables by a pre-order traversal of the type hierarchy +/// This ensures that for any class all the compatible virtual tables +/// will appear consecutively. +/// (2) collect the range of vtables that each type is compatible with. +/// (3) collect unique entry groups. +/// +/// BaseType: the root type of the disjoint hierarchy. +/// Since this pass only runs after vtables have been split, +/// it is guaranteed that there is only one base type. +/// +/// OrderedGlobals: the result of all the vtables ordered +/// by a pre-order traversal of the disjoint hierarchy. +/// +/// Groups: the result of all the unique entry groups. +/// +/// ProcessedGroups: all the entry groups that the +/// current types's ancestor types have and one offset +/// global variable for this entry group. We need this because +/// we only want to add entry groups we haven't seen to +/// Groups. +void InterleaveVTablesModule::traverseHierarchy( + Metadata *BaseType, std::vector &OrderedGlobals, + std::vector &Groups, + std::map ProcessedGroups) { + TypeInterleavingInfo &TII = InterleavingInfoMap[BaseType]; + + // Set the start index of the vtable in the list for the type. + TII.StartIndex = OrderedGlobals.size(); + + // Add each type's exclusive vtables to the list. Since we traverse the + // hierarchy in the pre-order, all the compatible vtables of a type will + // appear consecutively. + OrderedGlobals.insert(OrderedGlobals.end(), TII.ExclusiveVTables.begin(), + TII.ExclusiveVTables.end()); + + // Now we add referenced offsets of the type to Groups + // Note that it is possible that we already add that entry group when + // we processed an ancestor type of this type, so we only add groups + // that are not in ProcessedGroups. + for (auto &P : TII.OffsetGlobalMap) { + int64_t Offset = P.first; + GlobalVariable *GV = P.second; + + // If we have added this entry group, we don't want to do it again. + // Instead, we will replace all uses of the offset global variable + // associated with the one we saw first. + if (ProcessedGroups.find(Offset) != ProcessedGroups.end()) { + GV->replaceAllUsesWith(ProcessedGroups[Offset]); + GV->eraseFromParent(); + continue; + } + ProcessedGroups[Offset] = GV; + Groups.emplace_back(EntryGroup{BaseType, Offset, GV}); + } + + // Now call this function recursively on all the children types. + for (auto Child : TII.ChildrenTypes) + traverseHierarchy(Child, OrderedGlobals, Groups, ProcessedGroups); + + // Now that all the compatible vtables of BaseType have been added to the + // ordered list, we can set the end index of the compatible vtable range. + TII.EndIndex = OrderedGlobals.size(); +} + +/// Add a type and the set of compatible vtables to the type hierarchy. It +/// is assumed that this function is called with types of increasing size of +/// compatible vtable sets. +/// We need ProcessedTypes to keep track of types for which we have found a +/// parent type. Here is an example for why we need ProcessedTypes. +/// A +/// v / \ v +/// B C +/// \ / +/// D +/// Assume that A is the primary base for B and C (so C-in-D is not compatible +/// with A) and the diagnose mode is on (so the "all-vtables" type has an +/// address point at every possible address point). Without ProcessedTypes, A +/// would be first determined as a parent node for C since A is compatible with +/// the vtable for C, an exclusive vtable of C, and A has more compatible +/// vtables than C. Then all-vtables would be also considered as a parent node +/// for C because all-vtables is compatible with the vtable for C-in-D, another +/// exclusive vtable of C, and all-vtables also has more compatible vtables than +/// C, leading to an abnormal structure in which a node may have more than one +/// parent nodes. +void InterleaveVTablesModule::addType( + Metadata *TypdId, SetVector &Globals, + std::map &TypeMap, + std::set &ProcessedTypes) { + + TypeInterleavingInfo &TII = InterleavingInfoMap[TypdId]; + for (auto GV : Globals) { + // If we haven't seen this vtable, it means that it is exclusive to this + // type. + if (TypeMap.find(GV) == TypeMap.end()) + TII.ExclusiveVTables.push_back(GV); + // If we have seen this vtable, the last seen type of this vtable is a child + // of the current type. + else if (ProcessedTypes.find(TypeMap[GV]) == ProcessedTypes.end()) { + TII.ChildrenTypes.insert(TypeMap[GV]); + ProcessedTypes.insert(TypeMap[GV]); + } + TypeMap[GV] = TypdId; + } +} + +/// Given the list of sorted types and corresponding compatible vtables in the +/// order of increasing size of compatible set, build the type hierarchy. Since +/// vtables have been split, this function does a depth-first traversal in the +/// type hierarchy and builds it from bottom up. +void InterleaveVTablesModule::buildHierarchy( + std::vector>> + &TypeGlobals) { + // Map from a vtable to the last seen type that this vtable is compatible + // with. + std::map TypeMap; + // Set of types that we have found parent types. + std::set ProcessedTypes; + + for (auto &TypeAndGlobals : TypeGlobals) { + Metadata *TypeId = TypeAndGlobals.first; + SetVector &Globals = TypeAndGlobals.second; + addType(TypeId, Globals, TypeMap, ProcessedTypes); + } +} + +void InterleaveVTablesModule::handleDisjointSet( + ArrayRef TypeIds, ArrayRef Globals, + DenseMap &AddressPointIndices, + DenseMap> &TypeLevels) { + // For each type identifier, build a set of vtables that refer to members of + // the type identifier. + MapVector> TypeMembersMap; + + for (auto TypeId : TypeIds) + TypeMembersMap.insert( + std::make_pair(TypeId, SetVector{})); + + for (GlobalTypeMember *GTM : Globals) { + for (MDNode *Type : GTM->types()) { + // Type = { offset, type identifier, level } + Metadata *TypeId = Type->getOperand(1); + // We only care about type id's that appear in TypeIds + if (TypeMembersMap.find(TypeId) != TypeMembersMap.end()) + TypeMembersMap[TypeId].insert(GTM->getGlobal()); + } + } + + // Order the sets of vtables by size. For the types having the same size of + // vtable sets, order them by the inheritance level. We want the more derived + // types appear before the less derived types. The higher the level is, the + // more derived the associated type is. + std::vector>> + TypeMembersVector(TypeMembersMap.begin(), TypeMembersMap.end()); + std::stable_sort( + TypeMembersVector.begin(), TypeMembersVector.end(), + [&](const std::pair> &O1, + const std::pair> &O2) { + if (O1.second.size() != O2.second.size()) + return O1.second.size() < O2.second.size(); + for (GlobalVariable *GV : O1.second) + if (O2.second.count(GV) != 0) + return TypeLevels[GV][O1.first] > TypeLevels[GV][O2.first]; + return false; + }); + + // Build the type hierarchy. + buildHierarchy(TypeMembersVector); + + // All the entry groups in this disjoint hierarchy. + std::vector Groups; + + // A vector of vtables ordered by a pre-order. + // traversal of the disjoint hierarchy + std::vector OrderedGlobals; + + // Since we have sorted TypeMembers in the order of increasing size, + // the last element in it is the base type of this disjoint hierarchy. + Metadata *BaseType = TypeMembersVector.back().first; + + traverseHierarchy(BaseType, OrderedGlobals, Groups, {}); + + // Sort entry groups in the order of decreasing size. + std::stable_sort(Groups.begin(), Groups.end(), + [&](const EntryGroup &Left, const EntryGroup &Right) { + size_t LeftSize = + InterleavingInfoMap[Left.TypeId].EndIndex - + InterleavingInfoMap[Left.TypeId].StartIndex; + size_t RightSize = + InterleavingInfoMap[Right.TypeId].EndIndex - + InterleavingInfoMap[Right.TypeId].StartIndex; + return LeftSize > RightSize; + }); + + // In some corner cases, the compatible vtables for a type may not be + // consecutive in the ordered vtable list. To handle those cases, + // for each type, we add indices of compatible vtables for the type to + // the type's CompatibleIndices list. + for (Metadata *TypeId : TypeIds) + for (size_t I = InterleavingInfoMap[TypeId].StartIndex; + I < InterleavingInfoMap[TypeId].EndIndex; I++) + if (TypeMembersMap[TypeId].count(OrderedGlobals[I]) == 1) + InterleavingInfoMap[TypeId].CompatibleIndices.push_back(I); + + // Create the interleaved layout. + interleaveGlobalVariables(TypeIds, Groups, OrderedGlobals, + AddressPointIndices); +} + +template +void InterleaveVTablesModule::releaseMemory(std::vector &Vec, + DenseMap &Map) { + for (T &Elem : Vec) { + auto I = Map.find(Elem); + if (I != Map.end()) + Map.erase(I); + } +} + +/// Lower all type tests in this module. +InterleaveVTablesModule::InterleaveVTablesModule(Module &M) : M(M) {} + +bool InterleaveVTablesModule::lower() { + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + + // Equivalence class set containing type identifiers and the vtables that + // reference them. This is used to partition the set of type identifiers in + // the module into disjoint sets. + using GlobalClassesTy = + EquivalenceClasses>; + GlobalClassesTy GlobalClasses; + + // Verify the type metadata and build a few data structures to let us + // efficiently enumerate the type identifiers associated with a vtable: + // a list of GlobalTypeMembers (a vtable stored alongside a vector + // of associated type metadata) and a mapping from type identifiers to their + // list of GlobalTypeMembers and last observed index in the list of vtables. + // The indices will be used later to deterministically order the list of type + // identifiers. + BumpPtrAllocator Alloc; + struct TIInfo { + unsigned UniqueId; + std::vector RefGlobals; + }; + DenseMap TypeIdInfo; + unsigned CurUniqueId = 0; + SmallVector TypeNodes; + // List of type ids associated with the found offset global variables. + SetVector OffsetGVTypeIds; + + // Map each vtable to the offset of the address point in bytes + DenseMap AddressPointIndices; + + // Map each vtable to the vtable's map between its compatible types and their + // inheritance levels. + DenseMap> TypeLevels; + + // Set of type id's that are associated functions. This pass + // will not process calls to llvm.type.test whose second parameter + // is in this set. + std::unordered_set ICallTypeIds; + + for (auto I = M.global_object_begin(); I != M.global_object_end();) { + TypeNodes.clear(); + + if (Function *F = dyn_cast(&*I)) { + // We do not check the validity of type medadata here + // because LowerTypeTests will do that anyway. + F->getMetadata(LLVMContext::MD_type, TypeNodes); + for (MDNode *Type : TypeNodes) + ICallTypeIds.insert(Type->getOperand(1)); + I++; + continue; + } + + GlobalVariable *GV = dyn_cast(&*I); + if (!GV || GV->isDeclarationForLinker()) { + I++; + continue; + } + + // Each entry group in vtables are identified by + // a (type id, offset in vtable) pair. The front-end generates + // a global variable for each used offset in vtable with + // offset.type metadata to associate each offset global variable with + // its type id. In the loop through all the global variables, + // we collect referenced offsets of each vtable. + GV->getMetadata(LLVMContext::MD_offset_type, TypeNodes); + if (!TypeNodes.empty()) { + if (TypeNodes.size() != 1) + report_fatal_error("A global variable can only be associated with one " + "type offset metadata"); + MDNode *Type = TypeNodes[0]; + if (Type->getNumOperands() != 2) + report_fatal_error( + "All operands of offset.type metadata must have 2 elements"); + // offset.type metadata has the format {type id, offset} + Metadata *OffsetGVTypeId = Type->getOperand(0); + TypeInterleavingInfo &TII = InterleavingInfoMap[OffsetGVTypeId]; + const ConstantInt *CI = + mdconst::dyn_extract(Type->getOperand(1)); + if (!CI) + report_fatal_error( + "The second operand of offset.type metadata must be an integer"); + int64_t Offset = CI->getSExtValue(); + + if (TII.OffsetGlobalMap.find(Offset) != TII.OffsetGlobalMap.end()) { + // We have seen a offset global variable for this offset, so + // we can erase this one after replacing all its uses with the old one. + GlobalVariable *Prev = TII.OffsetGlobalMap[Offset]; + I++; + GV->replaceAllUsesWith(Prev); + GV->eraseFromParent(); + continue; + } else { + TII.OffsetGlobalMap[Offset] = GV; + OffsetGVTypeIds.insert(OffsetGVTypeId); + I++; + continue; + } + } + + TypeNodes.clear(); + GV->getMetadata(LLVMContext::MD_type, TypeNodes); + if (TypeNodes.empty()) { + I++; + continue; + } + + auto *GTM = GlobalTypeMember::create(Alloc, GV, TypeNodes); + uint64_t BytesBeforeAddrPt = UINT64_MAX; + for (MDNode *Type : TypeNodes) { + // Verify type metadata + if (Type->getNumOperands() != 3) + report_fatal_error("All operands of type metadata must have 3 elements " + "when vtable interleaving is enabled"); + if (GV->isThreadLocal()) + report_fatal_error("Bit set element may not be thread-local"); + if (GV->hasSection()) + report_fatal_error( + "A member of a type identifier may not have an explicit section"); + + // FIXME: We previously checked that global var member of a type + // identifier must be a definition, but the IR linker may leave type + // metadata on declarations. We should restore this check after fixing + // PR31759. + + auto OffsetConstMD = dyn_cast(Type->getOperand(0)); + if (!OffsetConstMD) + report_fatal_error("Type offset must be a constant"); + auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error("Type offset must be an integer constant"); + + auto LevelConstMD = dyn_cast(Type->getOperand(2)); + if (!LevelConstMD) + report_fatal_error("Type level must be a constant"); + auto LevelInt = dyn_cast(LevelConstMD->getValue()); + if (!LevelInt) + report_fatal_error("Type Level must be an integer constant"); + + // Offset of type metadata + uint64_t Offset = OffsetInt->getZExtValue(); + // Level of type metadata + uint64_t Level = LevelInt->getZExtValue(); + // Type of type metadata + Metadata *TypeId = Type->getOperand(1); + + // Type metadata of a virtual table should have the same offset + if (BytesBeforeAddrPt == UINT64_MAX) + BytesBeforeAddrPt = Offset; + else if (BytesBeforeAddrPt != Offset) + report_fatal_error("Multiple type metadata offsets."); + + auto &Info = TypeIdInfo[TypeId]; + Info.UniqueId = ++CurUniqueId; + Info.RefGlobals.push_back(GTM); + + if (TypeLevels.find(GV) == TypeLevels.end()) + TypeLevels.insert({GV, {}}); + if (TypeLevels[GV].find(TypeId) != TypeLevels[GV].end()) { + if (TypeLevels[GV][TypeId] != Level) + report_fatal_error( + "Multiple levels for the same type on the same vtable."); + } else + TypeLevels[GV].insert({TypeId, Level}); + } + AddressPointIndices[GTM->getGlobal()] = BytesBeforeAddrPt / EntrySize; + I++; + } + + auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & { + // Add the call site to the list of call sites for this type identifier. We + // also use TypeIdUsers to keep track of whether we have seen this type + // identifier before. If we have, we don't need to re-add the referenced + // vtables to the equivalence class. + auto Ins = TypeIdUsers.insert({TypeId, {}}); + if (Ins.second) { + // Add the type identifier to the equivalence class. + GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId); + GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); + + // Add the referenced vtables to the type identifier's equivalence class. + for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals) + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM))); + } + + return Ins.first->second; + }; + + for (Metadata *TypeId : OffsetGVTypeIds) { + if (TypeIdInfo.find(TypeId) == TypeIdInfo.end() || + TypeIdInfo[TypeId].RefGlobals.size() == 0) { + for (auto &P : InterleavingInfoMap[TypeId].OffsetGlobalMap) { + GlobalVariable *OGV = P.second; + LLVM_DEBUG(dbgs() << "Type " << *TypeId + << " has no referenced VTables so all references to " + "its VTables will be replaced by undef values."); + OGV->replaceAllUsesWith(UndefValue::get(OGV->getType())); + OGV->eraseFromParent(); + } + InterleavingInfoMap.erase(TypeId); + } else + AddTypeIdUse(TypeId); + } + OffsetGVTypeIds.clear(); + + // If there are no offset global variables left and llvm.type.test is not + // used, there is nothing left to do. + if (TypeIdUsers.size() == 0 && (!TypeTestFunc || TypeTestFunc->use_empty())) + return false; + + if (TypeTestFunc) + for (const Use &U : TypeTestFunc->uses()) { + auto CI = cast(U.getUser()); + + auto TypeIdMDVal = dyn_cast(CI->getArgOperand(1)); + if (!TypeIdMDVal) + report_fatal_error( + "Second argument of llvm.type.test must be metadata"); + auto TypeId = TypeIdMDVal->getMetadata(); + + // If we haven't seen any vtable that is associated with the type via + // the type metadata, we replace the call to llvm.type.test with false. + if (TypeIdInfo.find(TypeId) == TypeIdInfo.end() || + TypeIdInfo[TypeId].RefGlobals.size() == 0) { + if (ICallTypeIds.find(TypeId) == ICallTypeIds.end()) { + LLVM_DEBUG(dbgs() << "Type " << *TypeId + << " has no referenced VTables so all calls to " + "llvm.type.test will be replaced by false."); + CI->replaceAllUsesWith(ConstantInt::getFalse(M.getContext())); + CI->eraseFromParent(); + } + } else + AddTypeIdUse(TypeId).CallSites.push_back(CI); + } + ICallTypeIds.clear(); + + if (GlobalClasses.empty()) + return false; + + // Build a list of disjoint sets ordered by their maximum global index for + // determinism. + std::vector> Sets; + for (GlobalClassesTy::iterator I = GlobalClasses.begin(), + E = GlobalClasses.end(); + I != E; ++I) { + if (!I->isLeader()) + continue; + ++NumTypeIdDisjointSets; + + unsigned MaxUniqueId = 0; + for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); + MI != GlobalClasses.member_end(); ++MI) { + if (auto *MD = MI->dyn_cast()) + MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId); + } + Sets.emplace_back(I, MaxUniqueId); + } + llvm::sort(Sets.begin(), Sets.end(), + [](const std::pair &S1, + const std::pair &S2) { + return S1.second < S2.second; + }); + + // For each disjoint set we found + for (const auto &S : Sets) { + // Build the list of type identifiers in this disjoint set. + std::vector TypeIds; + std::vector Globals; + for (GlobalClassesTy::member_iterator MI = + GlobalClasses.member_begin(S.first); + MI != GlobalClasses.member_end(); ++MI) { + if (MI->is()) + TypeIds.push_back(MI->get()); + else if (MI->is()) + Globals.push_back(MI->get()); + } + + // Order type identifiers by unique ID for determinism. This ordering is + // stable as there is a one-to-one mapping between metadata and unique IDs. + llvm::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) { + return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId; + }); + + // Interleave virtual tables and lower calls to llvm.type.test. + handleDisjointSet(TypeIds, Globals, AddressPointIndices, TypeLevels); + + // Since we are done with all the types and globals in this disjoint type + // hierarchy, we can release the memory used by them. + releaseMemory(TypeIds, TypeIdInfo); + releaseMemory(TypeIds, TypeIdUsers); + releaseMemory(TypeIds, + InterleavingInfoMap); + } + + return true; +} + +PreservedAnalyses InterleaveVTablesPass::run(Module &M, + ModuleAnalysisManager &AM) { + bool Changed = InterleaveVTablesModule(M).lower(); + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} Index: llvm/lib/Transforms/IPO/LowerTypeTests.cpp =================================================================== --- llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1166,8 +1166,9 @@ } void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { - if (Type->getNumOperands() != 2) - report_fatal_error("All operands of type metadata must have 2 elements"); + if (Type->getNumOperands() != 2 && Type->getNumOperands() != 3) + report_fatal_error( + "All operands of type metadata must have either 2 or 3 elements"); if (GO->isThreadLocal()) report_fatal_error("Bit set element may not be thread-local"); Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -147,6 +147,10 @@ "enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)")); +static cl::opt + VTableInterleaving("vtable-interleaving", cl::init(false), + cl::desc("Enable VTables interleaving")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -1031,9 +1035,19 @@ // in the current module. PM.add(createCrossDSOCFIPass()); - // Lower type metadata and the type.test intrinsic. This pass supports Clang's - // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at - // link time if CFI is enabled. The pass does nothing if CFI is disabled. + if (VTableInterleaving) { + // When OptLevel = 0, addLTOOptimizationPasses is not executed so that + // vtables are not split. However, the interleaving pass only works on split + // vtables so here we make sure that vtables are split. + if (OptLevel == 0) + PM.add(createGlobalSplitPass()); + PM.add(createInterleaveVTablesPass()); + } + + // Lower type metadata and the type.test intrinsic. This pass supports + // Clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs to + // run at link time if CFI is enabled. The pass does nothing if CFI is + // disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); if (OptLevel != 0) Index: llvm/test/ThinLTO/X86/lazyload_metadata.ll =================================================================== --- llvm/test/ThinLTO/X86/lazyload_metadata.ll +++ llvm/test/ThinLTO/X86/lazyload_metadata.ll @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 63 bitcode-reader - Number of Metadata records loaded +; LAZY: 65 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 72 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 74 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded Index: llvm/test/Transforms/InterleaveVTables/basic.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InterleaveVTables/basic.ll @@ -0,0 +1,83 @@ +; RUN: opt -S -interleavevtables < %s | FileCheck %s + +target datalayout = "e-p:64:64" + +; Tests that this set of globals is interleaved according to our interleaving algorithm. +; (see interleaveGlobalVariables in lib/Transforms/IPO/InterleaveVTables.cpp). + +; CHECK: [[G:@[^ ]*]] = private constant [12 x i8*] [i8* null, i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 4 to i8*), i8* inttoptr (i64 5 to i8*), i8* inttoptr (i64 8 to i8*), i8* inttoptr (i64 9 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 6 to i8*), i8* inttoptr (i64 7 to i8*), i8* inttoptr (i64 10 to i8*), i8* inttoptr (i64 11 to i8*)] + +; CHECK: @a = alias i8*, getelementptr inbounds ([12 x i8*], [12 x i8*]* @interleavedvtable, i32 0, i32 0) +; CHECK: @b = alias i8*, getelementptr inbounds ([12 x i8*], [12 x i8*]* @interleavedvtable, i32 0, i32 2) +; CHECK: @c = alias i8*, getelementptr inbounds ([12 x i8*], [12 x i8*]* @interleavedvtable, i32 0, i32 4) +@a = constant [4 x i64] [i64 0, i64 1, i64 2, i64 3], !type !1 +@b = constant [4 x i64] [i64 4, i64 5, i64 6, i64 7], !type !1, !type !2 +@c = constant [4 x i64] [i64 8, i64 9, i64 10, i64 11], !type !1, !type !3 + +; CHECK-NOT: @__typeid{{[1-3]+}}$ + +@__typeid1$0 = global [0 x i32] zeroinitializer, !offset.type !4 +@__typeid1$8 = global [0 x i32] zeroinitializer, !offset.type !5 +@__typeid2$0 = global [0 x i32] zeroinitializer, !offset.type !6 +@__typeid2$8 = global [0 x i32] zeroinitializer, !offset.type !7 +@__typeid3$0 = global [0 x i32] zeroinitializer, !offset.type !8 +@__typeid3$8 = global [0 x i32] zeroinitializer, !offset.type !9 + +!1 = !{i64 16, !"typeid1", i64 2} +!2 = !{i64 16, !"typeid2", i64 3} +!3 = !{i64 16, !"typeid3", i64 3} + +!4 = !{!"typeid1", i64 0} +!5 = !{!"typeid1", i64 8} +!6 = !{!"typeid2", i64 0} +!7 = !{!"typeid2", i64 8} +!8 = !{!"typeid3", i64 0} +!9 = !{!"typeid3", i64 8} + +declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone + +; CHECK: @foo1(i8* [[A0:%[^ ]*]]) +define i1 @foo1(i8* %p) { + ; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i64 + ; CHECK: [[R1:%[^ ]*]] = icmp eq i64 [[R0]], ptrtoint (i8** getelementptr inbounds ([12 x i8*], [12 x i8*]* [[G]], i32 0, i32 4) to i64) + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid2") + ; CHECK: ret i1 [[R1]] + ret i1 %x +} + +; CHECK: @foo2(i8* [[B0:%[^ ]*]]) +define i1 @foo2(i8* %p) { + ; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i64 + ; CHECK: [[S1:%[^ ]*]] = icmp eq i64 [[S0]], ptrtoint (i8** getelementptr inbounds ([12 x i8*], [12 x i8*]* [[G]], i32 0, i32 6) to i64) + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid3") + ; CHECK: ret i1 [[S1]] + ret i1 %x +} + +; CHECK: @foo3(i8* [[C0:%[^ ]*]]) +define i1 @foo3(i8* %p) { + ; CHECK: [[T0:%[^ ]*]] = ptrtoint i8* [[C0]] to i64 + ; CHECK: [[T1:%[^ ]*]] = sub i64 [[T0]], ptrtoint (i8** getelementptr inbounds ([12 x i8*], [12 x i8*]* [[G]], i32 0, i32 2) to i64) + ; CHECK: [[T2:%[^ ]*]] = lshr i64 [[T1]], 4 + ; CHECK: [[T3:%[^ ]*]] = icmp ule i64 [[T2]], 2 + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1") + ; CHECK: ret i1 [[T3]] + ret i1 %x +} + +; CHECK: @foo4() +define void @foo4() { + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x0 = ptrtoint [0 x i32]* @__typeid1$0 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 40 to [0 x i32]*) to i64 + %x1 = ptrtoint [0 x i32]* @__typeid1$8 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x2 = ptrtoint [0 x i32]* @__typeid2$0 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 40 to [0 x i32]*) to i64 + %x3 = ptrtoint [0 x i32]* @__typeid2$8 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x4 = ptrtoint [0 x i32]* @__typeid3$0 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 40 to [0 x i32]*) to i64 + %x5 = ptrtoint [0 x i32]* @__typeid3$8 to i64 + ret void +} Index: llvm/test/Transforms/InterleaveVTables/incomplete.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InterleaveVTables/incomplete.ll @@ -0,0 +1,83 @@ +; RUN: opt -S -interleavevtables < %s | FileCheck %s + +target datalayout = "e-p:64:64" + +; Tests that this set of globals is interleaved according to our interleaving algorithm. +; (see interleaveGlobalVariables in lib/Transforms/IPO/InterleaveVTables.cpp). +; This test is similar to basic.ll but some entries in the virtual tables are not referenced. + +; The first list initialized with offset-to-top entries: [0, 4, 8] +; The second list initialized with RTTI entries: [1, 5, 9] +; Lists of entries that have to have the same distance [2, 6, 10] [7] [11] +; The first list before merging: [0, 4, 8, 2, 6, 10] +; The second list before merging: [1, 5, 9, 7, 11, 0] +; The final layout: [0, 1, 4, 5, 8, 9, 2, 7, 6, 11, 10, 0] + +; CHECK: [[G:@[^ ]*]] = private constant [12 x i8*] [i8* null, i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 4 to i8*), i8* inttoptr (i64 5 to i8*), i8* inttoptr (i64 8 to i8*), i8* inttoptr (i64 9 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 7 to i8*), i8* inttoptr (i64 6 to i8*), i8* inttoptr (i64 11 to i8*), i8* inttoptr (i64 10 to i8*), i8* null] + +; CHECK: @a = alias i8*, getelementptr inbounds ([12 x i8*], [12 x i8*]* @interleavedvtable, i32 0, i32 0) +; CHECK: @b = alias i8*, getelementptr inbounds ([12 x i8*], [12 x i8*]* @interleavedvtable, i32 0, i32 2) +; CHECK: @c = alias i8*, getelementptr inbounds ([12 x i8*], [12 x i8*]* @interleavedvtable, i32 0, i32 4) +@a = constant [4 x i64] [i64 0, i64 1, i64 2, i64 3], !type !1 +@b = constant [4 x i64] [i64 4, i64 5, i64 6, i64 7], !type !1, !type !2 +@c = constant [4 x i64] [i64 8, i64 9, i64 10, i64 11], !type !1, !type !3 + +; CHECK-NOT: @__typeid{{[1-3]+}}$ + +@__typeid1$0 = global [0 x i32] zeroinitializer, !offset.type !4 +@__typeid2$0 = global [0 x i32] zeroinitializer, !offset.type !5 +@__typeid2$8 = global [0 x i32] zeroinitializer, !offset.type !6 +@__typeid3$8 = global [0 x i32] zeroinitializer, !offset.type !7 + +!1 = !{i64 16, !"typeid1", i64 2} +!2 = !{i64 16, !"typeid2", i64 3} +!3 = !{i64 16, !"typeid3", i64 3} + +!4 = !{!"typeid1", i64 0} +!5 = !{!"typeid2", i64 0} +!6 = !{!"typeid2", i64 8} +!7 = !{!"typeid3", i64 8} + +declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone + +; CHECK: @foo1(i8* [[A0:%[^ ]*]]) +define i1 @foo1(i8* %p) { + ; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i64 + ; CHECK: [[R1:%[^ ]*]] = icmp eq i64 [[R0]], ptrtoint (i8** getelementptr inbounds ([12 x i8*], [12 x i8*]* [[G]], i32 0, i32 4) to i64) + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid2") + ; CHECK: ret i1 [[R1]] + ret i1 %x +} + +; CHECK: @foo2(i8* [[B0:%[^ ]*]]) +define i1 @foo2(i8* %p) { + ; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i64 + ; CHECK: [[S1:%[^ ]*]] = icmp eq i64 [[S0]], ptrtoint (i8** getelementptr inbounds ([12 x i8*], [12 x i8*]* [[G]], i32 0, i32 6) to i64) + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid3") + ; CHECK: ret i1 [[S1]] + ret i1 %x +} + +; CHECK: @foo3(i8* [[C0:%[^ ]*]]) +define i1 @foo3(i8* %p) { + ; CHECK: [[T0:%[^ ]*]] = ptrtoint i8* [[C0]] to i64 + ; CHECK: [[T1:%[^ ]*]] = sub i64 [[T0]], ptrtoint (i8** getelementptr inbounds ([12 x i8*], [12 x i8*]* [[G]], i32 0, i32 2) to i64) + ; CHECK: [[T2:%[^ ]*]] = lshr i64 [[T1]], 4 + ; CHECK: [[T3:%[^ ]*]] = icmp ule i64 [[T2]], 2 + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1") + ; CHECK: ret i1 [[T3]] + ret i1 %x +} + +; CHECK: @foo4() +define void @foo4() { + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x0 = ptrtoint [0 x i32]* @__typeid1$0 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x1 = ptrtoint [0 x i32]* @__typeid2$0 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 24 to [0 x i32]*) to i64 + %x2 = ptrtoint [0 x i32]* @__typeid2$8 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 24 to [0 x i32]*) to i64 + %x3 = ptrtoint [0 x i32]* @__typeid3$8 to i64 + ret void +} Index: llvm/test/Transforms/InterleaveVTables/vbase.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InterleaveVTables/vbase.ll @@ -0,0 +1,117 @@ +; RUN: opt -S -interleavevtables < %s | FileCheck %s + +target datalayout = "e-p:64:64" + +; Tests that this set of globals is interleaved according to our interleaving algorithm. +; (see interleaveGlobalVariables in lib/Transforms/IPO/InterleaveVTables.cpp). +; This test simulates vtables with virtual bases. + +; The first list initialized with offset-to-top entries: [0, 5, 11] +; The second list initialized with RTTI entries: [1, 6, 12] +; Lists of entries that have to have the same distance [2, 7, 13] [3, 8, 14] [4, 10] [9] +; The first list before merging: [0, 5, 11, 2, 7, 13, 4, 10] +; The second list before merging: [1, 6, 12, 3, 8, 14, 9, 0] +; The final layout: [0, 1, 5, 6, 11, 12, 2, 3, 7, 8, 13, 14, 4, 9, 10, 0] + +; CHECK: [[G:@[^ ]*]] = private constant [16 x i8*] [i8* null, i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 5 to i8*), i8* inttoptr (i64 6 to i8*), i8* inttoptr (i64 11 to i8*), i8* inttoptr (i64 12 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 7 to i8*), i8* inttoptr (i64 8 to i8*), i8* inttoptr (i64 13 to i8*), i8* inttoptr (i64 14 to i8*), i8* inttoptr (i64 4 to i8*), i8* inttoptr (i64 9 to i8*), i8* inttoptr (i64 10 to i8*), i8* null] + +; CHECK: @a = alias i8*, getelementptr inbounds ([16 x i8*], [16 x i8*]* @interleavedvtable, i32 0, i32 0) +; CHECK: @b = alias i8*, getelementptr inbounds ([16 x i8*], [16 x i8*]* @interleavedvtable, i32 0, i32 1) +; CHECK: @c = alias i8*, getelementptr inbounds ([16 x i8*], [16 x i8*]* @interleavedvtable, i32 0, i32 2) +@a = constant [4 x i64] [i64 0, i64 1, i64 2, i64 3], !type !1 +@b = constant [5 x i64] [i64 4, i64 5, i64 6, i64 7, i64 8], !type !3, !type !4, !type !5 +@c = constant [6 x i64] [i64 9, i64 10, i64 11, i64 12, i64 13, i64 14], !type !7, !type !8, !type !9, !type !10 + +; CHECK-NOT: @__typeid{{[1-3]+}}$ + +@__typeid2$-24 = global [0 x i32] zeroinitializer, !offset.type !11 +@__typeid3$-24 = global [0 x i32] zeroinitializer, !offset.type !12 +@__typeid3$-32 = global [0 x i32] zeroinitializer, !offset.type !13 + +@__typeid1$0 = global [0 x i32] zeroinitializer, !offset.type !14 +@__typeid1$8 = global [0 x i32] zeroinitializer, !offset.type !15 +@__typeid2$0 = global [0 x i32] zeroinitializer, !offset.type !16 +@__typeid2$8 = global [0 x i32] zeroinitializer, !offset.type !17 +@__typeid3$0 = global [0 x i32] zeroinitializer, !offset.type !18 +@__typeid3$8 = global [0 x i32] zeroinitializer, !offset.type !19 + +!1 = !{i64 16, !"typeid1", i64 2} + +!3 = !{i64 24, !"typeid1", i64 2} +!4 = !{i64 24, !"typeid1.virtual", i64 3} +!5 = !{i64 24, !"typeid2", i64 4} + +!7 = !{i64 32, !"typeid1", i64 2} +!8 = !{i64 32, !"typeid1.virtual", i64 3} +!9 = !{i64 32, !"typeid2", i64 4} +!10 = !{i64 32, !"typeid3", i64 5} + +!11 = !{!"typeid2", i64 -24} +!12 = !{!"typeid3", i64 -24} +!13 = !{!"typeid3", i64 -32} + +!14 = !{!"typeid1", i64 0} +!15 = !{!"typeid1", i64 8} +!16 = !{!"typeid2", i64 0} +!17 = !{!"typeid2", i64 8} +!18 = !{!"typeid3", i64 0} +!19 = !{!"typeid3", i64 8} + +declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone + +; CHECK: @foo1(i8* [[A0:%[^ ]*]]) +define i1 @foo1(i8* %p) { + ; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i64 + ; CHECK: [[R1:%[^ ]*]] = icmp eq i64 [[R0]], ptrtoint (i8** getelementptr inbounds ([16 x i8*], [16 x i8*]* [[G]], i32 0, i32 6) to i64) + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid3") + ; CHECK: ret i1 [[R1]] + ret i1 %x +} + +; CHECK: @foo2(i8* [[B0:%[^ ]*]]) +define i1 @foo2(i8* %p) { + ; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i64 + ; CHECK: [[S1:%[^ ]*]] = sub i64 [[S0]], ptrtoint (i8** getelementptr inbounds ([16 x i8*], [16 x i8*]* [[G]], i32 0, i32 4) to i64) + ; CHECK: [[S2:%[^ ]*]] = lshr i64 [[S1]], 4 + ; CHECK: [[S3:%[^ ]*]] = icmp ule i64 [[S2]], 1 + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid2") + ; CHECK: ret i1 [[S3]] + ret i1 %x +} + +; CHECK: @foo3(i8* [[C0:%[^ ]*]]) +define i1 @foo3(i8* %p) { + ; CHECK: [[T0:%[^ ]*]] = ptrtoint i8* [[C0]] to i64 + ; CHECK: [[T1:%[^ ]*]] = sub i64 [[T0]], ptrtoint (i8** getelementptr inbounds ([16 x i8*], [16 x i8*]* [[G]], i32 0, i32 2) to i64) + ; CHECK: [[T2:%[^ ]*]] = lshr i64 [[T1]], 4 + ; CHECK: [[T3:%[^ ]*]] = icmp ule i64 [[T2]], 2 + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1") + ; CHECK: ret i1 [[T3]] + ret i1 %x +} + +; CHECK: @foo4() +define void @foo4() { + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 64 to [0 x i32]*) to i64 + %x0 = ptrtoint [0 x i32]* @__typeid2$-24 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 64 to [0 x i32]*) to i64 + %x1 = ptrtoint [0 x i32]* @__typeid3$-24 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 56 to [0 x i32]*) to i64 + %x2 = ptrtoint [0 x i32]* @__typeid3$-32 to i64 + + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x3 = ptrtoint [0 x i32]* @__typeid1$0 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x4 = ptrtoint [0 x i32]* @__typeid2$0 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 32 to [0 x i32]*) to i64 + %x5 = ptrtoint [0 x i32]* @__typeid3$0 to i64 + + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 40 to [0 x i32]*) to i64 + %x6 = ptrtoint [0 x i32]* @__typeid1$8 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 40 to [0 x i32]*) to i64 + %x7 = ptrtoint [0 x i32]* @__typeid2$8 to i64 + ; CHECK-NEXT: ptrtoint [0 x i32]* inttoptr (i64 40 to [0 x i32]*) to i64 + %x8 = ptrtoint [0 x i32]* @__typeid3$8 to i64 + + ret void +}