Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -181,6 +181,7 @@ void initializeInstructionCombiningPassPass(PassRegistry&); void initializeInstructionSelectPass(PassRegistry&); void initializeInterleavedAccessPass(PassRegistry&); +void initializeInterleaveVTablesPass(PassRegistry&); void initializeInternalizeLegacyPassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); Index: llvm/include/llvm/Transforms/IPO.h =================================================================== --- llvm/include/llvm/Transforms/IPO.h +++ llvm/include/llvm/Transforms/IPO.h @@ -235,6 +235,9 @@ ModulePass *createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary); +/// This pass interleaves vtables and lowers llvm.typ.test intrinsic. +ModulePass *createInterleaveVTablesPass(); + /// This pass export CFI checks for use by external modules. ModulePass *createCrossDSOCFIPass(); Index: llvm/include/llvm/Transforms/IPO/InterleaveVTables.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/IPO/InterleaveVTables.h @@ -0,0 +1,105 @@ +//===- InterleaveVTables.h - VTables interleaving pass -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines parts of the vtable interleaving pass implementation that +// may be easily unit tested. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_INTERLEAVEVTABLES_H +#define LLVM_TRANSFORMS_IPO_INTERLEAVEVTABLES_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/PassManager.h" +#include +#include +#include +#include +#include + +namespace llvm { + +class Module; +class raw_ostream; + +namespace interleavevtables { + +/// This class implements a layout algorithm for globals referenced by bit sets +/// that tries to keep members of small bit sets together. This can +/// significantly reduce bit set sizes in many cases. +/// +/// It works by assembling fragments of layout from sets of referenced globals. +/// Each set of referenced globals causes the algorithm to create a new +/// fragment, which is assembled by appending each referenced global in the set +/// into the fragment. If a referenced global has already been referenced by an +/// fragment created earlier, we instead delete that fragment and append its +/// contents into the fragment we are assembling. +/// +/// By starting with the smallest fragments, we minimize the size of the +/// fragments that are copied into larger fragments. This is most intuitively +/// thought about when considering the case where the globals are virtual tables +/// and the bit sets represent their derived classes: in a single inheritance +/// hierarchy, the optimum layout would involve a depth-first search of the +/// class hierarchy (and in fact the computed layout ends up looking a lot like +/// a DFS), but a naive DFS would not work well in the presence of multiple +/// inheritance. This aspect of the algorithm ends up fitting smaller +/// hierarchies inside larger ones where that would be beneficial. +/// +/// For example, consider this class hierarchy: +/// +/// A B +/// \ / | \ +/// C D E +/// +/// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and +/// bsE (E). If we laid out our objects by DFS traversing B followed by A, our +/// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to +/// cover the only 4 objects in its hierarchy, but not for bsA as it needs to +/// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows: +/// +/// Add bsC, fragments {{C}} +/// Add bsD, fragments {{C}, {D}} +/// Add bsE, fragments {{C}, {D}, {E}} +/// Add bsA, fragments {{A, C}, {D}, {E}} +/// Add bsB, fragments {{B, A, C, D, E}} +/// +/// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3 +/// fewer) objects, at the cost of bsB needing to cover 1 more object. +/// +/// The bit set lowering pass assigns an object index to each object that needs +/// to be laid out, and calls addFragment for each bit set passing the object +/// indices of its referenced globals. It then assembles a layout from the +/// computed layout in the Fragments field. +struct GlobalLayoutBuilder { + /// The computed layout. Each element of this vector contains a fragment of + /// layout (which may be empty) consisting of object indices. + std::vector> Fragments; + + /// Mapping from object index to fragment index. + std::vector FragmentMap; + + GlobalLayoutBuilder(uint64_t NumObjects) + : Fragments(1), FragmentMap(NumObjects) {} + + /// Add F to the layout while trying to keep its indices contiguous. + /// If a previously seen fragment uses any of F's indices, that + /// fragment will be laid out inside F. + void addFragment(const std::set &F); +}; + +} // end namespace interleavevtables + +class InterleaveVTablesPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -78,6 +78,7 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/IPO/InterleaveVTables.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -53,6 +53,7 @@ MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrprof", InstrProfiling()) +MODULE_PASS("interleavevtables", InterleaveVTablesPass()) MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("ipsccp", IPSCCPPass()) Index: llvm/lib/Transforms/IPO/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/IPO/CMakeLists.txt +++ llvm/lib/Transforms/IPO/CMakeLists.txt @@ -20,6 +20,7 @@ InferFunctionAttrs.cpp InlineSimple.cpp Inliner.cpp + InterleaveVTables.cpp Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp Index: llvm/lib/Transforms/IPO/IPO.cpp =================================================================== --- llvm/lib/Transforms/IPO/IPO.cpp +++ llvm/lib/Transforms/IPO/IPO.cpp @@ -38,6 +38,7 @@ initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); + initializeInterleaveVTablesPass(Registry); initializeInternalizeLegacyPassPass(Registry); initializeLoopExtractorPass(Registry); initializeBlockExtractorPass(Registry); Index: llvm/lib/Transforms/IPO/InterleaveVTables.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/IPO/InterleaveVTables.cpp @@ -0,0 +1,826 @@ +//===- InterleaveVTables.cpp - VTables interleaving pass -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Create a interleaved layout for all the virtual tables in a disjoint +// inheritance hierarchy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/InterleaveVTables.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TrailingObjects.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace interleavevtables; + +#define DEBUG_TYPE "interleavevtables" + +STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers"); + +void GlobalLayoutBuilder::addFragment(const std::set &F) { + // Create a new fragment to hold the layout for F. + Fragments.emplace_back(); + std::vector &Fragment = Fragments.back(); + uint64_t FragmentIndex = Fragments.size() - 1; + + for (auto ObjIndex : F) { + uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; + if (OldFragmentIndex == 0) { + // We haven't seen this object index before, so just add it to the current + // fragment. + Fragment.push_back(ObjIndex); + } else { + // This index belongs to an existing fragment. Copy the elements of the + // old fragment into this one and clear the old fragment. We don't update + // the fragment map just yet, this ensures that any further references to + // indices from the old fragment in this fragment do not insert any more + // indices. + std::vector &OldFragment = Fragments[OldFragmentIndex]; + Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end()); + OldFragment.clear(); + } + } + + // Update the fragment map to point our object indices to this fragment. + for (uint64_t ObjIndex : Fragment) + FragmentMap[ObjIndex] = FragmentIndex; +} + +namespace { + +/// A POD-like structure that we use to store a global reference together with +/// its metadata types. In this pass we frequently need to query the set of +/// metadata types referenced by a global, which at the IR level is an expensive +/// operation involving a map lookup; this data structure helps to reduce the +/// number of times we need to do this lookup. +class GlobalTypeMember final : TrailingObjects { + friend TrailingObjects; + + GlobalVariable *GV; + size_t NTypes; + + size_t numTrailingObjects(OverloadToken) const { return NTypes; } + +public: + static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalVariable *GV, + ArrayRef Types) { + auto *GTM = static_cast(Alloc.Allocate( + totalSizeToAlloc(Types.size()), alignof(GlobalTypeMember))); + GTM->GV = GV; + GTM->NTypes = Types.size(); + std::uninitialized_copy(Types.begin(), Types.end(), + GTM->getTrailingObjects()); + return GTM; + } + + GlobalVariable *getGlobal() const { + return GV; + } + + ArrayRef types() const { + return makeArrayRef(getTrailingObjects(), NTypes); + } +}; + +class InterleaveVTablesModule { + Module &M; + + PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); + PointerType *Int64PtrTy = Type::getInt64PtrTy(M.getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(M.getContext()); + IntegerType *Int64Ty = Type::getInt64Ty(M.getContext()); + + // Mapping from type identifiers to the call sites that test them + struct TypeIdUserInfo { + std::vector CallSites; + }; + DenseMap TypeIdUsers; + + enum TypeRel { + OldContainsNew = 0, + NewContainsOld = 1, + Disjoint = 2 + }; + + TypeRel getTypesRelationship(DenseMap & TypeIdIndices, + std::vector> & TypeMembers, + Metadata *OldType, Metadata *NewType); + bool getOffsetFromGlobalVaribleName(StringRef GlobalVaribleName, + StringRef TypeName, + int & Offset); + void getSameDistanceEntries( + ArrayRef TypeIds, + DenseMap & TypeIdIndices, + std::vector> & TypeMembers, + std::vector> & SameDistanceEntries); + Constant * getConstantFromArray(Constant * Array, unsigned Offset); + void interleaveGlobalVariables( + ArrayRef TypeIds, + DenseMap & TypeIdIndices, + std::vector OrderedGlobals, + std::vector> & TypeOrderedMembers, + DenseMap & AddressPointIndices); + uint64_t verifyTypeMDNode(GlobalVariable *GV, MDNode *Type); + void handleDisjointSet(ArrayRef TypeIds, + ArrayRef Globals, + DenseMap & AddressPointIndices); + +public: + InterleaveVTablesModule(Module &M); + + bool lower(); +}; + +struct InterleaveVTables : public ModulePass { + static char ID; + + InterleaveVTables() : ModulePass(ID) { + initializeInterleaveVTablesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + return InterleaveVTablesModule(M).lower(); + } +}; + +} // end anonymous namespace + +char InterleaveVTables::ID = 0; + +INITIALIZE_PASS(InterleaveVTables, "interleavevtables", "Interleave vtables", false, + false) + +ModulePass * +llvm::createInterleaveVTablesPass() { + return new InterleaveVTables; +} + +InterleaveVTablesModule::TypeRel InterleaveVTablesModule::getTypesRelationship( + DenseMap & TypeIdIndices, + std::vector> & TypeMembers, + Metadata *OldType, Metadata *NewType) { + std::vector & OldList = TypeMembers[TypeIdIndices[OldType]]; + std::vector & NewList = TypeMembers[TypeIdIndices[NewType]]; + + for (auto Elem : NewList) { + if (std::find(OldList.begin(), OldList.end(), Elem) != OldList.end()) { + if (NewList.size() > OldList.size()) + return NewContainsOld; + else + return OldContainsNew; + } + } + + return Disjoint; +} + +/// Offset global variables' name have the format: [type name]$[offset] +/// This function parse the name of each global variable. If the parsing +/// is successful, it sets Offset and return true, otherwise, it returns +/// false. +bool InterleaveVTablesModule::getOffsetFromGlobalVaribleName( + StringRef GlobalVaribleName, + StringRef TypeName, + int & Offset) { + if (!GlobalVaribleName.contains(std::string(TypeName.str() + "$"))) + return false; + + size_t Loc = GlobalVaribleName.find('$') + 1; + if (Loc >= GlobalVaribleName.size()) { + LLVM_DEBUG({ + dbgs() << GlobalVaribleName << " does not contain an offset."; + }); + return false; + } + + StringRef OffsetStr = GlobalVaribleName.substr(Loc); + return !OffsetStr.consumeInteger(10, Offset); +} + +/// Using the offset global variables created in the front-end, +/// we categorize all the entries in the original virtual tables of the +/// disjoint virtual table hierarchy into a set of lists. +/// Each list, identified by a (TypeId, Offset) pair, contains all the entries +/// in the hierarchy that must have the same distance from corresponding +/// address points. +void InterleaveVTablesModule::getSameDistanceEntries( + ArrayRef TypeIds, + DenseMap & TypeIdIndices, + std::vector> & TypeMembers, + std::vector> & SameDistanceEntries) { + + DenseMap>> OffsetTypeIds; + + std::vector ToErase; + for (GlobalVariable &GV : M.globals()) { + StringRef GN = GV.getName(); + for (Metadata *TypeId : TypeIds) { + auto TypeIdStr = dyn_cast(TypeId); + int Offset = 0; + if (!getOffsetFromGlobalVaribleName(GN, TypeIdStr->getString(), Offset)) + continue; + + if (dyn_cast(GV.getInitializer()) == nullptr) + report_fatal_error("The offset global variable is not a ConstantInt."); + + auto I = OffsetTypeIds.find(Offset); + if (I == OffsetTypeIds.end()) { + OffsetTypeIds[Offset].emplace_back(TypeId, &GV); + } else { + // Indicating whether we should add (TypeID, Offset) to EntryLists + bool AddOrNot = true; + for (auto & OldType : OffsetTypeIds[Offset]) { + // This flag indicates the relationship between OldType and TypeId + InterleaveVTablesModule::TypeRel Rel = getTypesRelationship(TypeIdIndices, TypeMembers, OldType.first, TypeId); + + if (Rel == OldContainsNew) { + GV.replaceAllUsesWith(OldType.second); + ToErase.push_back(&GV); + AddOrNot = false; + break; + } else if (Rel == NewContainsOld) { + OldType.first = nullptr; + (OldType.second)->replaceAllUsesWith(&GV); + (OldType.second)->eraseFromParent(); + } + } + + if (AddOrNot) { + OffsetTypeIds[Offset].emplace_back(TypeId, &GV); + } + } + break; + } + } + + // Erase unnecessary offset global variables + for (auto & GV : ToErase) { + GV->eraseFromParent(); + } + + // Populate OTTEntries, RTTIEntries and EntryLists with OffsetTypeIds + for (auto & P : OffsetTypeIds) { + for (auto & TypeAndGlobal : P.second) { + if (TypeAndGlobal.first != nullptr) { + if (P.first != -2 && P.first != -1) + SameDistanceEntries.emplace_back(TypeAndGlobal.first, P.first, TypeAndGlobal.second); + } + } + } + + // Sort the lists of entries in the order of decreasing size + llvm::sort(SameDistanceEntries.begin(), SameDistanceEntries.end(), + [&](const std::tuple & Left, + const std::tuple & Right) { + uint64_t LeftIndex = TypeIdIndices[std::get<0>(Left)]; + uint64_t RightIndex = TypeIdIndices[std::get<0>(Right)]; + size_t LeftSize = TypeMembers[LeftIndex].size(); + size_t RightSize = TypeMembers[RightIndex].size(); + if (LeftSize != RightSize) + return LeftSize > RightSize; + if (LeftIndex != RightIndex) + return LeftIndex < RightIndex; + return std::get<1>(Left) < std::get<1>(Right); + }); + + return; +} + +/// Given a constant array that may be of ConstantArray type or ConstantDataArray type, +/// return a constant element in this array at Offset +Constant * InterleaveVTablesModule::getConstantFromArray(Constant * Array, unsigned Offset) { + + if (!isa(Array) && !isa(Array)) + report_fatal_error("The array is neither a ConstantArray Nor a ConstantDataArray."); + + ConstantArray * CA = dyn_cast(Array); + if (CA != nullptr) { + //if (CA->getType()->getArrayElementType() != Int8PtrTy) + //return ConstantExpr::getIntToPtr(CA->getOperand(Offset), Int8PtrTy); + return CA->getOperand(Offset); + } + + ConstantDataArray * CDA = dyn_cast(Array); + //if (CDA->getType()->getArrayElementType() != Int8PtrTy) + //return ConstantExpr::getIntToPtr(CDA->getElementAsConstant(Offset), Int8PtrTy); + return CDA->getElementAsConstant(Offset); +} + +/// Given a disjoint set of type identifiers and globals, interleave the globals +/// and lower the llvm.type.test calls. +void InterleaveVTablesModule::interleaveGlobalVariables( + ArrayRef TypeIds, + DenseMap & TypeIdIndices, + std::vector OrderedGlobals, + std::vector> & TypeOrderedMembers, + // Map each vtable to the index of the entry at the address point + DenseMap & AddressPointIndices) { + // Two temporary vectors of interleaved entries: + // UpperEntries is initialized with offset-to-top entries. + // LowerEntreis is initialized with RTTI entries. + std::vector UpperEntries; + std::vector LowerEntries; + + // The interleaved layout after merging UpperEntries and LowerEntries + std::vector InterleavedEntries; + + // Map each global to the offset of its address point in the interleaved layout + DenseMap AddrPoints; + + // Map each global to its constant array + DenseMap ConstantArrays; + + // For each vtable, map the old offset from the start of the vtable of each entry to + // the new offset from the start of the vtable in the interleaved layout of the entry. + // This table is also used for verifying the interleaved layout. + DenseMap> OldToNewOffsets; + + // Contains all entries that must have the same distance from the corresponding + // address point in the interleaved layout. offset-to-top and RTTI entries are + // excluded because we do not need to change offset global variables that reference + // them. + std::vector> SameDistanceEntries; + + if (OrderedGlobals.size() == 0) + return; + + for (auto & G : OrderedGlobals) { + ConstantArrays[G] = (G->getGlobal())->getInitializer(); + } + + // Populate the three entry lists + getSameDistanceEntries(TypeIds, TypeIdIndices, TypeOrderedMembers, SameDistanceEntries); + + // Initialized UpperEntires and LowerEntries + for (auto & GTM : OrderedGlobals) { + UpperEntries.push_back(getConstantFromArray(ConstantArrays[GTM], AddressPointIndices[GTM]-2)); + LowerEntries.push_back(getConstantFromArray(ConstantArrays[GTM], AddressPointIndices[GTM]-1)); + AddrPoints[GTM] = LowerEntries.size() * 2; + + OldToNewOffsets[GTM][-2] = -2; + OldToNewOffsets[GTM][-1] = -1; + } + + std::vector * CurList; + + // Index of the entry in the interleaved layout + int InterleavedIndex; + for (const auto & P : SameDistanceEntries) { + if (UpperEntries.size() <= LowerEntries.size()) { + CurList = &UpperEntries; + InterleavedIndex = UpperEntries.size()*2; + } + else { + CurList = &LowerEntries; + InterleavedIndex = 2*LowerEntries.size()+1; + } + + const std::vector & CurGlobalIndices = TypeOrderedMembers[TypeIdIndices[std::get<0>(P)]]; + + // All the entries in the same list should have the same offset from the corresponding address point + int64_t NewOffset = InterleavedIndex - AddrPoints[OrderedGlobals[CurGlobalIndices[0]]]; + + // Update the offset global variable if needed + GlobalVariable * CurGlobal = std::get<2>(P); + CurGlobal->replaceAllUsesWith(ConstantExpr::getIntToPtr(ConstantInt::get(Int64Ty, NewOffset), Int64PtrTy)); + CurGlobal->eraseFromParent(); + + for (auto GI: CurGlobalIndices) { + GlobalTypeMember * GTM = OrderedGlobals[GI]; + OldToNewOffsets[GTM][std::get<1>(P)] = NewOffset; + CurList->push_back(getConstantFromArray(ConstantArrays[GTM], (unsigned)(std::get<1>(P) + (int64_t)AddressPointIndices[GTM]))); + } + } + + // Pad the shorter list + if (UpperEntries.size() != LowerEntries.size()) { + CurList = UpperEntries.size() < LowerEntries.size() ? &UpperEntries : &LowerEntries; + while (UpperEntries.size() != LowerEntries.size()) { + CurList->push_back(ConstantPointerNull::get(Int8PtrTy)); + } + } + + // Interleave the two vector + for (size_t I=0; IgetGlobal(); + ConstantArray * OldArray = dyn_cast(GV->getInitializer()); + if (OldArray != nullptr) { + for (int64_t I = 0; I < OldArray->getNumOperands(); ++I) { + if (OldToNewOffsets[G].count(I-(int64_t)AddressPointIndices[G]) == 0) + continue; + int64_t OldOffsetFromAddrPt = I-(int64_t)AddressPointIndices[G]; + int64_t NewOffsetFromAddrPt = OldToNewOffsets[G][OldOffsetFromAddrPt]; + uint64_t NewOffsetFromBeginning = (uint64_t)(NewOffsetFromAddrPt + AddrPoints[G]); + if (OldArray->getOperand(I) != InterleavedEntries[NewOffsetFromBeginning]) { + LLVM_DEBUG({ + dbgs() << "The original VTable " << *GV << " and the interleaved VTable have different values for the " + << I << "th entry\n"; + }); + report_fatal_error("The old VTable and the interleaved VTable are inconsistent."); + } + } + } + else { // The initialzer of the global variable is a ConstantDataArray + ConstantDataArray * OldDataArray = dyn_cast(GV->getInitializer()); + for (int64_t I = 0; I < OldDataArray->getNumElements(); ++I) { + if (OldToNewOffsets[G].count(I-(int64_t)AddressPointIndices[G]) == 0) + continue; + int64_t OldOffsetFromAddrPt = I-(int64_t)AddressPointIndices[G]; + int64_t NewOffsetFromAddrPt = OldToNewOffsets[G][OldOffsetFromAddrPt]; + uint64_t NewOffsetFromBeginning = (uint64_t)(NewOffsetFromAddrPt + AddrPoints[G]); + ConstantInt * CInterleaved = cast(InterleavedEntries[NewOffsetFromBeginning]); + ConstantInt * COriginal = cast(OldDataArray->getElementAsConstant(I)); + if (COriginal->getSExtValue() != CInterleaved->getSExtValue()) { + LLVM_DEBUG({ + dbgs() << "The original VTable " << *GV << " and the interleaved VTable have different values for the " + << I << "th entry\n"; + }); + report_fatal_error("The old VTable and the interleaved VTable are inconsistent."); + } + } + } + } +//#endif + + // Make sure that each element in InterleavedEntries is of type Int8Ptr + for (auto & Elem : InterleavedEntries) { + if (Elem->getType() != Int8PtrTy) { + Elem = ConstantExpr::getIntToPtr(Elem, Int8PtrTy); + } + } + + Constant *NewInit = ConstantArray::get(ArrayType::get(Int8PtrTy, InterleavedEntries.size()), InterleavedEntries); + // The name of the combined global is temporary. + auto *CombinedGlobal = + new GlobalVariable(M, NewInit->getType(), true, + GlobalValue::PrivateLinkage, NewInit, "interleavedvtable"); + + // Build aliases pointing to offsets into the interleaved global for each + // global from which we built the interleaved global, and replace references + // to the original globals with references to the aliases. + int32_t StartOffset = 0; + for (auto GTM : OrderedGlobals) { + GlobalVariable *GV = GTM->getGlobal(); + + // The first vtable in the interleaved layout always starts at offset 0, and + // the beginnings of two adjacent vtable are off by two. + Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, StartOffset)}; + Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( + NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); + assert(GV->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = + GlobalAlias::create(Int8PtrTy, 0, GV->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->setVisibility(GV->getVisibility()); + GAlias->takeName(GV); + GV->replaceAllUsesWith(ConstantExpr::getBitCast(GAlias, GV->getType())); + GV->eraseFromParent(); + StartOffset += 2; + } +} + +uint64_t InterleaveVTablesModule::verifyTypeMDNode(GlobalVariable *GV, MDNode *Type) { + if (Type->getNumOperands() != 2) + report_fatal_error("All operands of type metadata must have 2 elements"); + + if (GV->isThreadLocal()) + report_fatal_error("Bit set element may not be thread-local"); + if (GV->hasSection()) + report_fatal_error( + "A member of a type identifier may not have an explicit section"); + + // FIXME: We previously checked that global var member of a type identifier + // must be a definition, but the IR linker may leave type metadata on + // declarations. We should restore this check after fixing PR31759. + + auto OffsetConstMD = dyn_cast(Type->getOperand(0)); + if (!OffsetConstMD) + report_fatal_error("Type offset must be a constant"); + auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error("Type offset must be an integer constant"); + + return OffsetInt->getZExtValue(); +} + +void InterleaveVTablesModule::handleDisjointSet( + ArrayRef TypeIds, + ArrayRef Globals, + DenseMap & AddressPointIndices) { + + if (Globals.size() == 0) + return; + + DenseMap TypeIdIndices; + for (unsigned I = 0; I != TypeIds.size(); ++I) + TypeIdIndices[TypeIds[I]] = I; + + // For each type identifier, build a set of indices that refer to members of + // the type identifier. + std::vector> TypeMembers(TypeIds.size()); + unsigned GlobalIndex = 0; + DenseMap GlobalIndices; + for (GlobalTypeMember *GTM : Globals) { + for (MDNode *Type : GTM->types()) { + // Type = { offset, type identifier } + auto I = TypeIdIndices.find(Type->getOperand(1)); + if (I != TypeIdIndices.end()) + TypeMembers[I->second].insert(GlobalIndex); + } + GlobalIndices[GTM] = GlobalIndex; + GlobalIndex++; + } + + // Order the sets of indices by size. The GlobalLayoutBuilder works best + // when given small index sets first. + // We need to make a copy of TypeMembers here because we will + // reuse TypeIdIndices later. + std::vector> TypeMembersCopy(TypeMembers); + std::stable_sort( + TypeMembersCopy.begin(), TypeMembersCopy.end(), + [](const std::set &O1, const std::set &O2) { + return O1.size() < O2.size(); + }); + + // Create a GlobalLayoutBuilder and provide it with index sets as layout + // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as + // close together as possible. + GlobalLayoutBuilder GLB(Globals.size()); + for (auto &&MemSet : TypeMembersCopy) + GLB.addFragment(MemSet); + + // Build a vector of globals with the computed layout. + std::vector OrderedGTMs(Globals.size()); + auto OGTMI = OrderedGTMs.begin(); + + // Map indices of globals in the unordered list to + // the indices in the ordered list + DenseMap IndexMap; + + // The index of a global in the ordered vector + uint64_t OrderedIndex = 0; + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + *OGTMI++ = Globals[Offset]; + + IndexMap[Offset] = OrderedIndex; + OrderedIndex ++; + } + } + + // For each type identifier, build a vector of indices that + // refer to globals in the ordered list of the type identifier. + std::vector> TypeOrderedMembers; + for (const auto & TS : TypeMembers) { + std::vector Cur(TS.begin(), TS.end()); + for (auto & E : Cur) + E = IndexMap[E]; + std::sort(Cur.begin(), Cur.end()); + TypeOrderedMembers.push_back(Cur); + } + + // Create the interleaved layout + interleaveGlobalVariables(TypeIds, TypeIdIndices, OrderedGTMs, TypeOrderedMembers, AddressPointIndices); +} + +/// Lower all type tests in this module. +InterleaveVTablesModule::InterleaveVTablesModule(Module &M) : M(M) {} + +bool InterleaveVTablesModule::lower() { + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + if (!TypeTestFunc || TypeTestFunc->use_empty()) + return false; + + // Equivalence class set containing type identifiers and the globals that + // reference them. This is used to partition the set of type identifiers in + // the module into disjoint sets. + using GlobalClassesTy = EquivalenceClasses< + PointerUnion>; + GlobalClassesTy GlobalClasses; + + // Verify the type metadata and build a few data structures to let us + // efficiently enumerate the type identifiers associated with a global: + // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector + // of associated type metadata) and a mapping from type identifiers to their + // list of GlobalTypeMembers and last observed index in the list of globals. + // The indices will be used later to deterministically order the list of type + // identifiers. + BumpPtrAllocator Alloc; + struct TIInfo { + unsigned UniqueId; + std::vector RefGlobals; + }; + DenseMap TypeIdInfo; + unsigned CurUniqueId = 0; + SmallVector Types; + + DenseMap GlobalTypeMembers; + + // Use the smallest first field of the type metadata + // to calculate the index of the address point + // for each virtual table. + DenseMap AddressPointIndices; + for (GlobalVariable &GV : M.globals()) { + if (GV.isDeclarationForLinker()) + continue; + + Types.clear(); + GV.getMetadata(LLVMContext::MD_type, Types); + if (Types.empty()) + continue; + + // We can only interleave vtables when they have been split. + Constant * Init = GV.getInitializer(); + if (!Init->getType()->isArrayTy()) + report_fatal_error("The global initializer is not an array."); + ArrayType * InitTy = cast(Init->getType()); + Type * ElemType = InitTy->getElementType(); + const DataLayout &DL = (GV.getParent())->getDataLayout(); + uint64_t ElemTypeSize = DL.getTypeAllocSize(ElemType); + + auto *GTM = + GlobalTypeMember::create(Alloc, &GV, Types); + GlobalTypeMembers[&GV] = GTM; + uint64_t BytesBeforeAddrPt = UINT64_MAX; + for (MDNode *Type : Types) { + uint64_t Offset = verifyTypeMDNode(&GV, Type); + if (BytesBeforeAddrPt > Offset) + BytesBeforeAddrPt = Offset; + auto &Info = TypeIdInfo[Type->getOperand(1)]; + Info.UniqueId = ++CurUniqueId; + Info.RefGlobals.push_back(GTM); + } + AddressPointIndices[GTM] = BytesBeforeAddrPt / ElemTypeSize; + } + + auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & { + // Add the call site to the list of call sites for this type identifier. We + // also use TypeIdUsers to keep track of whether we have seen this type + // identifier before. If we have, we don't need to re-add the referenced + // globals to the equivalence class. + auto Ins = TypeIdUsers.insert({TypeId, {}}); + if (Ins.second) { + // Add the type identifier to the equivalence class. + GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId); + GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); + + // Add the referenced globals to the type identifier's equivalence class. + for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals) + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM))); + } + + return Ins.first->second; + }; + + for (const Use &U : TypeTestFunc->uses()) { + auto CI = cast(U.getUser()); + + auto TypeIdMDVal = dyn_cast(CI->getArgOperand(1)); + if (!TypeIdMDVal) + report_fatal_error("Second argument of llvm.type.test must be metadata"); + auto TypeId = TypeIdMDVal->getMetadata(); + auto TypeIdStr = dyn_cast(TypeId); + if (!TypeIdStr) + report_fatal_error( + "Second argument of llvm.type.test must be a metadata string"); + AddTypeIdUse(TypeId).CallSites.push_back(CI); + } + + if (GlobalClasses.empty()) + return false; + + // Build a list of disjoint sets ordered by their maximum global index for + // determinism. + std::vector> Sets; + for (GlobalClassesTy::iterator I = GlobalClasses.begin(), + E = GlobalClasses.end(); + I != E; ++I) { + if (!I->isLeader()) + continue; + ++NumTypeIdDisjointSets; + + unsigned MaxUniqueId = 0; + for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); + MI != GlobalClasses.member_end(); ++MI) { + if (auto *MD = MI->dyn_cast()) + MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId); + } + Sets.emplace_back(I, MaxUniqueId); + } + llvm::sort(Sets.begin(), Sets.end(), + [](const std::pair &S1, + const std::pair &S2) { + return S1.second < S2.second; + }); + + // For each disjoint set we found... + for (const auto &S : Sets) { + // Build the list of type identifiers in this disjoint set. + std::vector TypeIds; + std::vector Globals; + for (GlobalClassesTy::member_iterator MI = + GlobalClasses.member_begin(S.first); + MI != GlobalClasses.member_end(); ++MI) { + if (MI->is()) + TypeIds.push_back(MI->get()); + else if (MI->is()) + Globals.push_back(MI->get()); + } + + // Order type identifiers by unique ID for determinism. This ordering is + // stable as there is a one-to-one mapping between metadata and unique IDs. + llvm::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) { + return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId; + }); + + // Interleave virtual tables and emit checks for this disjoint set. + handleDisjointSet(TypeIds, Globals, AddressPointIndices); + } + + return true; +} + +PreservedAnalyses InterleaveVTablesPass::run(Module &M, + ModuleAnalysisManager &AM) { + bool Changed = InterleaveVTablesModule(M).lower(); + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -152,6 +152,8 @@ "enable-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN sinking pass (default = off)")); +static cl::opt InterleaveOrNot("interleave", cl::desc("Enable VTables interleaving")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -953,11 +955,16 @@ // in the current module. PM.add(createCrossDSOCFIPass()); + if (InterleaveOrNot) { + PM.add(createInterleaveVTablesPass()); + } + // Lower type metadata and the type.test intrinsic. This pass supports Clang's // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + if (OptLevel != 0) addLateLTOOptimizationPasses(PM); Index: llvm/test/Transforms/InterleaveVTables/basic.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InterleaveVTables/basic.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -interleavevtables < %s | FileCheck %s + +target datalayout = "e-p:64:64" + +; Tests that this set of globals is interleaved according to our interleaving algorithm. +; (see interleaveGlobalVariables in lib/Transforms/IPO/InterleaveVTables.cpp). + + + +; CHECK: constant [12 x i8*] [i8* null, i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 4 to i8*), i8* inttoptr (i64 5 to i8*), i8* inttoptr (i64 8 to i8*), i8* inttoptr (i64 9 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 6 to i8*), i8* inttoptr (i64 7 to i8*), i8* inttoptr (i64 10 to i8*), i8* inttoptr (i64 11 to i8*)] +@a = constant [4 x i64] [i64 0, i64 1, i64 2, i64 3], !type !0 +@b = constant [4 x i64] [i64 4, i64 5, i64 6, i64 7], !type !0, !type !1 +@c = constant [4 x i64] [i64 8, i64 9, i64 10, i64 11], !type !0, !type !2 + +; CHECK-NOT: @typeid{{[1-3]+}}${{0|1}} = + +@typeid1$0 = constant i64 0 +@typeid1$1 = constant i64 1 +@typeid2$0 = constant i64 0 +@typeid2$1 = constant i64 1 +@typeid3$0 = constant i64 0 +@typeid3$1 = constant i64 1 + +!0 = !{i64 16, !"typeid1"} +!1 = !{i64 16, !"typeid2"} +!2 = !{i64 16, !"typeid3"} + +declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone + +define void @foo() { + %x = call i1 @llvm.type.test(i8* undef, metadata !"typeid1") + %y = call i1 @llvm.type.test(i8* undef, metadata !"typeid2") + %z = call i1 @llvm.type.test(i8* undef, metadata !"typeid3") + ret void +} Index: llvm/test/Transforms/InterleaveVTables/imcomplete.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InterleaveVTables/imcomplete.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -interleavevtables < %s | FileCheck %s + +target datalayout = "e-p:64:64" + +; Tests that this set of globals is interleaved according to our interleaving algorithm. +; (see interleaveGlobalVariables in lib/Transforms/IPO/InterleaveVTables.cpp). +; This test is similar to basic.ll but some entries in the virtual tables are not referenced. + +; The first list initilized with offset-to-top entries: [0, 4, 8] +; The second list initilized with RTTI entries: [1, 5, 9] +; Lists of entries that have to have the same distance [2, 6, 10] [7] [11] +; The first list before merging: [0, 4, 8, 2, 6, 10] +; The second list before merging: [1, 5, 9, 7, 11, 0] +; The final layout: [0, 1, 4, 5, 8, 9, 2, 7, 6, 11, 10, 0] + +; CHECK: constant [12 x i8*] [i8* null, i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 4 to i8*), i8* inttoptr (i64 5 to i8*), i8* inttoptr (i64 8 to i8*), i8* inttoptr (i64 9 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 7 to i8*), i8* inttoptr (i64 6 to i8*), i8* inttoptr (i64 11 to i8*), i8* inttoptr (i64 10 to i8*), i8* null] +@a = constant [4 x i64] [i64 0, i64 1, i64 2, i64 3], !type !0 +@b = constant [4 x i64] [i64 4, i64 5, i64 6, i64 7], !type !0, !type !1 +@c = constant [4 x i64] [i64 8, i64 9, i64 10, i64 11], !type !0, !type !2 + +; CHECK-NOT: @typeid{{[1-3]+}}${{0|1}} = + +@typeid1$0 = constant i64 0 +@typeid2$0 = constant i64 0 +@typeid2$1 = constant i64 1 +@typeid3$1 = constant i64 1 + +!0 = !{i64 16, !"typeid1"} +!1 = !{i64 16, !"typeid2"} +!2 = !{i64 16, !"typeid3"} + +declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone + +define void @foo() { + %x = call i1 @llvm.type.test(i8* undef, metadata !"typeid1") + %y = call i1 @llvm.type.test(i8* undef, metadata !"typeid2") + %z = call i1 @llvm.type.test(i8* undef, metadata !"typeid3") + ret void +} Index: llvm/test/Transforms/InterleaveVTables/vbase.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InterleaveVTables/vbase.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -interleavevtables < %s | FileCheck %s + +target datalayout = "e-p:64:64" + +; Tests that this set of globals is interleaved according to our interleaving algorithm. +; (see interleaveGlobalVariables in lib/Transforms/IPO/InterleaveVTables.cpp). +; This test simulates vtables with virtual bases. + +; The first list initilized with offset-to-top entries: [0, 5, 11] +; The second list initilized with RTTI entries: [1, 6, 12] +; Lists of entries that have to have the same distance [2, 7, 13] [3, 8, 14] [4, 10] [9] +; The first list before merging: [0, 5, 11, 2, 7, 13, 4, 10] +; The second list before merging: [1, 6, 12, 3, 8, 14, 9, 0] +; The final layout: [0, 1, 5, 6, 11, 12, 2, 3, 7, 8, 13, 14, 4, 9, 10, 0] + +; CHECK: constant [16 x i8*] [i8* null, i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 5 to i8*), i8* inttoptr (i64 6 to i8*), i8* inttoptr (i64 11 to i8*), i8* inttoptr (i64 12 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 7 to i8*), i8* inttoptr (i64 8 to i8*), i8* inttoptr (i64 13 to i8*), i8* inttoptr (i64 14 to i8*), i8* inttoptr (i64 4 to i8*), i8* inttoptr (i64 9 to i8*), i8* inttoptr (i64 10 to i8*), i8* null] + +@a = constant [4 x i64] [i64 0, i64 1, i64 2, i64 3], !type !0 +@b = constant [5 x i64] [i64 4, i64 5, i64 6, i64 7, i64 8], !type !1, !type !2 +@c = constant [6 x i64] [i64 9, i64 10, i64 11, i64 12, i64 13, i64 14], !type !3, !type !4, !type !5 + +; CHECK-NOT: @typeid{{[1-3]+}}$ + +@typeid2$-3 = constant i64 -3 +@typeid3$-3 = constant i64 -3 +@typeid3$-4 = constant i64 -4 + +@typeid1$0 = constant i64 0 +@typeid1$1 = constant i64 1 +@typeid2$0 = constant i64 0 +@typeid2$1 = constant i64 1 +@typeid3$0 = constant i64 0 +@typeid3$1 = constant i64 1 + +!0 = !{i64 16, !"typeid1"} +!1 = !{i64 24, !"typeid1"} +!2 = !{i64 24, !"typeid2"} +!3 = !{i64 32, !"typeid1"} +!4 = !{i64 32, !"typeid2"} +!5 = !{i64 32, !"typeid3"} + +declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone + +define void @foo() { + %x = call i1 @llvm.type.test(i8* undef, metadata !"typeid1") + %y = call i1 @llvm.type.test(i8* undef, metadata !"typeid2") + %z = call i1 @llvm.type.test(i8* undef, metadata !"typeid3") + ret void +}