Index: llvm/trunk/lib/Transforms/IPO/LowerTypeTests.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/LowerTypeTests.cpp +++ llvm/trunk/lib/Transforms/IPO/LowerTypeTests.cpp @@ -206,6 +206,34 @@ Constant *Mask; }; +/// A POD-like structure that we use to store a global reference together with +/// its metadata types. In this pass we frequently need to query the set of +/// metadata types referenced by a global, which at the IR level is an expensive +/// operation involving a map lookup; this data structure helps to reduce the +/// number of times we need to do this lookup. +class GlobalTypeMember { + GlobalObject *GO; + size_t NTypes; + MDNode *Types[1]; // We treat this as a flexible array member. + +public: + static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, + ArrayRef Types) { + auto GTM = Alloc.Allocate( + sizeof(GlobalTypeMember) + (Types.size() - 1) * sizeof(MDNode *)); + GTM->GO = GO; + GTM->NTypes = Types.size(); + std::copy(Types.begin(), Types.end(), GTM->Types); + return GTM; + } + GlobalObject *getGlobal() const { + return GO; + } + ArrayRef types() const { + return {&Types[0], &Types[NTypes]}; + } +}; + class LowerTypeTestsModule { Module &M; @@ -233,20 +261,20 @@ BitSetInfo buildBitSet(Metadata *TypeId, - const DenseMap &GlobalLayout); + const DenseMap &GlobalLayout); ByteArrayInfo *createByteArray(BitSetInfo &BSI); void allocateByteArrays(); Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, Value *BitOffset); - void - lowerTypeTestCalls(ArrayRef TypeIds, Constant *CombinedGlobalAddr, - const DenseMap &GlobalLayout); + void lowerTypeTestCalls( + ArrayRef TypeIds, Constant *CombinedGlobalAddr, + const DenseMap &GlobalLayout); Value * lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, Constant *CombinedGlobal, const DenseMap &GlobalLayout); void buildBitSetsFromGlobalVariables(ArrayRef TypeIds, - ArrayRef Globals); + ArrayRef Globals); unsigned getJumpTableEntrySize(); Type *getJumpTableEntryType(); void createJumpTableEntry(raw_ostream &OS, Function *Dest, unsigned Distance); @@ -254,13 +282,13 @@ GlobalVariable *JumpTable, unsigned Distance); void verifyTypeMDNode(GlobalObject *GO, MDNode *Type); void buildBitSetsFromFunctions(ArrayRef TypeIds, - ArrayRef Functions); + ArrayRef Functions); void buildBitSetsFromFunctionsNative(ArrayRef TypeIds, - ArrayRef Functions); + ArrayRef Functions); void buildBitSetsFromFunctionsWASM(ArrayRef TypeIds, - ArrayRef Functions); + ArrayRef Functions); void buildBitSetsFromDisjointSet(ArrayRef TypeIds, - ArrayRef Globals); + ArrayRef Globals); void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT); void moveInitializerToModuleConstructor(GlobalVariable *GV); @@ -296,16 +324,14 @@ /// Build a bit set for TypeId using the object layouts in /// GlobalLayout. BitSetInfo LowerTypeTestsModule::buildBitSet( - Metadata *TypeId, const DenseMap &GlobalLayout) { + Metadata *TypeId, + const DenseMap &GlobalLayout) { BitSetBuilder BSB; // Compute the byte offset of each address associated with this type // identifier. - SmallVector Types; for (auto &GlobalAndOffset : GlobalLayout) { - Types.clear(); - GlobalAndOffset.first->getMetadata(LLVMContext::MD_type, Types); - for (MDNode *Type : Types) { + for (MDNode *Type : GlobalAndOffset.first->types()) { if (Type->getOperand(1) != TypeId) continue; uint64_t Offset = @@ -521,16 +547,17 @@ /// Given a disjoint set of type identifiers and globals, lay out the globals, /// build the bit sets and lower the llvm.type.test calls. void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( - ArrayRef TypeIds, ArrayRef Globals) { + ArrayRef TypeIds, ArrayRef Globals) { // Build a new global with the combined contents of the referenced globals. // This global is a struct whose even-indexed elements contain the original // contents of the referenced globals and whose odd-indexed elements contain // any padding required to align the next element to the next power of 2. std::vector GlobalInits; const DataLayout &DL = M.getDataLayout(); - for (GlobalVariable *G : Globals) { - GlobalInits.push_back(G->getInitializer()); - uint64_t InitSize = DL.getTypeAllocSize(G->getValueType()); + for (GlobalTypeMember *G : Globals) { + GlobalVariable *GV = cast(G->getGlobal()); + GlobalInits.push_back(GV->getInitializer()); + uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); // Compute the amount of padding required. uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; @@ -554,7 +581,7 @@ const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); // Compute the offsets of the original globals within the new global. - DenseMap GlobalLayout; + DenseMap GlobalLayout; for (unsigned I = 0; I != Globals.size(); ++I) // Multiply by 2 to account for padding elements. GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); @@ -565,31 +592,36 @@ // global from which we built the combined global, and replace references // to the original globals with references to the aliases. for (unsigned I = 0; I != Globals.size(); ++I) { + GlobalVariable *GV = cast(Globals[I]->getGlobal()); + // Multiply by 2 to account for padding elements. Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); if (LinkerSubsectionsViaSymbols) { - Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + GV->replaceAllUsesWith(CombinedGlobalElemPtr); } else { - assert(Globals[I]->getType()->getAddressSpace() == 0); + assert(GV->getType()->getAddressSpace() == 0); GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, - Globals[I]->getLinkage(), "", + GV->getLinkage(), "", CombinedGlobalElemPtr, &M); - GAlias->setVisibility(Globals[I]->getVisibility()); - GAlias->takeName(Globals[I]); - Globals[I]->replaceAllUsesWith(GAlias); + GAlias->setVisibility(GV->getVisibility()); + GAlias->takeName(GV); + GV->replaceAllUsesWith(GAlias); } - Globals[I]->eraseFromParent(); + GV->eraseFromParent(); } } void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef TypeIds, Constant *CombinedGlobalAddr, - const DenseMap &GlobalLayout) { + const DenseMap &GlobalLayout) { Constant *CombinedGlobalIntAddr = ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy); + DenseMap GlobalObjLayout; + for (auto &P : GlobalLayout) + GlobalObjLayout[P.first->getGlobal()] = P.second; // For each type identifier in this disjoint set... for (Metadata *TypeId : TypeIds) { @@ -609,7 +641,7 @@ for (CallInst *CI : TypeTestCallSites[TypeId]) { ++NumTypeTestCallsLowered; Value *Lowered = - lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout); + lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalObjLayout); CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } @@ -730,7 +762,7 @@ /// Given a disjoint set of type identifiers and functions, build the bit sets /// and lower the llvm.type.test calls, architecture dependently. void LowerTypeTestsModule::buildBitSetsFromFunctions( - ArrayRef TypeIds, ArrayRef Functions) { + ArrayRef TypeIds, ArrayRef Functions) { if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || Arch == Triple::aarch64) buildBitSetsFromFunctionsNative(TypeIds, Functions); @@ -803,7 +835,7 @@ /// Given a disjoint set of type identifiers and functions, build a jump table /// for the functions, build the bit sets and lower the llvm.type.test calls. void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( - ArrayRef TypeIds, ArrayRef Functions) { + ArrayRef TypeIds, ArrayRef Functions) { // Unlike the global bitset builder, the function bitset builder cannot // re-arrange functions in a particular order and base its calculations on the // layout of the functions' entry points, as we have no idea how large a @@ -884,7 +916,7 @@ assert(!Functions.empty()); // Build a simple layout based on the regular layout of jump tables. - DenseMap GlobalLayout; + DenseMap GlobalLayout; unsigned EntrySize = getJumpTableEntrySize(); for (unsigned I = 0; I != Functions.size(); ++I) GlobalLayout[Functions[I]] = I * EntrySize; @@ -905,48 +937,48 @@ // Build aliases pointing to offsets into the jump table, and replace // references to the original functions with references to the aliases. for (unsigned I = 0; I != Functions.size(); ++I) { + Function *F = cast(Functions[I]->getGlobal()); + // Need a name for the asm label. Normally, unnamed functions get temporary // asm labels in TargetLoweringObjectFile but we don't have access to that // here. - if (!Functions[I]->hasName()) - Functions[I]->setName("unnamed"); - if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) { + if (!F->hasName()) + F->setName("unnamed"); + if (LinkerSubsectionsViaSymbols || F->isDeclarationForLinker()) { Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( ConstantExpr::getGetElementPtr( JumpTableType, JumpTable, ArrayRef{ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, I)}), - Functions[I]->getType()); - + F->getType()); - if (Functions[I]->isWeakForLinker()) { - AsmOS << ".weak " << Functions[I]->getName() << "\n"; - replaceWeakDeclarationWithJumpTablePtr(Functions[I], - CombinedGlobalElemPtr); + if (F->isWeakForLinker()) { + AsmOS << ".weak " << F->getName() << "\n"; + replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr); } else { - Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + F->replaceAllUsesWith(CombinedGlobalElemPtr); } } else { - assert(Functions[I]->getType()->getAddressSpace() == 0); + assert(F->getType()->getAddressSpace() == 0); - createJumpTableAlias(AsmOS, Functions[I], JumpTable, I); + createJumpTableAlias(AsmOS, F, JumpTable, I); Function *DeclAlias = - Function::Create(cast(Functions[I]->getValueType()), + Function::Create(cast(F->getValueType()), GlobalValue::ExternalLinkage, "", &M); // Since the alias (DeclAlias) is actually a declaration, it can not have // internal linkage. Compensate for that by giving it hidden visibility. // With this we end up with a GOT relocation against a local symbol. - DeclAlias->setVisibility(Functions[I]->hasLocalLinkage() + DeclAlias->setVisibility(F->hasLocalLinkage() ? GlobalValue::HiddenVisibility - : Functions[I]->getVisibility()); - DeclAlias->takeName(Functions[I]); + : F->getVisibility()); + DeclAlias->takeName(F); // Unnamed functions can not be added to llvm.used. - Functions[I]->setName(DeclAlias->getName() + ".cfi"); - Functions[I]->replaceAllUsesWith(DeclAlias); + F->setName(DeclAlias->getName() + ".cfi"); + F->replaceAllUsesWith(DeclAlias); } - if (!Functions[I]->isDeclarationForLinker()) - Functions[I]->setLinkage(GlobalValue::InternalLinkage); + if (!F->isDeclarationForLinker()) + F->setLinkage(GlobalValue::InternalLinkage); } // Try to emit the jump table at the end of the text segment. @@ -960,14 +992,14 @@ AsmOS << ".balign " << EntrySize << "\n"; AsmOS << JumpTable->getName() << ":\n"; for (unsigned I = 0; I != Functions.size(); ++I) - createJumpTableEntry(AsmOS, Functions[I], I); + createJumpTableEntry(AsmOS, cast(Functions[I]->getGlobal()), I); M.appendModuleInlineAsm(AsmOS.str()); SmallVector Used; Used.reserve(Functions.size()); for (auto *F : Functions) - Used.push_back(F); + Used.push_back(F->getGlobal()); appendToUsed(M, Used); } @@ -978,13 +1010,15 @@ /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet /// been finalized. void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM( - ArrayRef TypeIds, ArrayRef Functions) { + ArrayRef TypeIds, ArrayRef Functions) { assert(!Functions.empty()); // Build consecutive monotonic integer ranges for each call target set - DenseMap GlobalLayout; + DenseMap GlobalLayout; + + for (GlobalTypeMember *GTM : Functions) { + Function *F = cast(GTM->getGlobal()); - for (Function *F : Functions) { // Skip functions that are not address taken, to avoid bloating the table if (!F->hasAddressTaken()) continue; @@ -996,7 +1030,7 @@ F->setMetadata("wasm.index", MD); // Assign the counter value - GlobalLayout[F] = IndirectIndex++; + GlobalLayout[GTM] = IndirectIndex++; } // The indirect function table index space starts at zero, so pass a NULL @@ -1006,7 +1040,7 @@ } void LowerTypeTestsModule::buildBitSetsFromDisjointSet( - ArrayRef TypeIds, ArrayRef Globals) { + ArrayRef TypeIds, ArrayRef Globals) { llvm::DenseMap TypeIdIndices; for (unsigned I = 0; I != TypeIds.size(); ++I) TypeIdIndices[TypeIds[I]] = I; @@ -1014,12 +1048,9 @@ // For each type identifier, build a set of indices that refer to members of // the type identifier. std::vector> TypeMembers(TypeIds.size()); - SmallVector Types; unsigned GlobalIndex = 0; - for (GlobalObject *GO : Globals) { - Types.clear(); - GO->getMetadata(LLVMContext::MD_type, Types); - for (MDNode *Type : Types) { + for (GlobalTypeMember *GTM : Globals) { + for (MDNode *Type : GTM->types()) { // Type = { offset, type identifier } unsigned TypeIdIndex = TypeIdIndices[Type->getOperand(1)]; TypeMembers[TypeIdIndex].insert(GlobalIndex); @@ -1043,32 +1074,32 @@ GLB.addFragment(MemSet); // Build the bitsets from this disjoint set. - if (Globals.empty() || isa(Globals[0])) { + if (Globals.empty() || isa(Globals[0]->getGlobal())) { // Build a vector of global variables with the computed layout. - std::vector OrderedGVs(Globals.size()); + std::vector OrderedGVs(Globals.size()); auto OGI = OrderedGVs.begin(); for (auto &&F : GLB.Fragments) { for (auto &&Offset : F) { - auto GV = dyn_cast(Globals[Offset]); + auto GV = dyn_cast(Globals[Offset]->getGlobal()); if (!GV) report_fatal_error("Type identifier may not contain both global " "variables and functions"); - *OGI++ = GV; + *OGI++ = Globals[Offset]; } } buildBitSetsFromGlobalVariables(TypeIds, OrderedGVs); } else { // Build a vector of functions with the computed layout. - std::vector OrderedFns(Globals.size()); + std::vector OrderedFns(Globals.size()); auto OFI = OrderedFns.begin(); for (auto &&F : GLB.Fragments) { for (auto &&Offset : F) { - auto Fn = dyn_cast(Globals[Offset]); + auto Fn = dyn_cast(Globals[Offset]->getGlobal()); if (!Fn) report_fatal_error("Type identifier may not contain both global " "variables and functions"); - *OFI++ = Fn; + *OFI++ = Globals[Offset]; } } @@ -1093,22 +1124,37 @@ // Equivalence class set containing type identifiers and the globals that // reference them. This is used to partition the set of type identifiers in // the module into disjoint sets. - typedef EquivalenceClasses> + typedef EquivalenceClasses> GlobalClassesTy; GlobalClassesTy GlobalClasses; - // Verify the type metadata and build a mapping from type identifiers to their - // last observed index in the list of globals. This will be used later to - // deterministically order the list of type identifiers. - llvm::DenseMap TypeIdIndices; + // Verify the type metadata and build a few data structures to let us + // efficiently enumerate the type identifiers associated with a global: + // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector + // of associated type metadata) and a mapping from type identifiers to their + // list of GlobalTypeMembers and last observed index in the list of globals. + // The indices will be used later to deterministically order the list of type + // identifiers. + BumpPtrAllocator Alloc; + struct TIInfo { + unsigned Index; + std::vector RefGlobals; + }; + llvm::DenseMap TypeIdInfo; unsigned I = 0; SmallVector Types; for (GlobalObject &GO : M.global_objects()) { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); + if (Types.empty()) + continue; + + auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types); for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); - TypeIdIndices[cast(Type)->getOperand(1)] = ++I; + auto &Info = TypeIdInfo[cast(Type)->getOperand(1)]; + Info.Index = ++I; + Info.RefGlobals.push_back(GTM); } } @@ -1136,14 +1182,9 @@ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); // Add the referenced globals to the type identifier's equivalence class. - for (GlobalObject &GO : M.global_objects()) { - Types.clear(); - GO.getMetadata(LLVMContext::MD_type, Types); - for (MDNode *Type : Types) - if (Type->getOperand(1) == BitSet) - CurSet = GlobalClasses.unionSets( - CurSet, GlobalClasses.findLeader(GlobalClasses.insert(&GO))); - } + for (GlobalTypeMember *GTM : TypeIdInfo[BitSet].RefGlobals) + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM))); } if (GlobalClasses.empty()) @@ -1163,7 +1204,7 @@ for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); MI != GlobalClasses.member_end(); ++MI) { if ((*MI).is()) - MaxIndex = std::max(MaxIndex, TypeIdIndices[MI->get()]); + MaxIndex = std::max(MaxIndex, TypeIdInfo[MI->get()].Index); } Sets.emplace_back(I, MaxIndex); } @@ -1177,20 +1218,20 @@ for (const auto &S : Sets) { // Build the list of type identifiers in this disjoint set. std::vector TypeIds; - std::vector Globals; + std::vector Globals; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(S.first); MI != GlobalClasses.member_end(); ++MI) { if ((*MI).is()) TypeIds.push_back(MI->get()); else - Globals.push_back(MI->get()); + Globals.push_back(MI->get()); } // Order type identifiers by global index for determinism. This ordering is // stable as there is a one-to-one mapping between metadata and indices. std::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) { - return TypeIdIndices[M1] < TypeIdIndices[M2]; + return TypeIdInfo[M1].Index < TypeIdInfo[M2].Index; }); // Build bitsets for this disjoint set.