diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -199,7 +199,7 @@ uint32_t DuplicatedJumpTables{0x10000000}; /// Function fragments to skip. - std::vector FragmentsToSkip; + std::unordered_set FragmentsToSkip; /// The runtime library. std::unique_ptr RtLibrary; @@ -235,6 +235,18 @@ MIB = std::move(TargetBuilder); } + /// Return function fragments to skip. + const std::unordered_set &getFragmentsToSkip() { + return FragmentsToSkip; + } + + /// Add function fragment to skip + void addFragmentsToSkip(BinaryFunction *Function) { + FragmentsToSkip.insert(Function); + } + + void clearFragmentsToSkip() { FragmentsToSkip.clear(); } + /// Given DWOId returns CU if it exists in DWOCUs. Optional getDWOCU(uint64_t DWOId); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -337,9 +337,9 @@ /// True if the original entry point was patched. bool IsPatched{false}; - /// True if the function contains jump table with entries pointing to - /// locations in fragments. - bool HasSplitJumpTable{false}; + /// True if the function contains explicit or implicit indirect branch to its + /// split fragments, e.g., split jump table, landing pad in split fragment + bool HasIndirectTargetToSplitFragment{false}; /// True if there are no control-flow edges with successors in other functions /// (i.e. if tail calls have edges to function-local basic blocks). @@ -1440,9 +1440,12 @@ /// otherwise processed. bool isPseudo() const { return IsPseudo; } - /// Return true if the function contains a jump table with entries pointing - /// to split fragments. - bool hasSplitJumpTable() const { return HasSplitJumpTable; } + /// Return true if the function contains explicit or implicit indirect branch + /// to its split fragments, e.g., split jump table, landing pad in split + /// fragment. + bool hasIndirectTargetToSplitFragment() const { + return HasIndirectTargetToSplitFragment; + } /// Return true if all CFG edges have local successors. bool hasCanonicalCFG() const { return HasCanonicalCFG; } @@ -1837,7 +1840,9 @@ void setIsPatched(bool V) { IsPatched = V; } - void setHasSplitJumpTable(bool V) { HasSplitJumpTable = V; } + void setHasIndirectTargetToSplitFragment(bool V) { + HasIndirectTargetToSplitFragment = V; + } void setHasCanonicalCFG(bool V) { HasCanonicalCFG = V; } diff --git a/bolt/include/bolt/Core/JumpTable.h b/bolt/include/bolt/Core/JumpTable.h --- a/bolt/include/bolt/Core/JumpTable.h +++ b/bolt/include/bolt/Core/JumpTable.h @@ -87,13 +87,12 @@ uint64_t Count{0}; /// BinaryFunction this jump tables belongs to. - BinaryFunction *Parent{nullptr}; + SmallVector Parents; private: /// Constructor should only be called by a BinaryContext. JumpTable(MCSymbol &Symbol, uint64_t Address, size_t EntrySize, - JumpTableType Type, LabelMapType &&Labels, BinaryFunction &BF, - BinarySection &Section); + JumpTableType Type, LabelMapType &&Labels, BinarySection &Section); public: /// Return the size of the jump table. diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -502,7 +502,6 @@ // Number of targets other than __builtin_unreachable. uint64_t NumRealEntries = 0; - constexpr uint64_t INVALID_OFFSET = std::numeric_limits::max(); auto addOffset = [&](uint64_t Offset) { if (Offsets) Offsets->emplace_back(Offset); @@ -571,7 +570,7 @@ // __builtin_unreachable() case. if (Value == BF.getAddress() + BF.getSize()) { - addOffset(Value - BF.getAddress()); + addOffset(Value); HasUnreachable = true; LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); continue; @@ -611,18 +610,9 @@ ++NumRealEntries; - if (TargetBF == &BF) { - // Address inside the function. - addOffset(Value - TargetBF->getAddress()); - LLVM_DEBUG(dbgs() << "OK: real entry\n"); - } else { - // Address in split fragment. - BF.setHasSplitJumpTable(true); - // Add invalid offset for proper identification of jump table size. - addOffset(INVALID_OFFSET); - LLVM_DEBUG(dbgs() << "OK: address in split fragment " - << TargetBF->getPrintName() << '\n'); - } + if (TargetBF != &BF) + BF.setHasIndirectTargetToSplitFragment(true); + addOffset(Value); } // It's a jump table if the number of real entries is more than 1, or there's @@ -637,9 +627,11 @@ for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; ++JTI) { JumpTable *JT = JTI->second; - BinaryFunction &BF = *JT->Parent; - if (!BF.isSimple()) + bool nonSimpleParent = false; + for (BinaryFunction *BF : JT->Parents) + nonSimpleParent |= !BF->isSimple(); + if (nonSimpleParent) continue; uint64_t NextJTAddress = 0; @@ -647,25 +639,39 @@ if (NextJTI != JTE) NextJTAddress = NextJTI->second->getAddress(); - const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, - NextJTAddress, &JT->OffsetEntries); + const bool Success = + analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), + NextJTAddress, &JT->OffsetEntries); if (!Success) { - dbgs() << "failed to analyze jump table in function " << BF << '\n'; + LLVM_DEBUG(ListSeparator LS; + dbgs() << "failed to analyze jump table in function "; + for (BinaryFunction *Frag + : JT->Parents) dbgs() + << LS << *Frag; + dbgs() << '\n';); JT->print(dbgs()); if (NextJTI != JTE) { - dbgs() << "next jump table at 0x" - << Twine::utohexstr(NextJTI->second->getAddress()) - << " belongs to function " << *NextJTI->second->Parent << '\n'; + LLVM_DEBUG(ListSeparator LS; + dbgs() << "next jump table at 0x" + << Twine::utohexstr(NextJTI->second->getAddress()) + << " belongs to function "; + for (BinaryFunction *Frag + : NextJTI->second->Parents) dbgs() + << LS << *Frag; + dbgs() << "\n";); NextJTI->second->print(dbgs()); } llvm_unreachable("jump table heuristic failure"); } - - for (uint64_t EntryOffset : JT->OffsetEntries) { - if (EntryOffset == BF.getSize()) - BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); - else - BF.registerReferencedOffset(EntryOffset); + for (BinaryFunction *Frag : JT->Parents) { + for (uint64_t EntryOffset : JT->OffsetEntries) + if (EntryOffset == Frag->getAddress() + Frag->getSize()) { + Frag->IgnoredBranches.emplace_back(EntryOffset - Frag->getAddress(), + Frag->getSize()); + } else if (EntryOffset >= Frag->getAddress() && + EntryOffset < Frag->getAddress() + Frag->getSize()) { + Frag->registerReferencedOffset(EntryOffset - Frag->getAddress()); + } } // In strict mode, erase PC-relative relocation record. Later we check that @@ -679,8 +685,9 @@ } // Mark to skip the function and all its fragments. - if (BF.hasSplitJumpTable()) - FragmentsToSkip.push_back(&BF); + for (BinaryFunction *Frag : JT->Parents) + if (Frag->hasIndirectTargetToSplitFragment()) + addFragmentsToSkip(Frag); } if (opts::StrictMode && DataPCRelocations.size()) { @@ -696,27 +703,25 @@ } void BinaryContext::skipMarkedFragments() { - // Unique functions in the vector. - std::unordered_set UniqueFunctions(FragmentsToSkip.begin(), - FragmentsToSkip.end()); - // Copy the functions back to FragmentsToSkip. - FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); + std::vector FragmentQueue; + // Copy the functions to FragmentsToQueue. + FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); auto addToWorklist = [&](BinaryFunction *Function) -> void { - if (UniqueFunctions.count(Function)) + if (FragmentsToSkip.count(Function)) return; - FragmentsToSkip.push_back(Function); - UniqueFunctions.insert(Function); + FragmentQueue.push_back(Function); + addFragmentsToSkip(Function); }; // Functions containing split jump tables need to be skipped with all // fragments (transitively). - for (size_t I = 0; I != FragmentsToSkip.size(); I++) { - BinaryFunction *BF = FragmentsToSkip[I]; - assert(UniqueFunctions.count(BF) && + for (size_t I = 0; I != FragmentQueue.size(); I++) { + BinaryFunction *BF = FragmentQueue[I]; + assert(FragmentsToSkip.count(BF) && "internal error in traversing function fragments"); if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; BF->setSimple(false); - BF->setHasSplitJumpTable(true); + BF->setHasIndirectTargetToSplitFragment(true); llvm::for_each(BF->Fragments, addToWorklist); llvm::for_each(BF->ParentFragments, addToWorklist); @@ -725,7 +730,6 @@ errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") << " due to cold fragments\n"; - FragmentsToSkip.clear(); } MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, @@ -767,27 +771,35 @@ return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); }; + // Two fragments of same function access same jump table if (JumpTable *JT = getJumpTableContainingAddress(Address)) { assert(JT->Type == Type && "jump table types have to match"); - bool HasMultipleParents = isFragmentOf(JT->Parent, &Function) || - isFragmentOf(&Function, JT->Parent); - assert((JT->Parent == &Function || HasMultipleParents) && - "cannot re-use jump table of a different function"); assert(Address == JT->getAddress() && "unexpected non-empty jump table"); - // Flush OffsetEntries with INVALID_OFFSET if multiple parents - // Duplicate the entry for the parent function for easy access - if (HasMultipleParents) { + // Assumption: a jump table has no more than two distinct parents + if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { + bool SameFunction = isFragmentOf(JT->Parents[0], &Function) || + isFragmentOf(&Function, JT->Parents[0]); + assert(SameFunction && + "cannot re-use jump table of a different function"); + // Duplicate the entry for the parent function for easy access + JT->Parents.push_back(&Function); if (opts::Verbosity > 2) { outs() << "BOLT-WARNING: Multiple fragments access same jump table: " - << JT->Parent->getPrintName() << "; " << Function.getPrintName() - << "\n"; + << JT->Parents[0]->getPrintName() << "; " + << Function.getPrintName() << "\n"; + JT->print(outs()); } - constexpr uint64_t INVALID_OFFSET = std::numeric_limits::max(); - for (unsigned I = 0; I < JT->OffsetEntries.size(); ++I) - JT->OffsetEntries[I] = INVALID_OFFSET; Function.JumpTables.emplace(Address, JT); + JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); + JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); } + + bool ValidReuse = false; + for (BinaryFunction *Frag : JT->Parents) + if (Frag == &Function) + ValidReuse = true; + assert(ValidReuse && "cannot re-use jump table of a different function"); return JT->getFirstLabel(); } @@ -808,13 +820,14 @@ << " in function " << Function << '\n'); JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, - JumpTable::LabelMapType{{0, JTLabel}}, Function, + JumpTable::LabelMapType{{0, JTLabel}}, *getSectionForAddress(Address)); + JT->Parents.push_back(&Function); JumpTables.emplace(Address, JT); // Duplicate the entry for the parent function for easy access. Function.JumpTables.emplace(Address, JT); - + JT->print(outs()); return JTLabel; } @@ -836,8 +849,9 @@ MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); JumpTable *NewJT = new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, - JumpTable::LabelMapType{{Offset, NewLabel}}, Function, + JumpTable::LabelMapType{{Offset, NewLabel}}, *getSectionForAddress(JT->getAddress())); + NewJT->Parents.push_back(&Function); NewJT->Entries = JT->Entries; NewJT->Counts = JT->Counts; uint64_t JumpTableID = ++DuplicatedJumpTables; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1655,9 +1655,8 @@ } if (JT.Entries.empty()) { for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) { - MCSymbol *Label = - getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I], - /*CreatePastEnd*/ true); + MCSymbol *Label = getOrCreateLocalLabel(JT.OffsetEntries[I], + /*CreatePastEnd*/ true); JT.Entries.push_back(Label); } } @@ -1684,12 +1683,13 @@ uint64_t EntryOffset = JTAddress - JT->getAddress(); while (EntryOffset < JT->getSize()) { - uint64_t TargetOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize]; - if (TargetOffset < getSize()) { - TakenBranches.emplace_back(JTSiteOffset, TargetOffset); + uint64_t AbsoluteOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize]; + uint64_t RelativeOffset = AbsoluteOffset - getAddress(); + if (RelativeOffset < getSize()) { + TakenBranches.emplace_back(JTSiteOffset, RelativeOffset); if (opts::StrictMode) - registerReferencedOffset(TargetOffset); + registerReferencedOffset(RelativeOffset); } EntryOffset += JT->EntrySize; diff --git a/bolt/lib/Core/JumpTable.cpp b/bolt/lib/Core/JumpTable.cpp --- a/bolt/lib/Core/JumpTable.cpp +++ b/bolt/lib/Core/JumpTable.cpp @@ -29,9 +29,9 @@ bolt::JumpTable::JumpTable(MCSymbol &Symbol, uint64_t Address, size_t EntrySize, JumpTableType Type, LabelMapType &&Labels, - BinaryFunction &BF, BinarySection &Section) + BinarySection &Section) : BinaryData(Symbol, Address, 0, EntrySize, Section), EntrySize(EntrySize), - OutputEntrySize(EntrySize), Type(Type), Labels(Labels), Parent(&BF) {} + OutputEntrySize(EntrySize), Type(Type), Labels(Labels) {} std::pair bolt::JumpTable::getEntriesForAddress(const uint64_t Addr) const { @@ -103,11 +103,15 @@ uint64_t Offset = 0; if (Type == JTT_PIC) OS << "PIC "; - OS << "Jump table " << getName() << " for function " << *Parent << " at 0x" - << Twine::utohexstr(getAddress()) << " with a total count of " << Count - << ":\n"; + ListSeparator LS; + + OS << "Jump table " << getName() << " for function "; + for (BinaryFunction *Frag : Parents) + OS << LS << *Frag; + OS << " at 0x" << Twine::utohexstr(getAddress()) << " with a total count of " + << Count << ":\n"; for (const uint64_t EntryOffset : OffsetEntries) - OS << " 0x" << Twine::utohexstr(EntryOffset) << '\n'; + OS << " absolute offset: 0x" << Twine::utohexstr(EntryOffset) << '\n'; for (const MCSymbol *Entry : Entries) { auto LI = Labels.find(Offset); if (Offset && LI != Labels.end()) { diff --git a/bolt/lib/Passes/AsmDump.cpp b/bolt/lib/Passes/AsmDump.cpp --- a/bolt/lib/Passes/AsmDump.cpp +++ b/bolt/lib/Passes/AsmDump.cpp @@ -59,11 +59,10 @@ const MCInst &Instr, const std::string &BranchLabel) { StringRef FunctionName = BF.getOneName(); const JumpTable *JT = BF.getJumpTable(Instr); - for (const uint64_t EntryOffset : JT->OffsetEntries) { - auto LI = JT->Labels.find(EntryOffset); - StringRef TargetName = LI->second->getName(); - const uint64_t Mispreds = JT->Counts[EntryOffset].Mispreds; - const uint64_t Count = JT->Counts[EntryOffset].Count; + for (uint32_t i = 0; i < JT->Entries.size(); ++i) { + StringRef TargetName = JT->Entries[i]->getName(); + const uint64_t Mispreds = JT->Counts[i].Mispreds; + const uint64_t Count = JT->Counts[i].Count; OS << "# FDATA: 1 " << FunctionName << " #" << BranchLabel << "# " << "1 " << FunctionName << " #" << TargetName << "# " << Mispreds << " " << Count << '\n'; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2919,7 +2919,7 @@ if (!Function.isSimple()) { assert((!BC->HasRelocations || Function.getSize() == 0 || - Function.hasSplitJumpTable()) && + Function.hasIndirectTargetToSplitFragment()) && "unexpected non-simple function in relocation mode"); continue; } diff --git a/bolt/test/X86/split-func-jump-table-fragment-bidirection.s b/bolt/test/X86/split-func-jump-table-fragment-bidirection.s --- a/bolt/test/X86/split-func-jump-table-fragment-bidirection.s +++ b/bolt/test/X86/split-func-jump-table-fragment-bidirection.s @@ -8,9 +8,10 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o # RUN: llvm-strip --strip-unneeded %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -# RUN: llvm-bolt -v=3 %t.exe -o %t.out 2>&1 | FileCheck %s +# RUN: llvm-bolt -print-cfg -v=3 %t.exe -o %t.out 2>&1 | FileCheck %s # CHECK: BOLT-WARNING: Multiple fragments access same jump table: main; main.cold.1 +# CHECK: PIC Jump table JUMP_TABLE1 for function main, main.cold.1 at 0x2e0 with a total count of 0: .text .globl main