diff --git a/bolt/include/bolt/Core/BinaryEmitter.h b/bolt/include/bolt/Core/BinaryEmitter.h --- a/bolt/include/bolt/Core/BinaryEmitter.h +++ b/bolt/include/bolt/Core/BinaryEmitter.h @@ -22,6 +22,7 @@ namespace bolt { class BinaryContext; class BinaryFunction; +class FunctionFragment; /// Emit all code and data from the BinaryContext \p BC into the \p Streamer. /// @@ -34,7 +35,7 @@ /// Emit \p BF function code. The caller is responsible for emitting function /// symbol(s) and setting the section to emit the code to. void emitFunctionBody(MCStreamer &Streamer, BinaryFunction &BF, - bool EmitColdPart, bool EmitCodeOnly = false); + const FunctionFragment &FF, bool EmitCodeOnly); } // namespace bolt } // namespace llvm diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -568,7 +568,7 @@ }; SmallVector BasicBlockOffsets; - MCSymbol *ColdSymbol{nullptr}; + SmallVector ColdSymbols; /// Symbol at the end of the function. mutable MCSymbol *FunctionEndLabel{nullptr}; @@ -1081,7 +1081,23 @@ /// Return MC symbol associated with the function. /// All references to the function should use this symbol. - MCSymbol *getSymbol() { return Symbols[0]; } + MCSymbol *getSymbol(const FragmentNum Fragment = FragmentNum::hot()) { + if (Fragment == FragmentNum::hot()) + return Symbols[0]; + + size_t ColdSymbolIndex = Fragment.get() - 1; + if (ColdSymbolIndex >= ColdSymbols.size()) + ColdSymbols.resize(ColdSymbolIndex + 1); + + MCSymbol *&ColdSymbol = ColdSymbols[ColdSymbolIndex]; + if (ColdSymbol == nullptr) { + SmallString<10> Appendix = formatv(".cold.{0}", ColdSymbolIndex); + ColdSymbol = BC.Ctx->getOrCreateSymbol( + NameResolver::append(Symbols[0]->getName(), Appendix)); + } + + return ColdSymbol; + } /// Return MC symbol associated with the function (const version). /// All references to the function should use this symbol. @@ -1135,16 +1151,6 @@ /// Return true of all callbacks returned true, false otherwise. bool forEachEntryPoint(EntryPointCallbackTy Callback) const; - MCSymbol *getColdSymbol() { - if (ColdSymbol) - return ColdSymbol; - - ColdSymbol = BC.Ctx->getOrCreateSymbol( - NameResolver::append(getSymbol()->getName(), ".cold.0")); - - return ColdSymbol; - } - /// Return MC symbol associated with the end of the function. MCSymbol *getFunctionEndLabel() const { assert(BC.Ctx && "cannot be called with empty context"); @@ -1323,8 +1329,11 @@ } /// Return cold code section name for the function. - StringRef getColdCodeSectionName() const { - return StringRef(ColdCodeSectionName); + std::string getColdCodeSectionName(const FragmentNum Fragment) const { + std::string Result = ColdCodeSectionName; + if (Fragment != FragmentNum::cold()) + Result.append(".").append(std::to_string(Fragment.get() - 1)); + return Result; } /// Assign a section name for the cold part of the function. @@ -1333,8 +1342,9 @@ } /// Get output code section for cold code of this function. - ErrorOr getColdCodeSection() const { - return BC.getUniqueSectionByName(getColdCodeSectionName()); + ErrorOr + getColdCodeSection(const FragmentNum Fragment) const { + return BC.getUniqueSectionByName(getColdCodeSectionName(Fragment)); } /// Return true iif the function will halt execution on entry. diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h --- a/bolt/include/bolt/Core/FunctionLayout.h +++ b/bolt/include/bolt/Core/FunctionLayout.h @@ -66,6 +66,10 @@ : Num(Num), Layout(Layout) {} public: + FragmentNum getFragmentNum() const { return Num; } + bool isMainFragment() const { return Num.get() == 0; } + bool isSplitFragment() const { return Num.get() > 0; } + unsigned size() const; bool empty() const; const_iterator begin() const; @@ -153,6 +157,18 @@ /// Return the fragment identified by Num. FunctionFragment getFragment(FragmentNum Num) const; + /// Get the fragment that contains all entry blocks and other blocks that + /// cannot be split. + FunctionFragment getMainFragment() const { + return getFragment(FragmentNum::hot()); + } + + /// Get the fragment that contains all entry blocks and other blocks that + /// cannot be split. + iterator_range getSplitFragments() const { + return {++fragment_begin(), fragment_end()}; + } + /// Find the fragment that contains BB. FunctionFragment findFragment(const BinaryBasicBlock *BB) const; diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include using namespace llvm; @@ -2189,27 +2190,31 @@ // Create symbols in the LocalCtx so that they get destroyed with it. MCSymbol *StartLabel = LocalCtx->createTempSymbol(); MCSymbol *EndLabel = LocalCtx->createTempSymbol(); - MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); - MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); Streamer->switchSection(Section); Streamer->emitLabel(StartLabel); - emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, + emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), /*EmitCodeOnly=*/true); Streamer->emitLabel(EndLabel); - if (BF.isSplit()) { - MCSectionELF *ColdSection = - LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); - ColdSection->setHasInstructions(true); - - Streamer->switchSection(ColdSection); - Streamer->emitLabel(ColdStartLabel); - emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, - /*EmitCodeOnly=*/true); - Streamer->emitLabel(ColdEndLabel); - // To avoid calling MCObjectStreamer::flushPendingLabels() which is private + using LabelRange = std::pair; + SmallVector SplitLabels; + for (const FunctionFragment FF : BF.getLayout().getSplitFragments()) { + MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); + MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); + SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); + + MCSectionELF *const SplitSection = LocalCtx->getELFSection( + BF.getColdCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); + SplitSection->setHasInstructions(true); + Streamer->switchSection(SplitSection); + + Streamer->emitLabel(SplitStartLabel); + emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); + Streamer->emitLabel(SplitEndLabel); + // To avoid calling MCObjectStreamer::flushPendingLabels() which is + // private Streamer->emitBytes(StringRef("")); Streamer->switchSection(Section); } @@ -2225,10 +2230,12 @@ const uint64_t HotSize = Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); - const uint64_t ColdSize = BF.isSplit() - ? Layout.getSymbolOffset(*ColdEndLabel) - - Layout.getSymbolOffset(*ColdStartLabel) - : 0ULL; + const uint64_t ColdSize = + std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, + [&](const uint64_t Accu, const LabelRange &Labels) { + return Accu + Layout.getSymbolOffset(*Labels.second) - + Layout.getSymbolOffset(*Labels.first); + }); // Clean-up the effect of the code emission. for (const MCSymbol &Symbol : Assembler.symbols()) { diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -129,7 +129,7 @@ /// Emit function code. The caller is responsible for emitting function /// symbol(s) and setting the section to emit the code to. - void emitFunctionBody(BinaryFunction &BF, bool EmitColdPart, + void emitFunctionBody(BinaryFunction &BF, const FunctionFragment &FF, bool EmitCodeOnly = false); private: @@ -137,7 +137,7 @@ void emitFunctions(); /// Emit a single function. - bool emitFunction(BinaryFunction &BF, bool EmitColdPart); + bool emitFunction(BinaryFunction &BF, const FunctionFragment &FF); /// Helper for emitFunctionBody to write data inside a function /// (used for AArch64) @@ -234,13 +234,24 @@ !Function->hasValidProfile()) Streamer.setAllowAutoPadding(false); - Emitted |= emitFunction(*Function, /*EmitColdPart=*/false); + const FunctionLayout &Layout = Function->getLayout(); + Emitted |= emitFunction(*Function, Layout.getMainFragment()); if (Function->isSplit()) { if (opts::X86AlignBranchBoundaryHotOnly) Streamer.setAllowAutoPadding(false); - Emitted |= emitFunction(*Function, /*EmitColdPart=*/true); + + assert((Layout.fragment_size() == 1 || Function->isSimple()) && + "Only simple functions can have fragments"); + for (const FunctionFragment FF : Layout.getSplitFragments()) { + // Skip empty fragments so no symbols and sections for empty fragments + // are generated + if (FF.empty() && !Function->hasConstantIsland()) + continue; + Emitted |= emitFunction(*Function, FF); + } } + Streamer.setAllowAutoPadding(OriginalAllowAutoPadding); if (Emitted) @@ -268,16 +279,18 @@ } } -bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) { +bool BinaryEmitter::emitFunction(BinaryFunction &Function, + const FunctionFragment &FF) { if (Function.size() == 0 && !Function.hasIslandsInfo()) return false; if (Function.getState() == BinaryFunction::State::Empty) return false; - MCSection *Section = - BC.getCodeSection(EmitColdPart ? Function.getColdCodeSectionName() - : Function.getCodeSectionName()); + MCSection *Section = BC.getCodeSection( + FF.isSplitFragment() + ? Function.getColdCodeSectionName(FF.getFragmentNum()) + : Function.getCodeSectionName()); Streamer.switchSection(Section); Section->setHasInstructions(true); BC.Ctx->addGenDwarfSection(Section); @@ -290,8 +303,9 @@ Section->setAlignment(Align(opts::AlignFunctions)); Streamer.emitCodeAlignment(BinaryFunction::MinAlign, &*BC.STI); - uint16_t MaxAlignBytes = EmitColdPart ? Function.getMaxColdAlignmentBytes() - : Function.getMaxAlignmentBytes(); + uint16_t MaxAlignBytes = FF.isSplitFragment() + ? Function.getMaxColdAlignmentBytes() + : Function.getMaxAlignmentBytes(); if (MaxAlignBytes > 0) Streamer.emitCodeAlignment(Function.getAlignment(), &*BC.STI, MaxAlignBytes); @@ -302,29 +316,29 @@ MCContext &Context = Streamer.getContext(); const MCAsmInfo *MAI = Context.getAsmInfo(); - MCSymbol *StartSymbol = nullptr; + MCSymbol *const StartSymbol = Function.getSymbol(FF.getFragmentNum()); // Emit all symbols associated with the main function entry. - if (!EmitColdPart) { - StartSymbol = Function.getSymbol(); + if (FF.isMainFragment()) { for (MCSymbol *Symbol : Function.getSymbols()) { Streamer.emitSymbolAttribute(Symbol, MCSA_ELF_TypeFunction); Streamer.emitLabel(Symbol); } } else { - StartSymbol = Function.getColdSymbol(); Streamer.emitSymbolAttribute(StartSymbol, MCSA_ELF_TypeFunction); Streamer.emitLabel(StartSymbol); } // Emit CFI start if (Function.hasCFI()) { + assert(Function.getLayout().isHotColdSplit() && + "Exceptions supported only with hot/cold splitting."); Streamer.emitCFIStartProc(/*IsSimple=*/false); if (Function.getPersonalityFunction() != nullptr) Streamer.emitCFIPersonality(Function.getPersonalityFunction(), Function.getPersonalityEncoding()); - MCSymbol *LSDASymbol = - EmitColdPart ? Function.getColdLSDASymbol() : Function.getLSDASymbol(); + MCSymbol *LSDASymbol = FF.isSplitFragment() ? Function.getColdLSDASymbol() + : Function.getLSDASymbol(); if (LSDASymbol) Streamer.emitCFILsda(LSDASymbol, BC.LSDAEncoding); else @@ -353,7 +367,7 @@ } // Emit code. - emitFunctionBody(Function, EmitColdPart, /*EmitCodeOnly=*/false); + emitFunctionBody(Function, FF, /*EmitCodeOnly=*/false); // Emit padding if requested. if (size_t Padding = opts::padFunction(Function)) { @@ -369,8 +383,9 @@ if (Function.hasCFI()) Streamer.emitCFIEndProc(); - MCSymbol *EndSymbol = EmitColdPart ? Function.getFunctionColdEndLabel() - : Function.getFunctionEndLabel(); + MCSymbol *EndSymbol = FF.isSplitFragment() + ? Function.getFunctionColdEndLabel() + : Function.getFunctionEndLabel(); Streamer.emitLabel(EndSymbol); if (MAI->hasDotTypeDotSizeDirective()) { @@ -384,21 +399,22 @@ emitLineInfoEnd(Function, EndSymbol); // Exception handling info for the function. - emitLSDA(Function, EmitColdPart); + emitLSDA(Function, FF.isSplitFragment()); - if (!EmitColdPart && opts::JumpTables > JTS_NONE) + if (FF.isMainFragment() && opts::JumpTables > JTS_NONE) emitJumpTables(Function); return true; } -void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, bool EmitColdPart, +void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, + const FunctionFragment &FF, bool EmitCodeOnly) { - if (!EmitCodeOnly && EmitColdPart && BF.hasConstantIsland()) + if (!EmitCodeOnly && FF.isSplitFragment() && BF.hasConstantIsland()) { + assert(FF.getFragmentNum() == FragmentNum::cold() && + "Constant island support only with hot/cold split"); BF.duplicateConstantIslands(); - - const FunctionFragment FF = BF.getLayout().getFragment( - EmitColdPart ? FragmentNum::cold() : FragmentNum::hot()); + } if (!FF.empty() && FF.front()->isLandingPad()) { assert(!FF.front()->isEntryPoint() && @@ -488,7 +504,7 @@ } if (!EmitCodeOnly) - emitConstantIslands(BF, EmitColdPart); + emitConstantIslands(BF, FF.isSplitFragment()); } void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, @@ -904,7 +920,7 @@ // Corresponding FDE start. const MCSymbol *StartSymbol = - EmitColdPart ? BF.getColdSymbol() : BF.getSymbol(); + EmitColdPart ? BF.getSymbol(FragmentNum::cold()) : BF.getSymbol(); // Emit the LSDA header. @@ -1148,9 +1164,9 @@ } void emitFunctionBody(MCStreamer &Streamer, BinaryFunction &BF, - bool EmitColdPart, bool EmitCodeOnly) { + const FunctionFragment &FF, bool EmitCodeOnly) { BinaryEmitter(Streamer, BF.getBinaryContext()) - .emitFunctionBody(BF, EmitColdPart, EmitCodeOnly); + .emitFunctionBody(BF, FF, EmitCodeOnly); } } // namespace bolt diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3260,8 +3260,8 @@ // could be "short", then prioritize short for "taken". This will // generate a sequence 1 byte shorter on x86. if (IsSupported && BC.isX86() && - TSuccessor->isCold() != FSuccessor->isCold() && - BB->isCold() != TSuccessor->isCold()) { + TSuccessor->getFragmentNum() != FSuccessor->getFragmentNum() && + BB->getFragmentNum() != TSuccessor->getFragmentNum()) { std::swap(TSuccessor, FSuccessor); { auto L = BC.scopeLock(); @@ -4035,7 +4035,8 @@ } const uint64_t BaseAddress = getCodeSection()->getOutputAddress(); - ErrorOr ColdSection = getColdCodeSection(); + ErrorOr ColdSection = + getColdCodeSection(FragmentNum::cold()); const uint64_t ColdBaseAddress = isSplit() ? ColdSection->getOutputAddress() : 0; if (BC.HasRelocations || isInjected()) { @@ -4049,7 +4050,7 @@ setOutputDataAddress(BaseAddress + DataOffset); } if (isSplit()) { - const MCSymbol *ColdStartSymbol = getColdSymbol(); + const MCSymbol *ColdStartSymbol = getSymbol(FragmentNum::cold()); assert(ColdStartSymbol && ColdStartSymbol->isDefined() && "split function should have defined cold symbol"); const MCSymbol *ColdEndSymbol = getFunctionColdEndLabel(); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -12,6 +12,7 @@ #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/DebugData.h" #include "bolt/Core/Exceptions.h" +#include "bolt/Core/FunctionLayout.h" #include "bolt/Core/MCPlusBuilder.h" #include "bolt/Core/ParallelUtilities.h" #include "bolt/Core/Relocation.h" @@ -3181,12 +3182,15 @@ if (Section) BC->deregisterSection(*Section); assert(Function->getOriginSectionName() && "expected origin section"); - Function->CodeSectionName = std::string(*Function->getOriginSectionName()); - if (Function->isSplit()) { - if (ErrorOr ColdSection = Function->getColdCodeSection()) + Function->CodeSectionName = Function->getOriginSectionName()->str(); + for (const FunctionFragment FF : + Function->getLayout().getSplitFragments()) { + if (ErrorOr ColdSection = + Function->getColdCodeSection(FF.getFragmentNum())) BC->deregisterSection(*ColdSection); - Function->ColdCodeSectionName = std::string(getBOLTTextSectionName()); } + if (Function->getLayout().isSplit()) + Function->ColdCodeSectionName = getBOLTTextSectionName().str(); } if (opts::PrintCacheMetrics) { @@ -3722,34 +3726,37 @@ if (!Function.isSplit()) continue; - ErrorOr ColdSection = Function.getColdCodeSection(); - assert(ColdSection && "cannot find section for cold part"); - // Cold fragments are aligned at 16 bytes. - NextAvailableAddress = alignTo(NextAvailableAddress, 16); - BinaryFunction::FragmentInfo &ColdPart = Function.cold(); - if (TooLarge) { - // The corresponding FDE will refer to address 0. - ColdPart.setAddress(0); - ColdPart.setImageAddress(0); - ColdPart.setImageSize(0); - ColdPart.setFileOffset(0); - } else { - ColdPart.setAddress(NextAvailableAddress); - ColdPart.setImageAddress(ColdSection->getAllocAddress()); - ColdPart.setImageSize(ColdSection->getOutputSize()); - ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); - ColdSection->setOutputAddress(ColdPart.getAddress()); - } + for (const FunctionFragment FF : Function.getLayout().getSplitFragments()) { + ErrorOr ColdSection = + Function.getColdCodeSection(FF.getFragmentNum()); + assert(ColdSection && "cannot find section for cold part"); + // Cold fragments are aligned at 16 bytes. + NextAvailableAddress = alignTo(NextAvailableAddress, 16); + BinaryFunction::FragmentInfo &ColdPart = Function.cold(); + if (TooLarge) { + // The corresponding FDE will refer to address 0. + ColdPart.setAddress(0); + ColdPart.setImageAddress(0); + ColdPart.setImageSize(0); + ColdPart.setFileOffset(0); + } else { + ColdPart.setAddress(NextAvailableAddress); + ColdPart.setImageAddress(ColdSection->getAllocAddress()); + ColdPart.setImageSize(ColdSection->getOutputSize()); + ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); + ColdSection->setOutputAddress(ColdPart.getAddress()); + } - LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" - << Twine::utohexstr(ColdPart.getImageAddress()) - << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) - << " with size " - << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); - RTDyld.reassignSectionAddress(ColdSection->getSectionID(), - ColdPart.getAddress()); + LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" + << Twine::utohexstr(ColdPart.getImageAddress()) + << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) + << " with size " + << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); + RTDyld.reassignSectionAddress(ColdSection->getSectionID(), + ColdPart.getAddress()); - NextAvailableAddress += ColdPart.getImageSize(); + NextAvailableAddress += ColdPart.getImageSize(); + } } // Add the new text section aggregating all existing code sections. @@ -4511,17 +4518,20 @@ Symbols.emplace_back(ICFSymbol); } if (Function.isSplit() && Function.cold().getAddress()) { - ELFSymTy NewColdSym = FunctionSymbol; - SmallVector Buf; - NewColdSym.st_name = - AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) - .concat(".cold.0") - .toStringRef(Buf)); - NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex(); - NewColdSym.st_value = Function.cold().getAddress(); - NewColdSym.st_size = Function.cold().getImageSize(); - NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); - Symbols.emplace_back(NewColdSym); + for (const FunctionFragment FF : + Function.getLayout().getSplitFragments()) { + ELFSymTy NewColdSym = FunctionSymbol; + const SmallString<256> Buf = formatv( + "{0}.cold.{1}", cantFail(FunctionSymbol.getName(StringSection)), + FF.getFragmentNum().get() - 1); + NewColdSym.st_name = AddToStrTab(Buf); + NewColdSym.st_shndx = + Function.getColdCodeSection(FF.getFragmentNum())->getIndex(); + NewColdSym.st_value = Function.cold().getAddress(); + NewColdSym.st_size = Function.cold().getImageSize(); + NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); + Symbols.emplace_back(NewColdSym); + } } if (Function.hasConstantIsland()) { uint64_t DataMark = Function.getOutputDataAddress(); @@ -4636,6 +4646,9 @@ : nullptr; if (Function && Function->isEmitted()) { + assert(Function->getLayout().isHotColdSplit() && + "Adding symbols based on cold fragment when there are more than " + "2 fragments"); const uint64_t OutputAddress = Function->translateInputToOutputAddress(Symbol.st_value); @@ -4645,7 +4658,7 @@ NewSymbol.st_shndx = OutputAddress >= Function->cold().getAddress() && OutputAddress < Function->cold().getImageSize() - ? Function->getColdCodeSection()->getIndex() + ? Function->getColdCodeSection(FragmentNum::cold())->getIndex() : Function->getCodeSection()->getIndex(); } else { // Check if the symbol belongs to moved data object and update it.