Index: include/llvm/MC/MCAsmLayout.h =================================================================== --- include/llvm/MC/MCAsmLayout.h +++ include/llvm/MC/MCAsmLayout.h @@ -50,11 +50,6 @@ /// \brief Is the layout for this fragment valid? bool isFragmentValid(const MCFragment *F) const; - /// \brief Compute the amount of padding required before this fragment to - /// obey bundling restrictions. - uint64_t computeBundlePadding(const MCFragment *F, - uint64_t FOffset, uint64_t FSize); - public: MCAsmLayout(MCAssembler &_Assembler); Index: include/llvm/MC/MCAssembler.h =================================================================== --- include/llvm/MC/MCAssembler.h +++ include/llvm/MC/MCAssembler.h @@ -1249,11 +1249,20 @@ FileNames.push_back(FileName); } + void writeFragmentPadding(const MCFragment &F, uint64_t FSize, + MCObjectWriter *OW) const; + /// @} void dump(); }; +/// \brief Compute the amount of padding required before this fragment to +/// obey bundling restrictions. +uint64_t computeBundlePadding(const MCAssembler &Assembler, + const MCFragment *F, + uint64_t FOffset, uint64_t FSize); + } // end namespace llvm #endif Index: include/llvm/MC/MCELFStreamer.h =================================================================== --- include/llvm/MC/MCELFStreamer.h +++ include/llvm/MC/MCELFStreamer.h @@ -100,6 +100,8 @@ void fixSymbolsInTLSFixups(const MCExpr *expr); + void mergeFragment(MCDataFragment *, MCEncodedFragmentWithFixups *); + bool SeenIdent; struct LocalCommon { @@ -111,6 +113,8 @@ std::vector LocalCommons; SmallPtrSet BindingExplicitlySet; + + llvm::SmallVector BundleGroups; }; MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, Index: include/llvm/MC/MCObjectStreamer.h =================================================================== --- include/llvm/MC/MCObjectStreamer.h +++ include/llvm/MC/MCObjectStreamer.h @@ -44,10 +44,6 @@ void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; - // If any labels have been emitted but not assigned fragments, ensure that - // they get assigned, either to F if possible or to a new data fragment. - void flushPendingLabels(MCFragment *F); - protected: MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter); @@ -85,6 +81,12 @@ /// fragment is not a data fragment. MCDataFragment *getOrCreateDataFragment(); + // If any labels have been emitted but not assigned fragments, ensure that + // they get assigned, either to F if possible or to a new data fragment. + // Optionally, it is also possible to provide an offset which will be set + // as a symbol offset within the fragment. + void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0); + public: void visitUsedSymbol(const MCSymbol &Sym) override; Index: lib/MC/MCAssembler.cpp =================================================================== --- lib/MC/MCAssembler.cpp +++ lib/MC/MCAssembler.cpp @@ -229,8 +229,9 @@ return getSectionAddressSize(SD); } -uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F, - uint64_t FOffset, uint64_t FSize) { +uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler, + const MCFragment *F, + uint64_t FOffset, uint64_t FSize) { uint64_t BundleSize = Assembler.getBundleAlignSize(); assert(BundleSize > 0 && "computeBundlePadding should only be called if bundling is enabled"); @@ -338,6 +339,7 @@ getFragmentList().insert(IP, F); F->setParent(this); } + return IP; } @@ -652,7 +654,12 @@ // The fragment's offset will point to after the padding, and its computed // size won't include the padding. // - if (Assembler.isBundlingEnabled() && F->hasInstructions()) { + // When relax all flag is used, we optimize the bundling by writting the + // padding directly into fragments as the instructions are emitted in the + // streamer. + // + if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() && + F->hasInstructions()) { assert(isa(F) && "Only MCEncodedFragment implementations have instructions"); uint64_t FSize = Assembler.computeFragmentSize(*this, *F); @@ -660,7 +667,8 @@ if (FSize > Assembler.getBundleAlignSize()) report_fatal_error("Fragment can't be larger than a bundle size"); - uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize); + uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F, + F->Offset, FSize); if (RequiredBundlePadding > UINT8_MAX) report_fatal_error("Padding cannot exceed 255 bytes"); F->setBundlePadding(static_cast(RequiredBundlePadding)); @@ -675,24 +683,20 @@ OW->WriteBytes(EF.getContents()); } -/// \brief Write the fragment \p F to the output file. -static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment &F) { - MCObjectWriter *OW = &Asm.getWriter(); - - // FIXME: Embed in fragments instead? - uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F); - +/// \brief Write the necessary bundle padding to the given object writer. +// Expects a fragment containing instructions and its size. +void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize, + MCObjectWriter *OW) const { // Should NOP padding be written out before this fragment? unsigned BundlePadding = F.getBundlePadding(); if (BundlePadding > 0) { - assert(Asm.isBundlingEnabled() && + assert(isBundlingEnabled() && "Writing bundle padding with disabled bundling"); assert(F.hasInstructions() && "Writing bundle padding for a fragment without instructions"); - unsigned TotalLength = BundlePadding + static_cast(FragmentSize); - if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) { + unsigned TotalLength = BundlePadding + static_cast(FSize); + if (F.alignToBundleEnd() && TotalLength > getBundleAlignSize()) { // If the padding itself crosses a bundle boundary, it must be emitted // in 2 pieces, since even nop instructions must not cross boundaries. // v--------------v <- BundleAlignSize @@ -701,16 +705,27 @@ // | Prev |####|####| F | // ---------------------------- // ^-------------------^ <- TotalLength - unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize(); - if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW)) + unsigned DistanceToBoundary = TotalLength - getBundleAlignSize(); + if (!getBackend().writeNopData(DistanceToBoundary, OW)) report_fatal_error("unable to write NOP sequence of " + Twine(DistanceToBoundary) + " bytes"); BundlePadding -= DistanceToBoundary; } - if (!Asm.getBackend().writeNopData(BundlePadding, OW)) + if (!getBackend().writeNopData(BundlePadding, OW)) report_fatal_error("unable to write NOP sequence of " + Twine(BundlePadding) + " bytes"); } +} + +/// \brief Write the fragment \p F to the output file. +static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment &F) { + MCObjectWriter *OW = &Asm.getWriter(); + + // FIXME: Embed in fragments instead? + uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F); + + Asm.writeFragmentPadding(F, FragmentSize, OW); // This variable (and its dummy usage) is to participate in the assert at // the end of the function. Index: lib/MC/MCELFStreamer.cpp =================================================================== --- lib/MC/MCELFStreamer.cpp +++ lib/MC/MCELFStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -39,6 +40,49 @@ MCELFStreamer::~MCELFStreamer() { } +/// \brief Merge the content of the fragment \p EF into the fragment \p DF. +void MCELFStreamer::mergeFragment(MCDataFragment *DF, + MCEncodedFragmentWithFixups *EF) { + MCAssembler &Assembler = getAssembler(); + + if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) { + uint64_t FSize = EF->getContents().size(); + + if (FSize > Assembler.getBundleAlignSize()) + report_fatal_error("Fragment can't be larger than a bundle size"); + + uint64_t RequiredBundlePadding = computeBundlePadding( + Assembler, EF, DF->getContents().size(), FSize); + + if (RequiredBundlePadding > UINT8_MAX) + report_fatal_error("Padding cannot exceed 255 bytes"); + + if (RequiredBundlePadding > 0) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + MCObjectWriter *OW = Assembler.getBackend().createObjectWriter(VecOS); + + EF->setBundlePadding(static_cast(RequiredBundlePadding)); + + Assembler.writeFragmentPadding(*EF, FSize, OW); + VecOS.flush(); + delete OW; + + DF->getContents().append(Code.begin(), Code.end()); + } + } + + flushPendingLabels(DF, DF->getContents().size()); + + for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) { + EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() + + DF->getContents().size()); + DF->getFixups().push_back(EF->getFixups()[i]); + } + DF->setHasInstructions(true); + DF->getContents().append(EF->getContents().begin(), EF->getContents().end()); +} + void MCELFStreamer::InitSections(bool NoExecStack) { // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. @@ -448,7 +492,11 @@ if (Assembler.isBundlingEnabled()) { MCSectionData *SD = getCurrentSectionData(); - if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst()) + if (Assembler.getRelaxAll() && SD->isBundleLocked()) + DF = BundleGroups.back(); + else if (Assembler.getRelaxAll() && !SD->isBundleLocked()) + DF = new MCDataFragment(); + else if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst()) // If we are bundle-locked, we re-use the current fragment. // The bundle-locking directive ensures this is a new data fragment. DF = cast(getCurrentFragment()); @@ -486,6 +534,14 @@ } DF->setHasInstructions(true); DF->getContents().append(Code.begin(), Code.end()); + + if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) { + MCSectionData *SD = getCurrentSectionData(); + if (!SD->isBundleLocked()) { + mergeFragment(getOrCreateDataFragment(), DF); + delete DF; + } + } } void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) { @@ -509,6 +565,12 @@ if (!SD->isBundleLocked()) SD->setBundleGroupBeforeFirstInst(true); + if (getAssembler().getRelaxAll() && !SD->isBundleLocked()) { + // TODO: drop the lock state and set directly in the fragment + MCDataFragment *DF = new MCDataFragment(); + BundleGroups.push_back(DF); + } + SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd : MCSectionData::BundleLocked); } @@ -524,7 +586,22 @@ else if (SD->isBundleGroupBeforeFirstInst()) report_fatal_error("Empty bundle-locked group is forbidden"); - SD->setBundleLockState(MCSectionData::NotBundleLocked); + if (getAssembler().getRelaxAll()) { + assert(!BundleGroups.empty() && "There are no bundle groups"); + MCDataFragment *DF = BundleGroups.back(); + + SD->setBundleLockState(MCSectionData::NotBundleLocked); + + if (!SD->isBundleLocked()) { + mergeFragment(getOrCreateDataFragment(), DF); + BundleGroups.pop_back(); + delete DF; + } + + if (SD->getBundleLockState() != MCSectionData::BundleLockedAlignToEnd) + getOrCreateDataFragment()->setAlignToBundleEnd(false); + } else + SD->setBundleLockState(MCSectionData::NotBundleLocked); } void MCELFStreamer::Flush() { Index: lib/MC/MCObjectStreamer.cpp =================================================================== --- lib/MC/MCObjectStreamer.cpp +++ lib/MC/MCObjectStreamer.cpp @@ -42,7 +42,7 @@ delete Assembler; } -void MCObjectStreamer::flushPendingLabels(MCFragment *F) { +void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) { if (PendingLabels.size()) { if (!F) { F = new MCDataFragment(); @@ -51,7 +51,7 @@ } for (MCSymbolData *SD : PendingLabels) { SD->setFragment(F); - SD->setOffset(0); + SD->setOffset(FOffset); } PendingLabels.clear(); } @@ -92,7 +92,8 @@ MCDataFragment *F = dyn_cast_or_null(getCurrentFragment()); // When bundling is enabled, we don't want to add data to a fragment that // already has instructions (see MCELFStreamer::EmitInstToData for details) - if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) { + if (!F || (Assembler->isBundlingEnabled() && !Assembler->getRelaxAll() && + F->hasInstructions())) { F = new MCDataFragment(); insert(F); } @@ -148,7 +149,9 @@ // If there is a current fragment, mark the symbol as pointing into it. // Otherwise queue the label and set its fragment pointer when we emit the // next fragment. - if (auto *F = dyn_cast_or_null(getCurrentFragment())) { + auto *F = dyn_cast_or_null(getCurrentFragment()); + if (F && !(getAssembler().isBundlingEnabled() && + getAssembler().getRelaxAll())) { SD.setFragment(F); SD.setOffset(F->getContents().size()); } else { @@ -240,6 +243,9 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &STI) { + if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled()) + llvm_unreachable("All instructions should have already been relaxed"); + // Always create a new, separate fragment here, because its size can change // during relaxation. MCRelaxableFragment *IF = new MCRelaxableFragment(Inst, STI); Index: test/MC/X86/AlignedBundling/bundle-group-too-large-error.s =================================================================== --- test/MC/X86/AlignedBundling/bundle-group-too-large-error.s +++ test/MC/X86/AlignedBundling/bundle-group-too-large-error.s @@ -1,4 +1,5 @@ # RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s +# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - 2>&1 | FileCheck %s # CHECK: ERROR: Fragment can't be larger than a bundle size Index: test/MC/X86/AlignedBundling/different-sections.s =================================================================== --- test/MC/X86/AlignedBundling/different-sections.s +++ test/MC/X86/AlignedBundling/different-sections.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test two different executable sections with bundling. Index: test/MC/X86/AlignedBundling/labeloffset.s =================================================================== --- test/MC/X86/AlignedBundling/labeloffset.s +++ test/MC/X86/AlignedBundling/labeloffset.s @@ -2,6 +2,8 @@ # RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s # RUN: llvm-mc -triple=i686-nacl -filetype=obj %s -o - | \ # RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s +# RUN: llvm-mc -triple=i686-nacl -filetype=obj -mc-relax-all %s -o - | \ +# RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s .bundle_align_mode 5 .text Index: test/MC/X86/AlignedBundling/long-nop-pad.s =================================================================== --- test/MC/X86/AlignedBundling/long-nop-pad.s +++ test/MC/X86/AlignedBundling/long-nop-pad.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test that long nops are generated for padding where possible. Index: test/MC/X86/AlignedBundling/nesting.s =================================================================== --- test/MC/X86/AlignedBundling/nesting.s +++ test/MC/X86/AlignedBundling/nesting.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Will be bundle-aligning to 16 byte boundaries .bundle_align_mode 4 Index: test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s =================================================================== --- test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s +++ test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test some variations of padding to the end of a bundle. Index: test/MC/X86/AlignedBundling/pad-bundle-groups.s =================================================================== --- test/MC/X86/AlignedBundling/pad-bundle-groups.s +++ test/MC/X86/AlignedBundling/pad-bundle-groups.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test some variations of padding for bundle-locked groups. Index: test/MC/X86/AlignedBundling/relax-at-bundle-end.s =================================================================== --- test/MC/X86/AlignedBundling/relax-at-bundle-end.s +++ test/MC/X86/AlignedBundling/relax-at-bundle-end.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test that an instruction near a bundle end gets properly padded # after it is relaxed. Index: test/MC/X86/AlignedBundling/relax-in-bundle-group.s =================================================================== --- test/MC/X86/AlignedBundling/relax-in-bundle-group.s +++ test/MC/X86/AlignedBundling/relax-in-bundle-group.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ # RUN: | llvm-objdump -disassemble - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble - | FileCheck %s # Test that instructions inside bundle-locked groups are relaxed even if their # fixup is short enough not to warrant relaxation on its own. Index: test/MC/X86/AlignedBundling/single-inst-bundling.s =================================================================== --- test/MC/X86/AlignedBundling/single-inst-bundling.s +++ test/MC/X86/AlignedBundling/single-inst-bundling.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ -# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s # Test simple NOP insertion for single instructions. @@ -24,14 +26,17 @@ movl %ebx, %edi callq bar cmpl %r14d, %ebp +# CHECK-RELAX: nopl jle .L_ELSE # Due to the padding that's inserted before the addl, the jump target # becomes farther by one byte. -# CHECK: jle 5 +# CHECK-OPT: jle 5 +# CHECK-RELAX: jle 7 addl %ebp, %eax -# CHECK: nop -# CHECK-NEXT: 20: addl +# CHECK-OPT: nop +# CHECK-OPT-NEXT:20: addl +# CHECK-RELAX: 26: addl jmp .L_RET .L_ELSE: