Index: llvm/include/llvm/MC/MCFragment.h =================================================================== --- llvm/include/llvm/MC/MCFragment.h +++ llvm/include/llvm/MC/MCFragment.h @@ -517,37 +517,54 @@ } }; -/// Represents required padding such that a particular other set of fragments -/// does not cross a particular power-of-two boundary. The other fragments must -/// follow this one within the same section. +/// This is a placeholder fragment used to emit NOP or values to align a set of +/// fragments within specific boundary. If we call the nearest backward +/// MCBoundaryAlignFragment of LastFragment as NBBF, then the set of fragments +/// to be aligned is (NBBF, LastFragment]. The fragments to be aligned should be +/// in the same section with this fragment, and each non-BF fragment on the path +/// from this fragment to the fragments to be aligned must have a fixed size +/// after finite times of relaxation. class MCBoundaryAlignFragment : public MCFragment { + /// Flag to indicate that (optimal) NOPs should be emitted instead + /// of using the provided value. + bool EmitNops = false; /// The alignment requirement of the branch to be aligned. Align AlignBoundary; - /// Flag to indicate whether the branch is fused. Use in determining the - /// region of fragments being aligned. - bool Fused : 1; - /// Flag to indicate whether NOPs should be emitted. - bool EmitNops : 1; /// The size of the fragment. The size is lazily set during relaxation, and /// is not meaningful before that. uint64_t Size = 0; + /// Value to use for filling padding bytes if existing. + Optional Value; + /// The maximum number of bytes to emit; if the Flag EmitNops is true, + /// then this constraint is ignored. + uint64_t MaxBytesToEmit = 0; + /// The fragment to be aligned. + const MCFragment *LastFragment = nullptr; public: - MCBoundaryAlignFragment(Align AlignBoundary, bool Fused = false, - bool EmitNops = false, MCSection *Sec = nullptr) - : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), - Fused(Fused), EmitNops(EmitNops) {} + MCBoundaryAlignFragment(MCSection *Sec = nullptr) + : MCFragment(FT_BoundaryAlign, false, Sec) {} uint64_t getSize() const { return Size; } - void setSize(uint64_t Value) { Size = Value; } + void setSize(uint64_t V) { Size = V; } Align getAlignment() const { return AlignBoundary; } + void setAlignment(Align V) { AlignBoundary = V; } - bool isFused() const { return Fused; } - void setFused(bool Value) { Fused = Value; } + bool hasValue() const { return Value.hasValue(); } + uint8_t getValue() const { return Value.getValue(); } + void setValue(uint8_t V) { Value = V; } - bool canEmitNops() const { return EmitNops; } - void setEmitNops(bool Value) { EmitNops = Value; } + bool hasEmitNops() const { return EmitNops; } + void setEmitNops(bool V) { EmitNops = V; } + + bool hasEmitNopsOrValue() const { return EmitNops || Value.hasValue(); } + + uint8_t getMaxBytesToEmit() const { return MaxBytesToEmit; } + void setMaxBytesToEmit(uint64_t V) { MaxBytesToEmit = V; } + + const MCFragment *getFragment() const { return LastFragment; } + void setFragment(const MCFragment *F) { LastFragment = F; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; Index: llvm/include/llvm/MC/MCObjectStreamer.h =================================================================== --- llvm/include/llvm/MC/MCObjectStreamer.h +++ llvm/include/llvm/MC/MCObjectStreamer.h @@ -87,6 +87,11 @@ /// if the Subtarget differs from the current fragment. MCDataFragment *getOrCreateDataFragment(const MCSubtargetInfo* STI = nullptr); + /// Get a boundary-align fragment to write into, creating a new one if the + /// current fragment is not a boundary-align fragment or has been used to emit + /// something. + MCBoundaryAlignFragment *getOrCreateBoundaryAlignFragment(); + protected: bool changeSectionImpl(MCSection *Section, const MCExpr *Subsection); Index: llvm/lib/MC/MCAssembler.cpp =================================================================== --- llvm/lib/MC/MCAssembler.cpp +++ llvm/lib/MC/MCAssembler.cpp @@ -606,9 +606,15 @@ } case MCFragment::FT_BoundaryAlign: { - if (!Asm.getBackend().writeNopData(OS, FragmentSize)) - report_fatal_error("unable to write nop sequence of " + - Twine(FragmentSize) + " bytes"); + const MCBoundaryAlignFragment &BF = cast(F); + if (BF.hasEmitNops()) { + if (!Asm.getBackend().writeNopData(OS, FragmentSize)) + report_fatal_error("unable to write nop sequence of " + + Twine(FragmentSize) + " bytes"); + } else if (BF.hasValue()) { + for (uint64_t i = 0; i != FragmentSize; ++i) + OS << char(BF.getValue()); + } break; } @@ -987,27 +993,47 @@ bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF) { - // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed. - if (!BF.canEmitNops()) + // The MCBoundaryAlignFragment that does not emit anything or not have any + // fragment to be aligned should not be relaxed. + if (!BF.hasEmitNopsOrValue() || !BF.getFragment()) return false; - uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode()); - uint64_t AlignedSize = 0; - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise it is - // emitted into two fragments at most, the next MCBoundaryAlignFragment(if - // exists) also marks the end of the branch. - for (auto i = 0, N = BF.isFused() ? 2 : 1; - i != N && !isa(F); ++i, F = F->getNextNode()) { - AlignedSize += computeFragmentSize(Layout, *F); + // Compute the size of all the fragments in the range we're trying to align. + const MCFragment *TF = BF.getFragment(); + uint64_t AlignedSize = computeFragmentSize(Layout, *TF); + uint64_t AlignedOffset = Layout.getFragmentOffset(TF); + // Note: It should be guaranteed that there is a MCBoundaryAlignFragment + // before TF in the same section. + for (auto *F = TF->getPrevNode(); !isa(F); + F = F->getPrevNode()) { + uint64_t Size = computeFragmentSize(Layout, *F); + AlignedSize += Size; + AlignedOffset -= Size; } - uint64_t OldSize = BF.getSize(); - AlignedOffset -= OldSize; + + // Compute the size of all the MCBoundaryAlignFragments in the range + // [BF,BF.getFragment). + uint64_t FixedValue = 0; + for (const MCFragment *F = &BF; F != TF; F = F->getNextNode()) + if (auto *MBF = dyn_cast(F)) + FixedValue += MBF->getSize(); + + AlignedOffset -= FixedValue; Align BoundaryAlignment = BF.getAlignment(); uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) ? offsetToAlignment(AlignedOffset, BoundaryAlignment) : 0U; - if (NewSize == OldSize) + if (!BF.hasEmitNops()) { + assert(BF.getNextNode()->hasInstructions() && + "The fragment doesn't have any instruction."); + assert(computeFragmentSize(Layout, *(BF.getNextNode())) <= 15 && + "The fragment's size must be no longer than 15 since it should only " + "hold one instruction."); + NewSize = std::min({NewSize, + 15 - computeFragmentSize(Layout, *(BF.getNextNode())), + static_cast(BF.getMaxBytesToEmit())}); + } + if (NewSize == BF.getSize()) return false; BF.setSize(NewSize); Layout.invalidateFragmentsFrom(&BF); Index: llvm/lib/MC/MCFragment.cpp =================================================================== --- llvm/lib/MC/MCFragment.cpp +++ llvm/lib/MC/MCFragment.cpp @@ -424,14 +424,13 @@ } case MCFragment::FT_BoundaryAlign: { const auto *BF = cast(this); - if (BF->canEmitNops()) - OS << " (can emit nops to align"; - if (BF->isFused()) - OS << " fused branch)"; - else - OS << " unfused branch)"; + if (BF->hasEmitNops()) + OS << " (emit nops)"; OS << "\n "; + if (BF->hasValue()) + OS << " Value:" << hexdigit(BF->getValue()); OS << " BoundarySize:" << BF->getAlignment().value() + << " MaxBytesToEmit:" << BF->getMaxBytesToEmit() << " Size:" << BF->getSize(); break; } Index: llvm/lib/MC/MCObjectStreamer.cpp =================================================================== --- llvm/lib/MC/MCObjectStreamer.cpp +++ llvm/lib/MC/MCObjectStreamer.cpp @@ -191,11 +191,19 @@ return nullptr; } -static bool CanReuseDataFragment(const MCDataFragment &F, - const MCAssembler &Assembler, +static bool CanReuseDataFragment(const MCDataFragment &F, MCObjectStreamer &OS, const MCSubtargetInfo *STI) { if (!F.hasInstructions()) return true; + + MCAssembler &Assembler = OS.getAssembler(); + + // When the target need align instructions, we need to determine the size + // of some instructions during the relaxation, the easiest way to do it is + // to emit each instruction into fragment of its own. + if (Assembler.getBackend().allowAutoPadding()) + return false; + // When bundling is enabled, we don't want to add data to a fragment that // already has instructions (see MCELFStreamer::EmitInstToData for details) if (Assembler.isBundlingEnabled()) @@ -208,13 +216,22 @@ MCDataFragment * MCObjectStreamer::getOrCreateDataFragment(const MCSubtargetInfo *STI) { MCDataFragment *F = dyn_cast_or_null(getCurrentFragment()); - if (!F || !CanReuseDataFragment(*F, *Assembler, STI)) { + if (!F || !CanReuseDataFragment(*F, *this, STI)) { F = new MCDataFragment(); insert(F); } return F; } +MCBoundaryAlignFragment *MCObjectStreamer::getOrCreateBoundaryAlignFragment() { + auto *F = dyn_cast_or_null(getCurrentFragment()); + if (!F || F->hasEmitNopsOrValue()) { + F = new MCBoundaryAlignFragment(); + insert(F); + } + return F; +} + void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) { Assembler->registerSymbol(Sym); } Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -13,6 +13,7 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -77,11 +78,11 @@ cl::opt X86AlignBranchBoundary( "x86-align-branch-boundary", cl::init(0), cl::desc( - "Control how the assembler should align branches with NOP. If the " - "boundary's size is not 0, it should be a power of 2 and no less " - "than 32. Branches will be aligned to prevent from being across or " - "against the boundary of specified size. The default value 0 does not " - "align branches.")); + "Control how the assembler should align branches with NOP or segment " + "override prefix. If the boundary's size is not 0, it should be a " + "power of 2 and no less than 16. Branches will be aligned to prevent " + "from being across or against the boundary of specified size. The " + "default value 0 does not align branches.")); cl::opt> X86AlignBranch( "x86-align-branch", @@ -94,6 +95,11 @@ "indirect indicates indirect jumps."), cl::location(X86AlignBranchKindLoc)); +cl::opt X86AlignBranchPrefixSize( + "x86-align-branch-prefix-size", cl::init(0), + cl::desc("Specify the maximum number of prefixes on an instruction to " + "align branches. The number should be between 0 and 5.")); + class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine, @@ -106,14 +112,16 @@ std::unique_ptr MCII; X86AlignBranchKind AlignBranchType; Align AlignBoundary; + uint8_t AlignMaxPrefixSize; bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; bool needAlign(MCObjectStreamer &OS) const; bool needAlignInst(const MCInst &Inst) const; - MCBoundaryAlignFragment * - getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const; + + bool shouldAddPrefix(const MCInst &Inst) const; MCInst PrevInst; + const MCFragment *LastFragmentToBeAligned = nullptr; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -121,6 +129,7 @@ MCII(T.createMCInstrInfo()) { AlignBoundary = assumeAligned(X86AlignBranchBoundary); AlignBranchType = X86AlignBranchKindLoc; + AlignMaxPrefixSize = std::min(X86AlignBranchPrefixSize, 5); } bool allowAutoPadding() const override; @@ -376,19 +385,17 @@ (AlignBranchType & X86::AlignBranchIndirect)); } -static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) { - // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it. - return !F.canEmitNops(); -} +/// Check if prefix can be added before instruction \p Inst. +bool X86AsmBackend::shouldAddPrefix(const MCInst &Inst) const { + // No prefix can be added if AlignMaxPrefixSize is 0. + if (AlignMaxPrefixSize == 0) + return false; -MCBoundaryAlignFragment * -X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const { - auto *F = dyn_cast_or_null(OS.getCurrentFragment()); - if (!F || !canReuseBoundaryAlignFragment(*F)) { - F = new MCBoundaryAlignFragment(AlignBoundary); - OS.insert(F); - } - return F; + if (needAlignInst(Inst)) + return false; + + // Linker may rewrite the instruction with variant symbol operand. + return !hasVariantSymbol(Inst); } /// Insert MCBoundaryAlignFragment before instructions to align branches. @@ -397,57 +404,201 @@ if (!needAlign(OS)) return; + // Summary of inserting scheme(Two Steps): + // Step 1: + // If the previous instruction is the first instruction in a fusible pair + // - If macro fusion actually happens, emit NOP before the first instrucion + // in the fused pair and skip step 2. + // - If the macro fusion doesn't happen indeed, emit prefix before the + // previous instruction. + // + // Step 2: + // If the instruction needs to be aligned, emit NOP before the instruction. + // + // If the instruction is the first instruction in a fusible pair, put a + // a placeholder here. + // + // Otherwise emit prefix before the instruction. + MCFragment *CF = OS.getCurrentFragment(); - bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { - // Macro fusion actually happens and there is no other fragment inserted - // after the previous instruction. NOP can be emitted in PF to align fused - // jcc. - if (auto *PF = - dyn_cast_or_null(CF->getPrevNode())) { - const_cast(PF)->setEmitNops(true); - const_cast(PF)->setFused(true); + + // Prefix or NOP shouldn't be inserted after hardcode, e.g. + // + // \code + // .byte 0x2e + // jmp .Label0 + // \endcode + // + // since there is no clear instruction boundary. + if (isa_and_nonnull(CF) && !CF->hasInstructions()) + return; + + // The number of prefixes is limted by AlignMaxPrefixSize for some peformance + // reasons, so we need to compute how many prefixes can be added. + SmallString<14> Code; + raw_svector_ostream VecOS(Code); + OS.getAssembler().getEmitter().emitPrefix(Inst, VecOS, STI); + assert(Code.size() < 15 && "The number of prefixes must be less than 15."); + uint8_t ExistingPrefixSize = static_cast(Code.size()); + uint8_t RemainingPrefixSize = (AlignMaxPrefixSize > ExistingPrefixSize) + ? (AlignMaxPrefixSize - ExistingPrefixSize) + : 0; + // Choose which prefix should be inserted before the instruction. + // + // If there is one, use the existing segment override prefix. + // If the target is 64-bit, use the CS. + // If the target is 32-bit, + // - If the instruction has a ESP/EBP base register, use SS. + // - Otherwise use DS. + uint8_t Prefix = [&]() { + auto rend = Code.rend(); + for (auto it = Code.rbegin(); it != rend; ++it) { + uint8_t Byte = *it; + // CS(0x2e), SS(0x36), DS(0x3e), ES(0x26), FS(0x64), GS(0x65) + if (Byte == 0x2e || Byte == 0x36 || Byte == 0x3e || Byte == 0x26 || + Byte == 0x64 || Byte == 0x65) + return Byte; } - } else if (needAlignInst(Inst)) { - // Note: When there is at least one fragment, such as MCAlignFragment, - // inserted after the previous instruction, e.g. + if (STI.hasFeature(X86::Mode64Bit)) + return static_cast(0x2e); + unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MCII->get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand >= 0) { + unsigned CurOp = X86II::getOperandBias(Desc); + unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; + unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::ESP || BaseReg == X86::EBP) + return static_cast(0x36); + } + return static_cast(0x3e); + }(); + + bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; + // Step 1: + // Handle the condition when the previous the instruction is the first + // instruction in a fusible pair. Note: We need to check the previous + // fragment is a BF since we may encounter the case: + // + // \code + // cmp %rax %rcx + // .align 16 + // je .Label0 + // \endcode + // + // MCAlignFragment can grow and shrink, so it is not ensured to get a fixed + // size after finite times of relaxation. NOP or prefix should not emitted + // before the CMP since it may cause MCAssembler::relaxBoundaryAlign not to + // converge. + if (NeedAlignFused && isFirstMacroFusibleInst(PrevInst, *MCII) && CF && + isa_and_nonnull(CF->getPrevNode())) { + auto *PF = const_cast( + cast(CF->getPrevNode())); + // Macro fusion actually happens, so emit NOP before the first instrucion in + // the fused pair. Note: When there is a MCAlignFragment inserted just + // before the first instruction in the fused pair, e.g. // // \code + // .align 16 // cmp %rax %rcx + // je .Label0 + // \endcode + // + // We will not emit NOP before the CMP since the align directive is + // used to align the fused pair rather than NOP. + if (isMacroFused(PrevInst, Inst)) { + if (isa_and_nonnull(PF->getPrevNode())) + return; + PF->setAlignment(AlignBoundary); + PF->setEmitNops(true); + return; + } else if (shouldAddPrefix(PrevInst)) { + // Macro fusion doesn't happen indeed, emit prefix before the previous + // instruction. + PF->setAlignment(AlignBoundary); + PF->setMaxBytesToEmit(RemainingPrefixSize); + PF->setValue(Prefix); + } + } + + // Step 2: + if (needAlignInst(Inst)) { + // Handle the condition when the instruction to be aligned is unfused. Note: + // When there is a MCAlignFragment inserted just before the instruction to + // be aligned, e.g. + // + // \code // .align 16 // je .Label0 - // \ endcode + // \endcode // - // We will treat the JCC as a unfused branch although it may be fused - // with the CMP. - auto *F = getOrCreateBoundaryAlignFragment(OS); + // We will not emit NOP before the instruction since the align directive is + // used to align JCC rather than NOP. + if (isa_and_nonnull(CF)) + return; + // Emit NOP before the instruction to be aligned. + auto *F = OS.getOrCreateBoundaryAlignFragment(); + F->setAlignment(AlignBoundary); F->setEmitNops(true); - F->setFused(false); } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) { - // We don't know if macro fusion happens until the reaching the next - // instruction, so a place holder is put here if necessary. - getOrCreateBoundaryAlignFragment(OS); + // We don't know if macro fusion happens until reaching the next + // instruction, so a placeholder is put here if necessary. + OS.getOrCreateBoundaryAlignFragment(); + } else if (shouldAddPrefix(Inst)) { + // Emit prefixes before instruction that doesn't need to be aligned. + auto *F = OS.getOrCreateBoundaryAlignFragment(); + F->setAlignment(AlignBoundary); + F->setMaxBytesToEmit(RemainingPrefixSize); + F->setValue(Prefix); } - - PrevInst = Inst; } -/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned -/// if necessary. +/// Set the last fragment in the set of fragments to be aligned (which is +/// current fragment indeed) for BF and insert a new BF to prevent further +/// instruction from being added to the current fragment if necessary. void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; - // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the - // branch is fused, the fused branch(macro fusion pair) must be emitted into - // two fragments. Or when the branch is unfused, the branch must be emitted - // into one fragment. The MCRelaxableFragment naturally marks the end of the - // fused or unfused branch. - // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of - // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align - // other branch. - if (needAlignInst(Inst) && !isa(OS.getCurrentFragment())) - OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + + PrevInst = Inst; + + if (!needAlignInst(Inst)) + return; + + const MCFragment *CF = OS.getCurrentFragment(); + for (auto *F = CF; F && F != LastFragmentToBeAligned && + (F->hasInstructions() || isa(F)); + F = F->getPrevNode()) { + // The fragments to be aligned should be in the same section with this + // fragment, and each non-BF fragment on the path from this fragment to the + // fragments to be aligned must have a fixed size after finite times of + // relaxation. Currently, we conservatively use hasInstruction to ensure + // that. + if (auto *BF = dyn_cast(F)) { + if (BF->hasEmitNopsOrValue()) + const_cast(BF)->setFragment(CF); + // There is at most one MCBoundaryAlignFragment to align one instruction + // if we only emit NOP to align instruction. + if (AlignMaxPrefixSize == 0) + break; + } + } + + LastFragmentToBeAligned = CF; + + // We need no further instructions can be emitted into the current fragment. + // + // If current fragment is a MCRelaxableFragment, then no more + // instructions can be pushed into since MCRelaxableFragment only holds one + // instruction. + // + // Otherwise, we need to insert a new BF to truncate the current fragment. + // This MCBoundaryAlignFragment may be reused to emit NOP or segment override + // prefix to align other instruction. + + if (!isa(OS.getCurrentFragment())) + OS.insert(new MCBoundaryAlignFragment()); // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); Index: llvm/test/MC/X86/align-branch-32-1a.s =================================================================== --- llvm/test/MC/X86/align-branch-32-1a.s +++ llvm/test/MC/X86/align-branch-32-1a.s @@ -1,5 +1,6 @@ -# Check NOP padding is disabled before instruction that has variant symbol operand. -# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call %s | llvm-objdump -d - | FileCheck %s +## Check NOP/Prefix padding is disabled for instruction that has variant symbol operand. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call+jmp %s | llvm-objdump -d - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call+jmp --x86-align-branch-prefix-size=4 %s | llvm-objdump -d - | FileCheck %s # CHECK: 00000000 foo: # CHECK-COUNT-5: : 64 a3 01 00 00 00 movl %eax, %fs:1 Index: llvm/test/MC/X86/align-branch-32-2a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-2a.s @@ -0,0 +1,25 @@ +## Check no prefix is inserted after hardcode. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=2 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 main: +# CHECK-NEXT: 0: 2e 55 pushl %ebp +# CHECK-NEXT: 2: 2e 89 e5 movl %esp, %ebp +# CHECK-NEXT: 5: 3e 55 pushl %ebp +# CHECK-COUNT-25: 55 pushl %ebp +# CHECK-NEXT: 20: e9 fc ff ff ff jmp {{.*}} +# CHECK: 00000025 infiniteLoop: +# CHECK-NEXT: 25: eb d9 jmp {{.*}} + + .text + .globl infiniteLoop +main: + .byte 0x2e + pushl %ebp + .byte 0x2e + movl %esp, %ebp + .rept 26 + pushl %ebp + .endr + jmp infiniteLoop +infiniteLoop: + jmp main Index: llvm/test/MC/X86/align-branch-32-3a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-3a.s @@ -0,0 +1,111 @@ +## Check approriate prefix is choosen to prefix an instruction. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=2 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 foo: +# CHECK-NEXT: 0: 65 65 a3 01 00 00 00 movl %eax, %gs:1 +# CHECK-NEXT: 7: 3e 55 pushl %ebp +# CHECK-NEXT: 9: 57 pushl %edi +# CHECK-COUNT-2: : 55 pushl %ebp +# CHECK: c: 89 e5 movl %esp, %ebp +# CHECK-NEXT: e: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-5: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK: 20: 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 22: 74 5e je {{.*}} +# CHECK-NEXT: 24: 3e 89 73 f4 movl %esi, %ds:-12(%ebx) +# CHECK-NEXT: 28: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 2b: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-5: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK-COUNT-3: : 5d popl %ebp +# CHECK: 40: 74 40 je {{.*}} +# CHECK-NEXT: 42: 5d popl %ebp +# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 45: 36 89 44 24 fc movl %eax, %ss:-4(%esp) +# CHECK-NEXT: 4a: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 4d: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-5: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK: 5f: 5d popl %ebp +# CHECK-NEXT: 60: eb 26 jmp {{.*}} +# CHECK-NEXT: 62: eb 24 jmp {{.*}} +# CHECK-NEXT: 64: eb 22 jmp {{.*}} +# CHECK-NEXT: 66: 89 45 fc movl %eax, -4(%ebp) +# CHECK-NEXT: 69: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 6c: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-3: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK-COUNT-2: : 5d popl %ebp +# CHECK-NEXT: 7a: 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 7c: 74 04 je {{.*}} +# CHECK-COUNT-2: : 90 nop +# CHECK-NEXT: 80: eb 06 jmp {{.*}} +# CHECK-NEXT: 82: 8b 45 f4 movl -12(%ebp), %eax +# CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%ebp) +# CHECK-COUNT-4: : 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK: a0: 89 75 0c movl %esi, 12(%ebp) +# CHECK-NEXT: a3: e9 fc ff ff ff jmp {{.*}} +# CHECK-COUNT-3: : 64 64 8e 15 01 00 00 00 movw %fs:1, %ss +# CHECK c0: 39 c5 cmpl %eax, %ebp +# CHECK-NEXT c2: 74 c4 je {{.*}} + .text + .globl foo + .p2align 4 +foo: + movl %eax, %gs:0x1 + pushl %ebp + pushl %edi + .rept 2 + pushl %ebp + .endr + movl %esp, %ebp + movl %edi, -8(%ebp) + .rept 5 + movl %esi, -12(%ebp) + .endr + cmp %eax, %ebp + je .L_2 + movl %esi, -12(%ebx) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + .rept 5 + movl %esi, -12(%ebp) + .endr + .rept 3 + popl %ebp + .endr + je .L_2 + popl %ebp + je .L_2 + movl %eax, -4(%esp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + .rept 5 + movl %esi, -12(%ebp) + .endr + popl %ebp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%ebp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + .rept 3 + movl %esi, -12(%ebp) + .endr + .rept 2 + popl %ebp + .endr + cmp %eax, %ebp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%ebp), %eax + movl %eax, -4(%ebp) +.L_3: + .rept 4 + movl %esi, -1200(%ebp) + .endr + movl %esi, 12(%ebp) + jmp bar + .rept 3 + mov %fs:0x1, %ss + .endr + cmp %eax, %ebp + je .L_3 Index: llvm/test/MC/X86/align-branch-32-4a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-4a.s @@ -0,0 +1,27 @@ +## Check prefix of instruction is limited by option --x86-align-branch-prefix-size=NUM. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=4 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 foo: +# CHECK-NEXT: 0: 3e 66 0f 3a 60 00 03 pcmpestrm $3, %ds:(%eax), %xmm0 +# CHECK-NEXT: 7: 3e c4 e3 79 60 00 03 vpcmpestrm $3, %ds:(%eax), %xmm0 +# CHECK-NEXT: e: 65 65 65 a3 01 00 00 00 movl %eax, %gs:1 +# CHECK-COUNT-3: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1f: 55 pushl %ebp +# CHECK-NEXT: 20: a8 04 testb $4, %al +# CHECK-NEXT: 22: 70 dc jo {{.*}} + + .text + .globl foo + .p2align 4 +foo: +.L1: + pcmpestrm $3, (%eax), %xmm0 + vpcmpestrm $3, (%eax), %xmm0 + movl %eax, %gs:0x1 + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + pushl %ebp + testb $0x4,%al + jo .L1 + Index: llvm/test/MC/X86/align-branch-64-1e.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1e.s @@ -0,0 +1,35 @@ +## Check only fused conditional jumps, conditional jumps and unconditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=4 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=4 %p/Inputs/align-branch-64-1.s | llvm-objdump -d - > %t1 +# RUN: FileCheck --input-file=%t1 %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 1b: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 1e: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 23: 74 5d je {{.*}} +# CHECK-NEXT: 25: 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 3e: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 40: 74 40 je {{.*}} +# CHECK-NEXT: 42: 5d popq %rbp +# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 45: 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 5e: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 60: eb 26 jmp {{.*}} +# CHECK-NEXT: 62: eb 24 jmp {{.*}} +# CHECK-NEXT: 64: eb 22 jmp {{.*}} +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 76: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 79: 5d popq %rbp +# CHECK-NEXT: 7a: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7d: 74 03 je {{.*}} +# CHECK-NEXT: 7f: 90 nop +# CHECK-NEXT: 80: eb 06 jmp {{.*}} +# CHECK-NEXT: 82: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%rbp) +# CHECK-COUNT-10: : 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK: c4: eb c2 jmp {{.*}} +# CHECK-NEXT: c6: c3 retq Index: llvm/test/MC/X86/align-branch-64-2d.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2d.s @@ -0,0 +1,20 @@ +## Check only indirect jumps and calls are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=indirect+call --x86-align-branch-prefix-size=4 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=indirect+call --x86-align-branch-prefix-size=4 %p/Inputs/align-branch-64-2.s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 89 75 f4 movl %esi, -12(%rbp) +# CHECK: 20: ff e0 jmpq *%rax +# CHECK-NEXT: 22: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 3c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3f: 55 pushq %rbp +# CHECK-NEXT: 40: ff d0 callq *%rax +# CHECK-NEXT: 42: 64 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 4d: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 57: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 5f: 55 pushq %rbp +# CHECK-NEXT: 60: e8 00 00 00 00 callq {{.*}} +# CHECK-COUNT-4: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 85: ff 14 25 00 00 00 00 callq *0 Index: llvm/test/MC/X86/align-branch-64-7a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-7a.s @@ -0,0 +1,29 @@ +## Check no prefixes is added to the instruction if there is a align directive between the instruction and the target branch +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 test1: +# CHECK-NEXT: 0: 31 d2 xorl %edx, %edx +# CHECK-NEXT: 2: 89 8c 24 84 00 00 00 movl %ecx, 132(%rsp) +# CHECK-NEXT: 9: 4c 89 c1 movq %r8, %rcx +# CHECK-NEXT: c: 4c 8b 8c 24 88 00 00 00 movq 136(%rsp), %r9 +# CHECK-COUNT-4: : 90 nop +# CHECK: 18: 66 66 90 nop +# CHECK-NEXT: 1b: 2e 2e 4c 89 c1 movq %r8, %rcx +# CHECK-NEXT: 20: eb de jmp {{.*}} +# CHECK-NEXT: 22: c3 retq + + .text + .globl test1 +test1: +.Ltmp0: + xorl %edx, %edx + movl %ecx, 132(%rsp) + movq %r8, %rcx + movq 136(%rsp), %r9 + .p2align 3, 0x90 + .byte 102 + .byte 102 + nop + movq %r8, %rcx + jmp .Ltmp0 + retq Index: llvm/test/MC/X86/align-branch-64-8a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-8a.s @@ -0,0 +1,24 @@ +## Check the case multiple CMPs are followed a jcc is correctly handled. +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 test1: +# CHECK-NEXT: 0: 2e 2e 2e 2e 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7: 2e 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: b: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: e: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 11: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 14: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 17: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 1a: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 1d: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 23: 74 db je {{.*}} + + .text + .globl test1 +test1: +.Ltmp0: + .rept 10 + cmp %rax, %rbp + .endr + je .Ltmp0