Index: llvm/include/llvm/MC/MCAsmBackend.h =================================================================== --- llvm/include/llvm/MC/MCAsmBackend.h +++ llvm/include/llvm/MC/MCAsmBackend.h @@ -51,6 +51,8 @@ /// emitting the instruction. virtual void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) {} virtual void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {} + /// Check if the target need to emit prefix to do the instruction alignment. + virtual bool needPrefixPadding() const { return false; } /// lifetime management virtual void reset() {} Index: llvm/include/llvm/MC/MCFragment.h =================================================================== --- llvm/include/llvm/MC/MCFragment.h +++ llvm/include/llvm/MC/MCFragment.h @@ -574,32 +574,46 @@ uint64_t Size = 0; /// The alignment requirement of the branch to be aligned. Align AlignBoundary; - /// Flag to indicate whether the branch is fused. Use in determining the - /// region of fragments being aligned. - bool Fused : 1; - /// Flag to indicate whether NOPs should be emitted. - bool EmitNops : 1; + /// Flag to indicate that (optimal) NOPs should be emitted instead + /// of using the provided value. + bool EmitNops = false; + /// Value to use for filling padding bytes if existing. + Optional Value; + /// The maximum number of bytes to emit; if the Flag EmitNops is true, + /// then this constraint is ignored. + uint64_t MaxBytesToEmit = 0; + /// The fragment to be aligned. + const MCFragment *Frag = nullptr; public: - MCBoundaryAlignFragment(Align AlignBoundary, bool Fused = false, - bool EmitNops = false, MCSection *Sec = nullptr) - : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), - Fused(Fused), EmitNops(EmitNops) {} + MCBoundaryAlignFragment(MCSection *Sec = nullptr) + : MCFragment(FT_BoundaryAlign, false, Sec) {} /// \name Accessors /// @{ + uint64_t getSize() const { return Size; } - void setSize(uint64_t Value) { Size = Value; } + void setSize(uint64_t V) { Size = V; } Align getAlignment() const { return AlignBoundary; } + void setAlignment(Align V) { AlignBoundary = V; } - bool isFused() const { return Fused; } - void setFused(bool Value) { Fused = Value; } + bool hasValue() const { return Value.hasValue(); } + uint8_t getValue() const { return Value.getValue(); } + void setValue(uint8_t V) { Value = V; } + + bool hasEmitNops() const { return EmitNops; } + void setEmitNops(bool V) { EmitNops = V; } + + bool hasEmit() const { return EmitNops || Value.hasValue(); } + + uint8_t getMaxBytesToEmit() const { return MaxBytesToEmit; } + void setMaxBytesToEmit(uint64_t V) { MaxBytesToEmit = V; } + + const MCFragment *getFragment() const { return Frag; } + void setFragment(const MCFragment *F) { Frag = F; } - bool canEmitNops() const { return EmitNops; } - void setEmitNops(bool Value) { EmitNops = Value; } /// @} - // static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; Index: llvm/include/llvm/MC/MCObjectStreamer.h =================================================================== --- llvm/include/llvm/MC/MCObjectStreamer.h +++ llvm/include/llvm/MC/MCObjectStreamer.h @@ -87,6 +87,11 @@ /// if the Subtarget differs from the current fragment. MCDataFragment *getOrCreateDataFragment(const MCSubtargetInfo* STI = nullptr); + /// Get a boundary-align fragment to write into, creating a new one if the + /// current fragment is not a boundary-align fragment or has been used to emit + /// something. + MCBoundaryAlignFragment *getOrCreateBoundaryAlignFragment(); + protected: bool changeSectionImpl(MCSection *Section, const MCExpr *Subsection); Index: llvm/lib/MC/MCAssembler.cpp =================================================================== --- llvm/lib/MC/MCAssembler.cpp +++ llvm/lib/MC/MCAssembler.cpp @@ -609,9 +609,15 @@ } case MCFragment::FT_BoundaryAlign: { - if (!Asm.getBackend().writeNopData(OS, FragmentSize)) - report_fatal_error("unable to write nop sequence of " + - Twine(FragmentSize) + " bytes"); + const MCBoundaryAlignFragment &BF = cast(F); + if (BF.hasEmitNops()) { + if (!Asm.getBackend().writeNopData(OS, FragmentSize)) + report_fatal_error("unable to write nop sequence of " + + Twine(FragmentSize) + " bytes"); + } else if (BF.hasValue()) { + for (uint64_t i = 0; i != FragmentSize; ++i) + OS << char(BF.getValue()); + } break; } @@ -990,26 +996,43 @@ bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF) { - // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed. - if (!BF.canEmitNops()) + // The MCBoundaryAlignFragment that does not emit anything or have a + // target branch should not be relaxed. + if (!BF.hasEmit() || !BF.getFragment()) return false; - uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode()); - uint64_t AlignedSize = 0; - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise it is - // emitted into two fragments at most, the next MCBoundaryAlignFragment(if - // exists) also marks the end of the branch. - for (auto i = 0, N = BF.isFused() ? 2 : 1; - i != N && !isa(F); ++i, F = F->getNextNode()) { - AlignedSize += computeFragmentSize(Layout, *F); + const MCFragment *TF = BF.getFragment(); + uint64_t AlignedSize = computeFragmentSize(Layout, *TF); + uint64_t AlignedOffset = Layout.getFragmentOffset(TF); + // Deal with the macro fusion condition. + const MCFragment *PTF = TF->getPrevNode(); + if (!isa(PTF)) { + uint64_t Size = computeFragmentSize(Layout, *PTF); + AlignedSize += Size; + AlignedOffset -= Size; } - uint64_t OldSize = BF.getSize(); - AlignedOffset -= OldSize; + + // Get the total size of the MCBoundaryAlignFragments from the BF + // to its target fragment. + uint64_t FixedValue = [&]() { + uint64_t N = 0; + for (const MCFragment *F = &BF; F != TF; F = F->getNextNode()) + if (auto *MBF = dyn_cast(F)) + N += MBF->getSize(); + return N; + }(); + + AlignedOffset -= FixedValue; Align BoundaryAlignment = BF.getAlignment(); uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) ? offsetToAlignment(AlignedOffset, BoundaryAlignment) : 0U; + if (!BF.hasEmitNops()) { + NewSize = std::min({NewSize, + 15 - computeFragmentSize(Layout, *(BF.getNextNode())), + static_cast(BF.getMaxBytesToEmit())}); + } + uint64_t OldSize = BF.getSize(); if (NewSize == OldSize) return false; BF.setSize(NewSize); Index: llvm/lib/MC/MCFragment.cpp =================================================================== --- llvm/lib/MC/MCFragment.cpp +++ llvm/lib/MC/MCFragment.cpp @@ -424,14 +424,13 @@ } case MCFragment::FT_BoundaryAlign: { const auto *BF = cast(this); - if (BF->canEmitNops()) - OS << " (can emit nops to align"; - if (BF->isFused()) - OS << " fused branch)"; - else - OS << " unfused branch)"; + if (BF->hasEmitNops()) + OS << " (emit nops)"; OS << "\n "; + if (BF->hasValue()) + OS << " Value:" << hexdigit(BF->getValue()); OS << " BoundarySize:" << BF->getAlignment().value() + << " MaxBytesToEmit:" << BF->getMaxBytesToEmit() << " Size:" << BF->getSize(); break; } Index: llvm/lib/MC/MCObjectStreamer.cpp =================================================================== --- llvm/lib/MC/MCObjectStreamer.cpp +++ llvm/lib/MC/MCObjectStreamer.cpp @@ -193,6 +193,20 @@ const MCSubtargetInfo *STI) { if (!F.hasInstructions()) return true; + + // When prefixes are emitted before non-branch instructions to align + // branch, we we don't want to add data to a fragment that already has + // instructions. Namely, each instruction will be emit into fragment of + // its own except when there is some hardcode before the instruction, e.g. + // + // \code + // .byte 0x2e + // cmp %rax %rcx + // \endcode + // + if (Assembler.getBackend().needPrefixPadding()) + return !F.hasInstructions(); + // When bundling is enabled, we don't want to add data to a fragment that // already has instructions (see MCELFStreamer::EmitInstToData for details) if (Assembler.isBundlingEnabled()) @@ -212,6 +226,15 @@ return F; } +MCBoundaryAlignFragment *MCObjectStreamer::getOrCreateBoundaryAlignFragment() { + auto *F = dyn_cast_or_null(getCurrentFragment()); + if (!F || F->hasEmit()) { + F = new MCBoundaryAlignFragment(); + insert(F); + } + return F; +} + void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) { Assembler->registerSymbol(Sym); } Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -13,6 +13,7 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -29,8 +30,8 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -119,11 +120,11 @@ cl::opt X86AlignBranchBoundary( "x86-align-branch-boundary", cl::init(0), - cl::desc( - "Control how the assembler should align branches with NOP. If the " - "boundary's size is not 0, it should be a power of 2 and no less " - "than 32. Branches will be aligned within the boundary of specified " - "size. -x86-align-branch-boundary=0 doesn't align branches.")); + cl::desc("Control how the assembler should align branches with NOP or " + "segment override prefix. If the boundary's size is not 0, it " + "should be a power of 2 and no less than 32. Branches will be " + "aligned within the boundary of specified size. " + "-x86-align-branch-boundary=0 doesn't align branches.")); cl::opt> X86AlignBranch( "x86-align-branch", @@ -135,6 +136,11 @@ "indirect(indirect jump)."), cl::location(X86AlignBranchKindLoc)); +cl::opt X86AlignBranchPrefixSize( + "x86-align-branch-prefix-size", cl::init(0), cl::Hidden, + cl::desc("Specify the maximum number of prefixes on an instruction to " + "align branches. The number should be between 0 and 4.")); + class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine, @@ -147,14 +153,17 @@ std::unique_ptr MCII; X86AlignBranchKind AlignBranchType; Align AlignBoundary; + uint8_t AlignMaxPrefixSize; bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; bool needAlign(MCObjectStreamer &OS) const; bool needAlignInst(const MCInst &Inst) const; - MCBoundaryAlignFragment * - getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const; + + bool shouldAddPrefix(const MCInst &Inst) const; + uint8_t choosePrefix(const MCInst &Inst) const; MCInst PrevInst; + const MCFragment *LastBranch = nullptr; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -162,10 +171,16 @@ MCII(T.createMCInstrInfo()) { AlignBoundary = assumeAligned(X86AlignBranchBoundary); AlignBranchType = X86AlignBranchKindLoc; + AlignMaxPrefixSize = std::min(X86AlignBranchPrefixSize, 4); } void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override; void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override; + bool needPrefixPadding() const override { + return AlignBoundary != Align::None() && + AlignBranchType != X86AlignBranchKind::AlignBranchNone && + AlignMaxPrefixSize != 0; + } unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -458,19 +473,63 @@ (AlignBranchType & X86AlignBranchKind::AlignBranchIndirect)); } -static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) { - // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it. - return !F.canEmitNops(); +/// Check if prefix can be added before instruction \p Inst. +bool X86AsmBackend::shouldAddPrefix(const MCInst &Inst) const { + // No prefix can be added if AlignMaxPrefixSize is 0. + if (AlignMaxPrefixSize == 0) + return false; + // The longer the instruction, the easier it is to cross boundary, prefixes + // should not be inserted before any branch affected by JCC Erratum even if it + // is asked to be aligned. + const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode()); + if (InstDesc.isBranch() || InstDesc.isCall() || InstDesc.isReturn()) + return false; + + // Linker may rewrite the instruction with variant symbol operand. + return !hasVariantSymbol(Inst); } -MCBoundaryAlignFragment * -X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const { - auto *F = dyn_cast_or_null(OS.getCurrentFragment()); - if (!F || !canReuseBoundaryAlignFragment(*F)) { - F = new MCBoundaryAlignFragment(AlignBoundary); - OS.insert(F); +/// Choose which prefix should be inserted before the instruction. The choice of +/// prefixes are: +/// a. Use the existing segment prefix if there is one. +/// b. Use CS segment prefix in 64-bit mode. +/// c. In 32-bit mode, use SS segment prefix with ESP/EBP base register and use +/// DS segment prefix without ESP/EBP base register. +uint8_t X86AsmBackend::choosePrefix(const MCInst &Inst) const { + for (const auto &Operand : Inst) { + if (Operand.isReg()) + switch (Operand.getReg()) { + default: + break; + case X86::CS: + return 0x2e; + case X86::SS: + return 0x36; + case X86::DS: + return 0x3e; + case X86::ES: + return 0x26; + case X86::FS: + return 0x64; + case X86::GS: + return 0x65; + } + } + if (STI.getFeatureBits()[X86::Mode64Bit]) + return 0x2e; + + unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MCII->get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand >= 0) { + unsigned CurOp = X86II::getOperandBias(Desc); + unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; + unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::ESP || BaseReg == X86::EBP) + return 0x36; } - return F; + return 0x3e; } /// Insert MCBoundaryAlignFragment before instructions to align branches. @@ -480,56 +539,124 @@ return; MCFragment *CF = OS.getCurrentFragment(); + // Prefix or NOP shouldn't be inserted after hardcode since there is no clear + // instruction boundary. + if (isa_and_nonnull(CF) && !CF->hasInstructions()) { + PrevInst = Inst; + return; + } + + auto GetRemainingPrefixSize = [&](const MCInst &Inst) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + OS.getAssembler().getEmitter().emitPrefix(Inst, VecOS, STI); + uint8_t ExistingPrefixSize = static_cast(Code.size()); + return AlignMaxPrefixSize - ExistingPrefixSize; + }; + bool NeedAlignFused = AlignBranchType & X86AlignBranchKind::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + if (NeedAlignFused && isFirstMacroFusibleInst(PrevInst, *MCII) && CF && + isa_and_nonnull(CF->getPrevNode())) { + auto *PF = const_cast( + cast(CF->getPrevNode())); // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. NOP can be emitted in PF to align fused // jcc. - if (auto *PF = - dyn_cast_or_null(CF->getPrevNode())) { - const_cast(PF)->setEmitNops(true); - const_cast(PF)->setFused(true); + if (isMacroFused(PrevInst, Inst)) { + PF->setAlignment(AlignBoundary); + PF->setEmitNops(true); + } else if (shouldAddPrefix(PrevInst)) { + // Macro fusion doesn't happen. Prefix can be emitted in PF to align + // branch. + PF->setAlignment(AlignBoundary); + PF->setMaxBytesToEmit(GetRemainingPrefixSize(PrevInst)); + PF->setValue(choosePrefix(PrevInst)); } - } else if (needAlignInst(Inst)) { - // Note: When there is at least one fragment, such as MCAlignFragment, - // inserted after the previous instruction, e.g. + + } else if (needAlignInst(Inst) && !isa_and_nonnull(CF)) { + // Note: When there is a MCAlignFragment inserted just before the branch to + // be emitted, e.g. // // \code // cmp %rax %rcx // .align 16 // je .Label0 - // \ endcode + // \endcode + // + // We will not emit NOP before the branch since the align directive is used + // to align the branch rather than NOP. // - // We will treat the JCC as a unfused branch although it may be fused - // with the CMP. - auto *F = getOrCreateBoundaryAlignFragment(OS); + // Emit NOP before unfused branch to be aligned. + auto *F = OS.getOrCreateBoundaryAlignFragment(); + F->setAlignment(AlignBoundary); F->setEmitNops(true); - F->setFused(false); } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) { - // We don't know if macro fusion happens until the reaching the next + // We don't know if macro fusion happens until reaching the next // instruction, so a place holder is put here if necessary. - getOrCreateBoundaryAlignFragment(OS); + OS.getOrCreateBoundaryAlignFragment(); + } else if (shouldAddPrefix(Inst)) { + // Emit prefixes before non-branch instruction to align branch. + auto *F = OS.getOrCreateBoundaryAlignFragment(); + F->setAlignment(AlignBoundary); + F->setMaxBytesToEmit(GetRemainingPrefixSize(Inst)); + F->setValue(choosePrefix(Inst)); } PrevInst = Inst; } -/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned -/// if necessary. +/// Set target branch for MCBoundaryAlignFragment and insert a +/// MCBoundaryAlignFragment to mark the end of the branch to be aligned if +/// necessary. void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; - // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the - // branch is fused, the fused branch(macro fusion pair) must be emitted into - // two fragments. Or when the branch is unfused, the branch must be emitted - // into one fragment. The MCRelaxableFragment naturally marks the end of the - // fused or unfused branch. + + if (!needAlignInst(Inst)) + return; + + const MCFragment *CF = OS.getCurrentFragment(); + for (auto *F = CF; F && F != LastBranch && + (F->hasInstructions() || isa(F)); + F = F->getPrevNode()) { + // If there is a fragment that neither has instructions nor is + // MCBoundaryAlignFragment on the path from the MCBoundaryAlignFragment to + // the branch, the MCBoundaryAlignFragment will have no target branch, so + // that the MCBoundaryAlignFragment won't be relaxed to avoid falling into + // an infinite loop. + if (auto *BF = dyn_cast(F)) { + if (BF->hasEmit()) + const_cast(BF)->setFragment(CF); + // There is at most one MCBoundaryAlignFragment to align one branch if + // we only emit NOP to align branch. + if (AlignMaxPrefixSize == 0) + break; + } + } + + LastBranch = CF; + + // We need the following instructions can not be emitted into the same + // fragment as the branch to be aligned, then we can determine the size of the + // branch in MCAssembler::relaxBoundaryAlign by traversing from the fragment + // where the branch is to the backward and nearest MCBoundaryAlignFragment. + // + // If the branch is emitted into a MCRelaxableFragment, then no more + // instructions can be pushed into since MCRelaxableFragment only holds one + // instruction. + // + // If we need to prefix padding to align branch, then each fragment at most + // holds one instruction and no hardcode can be appended. (see + // MCObjectStreamer::getOrCreateDataFragment and MCELFStreamer::EmitInstToData + // for details) + // // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of - // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align - // other branch. - if (needAlignInst(Inst) && !isa(OS.getCurrentFragment())) - OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + // the branch. This MCBoundaryAlignFragment may be reused to emit NOP or + // segment override prefix to align other branch. + + if (!isa(OS.getCurrentFragment()) && + AlignMaxPrefixSize == 0) + OS.insert(new MCBoundaryAlignFragment()); // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); Index: llvm/test/MC/X86/align-branch-32-1a.s =================================================================== --- llvm/test/MC/X86/align-branch-32-1a.s +++ llvm/test/MC/X86/align-branch-32-1a.s @@ -1,5 +1,6 @@ -# Check NOP padding is disabled before instruction that has variant symbol operand. -# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call %s | llvm-objdump -d - | FileCheck %s +# Check NOP/Prefix padding is disabled for instruction that has variant symbol operand. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call+jmp %s | llvm-objdump -d - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call+jmp --x86-align-branch-prefix-size=4 %s | llvm-objdump -d - | FileCheck %s # CHECK: 00000000 foo: # CHECK-COUNT-5: : 64 a3 01 00 00 00 movl %eax, %fs:1 Index: llvm/test/MC/X86/align-branch-32-2a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-2a.s @@ -0,0 +1,23 @@ +# Check no prefix is inserted after hardcode. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=2 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 main: +# CHECK-NEXT: 0: 2e 55 pushl %ebp +# CHECK-NEXT: 2: 3e 3e 89 e5 movl %esp, %ebp +# CHECK-COUNT-26: 55 pushl %ebp +# CHECK-NEXT: 20: eb 00 jmp 0 +# CHECK: 00000022 infiniteLoop: +# CHECK-NEXT: 22: eb dc jmp -36
+ + .text + .globl infiniteLoop +main: + .byte 0x2e + pushl %ebp + movl %esp, %ebp + .rept 26 + pushl %ebp + .endr + jmp infiniteLoop +infiniteLoop: + jmp main Index: llvm/test/MC/X86/align-branch-32-3a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-3a.s @@ -0,0 +1,114 @@ +# Check approriate prefix is choosen to prefix an instruction. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=2 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 foo: +# CHECK-NEXT: 0: 65 65 a3 01 00 00 00 movl %eax, %gs:1 +# CHECK-NEXT: 7: 3e 55 pushl %ebp +# CHECK-NEXT: 9: 57 pushl %edi +# CHECK-COUNT-2: : 55 pushl %ebp +# CHECK: c: 89 e5 movl %esp, %ebp +# CHECK-NEXT: e: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-5: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK: 20: 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 22: 74 5e je {{.*}} +# CHECK-NEXT: 24: 3e 89 73 f4 movl %esi, %ds:-12(%ebx) +# CHECK-NEXT: 28: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 2b: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-5: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK-COUNT-3: : 5d popl %ebp +# CHECK: 40: 74 40 je {{.*}} +# CHECK-NEXT: 42: 5d popl %ebp +# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 45: 36 89 44 24 fc movl %eax, %ss:-4(%esp) +# CHECK-NEXT: 4a: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 4d: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-5: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK: 5f: 5d popl %ebp +# CHECK-NEXT: 60: eb 26 jmp {{.*}} +# CHECK-NEXT: 62: eb 24 jmp {{.*}} +# CHECK-NEXT: 64: eb 22 jmp {{.*}} +# CHECK-NEXT: 66: 89 45 fc movl %eax, -4(%ebp) +# CHECK-NEXT: 69: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 6c: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-COUNT-3: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK-COUNT-2: : 5d popl %ebp +# CHECK-NEXT: 7a: 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 7c: 74 04 je {{.*}} +# CHECK-COUNT-2: : 90 nop +# CHECK-NEXT: 80: eb 06 jmp {{.*}} +# CHECK-NEXT: 82: 8b 45 f4 movl -12(%ebp), %eax +# CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%ebp) +# CHECK-COUNT-4: : 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK: a0: 89 75 0c movl %esi, 12(%ebp) +# CHECK-NEXT: a3: e9 fc ff ff ff jmp {{.*}} +# CHECK-COUNT-4: : 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK: c0: 89 75 00 movl %esi, (%ebp) +# CHECK-NEXT: c3: 74 c3 je {{.*}} +# CHECK-NEXT: c5: 74 c1 je {{.*}} + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %gs:0x1 + pushl %ebp + pushl %edi + .rept 2 + pushl %ebp + .endr + movl %esp, %ebp + movl %edi, -8(%ebp) + .rept 5 + movl %esi, -12(%ebp) + .endr + cmp %eax, %ebp + je .L_2 + movl %esi, -12(%ebx) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + .rept 5 + movl %esi, -12(%ebp) + .endr + .rept 3 + popl %ebp + .endr + je .L_2 + popl %ebp + je .L_2 + movl %eax, -4(%esp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + .rept 5 + movl %esi, -12(%ebp) + .endr + popl %ebp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%ebp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + .rept 3 + movl %esi, -12(%ebp) + .endr + .rept 2 + popl %ebp + .endr + cmp %eax, %ebp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%ebp), %eax + movl %eax, -4(%ebp) +.L_3: + .rept 4 + movl %esi, -1200(%ebp) + .endr + movl %esi, 12(%ebp) + jmp bar + .rept 4 + movl %esi, -1200(%ebp) + .endr + movl %esi, (%ebp) + je .L_3 + je .L_3 Index: llvm/test/MC/X86/align-branch-32-4a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-4a.s @@ -0,0 +1,27 @@ +# Check prefix of instruction is limited by option --x86-align-branch-prefix-size=NUM. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=4 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 foo: +# CHECK-NEXT: 0: 3e 66 0f 3a 60 00 03 pcmpestrm $3, %ds:(%eax), %xmm0 +# CHECK-NEXT: 7: 3e c4 e3 79 60 00 03 vpcmpestrm $3, %ds:(%eax), %xmm0 +# CHECK-NEXT: e: 65 65 65 a3 01 00 00 00 movl %eax, %gs:1 +# CHECK-COUNT-3: : 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1f: 55 pushl %ebp +# CHECK-NEXT: 20: a8 04 testb $4, %al +# CHECK-NEXT: 22: 70 dc jo {{.*}} + + .text + .globl foo + .p2align 4 +foo: +.L1: + pcmpestrm $3, (%eax), %xmm0 + vpcmpestrm $3, (%eax), %xmm0 + movl %eax, %gs:0x1 + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + pushl %ebp + testb $0x4,%al + jo .L1 + Index: llvm/test/MC/X86/align-branch-64-1e.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1e.s @@ -0,0 +1,35 @@ +# Check only fused conditional jumps, conditional jumps and unconditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=4 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=4 %p/Inputs/align-branch-64-1.s | llvm-objdump -d - > %t1 +# RUN: FileCheck --input-file=%t1 %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 1b: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 1e: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 23: 74 5d je {{.*}} +# CHECK-NEXT: 25: 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 3e: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 40: 74 40 je {{.*}} +# CHECK-NEXT: 42: 5d popq %rbp +# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 45: 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 5e: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 60: eb 26 jmp {{.*}} +# CHECK-NEXT: 62: eb 24 jmp {{.*}} +# CHECK-NEXT: 64: eb 22 jmp {{.*}} +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 76: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 79: 5d popq %rbp +# CHECK-NEXT: 7a: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7d: 74 03 je {{.*}} +# CHECK-NEXT: 7f: 90 nop +# CHECK-NEXT: 80: eb 06 jmp {{.*}} +# CHECK-NEXT: 82: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%rbp) +# CHECK-COUNT-10: : 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK: c4: eb c2 jmp {{.*}} +# CHECK-NEXT: c6: c3 retq Index: llvm/test/MC/X86/align-branch-64-2d.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2d.s @@ -0,0 +1,20 @@ +# Check only indirect jumps and calls are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=indirect+call --x86-align-branch-prefix-size=4 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=indirect+call --x86-align-branch-prefix-size=4 %p/Inputs/align-branch-64-2.s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 89 75 f4 movl %esi, -12(%rbp) +# CHECK: 20: ff e0 jmpq *%rax +# CHECK-NEXT: 22: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 3c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3f: 55 pushq %rbp +# CHECK-NEXT: 40: ff d0 callq *%rax +# CHECK-NEXT: 42: 64 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 4d: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 57: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 5f: 55 pushq %rbp +# CHECK-NEXT: 60: e8 9b ff ff ff callq -101 +# CHECK-COUNT-4: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK: 85: ff 14 25 00 00 00 00 callq *0 Index: llvm/test/MC/X86/align-branch-64-7a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-7a.s @@ -0,0 +1,29 @@ +# Check no prefixes is added to the instruction if there is a align directive between the instruction and the target branch +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 test1: +# CHECK-NEXT: 0: 31 d2 xorl %edx, %edx +# CHECK-NEXT: 2: 89 8c 24 84 00 00 00 movl %ecx, 132(%rsp) +# CHECK-NEXT: 9: 4c 89 c1 movq %r8, %rcx +# CHECK-NEXT: c: 4c 8b 8c 24 88 00 00 00 movq 136(%rsp), %r9 +# CHECK-COUNT-4: : 90 nop +# CHECK: 18: 66 66 90 nop +# CHECK-NEXT: 1b: 2e 2e 4c 89 c1 movq %r8, %rcx +# CHECK-NEXT: 20: eb de jmp {{.*}} +# CHECK-NEXT: 22: c3 retq + + .text + .globl test1 +test1: +.Ltmp0: + xorl %edx, %edx + movl %ecx, 132(%rsp) + movq %r8, %rcx + movq 136(%rsp), %r9 + .p2align 3, 0x90 + .byte 102 + .byte 102 + nop + movq %r8, %rcx + jmp .Ltmp0 + retq