Index: llvm/include/llvm/MC/MCAsmBackend.h =================================================================== --- llvm/include/llvm/MC/MCAsmBackend.h +++ llvm/include/llvm/MC/MCAsmBackend.h @@ -46,6 +46,9 @@ const support::endianness Endian; + virtual void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) {} + virtual void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {} + /// lifetime management virtual void reset() {} Index: llvm/include/llvm/MC/MCAssembler.h =================================================================== --- llvm/include/llvm/MC/MCAssembler.h +++ llvm/include/llvm/MC/MCAssembler.h @@ -191,9 +191,8 @@ bool layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec); bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF); - bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF); - + bool relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF); bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF); bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout, MCDwarfCallFrameFragment &DF); Index: llvm/include/llvm/MC/MCFragment.h =================================================================== --- llvm/include/llvm/MC/MCFragment.h +++ llvm/include/llvm/MC/MCFragment.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ilist_node.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/SMLoc.h" #include @@ -41,6 +42,7 @@ FT_Dwarf, FT_DwarfFrame, FT_LEB, + FT_BoundaryAlign, FT_SymbolId, FT_CVInlineLines, FT_CVDefRange, @@ -563,6 +565,47 @@ } }; +class MCBoundaryAlignFragment : public MCFragment { +private: + /// The size of the MCBoundaryAlignFragment. + uint64_t Size = 0; + /// The alignment requirement of the branch to be aligned. + Align AlignBoundary; + /// Flag to indicate whether the branch is fused. + bool Fused : 1; + /// Flag to indicate whether NOPs should be emitted. + bool EmitNops : 1; + +public: + MCBoundaryAlignFragment(Align AlignBoundary, bool Fused = false, + bool EmitNops = false, MCSection *Sec = nullptr) + : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), + Fused(Fused), EmitNops(EmitNops) {} + + /// \name Accessors + /// @{ + + Align getAlignment() const { return AlignBoundary; } + + uint64_t getSize() const { return Size; } + + bool canEmitNops() const { return EmitNops; } + + bool isFused() const { return Fused; } + + void setFused(bool Value) { Fused = Value; } + + void setEmitNops(bool Value) { EmitNops = Value; } + + void setSize(uint64_t Value) { Size = Value; } + + /// @} + // + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_BoundaryAlign; + } +}; } // end namespace llvm #endif // LLVM_MC_MCFRAGMENT_H Index: llvm/include/llvm/MC/MCObjectStreamer.h =================================================================== --- llvm/include/llvm/MC/MCObjectStreamer.h +++ llvm/include/llvm/MC/MCObjectStreamer.h @@ -51,6 +51,7 @@ void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; MCSymbol *EmitCFILabel() override; + void EmitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI); void resolvePendingFixups(); protected: Index: llvm/lib/MC/MCAssembler.cpp =================================================================== --- llvm/lib/MC/MCAssembler.cpp +++ llvm/lib/MC/MCAssembler.cpp @@ -309,6 +309,9 @@ case MCFragment::FT_LEB: return cast(F).getContents().size(); + case MCFragment::FT_BoundaryAlign: + return cast(F).getSize(); + case MCFragment::FT_SymbolId: return 4; @@ -605,6 +608,13 @@ break; } + case MCFragment::FT_BoundaryAlign: { + if (!Asm.getBackend().writeNopData(OS, FragmentSize)) + report_fatal_error("unable to write nop sequence of " + + Twine(FragmentSize) + " bytes"); + break; + } + case MCFragment::FT_SymbolId: { const MCSymbolIdFragment &SF = cast(F); support::endian::write(OS, SF.getSymbol()->getIndex(), Endian); @@ -941,6 +951,66 @@ return OldSize != LF.getContents().size(); } +/// Check if the branch with given address and size crosses the boundary. +static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + uint64_t EndAddr = StartAddr + Size; + return (StartAddr >> Log2(BoundaryAlignment)) != + ((EndAddr - 1) >> Log2(BoundaryAlignment)); +} + +/// Check if the branch with given address and size is against the boundary. +static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + uint64_t EndAddr = StartAddr + Size; + return (EndAddr & (BoundaryAlignment.value() - 1)) == 0; +} + +/// Check if the branch with given address and size needs padding. +static bool needPadding(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) || + isAgainstBoundary(StartAddr, Size, BoundaryAlignment); +} + +bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, + MCBoundaryAlignFragment &BF) { + // The MCBoundaryAlignFragment that doesn't emit NOP should not relax. + if (!BF.canEmitNops()) + return false; + + auto AlignedOffset = Layout.getFragmentOffset(BF.getNextNode()); + uint64_t AlignedSize = 0; + auto IsFused = BF.isFused(); + if (IsFused) { + uint8_t i = 0; + for (auto F = BF.getNextNode(); !isa(F); + F = F->getNextNode()) { + AlignedSize += computeFragmentSize(Layout, *F); + ++i; + // Fused branch is at most in two fragments. + if (i == 2) + break; + } + } else { + AlignedSize = computeFragmentSize(Layout, *(BF.getNextNode())); + } + auto OldSize = BF.getSize(); + AlignedOffset -= OldSize; + uint64_t NewSize = 0; + auto BoundaryAlignment = BF.getAlignment(); + if (needPadding(AlignedOffset, AlignedSize, BoundaryAlignment)) { + NewSize = offsetToAlignment(AlignedOffset, BoundaryAlignment); + } + if (NewSize != OldSize) { + BF.setSize(NewSize); + Layout.invalidateFragmentsFrom(&BF); + return true; + } else { + return false; + } +} + bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF) { MCContext &Context = Layout.getAssembler().getContext(); @@ -1057,6 +1127,10 @@ case MCFragment::FT_LEB: RelaxedFrag = relaxLEB(Layout, *cast(I)); break; + case MCFragment::FT_BoundaryAlign: + RelaxedFrag = + relaxBoundaryAlign(Layout, *cast(I)); + break; case MCFragment::FT_CVInlineLines: RelaxedFrag = relaxCVInlineLineTable(Layout, *cast(I)); Index: llvm/lib/MC/MCFragment.cpp =================================================================== --- llvm/lib/MC/MCFragment.cpp +++ llvm/lib/MC/MCFragment.cpp @@ -275,6 +275,9 @@ case FT_LEB: delete cast(this); return; + case FT_BoundaryAlign: + delete cast(this); + return; case FT_SymbolId: delete cast(this); return; @@ -319,6 +322,7 @@ case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; + case MCFragment::FT_BoundaryAlign: OS<<"MCBoundaryAlignFragment"; break; case MCFragment::FT_SymbolId: OS << "MCSymbolIdFragment"; break; case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break; case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break; @@ -418,6 +422,19 @@ OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned(); break; } + case MCFragment::FT_BoundaryAlign: { + const auto *BF = cast(this); + if (BF->canEmitNops()) + OS << " (can emit nops to align"; + if (BF->isFused()) + OS << " fused branch)"; + else + OS << " unfused branch)"; + OS << "\n "; + OS << " BoundarySize:" << BF->getBoundarySize() + << " Size:" << BF->getSize(); + break; + } case MCFragment::FT_SymbolId: { const MCSymbolIdFragment *F = cast(this); OS << "\n "; Index: llvm/lib/MC/MCObjectStreamer.cpp =================================================================== --- llvm/lib/MC/MCObjectStreamer.cpp +++ llvm/lib/MC/MCObjectStreamer.cpp @@ -319,6 +319,13 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { + getAssembler().getBackend().alignBranchesBegin(*this, Inst); + EmitInstructionImpl(Inst, STI); + getAssembler().getBackend().alignBranchesEnd(*this, Inst); +} + +void MCObjectStreamer::EmitInstructionImpl(const MCInst &Inst, + const MCSubtargetInfo &STI) { MCStreamer::EmitInstruction(Inst, STI); MCSection *Sec = getCurrentSectionOnly(); Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -19,14 +19,19 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" + using namespace llvm; static unsigned getFixupKindSize(unsigned Kind) { @@ -64,6 +69,73 @@ } namespace { +class X86AlignBranchKind { +private: + uint8_t AlignBranchKind = 0; + +public: + enum Flag : uint8_t { + AlignBranchNone = 0, + AlignBranchFused = 1U << 0, + AlignBranchJcc = 1U << 1, + AlignBranchJmp = 1U << 2, + AlignBranchCall = 1U << 3, + AlignBranchRet = 1U << 4, + AlignBranchIndirect = 1U << 5 + }; + + void operator=(const std::string &Val) { + if (Val.empty()) + return; + SmallVector BranchTypes; + StringRef(Val).split(BranchTypes, '+', -1, false); + for (auto BranchType : BranchTypes) { + if (BranchType == "fused") + addKind(AlignBranchFused); + else if (BranchType == "jcc") + addKind(AlignBranchJcc); + else if (BranchType == "jmp") + addKind(AlignBranchJmp); + else if (BranchType == "call") + addKind(AlignBranchCall); + else if (BranchType == "ret") + addKind(AlignBranchRet); + else if (BranchType == "indirect") + addKind(AlignBranchIndirect); + else { + report_fatal_error( + "'-x86-align-branch 'The branches's type is combination of jcc, " + "fused, jmp, call, ret, indirect.(plus separated)", + false); + } + } + } + + operator uint8_t() const { return AlignBranchKind; } + void addKind(Flag Value) { AlignBranchKind |= Value; } +}; + +X86AlignBranchKind X86AlignBranchKindLoc; + +cl::opt X86AlignBranchBoundary( + "x86-align-branch-boundary", cl::init(0), + cl::desc( + "Control how the assembler should align branches with NOP. If the " + "boundary's size is not 0, it should be a power of 2 and no less " + "than 32. Branches will be aligned within the boundary of specified " + "size. -x86-align-branch-boundary=0 doesn't align branches.")); + +cl::opt> X86AlignBranch( + "x86-align-branch", + cl::desc("Specify types of branches to align (plus separated list of " + "types). The branches's type is combination of jcc, fused, " + "jmp, call, ret, indirect."), + cl::value_desc( + "jcc, which aligns conditional jumps; fused, which aligns fused " + "conditional jumps; jmp, which aligns unconditional jumps; call, " + "which aligns calls; ret, which aligns rets; indirect, which " + "aligns indirect jumps."), + cl::location(X86AlignBranchKindLoc)); class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: @@ -74,9 +146,29 @@ class X86AsmBackend : public MCAsmBackend { const MCSubtargetInfo &STI; + const MCInstrInfo &MCII; + X86AlignBranchKind AlignBranchType; + Align AlignBoundary; + + bool isFirstMacroFusibleInst(const MCInst &Inst) const; + bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; + bool isRIPRelative(const MCInst &MI) const; + bool hasVariantSymbol(const MCInst &MI) const; + + bool needAlign(MCObjectStreamer &OS) const; + bool needAlignInst(const MCInst &Inst) const; + MCInst PrevInst; + public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) - : MCAsmBackend(support::little), STI(STI) {} + : MCAsmBackend(support::little), STI(STI), + MCII(*(T.createMCInstrInfo())) { + AlignBoundary = assumeAligned(X86AlignBranchBoundary); + AlignBranchType = X86AlignBranchKindLoc; + } + + void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override; + void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override; unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -258,6 +350,182 @@ return getRelaxedOpcodeBranch(Inst, is16BitMode); } +static X86::CondCode getCondFromBranch(const MCInst &MI, + const MCInstrInfo &MCII) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: + return X86::COND_INVALID; + case X86::JCC_1: { + const MCInstrDesc &Desc = MCII.get(Opcode); + return static_cast( + MI.getOperand(Desc.getNumOperands() - 1).getImm()); + } + } +} + +static X86::SecondMacroFusionInstKind +classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { + X86::CondCode CC = getCondFromBranch(MI, MCII); + return classifySecondCondCodeInMacroFusion(CC); +} + +/// Check if the instruction is valid as the first instruction in macro fusion. +bool X86AsmBackend::isFirstMacroFusibleInst(const MCInst &Inst) const { + // An Intel instruction with RIP relative addressing is not macro fusible. + if (isRIPRelative(Inst)) + return false; + X86::FirstMacroFusionInstKind FIK = + X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); + return FIK != X86::FirstMacroFusionInstKind::Invalid; +} + +/// Check if the two instructions are macro-fused. +bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { + const MCInstrDesc &InstDesc = MCII.get(Jcc.getOpcode()); + if (!InstDesc.isConditionalBranch()) + return false; + if (!isFirstMacroFusibleInst(Cmp)) + return false; + const X86::FirstMacroFusionInstKind CmpKind = + X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); + const X86::SecondMacroFusionInstKind BranchKind = + classifySecondInstInMacroFusion(Jcc, MCII); + return X86::isMacroFused(CmpKind, BranchKind); +} + +/// Check if the instruction is RIP relative addressing. +bool X86AsmBackend::isRIPRelative(const MCInst &MI) const { + unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + unsigned CurOp = X86II::getOperandBias(Desc); + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand >= 0) { + unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; + unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::RIP) + return true; + } + return false; +} + +/// Check if the instruction has variant symbol operand. +bool X86AsmBackend::hasVariantSymbol(const MCInst &MI) const { + + for (auto &Operand : MI) { + if (Operand.isExpr()) { + const MCExpr &Expr = *Operand.getExpr(); + if (Expr.getKind() == MCExpr::SymbolRef && + cast(*Operand.getExpr()).getKind() != + MCSymbolRefExpr::VK_None) + return true; + } + } + return false; +} + +bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { + if (AlignBoundary == Align::None() || + AlignBranchType == X86AlignBranchKind::AlignBranchNone) + return false; + + MCAssembler &Assembler = OS.getAssembler(); + MCSection *Sec = OS.getCurrentSectionOnly(); + // To be Done: Currently don't deal with Bundle cases. + if (Assembler.isBundlingEnabled() && Sec->isBundleLocked()) + return false; + + // Branches only need to be aligned in 32-bit or 64-bit mode. + if (!(STI.getFeatureBits()[X86::Mode64Bit] || + STI.getFeatureBits()[X86::Mode32Bit])) + return false; + + return true; +} + +/// Check if the instruction operand needs to be aligned. Padding is disabled +/// before intruction which may be rewritten by linker(e.g. TLSCALL). +bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { + // Linker may rewrite the instruction with variant symbol operand. + if(hasVariantSymbol(Inst)) return false; + + const MCInstrDesc &InstDesc = MCII.get(Inst.getOpcode()); + return (InstDesc.isConditionalBranch() && + (AlignBranchType & X86AlignBranchKind::AlignBranchJcc)) || + (InstDesc.isUnconditionalBranch() && + (AlignBranchType & X86AlignBranchKind::AlignBranchJmp)) || + (InstDesc.isCall() && + (AlignBranchType & X86AlignBranchKind::AlignBranchCall)) || + (InstDesc.isReturn() && + (AlignBranchType & X86AlignBranchKind::AlignBranchRet)) || + (InstDesc.isIndirectBranch() && + (AlignBranchType & X86AlignBranchKind::AlignBranchIndirect)); +} + +/// Insert MCBoundaryAlignFragment before instructions to align branches. +void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, + const MCInst &Inst) { + if (!needAlign(OS)) + return; + + MCFragment *CF = OS.getCurrentFragment(); + bool NeedAlignFused = AlignBranchType & X86AlignBranchKind::AlignBranchFused; + if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + // Macro fusion actually happens and there is no other fragment inserted + // after the previous instruction. NOP can be emitted in PF to align fused + // jcc. + if (auto PF = + dyn_cast_or_null(CF->getPrevNode())) { + const_cast(PF)->setEmitNops(true); + const_cast(PF)->setFused(true); + } else { + // When there is at least one fragment, such as MCAlignFragment, inserted + // after the previous instruction, although current branch is fused with + // the previous instruction, we only emit NOP before the branch to avoid + // falling into an infinite loop. + OS.insert(new MCBoundaryAlignFragment(AlignBoundary, false, true)); + } + } else if (needAlignInst(Inst)) { + // Reuse the current fragment to align the unfused branch. + if (auto F = dyn_cast_or_null(CF)) { + assert(!F->canEmitNops() && + "Reused fragment should not have been used to emit NOP."); + assert( + !F->isFused() && + "Reused fragment should not have been used to align fused branch."); + F->setEmitNops(true); + } else { + // Create a new fragment to emit NOP to align the unfused branch. + OS.insert(new MCBoundaryAlignFragment(AlignBoundary, false, true)); + } + } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst)) { + // We don't know if macro fusion happens until the reaching the next + // instruction, so a place holder is put here. + OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + } else { + // Do nothing + } + + PrevInst = Inst; +} + +/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned +/// if necessary. +void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { + if (!needAlign(OS)) + return; + // If the branch emitted into a MCRelaxableFragment, we can determine the size + // of the branch later easily, otherwise we need to make a mark. + if (needAlignInst(Inst) && !isa(OS.getCurrentFragment())) + OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + + // Update the maximum alignment on the current section if necessary. + MCSection *Sec = OS.getCurrentSectionOnly(); + if (AlignBoundary.value() > Sec->getAlignment()) + Sec->setAlignment(AlignBoundary); +} + Optional X86AsmBackend::getFixupKind(StringRef Name) const { if (STI.getTargetTriple().isOSBinFormatELF()) { if (STI.getTargetTriple().getArch() == Triple::x86_64) { Index: llvm/test/MC/X86/align-branch-32-1a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-1a.s @@ -0,0 +1,57 @@ +# Check NOP padding is disabled before tls_get_addr calls +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 foo: +# CHECK-NEXT: 0: 64 a3 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 6: 55 pushl %ebp +# CHECK-NEXT: 7: 55 pushl %ebp +# CHECK-NEXT: 8: 55 pushl %ebp +# CHECK-NEXT: 9: 55 pushl %ebp +# CHECK-NEXT: a: 89 e5 movl %esp, %ebp +# CHECK-NEXT: c: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1e: e8 fc ff ff ff calll {{.*}} +# CHECK-NEXT: 23: 55 pushl %ebp +# CHECK-NEXT: 24: 55 pushl %ebp +# CHECK-NEXT: 25: 64 a3 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2b: 89 e5 movl %esp, %ebp +# CHECK-NEXT: 2d: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 30: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 33: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 36: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 39: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 3c: ff 91 00 00 00 00 calll *(%ecx) +# CHECK-NEXT: 42: 89 75 f4 movl %esi, -12(%ebp) + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushl %ebp + pushl %ebp + pushl %ebp + pushl %ebp + movl %esp, %ebp + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + call ___tls_get_addr@PLT + pushl %ebp + pushl %ebp + movl %eax, %fs:0x1 + movl %esp, %ebp + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + call *___tls_get_addr@GOT(%ecx) + movl %esi, -12(%ebp) Index: llvm/test/MC/X86/align-branch-64-1a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1a.s @@ -0,0 +1,147 @@ +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp %s | llvm-objdump -d - > %t1 +# RUN: FileCheck --input-file=%t1 %s + +# Check no branches is aligned with option --x86-align-branch-boundary=0 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=0 --x86-align-branch=fused+jcc+jmp %s | llvm-objdump -d - > %t2 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t3 +# RUN: cmp %t2 %t3 + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-COUNT-3: 90 nop +# CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 23: 74 5d je {{.*}} +# CHECK-NEXT: 25: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 28: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2b: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3d: 5d popq %rbp +# CHECK-NEXT: 3e: 5d popq %rbp +# CHECK-NEXT: 3f: 90 nop +# CHECK-NEXT: 40: 74 40 je {{.*}} +# CHECK-NEXT: 42: 5d popq %rbp +# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 45: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 48: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4b: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5d: 5d popq %rbp +# CHECK-NEXT: 5e: 5d popq %rbp +# CHECK-NEXT: 5f: 90 nop +# CHECK-NEXT: 60: eb 26 jmp {{.*}} +# CHECK-NEXT: 62: eb 24 jmp {{.*}} +# CHECK-NEXT: 64: eb 22 jmp {{.*}} +# CHECK-NEXT: 66: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 69: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6c: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 72: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 75: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 78: 5d popq %rbp +# CHECK-NEXT: 79: 5d popq %rbp +# CHECK-NEXT: 7a: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7d: 74 03 je {{.*}} +# CHECK-NEXT: 7f: 90 nop +# CHECK-NEXT: 80: eb 06 jmp {{.*}} +# CHECK-NEXT: 82: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 88: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8e: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 94: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9a: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a0: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a6: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: ac: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b2: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b8: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: be: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: c4: eb c2 jmp {{.*}} +# CHECK-NEXT: c6: 5d popq %rbp +# CHECK-NEXT: c7: c3 retq + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + cmp %rax, %rbp + je .L_2 + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + je .L_2 + popq %rbp + je .L_2 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + cmp %rax, %rbp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%rbp), %eax + movl %eax, -4(%rbp) +.L_3: + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + jmp .L_3 + popq %rbp + retq Index: llvm/test/MC/X86/align-branch-64-1b.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1b.s @@ -0,0 +1,72 @@ +# Check only fused conditional jumps and conditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc %S/align-branch-64-1a.s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 90 nop +# CHECK-NEXT: 1e: 90 nop +# CHECK-NEXT: 1f: 90 nop +# CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 23: 74 5b je {{.*}} +# CHECK-NEXT: 25: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 28: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2b: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3d: 5d popq %rbp +# CHECK-NEXT: 3e: 5d popq %rbp +# CHECK-NEXT: 3f: 90 nop +# CHECK-NEXT: 40: 74 3e je {{.*}} +# CHECK-NEXT: 42: 5d popq %rbp +# CHECK-NEXT: 43: 74 3b je {{.*}} +# CHECK-NEXT: 45: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 48: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4b: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5d: 5d popq %rbp +# CHECK-NEXT: 5e: 5d popq %rbp +# CHECK-NEXT: 5f: eb 25 jmp {{.*}} +# CHECK-NEXT: 61: eb 23 jmp {{.*}} +# CHECK-NEXT: 63: eb 21 jmp {{.*}} +# CHECK-NEXT: 65: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 68: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6b: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 71: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 74: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 77: 5d popq %rbp +# CHECK-NEXT: 78: 5d popq %rbp +# CHECK-NEXT: 79: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7c: 74 02 je {{.*}} +# CHECK-NEXT: 7e: eb 06 jmp {{.*}} +# CHECK-NEXT: 80: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 83: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 86: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8c: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 92: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 98: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9e: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a4: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: aa: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b0: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b6: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: bc: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: c2: eb c2 jmp {{.*}} +# CHECK-NEXT: c4: 5d popq %rbp +# CHECK-NEXT: c5: c3 retq Index: llvm/test/MC/X86/align-branch-64-1c.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1c.s @@ -0,0 +1,69 @@ +# Check only conditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=jcc +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=jcc %S/align-branch-64-1a.s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 20: 74 5b je {{.*}} +# CHECK-NEXT: 22: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 25: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 28: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 5d popq %rbp +# CHECK-NEXT: 3b: 5d popq %rbp +# CHECK-NEXT: 3c: 74 3f je {{.*}} +# CHECK-NEXT: 3e: 5d popq %rbp +# CHECK-NEXT: 3f: 90 nop +# CHECK-NEXT: 40: 74 3b je {{.*}} +# CHECK-NEXT: 42: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 45: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 48: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 5d popq %rbp +# CHECK-NEXT: 5b: 5d popq %rbp +# CHECK-NEXT: 5c: eb 25 jmp {{.*}} +# CHECK-NEXT: 5e: eb 23 jmp {{.*}} +# CHECK-NEXT: 60: eb 21 jmp {{.*}} +# CHECK-NEXT: 62: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 65: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 68: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 71: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 74: 5d popq %rbp +# CHECK-NEXT: 75: 5d popq %rbp +# CHECK-NEXT: 76: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 79: 74 02 je {{.*}} +# CHECK-NEXT: 7b: eb 06 jmp {{.*}} +# CHECK-NEXT: 7d: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 80: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 83: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 89: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8f: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 95: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9b: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a1: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a7: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: ad: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b3: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b9: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: bf: eb c2 jmp {{.*}} +# CHECK-NEXT: c1: 5d popq %rbp +# CHECK-NEXT: c2: c3 retq Index: llvm/test/MC/X86/align-branch-64-1d.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1d.s @@ -0,0 +1,77 @@ +# Check only conditional jumps and unconditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=jcc+jmp +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=jcc+jmp %S/align-branch-64-1a.s | llvm-objdump -d - > %t1 +# RUN: FileCheck --input-file=%t1 %s --check-prefixes=CHECK,SHORT-NOP + +# Check long NOP can be emitted to align branch if the target cpu support long nop. +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 -mcpu=x86-64 --x86-align-branch=jcc+jmp %S/align-branch-64-1a.s | llvm-objdump -d - >%t2 +# RUN: FileCheck --input-file=%t2 %s --check-prefixes=CHECK,LONG-NOP + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 20: 74 5d je {{.*}} +# CHECK-NEXT: 22: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 25: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 28: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 5d popq %rbp +# CHECK-NEXT: 3b: 5d popq %rbp +# CHECK-NEXT: 3c: 74 41 je {{.*}} +# CHECK-NEXT: 3e: 5d popq %rbp +# CHECK-NEXT: 3f: 90 nop +# CHECK-NEXT: 40: 74 3d je {{.*}} +# CHECK-NEXT: 42: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 45: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 48: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 5d popq %rbp +# CHECK-NEXT: 5b: 5d popq %rbp +# CHECK-NEXT: 5c: eb 27 jmp {{.*}} +# SHORT-NOP: 5e: 90 nop +# SHORT-NOP-NEXT: 5f: 90 nop +# LONG-NOP: 5e: 66 90 nop +# CHECK: 60: eb 23 jmp {{.*}} +# CHECK-NEXT: 62: eb 21 jmp {{.*}} +# CHECK-NEXT: 64: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 67: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6a: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 70: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 73: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 76: 5d popq %rbp +# CHECK-NEXT: 77: 5d popq %rbp +# CHECK-NEXT: 78: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7b: 74 02 je {{.*}} +# CHECK-NEXT: 7d: eb 06 jmp {{.*}} +# CHECK-NEXT: 7f: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 82: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 85: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8b: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 91: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 97: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9d: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a3: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a9: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: af: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b5: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: bb: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: c1: eb c2 jmp {{.*}} +# CHECK-NEXT: c3: 5d popq %rbp +# CHECK-NEXT: c4: c3 retq Index: llvm/test/MC/X86/align-branch-64-2a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2a.s @@ -0,0 +1,96 @@ +# Check indirect jumps and calls are not aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t1 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp %s | llvm-objdump -d - > %t2 +# RUN: cmp %t1 %t2 + +# Check only indirect jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=indirect +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=indirect %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1e: 90 nop +# CHECK-NEXT: 1f: 90 nop +# CHECK-NEXT: 20: ff e0 jmpq *%rax +# CHECK-NEXT: 22: 55 pushq %rbp +# CHECK-NEXT: 23: 55 pushq %rbp +# CHECK-NEXT: 24: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 2f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 32: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 35: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 38: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3e: ff d0 callq *%rax +# CHECK-NEXT: 40: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 43: 55 pushq %rbp +# CHECK-NEXT: 44: 55 pushq %rbp +# CHECK-NEXT: 45: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 4d: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 50: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 53: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 56: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 59: e8 a2 ff ff ff callq {{.*}} +# CHECK-NEXT: 5e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 61: 55 pushq %rbp +# CHECK-NEXT: 62: 55 pushq %rbp +# CHECK-NEXT: 63: 55 pushq %rbp +# CHECK-NEXT: 64: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 6c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 6f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 72: ff 14 25 00 00 00 00 callq *0 +# CHECK-NEXT: 79: 55 pushq %rbp + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + jmp *%rax + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call *%rax + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call foo + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + call *foo + pushq %rbp Index: llvm/test/MC/X86/align-branch-64-2b.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2b.s @@ -0,0 +1,44 @@ +# Check only calls are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=call +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call %S/align-branch-64-2a.s| llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1e: ff e0 jmpq *%rax +# CHECK-NEXT: 20: 55 pushq %rbp +# CHECK-NEXT: 21: 55 pushq %rbp +# CHECK-NEXT: 22: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2a: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 2d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 30: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 33: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 36: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 39: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3c: ff d0 callq *%rax +# CHECK-NEXT: 3e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 41: 55 pushq %rbp +# CHECK-NEXT: 42: 55 pushq %rbp +# CHECK-NEXT: 43: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 4b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: e8 a4 ff ff ff callq {{.*}} +# CHECK-NEXT: 5c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5f: 55 pushq %rbp +# CHECK-NEXT: 60: 55 pushq %rbp +# CHECK-NEXT: 61: 55 pushq %rbp +# CHECK-NEXT: 62: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 6a: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 6d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 70: ff 14 25 00 00 00 00 callq *0 +# CHECK-NEXT: 77: 55 pushq %rbp Index: llvm/test/MC/X86/align-branch-64-2c.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2c.s @@ -0,0 +1,48 @@ +# Check only indirect jumps and calls are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=indirect+call +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=indirect+call %S/align-branch-64-2a.s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-COUNT-2: : 90 nop +# CHECK: 20: ff e0 jmpq *%rax +# CHECK-NEXT: 22: 55 pushq %rbp +# CHECK-NEXT: 23: 55 pushq %rbp +# CHECK-NEXT: 24: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 2f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 32: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 35: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 38: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-COUNT-2: : 90 nop +# CHECK: 40: ff d0 callq *%rax +# CHECK-NEXT: 42: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 45: 55 pushq %rbp +# CHECK-NEXT: 46: 55 pushq %rbp +# CHECK-NEXT: 47: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 4f: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 52: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 55: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 58: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-COUNT-4: : 90 nop +# CHECK: 60: e8 9b ff ff ff callq {{.*}} +# CHECK-NEXT: 65: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 68: 55 pushq %rbp +# CHECK-NEXT: 69: 55 pushq %rbp +# CHECK-NEXT: 6a: 55 pushq %rbp +# CHECK-NEXT: 6b: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 73: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 76: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-COUNT-7: : 90 nop +# CHECK: 80: ff 14 25 00 00 00 00 callq *0 +# CHECK-NEXT: 87: 55 pushq %rbp Index: llvm/test/MC/X86/align-branch-64-3a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-3a.s @@ -0,0 +1,55 @@ +# Check NOP padding is disabled before tls_get_addr calls +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1e: e8 00 00 00 00 callq 0 +# CHECK-NEXT: 23: 55 pushq %rbp +# CHECK-NEXT: 24: 55 pushq %rbp +# CHECK-NEXT: 25: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2d: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 30: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 33: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 36: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 39: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3f: ff 15 00 00 00 00 callq *(%rip) +# CHECK-NEXT: 45: 89 75 f4 movl %esi, -12(%rbp) + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call __tls_get_addr@PLT + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call *__tls_get_addr@GOTPCREL(%rip) + movl %esi, -12(%rbp) Index: llvm/test/MC/X86/align-branch-64-4a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-4a.s @@ -0,0 +1,63 @@ +# Check rets are not aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp %s | llvm-objdump -d - >%t2 +# RUN: cmp %t %t2 + +# Check only rets are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=ret +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=ret %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 10: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 13: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 16: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 19: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1f: 90 nop +# CHECK-NEXT: 20: c3 retq +# CHECK-NEXT: 21: 55 pushq %rbp +# CHECK-NEXT: 22: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2a: 55 pushq %rbp +# CHECK-NEXT: 2b: 55 pushq %rbp +# CHECK-NEXT: 2c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 32: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 35: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 38: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3e: 90 nop +# CHECK-NEXT: 3f: 90 nop +# CHECK-NEXT: 40: c2 1e 00 retq $30 +# CHECK-NEXT: 43: 55 pushq %rbp + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + ret + pushq %rbp + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + ret $30 + pushq %rbp Index: llvm/test/MC/X86/align-branch-64-5a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-5a.s @@ -0,0 +1,63 @@ +# Check no nop or prefix is inserted if no branch cross or is against the boundary +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp+indirect+call+ret %s | llvm-objdump -d - > %t1 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t2 +# RUN: cmp %t1 %t2 +# RUN: FileCheck --input-file=%t1 %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 3: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 6: 89 d1 movl %edx, %ecx +# CHECK-NEXT: 8: 31 c0 xorl %eax, %eax +# CHECK-NEXT: a: 31 c8 xorl %ecx, %eax +# CHECK-NEXT: c: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: f: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 12: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 15: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 18: f6 c2 02 testb $2, %dl +# CHECK-NEXT: 1b: f3 ab rep stosl %eax, %es:(%rdi) +# CHECK-NEXT: 1d: 75 e4 jne {{.*}} +# CHECK-NEXT: 1f: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 21: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 24: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 27: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 2a: 89 d1 movl %edx, %ecx +# CHECK-NEXT: 2c: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 2e: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 31: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 34: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 37: f6 c2 02 testb $2, %dl +# CHECK-NEXT: 3a: e8 00 00 00 00 callq {{.*}} +# CHECK-NEXT: 3f: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 41: 75 e1 jne {{.*}} + + .text + .p2align 4,,15 +foo: + shrl $2, %ecx +.L1: + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + xorl %ecx, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + testb $2, %dl + rep stosl + jne .L1 + xorl %eax, %eax + shrl $2, %ecx +.L2: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + testb $2, %dl + call bar + xorl %eax, %eax + jne .L2