Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2181,6 +2181,49 @@ def mno_inline_all_stringops : Flag<["-"], "mno-inline-all-stringops">, Group; def malign_double : Flag<["-"], "malign-double">, Group, Flags<[CC1Option]>, HelpText<"Align doubles to two words in structs (x86 only)">; +def malign_branch_boundary_EQ + : Joined<["-"], "malign-branch-boundary=">, + Group, + Flags<[DriverOption, HelpHidden]>, + HelpText< + "Control how the assembler should align branches with segment " + "prefixes or NOP. The boundary's size must be a power of 2. It " + "should be 0 or no less than 32. Branches will be aligned within " + "the boundary of specifies size. -malign-branch-boundary=0 " + "doesn't align branches.">; +def malign_branch_EQ + : Joined<["-"], "malign-branch=">, + Group, + Flags<[DriverOption, HelpHidden]>, + HelpText< + "Specify types of branches to align (plus separated list of " + "types). The branches's types is combination of jcc, fused, " + "jmp, call, ret, indirect." + " jcc, which aligns conditional jumps; fused, which aligns fused " + "conditional jumps; jmp, which aligns unconditional jumps; call, " + "which aligns calls; ret, which aligns rets; indirect, which " + "aligns indirect jumps.">; +def malign_branch_prefix_size_EQ + : Joined<["-"], "malign-branch-prefix-size=">, + Group, + Flags<[DriverOption, CC1Option, HelpHidden]>, + HelpText<"Specify the maximum number of prefixes on an instruction to " + "align branches. The number should be between 0 and 5.">; +def mbranches_within_32B_boundaries + : Flag<["-"], "mbranches-within-32B-boundaries">, + Group, + Flags<[DriverOption]>, + HelpText< + "Aligns conditional jumps, fused conditional jumps, and " + "unconditional " + "jumps within 32 byte boundary with up to 5 segment prefixes on an " + "instruction. It is equivalent to -malign-branch-boundary=32, " + "-malign-branch=fused+jcc+jmp, -malign-branch-prefix-size=5.">; +def mno_branches_within_32B_boundaries + : Flag<["-"], "mno-branches-within-32B-boundaries">, + Group, + Flags<[DriverOption]>, + HelpText<"Opposite to -mbranches-within-32B_boundaries.">; def mfloat_abi_EQ : Joined<["-"], "mfloat-abi=">, Group, Values<"soft,softfp,hard">; def mfpmath_EQ : Joined<["-"], "mfpmath=">, Group; def mfpu_EQ : Joined<["-"], "mfpu=">, Group; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compression.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TargetParser.h" @@ -2011,8 +2012,53 @@ CmdArgs.push_back("-mbackchain"); } +static void AlignBranchesOptions(const Driver &D, const ArgList &Args, + ArgStringList &CmdArgs) { + if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_boundary_EQ)) { + StringRef Value = A->getValue(); + unsigned Num; + if (!Value.getAsInteger(10, Num) && Num >= 32 && llvm::isPowerOf2_32(Num)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back( + Args.MakeArgString("-x86-align-branch-boundary=" + Value)); + } else { + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Value; + } + } + + if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_EQ)) { + StringRef Value = A->getValue(); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-x86-align-branch=" + Value)); + } + + if (const Arg *A = + Args.getLastArg(options::OPT_malign_branch_prefix_size_EQ)) { + StringRef Value = A->getValue(); + unsigned Num; + if (!Value.getAsInteger(10, Num) && Num <= 5) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back( + Args.MakeArgString("-x86-align-branch-prefix-size=" + Value)); + } else { + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Value; + } + } + + if (Args.hasFlag(options::OPT_mbranches_within_32B_boundaries, + options::OPT_mno_branches_within_32B_boundaries, false)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back( + Args.MakeArgString("-x86-branches-within-32B-boundaries")); + } +} + void Clang::AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { + AlignBranchesOptions(getToolChain().getDriver(), Args, CmdArgs); + if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)) @@ -6415,6 +6461,8 @@ void ClangAs::AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { + AlignBranchesOptions(getToolChain().getDriver(), Args, CmdArgs); + if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) { StringRef Value = A->getValue(); if (Value == "intel" || Value == "att") { Index: clang/test/Driver/intel-align-branch.c =================================================================== --- /dev/null +++ clang/test/Driver/intel-align-branch.c @@ -0,0 +1,34 @@ +// RUN: %clang -target x86_64-unknown-unknown -malign-branch-boundary=32 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-BOUNDARY +// CHECK-BOUNDARY: "-mllvm" "-x86-align-branch-boundary=32" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=jcc -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-JCC +// CHECK-JCC: "-mllvm" "-x86-align-branch=jcc" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=fused -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-FUSED +// CHECK-FUSED: "-mllvm" "-x86-align-branch=fused" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=jmp -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-JMP +// CHECK-JMP: "-mllvm" "-x86-align-branch=jmp" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=call -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-CALL +// CHECK-CALL: "-mllvm" "-x86-align-branch=call" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=ret -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-RET +// CHECK-RET: "-mllvm" "-x86-align-branch=ret" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=indirect -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-INDIRECT +// CHECK-INDIRECT: "-mllvm" "-x86-align-branch=indirect" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=fused+jcc+jmp+ret+call+indirect -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-BRANCH +// CHECK-BRANCH: "-mllvm" "-x86-align-branch=fused+jcc+jmp+ret+call+indirect" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch-prefix-size=5 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-PREFIX +// CHECK-PREFIX: "-mllvm" "-x86-align-branch-prefix-size=5" +// +// RUN: %clang -target x86_64-unknown-unknown -mbranches-within-32B-boundaries -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-TOTAL +// CHECK-TOTAL: "-mllvm" "-x86-branches-within-32B-boundaries" +// +// RUN: %clang -target x86_64-unknown-unknown -mno-branches-within-32B-boundaries -mbranches-within-32B-boundaries -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-TOTAL2 +// CHECK-TOTAL2: "-mllvm" "-x86-branches-within-32B-boundaries" +// RUN: %clang -target x86_64-unknown-unknown -mbranches-within-32B-boundaries -mno-branches-within-32B-boundaries -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-TOTAL3 +// CHECK-TOTAL3-NOT: "-mllvm" "-x86-branches-within-32B-boundaries" Index: clang/test/Driver/intel-align-branch.s =================================================================== --- /dev/null +++ clang/test/Driver/intel-align-branch.s @@ -0,0 +1,34 @@ +// RUN: %clang -target x86_64-unknown-unknown -malign-branch-boundary=32 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-BOUNDARY +// CHECK-BOUNDARY: "-mllvm" "-x86-align-branch-boundary=32" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=jcc -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-JCC +// CHECK-JCC: "-mllvm" "-x86-align-branch=jcc" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=fused -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-FUSED +// CHECK-FUSED: "-mllvm" "-x86-align-branch=fused" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=jmp -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-JMP +// CHECK-JMP: "-mllvm" "-x86-align-branch=jmp" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=call -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-CALL +// CHECK-CALL: "-mllvm" "-x86-align-branch=call" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=ret -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-RET +// CHECK-RET: "-mllvm" "-x86-align-branch=ret" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=indirect -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-INDIRECT +// CHECK-INDIRECT: "-mllvm" "-x86-align-branch=indirect" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch=fused+jcc+jmp+ret+call+indirect -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-BRANCH +// CHECK-BRANCH: "-mllvm" "-x86-align-branch=fused+jcc+jmp+ret+call+indirect" +// +// RUN: %clang -target x86_64-unknown-unknown -malign-branch-prefix-size=5 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-PREFIX +// CHECK-PREFIX: "-mllvm" "-x86-align-branch-prefix-size=5" +// +// RUN: %clang -target x86_64-unknown-unknown -mbranches-within-32B-boundaries -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-TOTAL +// CHECK-TOTAL: "-mllvm" "-x86-branches-within-32B-boundaries" +// +// RUN: %clang -target x86_64-unknown-unknown -mno-branches-within-32B-boundaries -mbranches-within-32B-boundaries -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-TOTAL2 +// CHECK-TOTAL2: "-mllvm" "-x86-branches-within-32B-boundaries" +// RUN: %clang -target x86_64-unknown-unknown -mbranches-within-32B-boundaries -mno-branches-within-32B-boundaries -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-TOTAL3 +// CHECK-TOTAL3-NOT: "-mllvm" "-x86-branches-within-32B-boundaries" Index: llvm/include/llvm/MC/MCAsmBackend.h =================================================================== --- llvm/include/llvm/MC/MCAsmBackend.h +++ llvm/include/llvm/MC/MCAsmBackend.h @@ -46,6 +46,10 @@ const support::endianness Endian; + virtual void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) {} + virtual void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {} + virtual void markHardCode(MCObjectStreamer &OS) {} + /// lifetime management virtual void reset() {} @@ -162,6 +166,14 @@ /// \return - True on success. virtual bool writeNopData(raw_ostream &OS, uint64_t Count) const = 0; + /// Write a segment prefix sequence of Count bytes to the given output. + /// + /// \return - True on success. + virtual bool writeSegmentPrefixData(raw_ostream &OS, uint64_t Count, + char Prefix) const { + return true; + } + /// Give backend an opportunity to finish layout after relaxation virtual void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const {} Index: llvm/include/llvm/MC/MCAssembler.h =================================================================== --- llvm/include/llvm/MC/MCAssembler.h +++ llvm/include/llvm/MC/MCAssembler.h @@ -191,9 +191,9 @@ bool layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec); bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF); - bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF); - + bool relaxMachineDependent(MCAsmLayout &Layout, + MCMachineDependentFragment &MF); bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF); bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout, MCDwarfCallFrameFragment &DF); Index: llvm/include/llvm/MC/MCFragment.h =================================================================== --- llvm/include/llvm/MC/MCFragment.h +++ llvm/include/llvm/MC/MCFragment.h @@ -41,6 +41,7 @@ FT_Dwarf, FT_DwarfFrame, FT_LEB, + FT_MachineDependent, FT_SymbolId, FT_CVInlineLines, FT_CVDefRange, @@ -563,6 +564,119 @@ } }; +class MCMachineDependentFragment : public MCFragment { +public: + enum SubType : uint8_t { + // The variable size fragment to insert NOP before branch. + BranchPadding, + // The variable size fragment to insert segment prefixes to an instruction. + BranchPrefix, + // The zero size fragment to separate the instruction which is fused with + // the following conditional jump from fused jcc. + BranchSplit, + // The variable size fragment to insert NOP before fused conditional jump. + FusedJccPadding, + // The zero size fragment to mark the begin of the sequence of hard code. + HardCodeBegin, + // The zero size fragment to mark the end of the sequence of hard code. + HardCodeEnd + }; + + /// The subtype of this fragment. + mutable SubType SubKind; + +private: + /// The size of the MCMachineDependentFragment. + unsigned Size = 0; + /// The value of the prefix to be emited if the subtype of this fragment is + /// BranchPrefix. + char Prefix = 0; + /// The fragment where the branch need to be aligned is. + const MCFragment *Branch = nullptr; + /// The maximum size of prefixes be added to the next instruction. + unsigned MaxPrefixSize = 0; + /// The size of the boundary, within which the branches need to be aligned. + unsigned AlignBoundarySize = 0; + +public: + MCMachineDependentFragment(SubType SubKind, unsigned MaxPrefixSize, + unsigned AlignBoundarySize, + MCSection *Sec = nullptr) + : MCFragment(FT_MachineDependent, false, Sec), SubKind(SubKind), + MaxPrefixSize(MaxPrefixSize), AlignBoundarySize(AlignBoundarySize) {} + + /// \name Accessors + /// @{ + + StringRef getSubTypeName() const { + switch (SubKind) { + case BranchPadding: + return "BranchPadding"; + case BranchPrefix: + return "BranchPrefix"; + case BranchSplit: + return "BranchSplit"; + case FusedJccPadding: + return "FusedJccPadding"; + case HardCodeBegin: + return "HardCodeBegin"; + case HardCodeEnd: + return "HardCodeEnd"; + } + llvm_unreachable("Unknown subtype of MCMachineDependentFragment"); + } + + + unsigned getMaxPrefixSize() const { + return MaxPrefixSize; + } + + unsigned getBoundarySize() const { + return AlignBoundarySize; + } + + uint64_t getSize() const { return Size; } + + char getPrefix() const { + assert(SubKind == BranchPrefix && + "Unsupported subtype of MCMachineDependentFragment"); + return Prefix; + } + + const MCFragment *getBranch() const { + assert(SubKind != BranchSplit && SubKind != HardCodeBegin && + SubKind != HardCodeEnd && + "Unsupported subtype of MCMachineDependentFragment"); + return Branch; + } + + /// @} + + void setSize(unsigned Value) { + assert(SubKind != BranchSplit && SubKind != HardCodeBegin && + SubKind != HardCodeEnd && + "Unsupported subtype of MCMachineDependentFragment"); + Size = Value; + } + + + void setPrefix(char Value) { + assert(SubKind == BranchPrefix && + "Unsupported subtype of MCMachineDependentFragment"); + Prefix = Value; + } + + void setBranch(const MCFragment *Fragment) { + assert(SubKind != BranchSplit && SubKind != HardCodeBegin && + SubKind != HardCodeEnd && + "Unsupported subtype of MCMachineDependentFragment"); + Branch = Fragment; + } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_MachineDependent; + } +}; } // end namespace llvm #endif // LLVM_MC_MCFRAGMENT_H Index: llvm/include/llvm/MC/MCObjectStreamer.h =================================================================== --- llvm/include/llvm/MC/MCObjectStreamer.h +++ llvm/include/llvm/MC/MCObjectStreamer.h @@ -51,6 +51,7 @@ void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; MCSymbol *EmitCFILabel() override; + void EmitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI); void resolvePendingFixups(); protected: Index: llvm/lib/MC/MCAssembler.cpp =================================================================== --- llvm/lib/MC/MCAssembler.cpp +++ llvm/lib/MC/MCAssembler.cpp @@ -309,6 +309,9 @@ case MCFragment::FT_LEB: return cast(F).getContents().size(); + case MCFragment::FT_MachineDependent: + return cast(F).getSize(); + case MCFragment::FT_SymbolId: return 4; @@ -605,6 +608,24 @@ break; } + case MCFragment::FT_MachineDependent: { + const MCMachineDependentFragment &MDF = cast(F); + if (FragmentSize == 0) + break; + if (MDF.SubKind == MCMachineDependentFragment::BranchPrefix) { + if (!Asm.getBackend().writeSegmentPrefixData(OS, FragmentSize, + MDF.getPrefix())) + report_fatal_error("unable to write segment prefix sequence of " + + Twine(FragmentSize) + " bytes"); + } else if (MDF.SubKind == MCMachineDependentFragment::BranchPadding || + MDF.SubKind == MCMachineDependentFragment::FusedJccPadding) { + if (!Asm.getBackend().writeNopData(OS, FragmentSize)) + report_fatal_error("unable to write nop sequence of " + + Twine(FragmentSize) + " bytes"); + } + break; + } + case MCFragment::FT_SymbolId: { const MCSymbolIdFragment &SF = cast(F); support::endian::write(OS, SF.getSymbol()->getIndex(), Endian); @@ -941,6 +962,97 @@ return OldSize != LF.getContents().size(); } +/// Get the total size of the MachineDependentFragments from the fragment +/// operand to the fragment where the target branch is. +static unsigned getFixedValue(const MCMachineDependentFragment *MF) { + unsigned FixValue = 0; + const MCFragment *CurrFragment = MF; + const MCFragment *const BranchFragment = MF->getBranch(); + while (CurrFragment && CurrFragment != BranchFragment) { + if (auto *MCF = + dyn_cast_or_null(CurrFragment)) { + FixValue += MCF->getSize(); + } + CurrFragment = CurrFragment->getNextNode(); + } + return FixValue; +} + +/// Check if the branch with given address and size crosses the boundary. +static bool mayCrossBoundary(unsigned StartAddr, unsigned Size, + unsigned BoundarySize) { + unsigned EndAddr = StartAddr + Size; + return StartAddr / BoundarySize != ((EndAddr - 1) / BoundarySize); +} + +/// Check if the branch with given address and size is against the boundary. +static bool isAgainstBoundary(unsigned StartAddr, unsigned Size, + unsigned BoundarySize) { + unsigned EndAddr = StartAddr + Size; + return EndAddr % BoundarySize == 0; +} + +/// Check if the branch with given address and size needs padding. +static bool needPadding(unsigned StartAddr, unsigned Size, + unsigned BoundarySize) { + return mayCrossBoundary(StartAddr, Size, BoundarySize) || + isAgainstBoundary(StartAddr, Size, BoundarySize); +} + +/// Get how many bytes need to be padded to align branch with given address if +/// the branch cross or is against the boundary. +static unsigned getPaddingSize(unsigned StartAddr, unsigned BoundarySize) { + return BoundarySize - (StartAddr % BoundarySize); +} + +bool MCAssembler::relaxMachineDependent(MCAsmLayout &Layout, + MCMachineDependentFragment &MF) { + // BranchSplit/HardCodeBegin/HardCodeEnd fragment should not relax since it is + // a zero-size fragment. The MCMachineDependentFragment which doesn't has a + // target branch to be aligned should not relax either. + if (MF.SubKind == MCMachineDependentFragment::BranchSplit || + MF.SubKind == MCMachineDependentFragment::HardCodeBegin || + MF.SubKind == MCMachineDependentFragment::HardCodeEnd || !MF.getBranch()) + return false; + unsigned OldSize = MF.getSize(); + const MCFragment *BranchFragment = MF.getBranch(); + const MCMachineDependentFragment *HintFragment = + cast(BranchFragment->getPrevNode()); + auto getInstSize = [&](const MCAsmLayout &Layout, const MCFragment &F) { + assert(F.hasInstructions() && "The fragment doesn't has any instruction."); + unsigned Size = computeFragmentSize(Layout, F); + assert(Size <= 15 && "The length of instruction is never longer than 15."); + return Size; + }; + unsigned AlignedSize = getInstSize(Layout, *BranchFragment); + unsigned AlignedOffset = Layout.getFragmentOffset(BranchFragment); + // If the branch is macro-fused, it's address and size need to be fixed. + if (HintFragment->SubKind == MCMachineDependentFragment::BranchSplit) { + unsigned CmpSize = getInstSize(Layout, *(HintFragment->getPrevNode())); + AlignedSize += CmpSize; + AlignedOffset -= CmpSize; + } + unsigned FixedValue = getFixedValue(&MF); + AlignedOffset -= FixedValue; + unsigned NewSize = 0; + unsigned BoundarySize = MF.getBoundarySize(); + if (needPadding(AlignedOffset, AlignedSize, BoundarySize)) { + NewSize = getPaddingSize(AlignedOffset, BoundarySize); + } + if (MF.SubKind == MCMachineDependentFragment::BranchPrefix) { + unsigned NextFragmentSize = getInstSize(Layout, *(MF.getNextNode())); + NewSize = std::min( + {NewSize, 15 - NextFragmentSize, MF.getMaxPrefixSize()}); + } + if (NewSize != OldSize) { + MF.setSize(NewSize); + Layout.invalidateFragmentsFrom(&MF); + return true; + } else { + return false; + } +} + bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF) { MCContext &Context = Layout.getAssembler().getContext(); @@ -1057,6 +1169,10 @@ case MCFragment::FT_LEB: RelaxedFrag = relaxLEB(Layout, *cast(I)); break; + case MCFragment::FT_MachineDependent: + RelaxedFrag = + relaxMachineDependent(Layout, *cast(I)); + break; case MCFragment::FT_CVInlineLines: RelaxedFrag = relaxCVInlineLineTable(Layout, *cast(I)); Index: llvm/lib/MC/MCFragment.cpp =================================================================== --- llvm/lib/MC/MCFragment.cpp +++ llvm/lib/MC/MCFragment.cpp @@ -275,6 +275,9 @@ case FT_LEB: delete cast(this); return; + case FT_MachineDependent: + delete cast(this); + return; case FT_SymbolId: delete cast(this); return; @@ -319,6 +322,7 @@ case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; + case MCFragment::FT_MachineDependent: OS<<"MCMachineDependentFragment"; break; case MCFragment::FT_SymbolId: OS << "MCSymbolIdFragment"; break; case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break; case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break; @@ -418,6 +422,13 @@ OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned(); break; } + case MCFragment::FT_MachineDependent: { + const MCMachineDependentFragment *MF = + cast(this); + OS << "\n "; + OS << " Subtype:" << MF->getSubTypeName() << " Size:" << MF->getSize(); + break; + } case MCFragment::FT_SymbolId: { const MCSymbolIdFragment *F = cast(this); OS << "\n "; Index: llvm/lib/MC/MCObjectStreamer.cpp =================================================================== --- llvm/lib/MC/MCObjectStreamer.cpp +++ llvm/lib/MC/MCObjectStreamer.cpp @@ -319,6 +319,13 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { + getAssembler().getBackend().alignBranchesBegin(*this, Inst); + EmitInstructionImpl(Inst, STI); + getAssembler().getBackend().alignBranchesEnd(*this, Inst); +} + +void MCObjectStreamer::EmitInstructionImpl(const MCInst &Inst, + const MCSubtargetInfo &STI) { MCStreamer::EmitInstruction(Inst, STI); MCSection *Sec = getCurrentSectionOnly(); @@ -513,6 +520,7 @@ } void MCObjectStreamer::EmitBytes(StringRef Data) { + getAssembler().getBackend().markHardCode(*this); MCDwarfLineEntry::Make(this, getCurrentSectionOnly()); MCDataFragment *DF = getOrCreateDataFragment(); flushPendingLabels(DF, DF->getContents().size()); Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -19,14 +19,19 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" + using namespace llvm; static unsigned getFixupKindSize(unsigned Kind) { @@ -64,6 +69,81 @@ } namespace { +class X86AlignBranchKind { +private: + uint8_t AlignBranchKind = 0; + +public: + enum Flag : uint8_t { + AlignBranchNone = 0, + AlignBranchFused = 1U << 0, + AlignBranchJcc = 1U << 1, + AlignBranchJmp = 1U << 2, + AlignBranchCall = 1U << 3, + AlignBranchRet = 1U << 4, + AlignBranchIndirect = 1U << 5 + }; + + void operator=(const std::string &Val) { + if (Val.empty()) + return; + SmallVector BranchTypes; + StringRef(Val).split(BranchTypes, '+', -1, false); + for (auto BranchType : BranchTypes) { + if (BranchType == "fused") + addKind(AlignBranchFused); + else if (BranchType == "jcc") + addKind(AlignBranchJcc); + else if (BranchType == "jmp") + addKind(AlignBranchJmp); + else if (BranchType == "call") + addKind(AlignBranchCall); + else if (BranchType == "ret") + addKind(AlignBranchRet); + else if (BranchType == "indirect") + addKind(AlignBranchIndirect); + } + } + + operator uint8_t() const { return AlignBranchKind; } + void addKind(Flag Value) { AlignBranchKind |= Value; } +}; + +X86AlignBranchKind X86AlignBranchKindLoc; + +cl::opt X86AlignBranchBoundary( + "x86-align-branch-boundary", cl::init(0), cl::Hidden, + cl::desc("Control how the assembler should align branches with segment " + "prefixes or NOP. The boundary's size must be a power of 2. It " + "should be 0 or no less than 32. Branches will be aligned within " + "the boundary of specifies size. -x86-align-branch-boundary=0 " + "doesn't align branches.")); + +cl::opt> X86AlignBranch( + "x86-align-branch", + cl::desc("Specify types of branches to align (plus separated list of " + "types). The branches's types is combination of jcc, fused, " + "jmp, call, ret, indirect."), + cl::Hidden, + cl::value_desc( + "jcc, which aligns conditional jumps; fused, which aligns fused " + "conditional jumps; jmp, which aligns unconditional jumps; call, " + "which aligns calls; ret, which aligns rets; indirect, which " + "aligns indirect jumps."), + cl::location(X86AlignBranchKindLoc)); + +cl::opt X86AlignBranchPrefixSize( + "x86-align-branch-prefix-size", cl::init(0), cl::Hidden, + cl::desc("Specify the maximum number of prefixes on an instruction to " + "align branches. The number should be between 0 and 5.")); + +cl::opt X86AlignBranchWithin32BBoundaries( + "x86-branches-within-32B-boundaries", cl::init(false), + cl::desc( + "Aligns conditional jumps, fused conditional jumps, and unconditional " + "jumps within 32 byte boundary with up to 5 segment prefixes on an " + "instruction. It is equivalent to -x86-align-branch-boundary=32, " + "-x86-align-branch=fused+jcc+jmp, -x86-align-branch-prefix-size=5.")); class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: @@ -74,9 +154,45 @@ class X86AsmBackend : public MCAsmBackend { const MCSubtargetInfo &STI; + const MCInstrInfo &MCII; + X86AlignBranchKind AlignBranchType; + unsigned AlignBoundarySize = 0; + unsigned AlignMaxPrefixSize = 0; + + bool isFirstMacroFusibleInst(const MCInst &Inst) const; + bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; + char choosePrefixValue(const MCInst &MI) const; + unsigned getSegmentPrefixSize(const MCInst &MI) const; + bool isRIPRelative(const MCInst &MI) const; + bool hasVariantSymbol(const MCInst &MI) const; + + bool needAlign(MCObjectStreamer &OS) const; + bool needAlignInst(const MCInst &Inst) const; + bool shouldAddPrefix(const MCInst& Inst) const; + std::vector PendingAlignmentFragments; + MCInst PrevInst; + MCFragment *PrevInstFragment = nullptr; + public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) - : MCAsmBackend(support::little), STI(STI) {} + : MCAsmBackend(support::little), STI(STI), + MCII(*(T.createMCInstrInfo())) { + if (X86AlignBranchWithin32BBoundaries) { + AlignBoundarySize = 32; + AlignBranchType.addKind(X86AlignBranchKind::AlignBranchFused); + AlignBranchType.addKind(X86AlignBranchKind::AlignBranchJcc); + AlignBranchType.addKind(X86AlignBranchKind::AlignBranchJmp); + AlignMaxPrefixSize = 5; + } else { + AlignBoundarySize = X86AlignBranchBoundary; + AlignBranchType = X86AlignBranchKindLoc; + AlignMaxPrefixSize = X86AlignBranchPrefixSize; + } + } + + void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override; + void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override; + void markHardCode(MCObjectStreamer &OS) override; unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -151,6 +267,8 @@ MCInst &Res) const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; + bool writeSegmentPrefixData(raw_ostream &OS, uint64_t Count, + char Prefix) const override; }; } // end anonymous namespace @@ -258,6 +376,335 @@ return getRelaxedOpcodeBranch(Inst, is16BitMode); } +static X86::CondCode getCondFromBranch(const MCInst &MI, + const MCInstrInfo &MCII) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: + return X86::COND_INVALID; + case X86::JCC_1: { + const MCInstrDesc &Desc = MCII.get(Opcode); + return static_cast( + MI.getOperand(Desc.getNumOperands() - 1).getImm()); + } + } +} + +static X86::SecondMacroFusionInstKind +classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { + X86::CondCode CC = getCondFromBranch(MI, MCII); + return classifySecondCondCodeInMacroFusion(CC); +} + +/// Check if the instruction is valid as the first instruction in macro fusion. +bool X86AsmBackend::isFirstMacroFusibleInst(const MCInst &Inst) const { + // An Intel instruction with RIP relative addressing is not macro fusible. + if (isRIPRelative(Inst)) + return false; + X86::FirstMacroFusionInstKind FIK = + X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); + return FIK != X86::FirstMacroFusionInstKind::Invalid; +} + +/// Check if the two instructions are macro-fused. +bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { + const MCInstrDesc &InstDesc = MCII.get(Jcc.getOpcode()); + if (!InstDesc.isConditionalBranch()) + return false; + if (!isFirstMacroFusibleInst(Cmp)) + return false; + const X86::FirstMacroFusionInstKind CmpKind = + X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); + const X86::SecondMacroFusionInstKind BranchKind = + classifySecondInstInMacroFusion(Jcc, MCII); + return X86::isMacroFused(CmpKind, BranchKind); +} + +/// Choose which prefix should be inserted before the instruction. The choice of +/// prefixes are: +/// a. Use the existing segment prefix if there is one. +/// b. Use CS segment prefix in 64-bit mode. +/// c. In 32-bit mode, use SS segment prefix with ESP/EBP base register and use +/// DS segment prefix without ESP/EBP base register. +char X86AsmBackend::choosePrefixValue(const MCInst &MI) const { + for (const auto &Operand : MI) { + if (Operand.isReg()) + switch (Operand.getReg()) { + default: + break; + case X86::CS: + return 0x2e; + case X86::SS: + return 0x36; + case X86::DS: + return 0x3e; + case X86::ES: + return 0x26; + case X86::FS: + return 0x64; + case X86::GS: + return 0x65; + } + } + if (STI.getFeatureBits()[X86::Mode64Bit]) + return 0x2e; + + unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand >= 0) { + unsigned CurOp = X86II::getOperandBias(Desc); + unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; + unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::ESP || BaseReg == X86::EBP) + return 0x36; + } + return 0x3e; +} + +/// Count the existing segment prefixes of the instruction. +unsigned X86AsmBackend::getSegmentPrefixSize(const MCInst &MI) const { + unsigned Size = 0; + for (const auto &Operand : MI) { + if (Operand.isReg()) { + unsigned Reg = Operand.getReg(); + if (Reg == X86::CS || Reg == X86::SS || Reg == X86::DS || + Reg == X86::ES || Reg == X86::FS || Reg == X86::GS) + ++Size; + } + } + return Size; +} + +/// Check if the instruction is RIP relative addressing. +bool X86AsmBackend::isRIPRelative(const MCInst &MI) const { + unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + unsigned CurOp = X86II::getOperandBias(Desc); + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand >= 0) { + unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; + unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::RIP) + return true; + } + return false; +} + +/// Check if the instruction has variant symbol operand. +bool X86AsmBackend::hasVariantSymbol(const MCInst &MI) const { + + for (auto &Operand : MI) { + if (Operand.isExpr()) { + const MCExpr &Expr = *Operand.getExpr(); + if (Expr.getKind() == MCExpr::SymbolRef && + cast(*Operand.getExpr()).getKind() != + MCSymbolRefExpr::VK_None) + return true; + } + } + return false; +} + +bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { + if (AlignBoundarySize == 0 || + AlignBranchType == X86AlignBranchKind::AlignBranchNone) + return false; + + MCAssembler &Assembler = OS.getAssembler(); + MCSection *Sec = OS.getCurrentSectionOnly(); + // To be Done: Currently don't deal with Bundle cases. + if (Assembler.isBundlingEnabled() && Sec->isBundleLocked()) + return false; + + // Branches only need to be aligned in 32-bit or 64-bit mode. + if (!(STI.getFeatureBits()[X86::Mode64Bit] || + STI.getFeatureBits()[X86::Mode32Bit])) + return false; + + return true; +} + +/// Check if the instruction operand needs to be aligned. Padding is disabled +/// before intruction which may be rewritten by linker(e.g. TLSCALL). +bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { + // Linker may rewrite the instruction with variant symbol operand. + if(hasVariantSymbol(Inst)) return false; + + const MCInstrDesc &InstDesc = MCII.get(Inst.getOpcode()); + return (InstDesc.isConditionalBranch() && + (AlignBranchType & X86AlignBranchKind::AlignBranchJcc)) || + (InstDesc.isUnconditionalBranch() && + (AlignBranchType & X86AlignBranchKind::AlignBranchJmp)) || + (InstDesc.isCall() && + (AlignBranchType & X86AlignBranchKind::AlignBranchCall)) || + (InstDesc.isReturn() && + (AlignBranchType & X86AlignBranchKind::AlignBranchRet)) || + (InstDesc.isIndirectBranch() && + (AlignBranchType & X86AlignBranchKind::AlignBranchIndirect)); +} + +bool X86AsmBackend::shouldAddPrefix(const MCInst &Inst) const { + // The longer the instruction, the easier it is to cross 32-Byte boundary. So + // prefixes should not be inserted before branch, call or ret even if these + // instructions are not asked to be aligned. + const MCInstrDesc &InstDesc = MCII.get(Inst.getOpcode()); + if (InstDesc.isBranch() || InstDesc.isCall() || InstDesc.isReturn()) + return false; + + // Linker may rewrite the instruction with variant symbol operand. + return !hasVariantSymbol(Inst); +} + +/// Check if a MCFragment pointer points to fragment that may contain +/// instruction. +static bool mayContainInst(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Data || + F->getKind() == MCFragment::FT_MachineDependent || + F->getKind() == MCFragment::FT_Relaxable || + F->getKind() == MCFragment::FT_CompactEncodedInst; +} + +/// Insert MCMachineDependentFragment before instructions to align branches. +void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, + const MCInst &Inst) { + if (!needAlign(OS)) + return; + + MCFragment *CF = OS.getCurrentFragment(); + if (!CF) + return; + // If there is a fragment not holding instructions between + // MCMachineDependentFragment and target branch, we won't relax the + // MCMachineDependentFragment to avoid falling into an infinite loop. + for (auto *F = CF; F != PrevInstFragment; F = F->getPrevNode()) { + // If the last instruction and the instruction to be emited are in + // different sections, F will be null in this loop eventually. + if (!F || !mayContainInst(F)) { + PendingAlignmentFragments.clear(); + break; + } + } + MCFragment *PF = CF->getPrevNode(); + bool IsPFMF = isa_and_nonnull(PF); + // The prefix or nop isn't inserted if the previous item is hard code, which + // may be used to hardcode an instruction, since there is no clear instruction + // boundary. + if (IsPFMF && cast(PF)->SubKind == + MCMachineDependentFragment::HardCodeBegin) { + // Insert HardCodeEnd to mark the end of the sequence of hard code. + OS.insert(new MCMachineDependentFragment( + MCMachineDependentFragment::HardCodeEnd, 0, AlignBoundarySize)); + return; + } + + bool IsPFFusedJccPadding = + IsPFMF && cast(PF)->SubKind == + MCMachineDependentFragment::FusedJccPadding; + bool IsFused = isMacroFused(PrevInst, Inst); + if (IsPFFusedJccPadding && !IsFused) { + // Turn the previous FusedJccPadding into BranchPrefix if the instruction is + // not macro fused indeed. + cast(PF)->SubKind = + MCMachineDependentFragment::BranchPrefix; + cast(PF)->setPrefix( + choosePrefixValue(PrevInst)); + } + PrevInst = Inst; + unsigned SegmentPrefixSize = getSegmentPrefixSize(Inst); + unsigned MaxPrefixSize = (AlignMaxPrefixSize > SegmentPrefixSize) + ? (AlignMaxPrefixSize - SegmentPrefixSize) + : 0; + if (isFirstMacroFusibleInst(Inst) && + (AlignBranchType & X86AlignBranchKind::AlignBranchFused)) { + // Insert FusedJccPadding if the instruction to be emitted is valid as + // first instruction in macro fusion. The inserted FusedJccPadding will + // turn to BranchPrefix later if the instruction is not macro fused indeed. + OS.insert(new MCMachineDependentFragment( + MCMachineDependentFragment::FusedJccPadding, MaxPrefixSize, + AlignBoundarySize)); + } else if (IsPFFusedJccPadding && IsFused) { + // Insert BranchSplit between the first intruction in macro fusion and + // the second instruction in macro fusion. + OS.insert( + new MCMachineDependentFragment(MCMachineDependentFragment::BranchSplit, + MaxPrefixSize, AlignBoundarySize)); + return; + } else if (needAlignInst(Inst)) { + // Insert BranchPadding before the instruction need to be aligned. + OS.insert(new MCMachineDependentFragment( + MCMachineDependentFragment::BranchPadding, MaxPrefixSize, + AlignBoundarySize)); + } else { + // Speed up if we don't need to add prefix + if (AlignMaxPrefixSize == 0) + return; + + auto *MF = + new MCMachineDependentFragment(MCMachineDependentFragment::BranchPrefix, + MaxPrefixSize, AlignBoundarySize); + // Choose the value of the prefix by the instruction to be emited. + MF->setPrefix(choosePrefixValue(Inst)); + // Insert BranchPrefix to align the next branch. + OS.insert(MF); + if (!shouldAddPrefix(Inst)) + return; + } + // Pend the BranchPrefix, BranchPadding and FusedJccPadding since we + // haven't known the fragment where the next branch is yet. + PendingAlignmentFragments.push_back( + cast(OS.getCurrentFragment())); + return; +} + +/// Set the target branches for BranchPrefix, BranchPadding and FusedJccPadding. +void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { + if (!needAlign(OS)) + return; + MCFragment *CF = OS.getCurrentFragment(); + PrevInstFragment = CF; + if (!needAlignInst(Inst)) + return; + for (MCMachineDependentFragment *MDF : PendingAlignmentFragments) { + MDF->setBranch(CF); + if (AlignMaxPrefixSize == 0 && CF->getKind() != MCFragment::FT_Relaxable) + // If we only insert NOP to align branch, we need to break the last + // fragment so that more instructions can't be pushed into it, so that we + // can get the instruction size of the branch later. + OS.insert(new MCDataFragment()); + } + PendingAlignmentFragments.clear(); + + // Update the maximum alignment on the current section if necessary. + MCSection *Sec = OS.getCurrentSectionOnly(); + if (AlignBoundarySize > Sec->getAlignment()) + Sec->setAlignment(Align(AlignBoundarySize)); +} + +/// Make a mark between hardcode and instruction. +void X86AsmBackend::markHardCode(MCObjectStreamer &OS) { + // Hardcode only exists in text section. + MCSection *Sec = OS.getCurrentSectionOnly(); + if (!Sec->getKind().isText()) + return; + + if (!needAlign(OS)) + return; + + MCFragment *CF = OS.getCurrentFragment(); + MCFragment *PF = CF ? CF->getPrevNode() : nullptr; + bool IsPFHardCodeBegin = isa_and_nonnull(PF) && + cast(PF)->SubKind == + MCMachineDependentFragment::HardCodeBegin; + if (!IsPFHardCodeBegin) { + // Insert HardCodeBegin to mark the begin of the sequence of hard code. + OS.insert(new MCMachineDependentFragment( + MCMachineDependentFragment::HardCodeBegin, 0, AlignBoundarySize)); + } +} + Optional X86AsmBackend::getFixupKind(StringRef Name) const { if (STI.getTargetTriple().isOSBinFormatELF()) { if (STI.getTargetTriple().getArch() == Triple::x86_64) { @@ -326,6 +773,21 @@ Res.setOpcode(RelaxedOp); } +/// Write a sequence of segment prefixes to the output, covering \p Count +/// bytes. +/// \return - true on success, false on failure +bool X86AsmBackend::writeSegmentPrefixData(raw_ostream &OS, uint64_t Count, + char Prefix) const { + // The function should only write segment prefixes. + // CS:0x2e, SS:0x36, DS:0x3e, ES:0x26, FS:0x64, GS:0x65 + if (!(Prefix == 0x2e || Prefix == 0x36 || Prefix == 0x3e || Prefix == 0x26 || + Prefix == 0x64 || Prefix == 0x65)) + return false; + for (uint64_t i = 0; i < Count; ++i) + OS << Prefix; + return true; +} + /// Write a sequence of optimal nops to the output, covering \p Count /// bytes. /// \return - true on success, false on failure Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -1017,7 +1017,7 @@ def X86_COND_O : PatLeaf<(i8 0)>; def X86_COND_NO : PatLeaf<(i8 1)>; def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C -def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC +def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC,COND_NB def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA Index: llvm/test/MC/X86/align-branch-32-1a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-1a.s @@ -0,0 +1,83 @@ +# Check the macro-fusion table +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 main: +# CHECK-NEXT: 0: 55 pushl %ebp +# CHECK-NEXT: 1: 54 pushl %esp +# CHECK-NEXT: 2: eb 17 jmp 23 +# CHECK-COUNT-23: 90 nop +# CHECK: 0000001b infiniteLoop: +# CHECK-NEXT: 1b: 55 pushl %ebp +# CHECK-NEXT: 1c: 3e 3e 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 20: 78 de js {{.*}} +# CHECK-COUNT-25: 90 nop +# CHECK-NEXT: 3b: 55 pushl %ebp +# CHECK-NEXT: 3c: 3e 3e 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 40: 70 be jo {{.*}} +# CHECK-COUNT-25: 90 nop +# CHECK-NEXT: 5b: 55 pushl %ebp +# CHECK-NEXT: 5c: 3e 3e 01 c5 addl %eax, %ebp +# CHECK-NEXT: 60: 71 9e jno {{.*}} +# CHECK-COUNT-25: 90 nop +# CHECK-NEXT: 7b: 55 pushl %ebp +# CHECK-NEXT: 7c: 3e 3e 29 c5 subl %eax, %ebp +# CHECK-NEXT: 80: 0f 8b 7a ff ff ff jnp {{.*}} +# CHECK-COUNT-21: 90 nop +# CHECK-NEXT: 9b: 55 pushl %ebp +# CHECK-NEXT: 9c: 3e 3e 3e 42 incl %edx +# CHECK-NEXT: a0: 0f 82 5a ff ff ff jb {{.*}} +# CHECK-COUNT-21: 90 nop +# CHECK-NEXT: bb: 55 pushl %ebp +# CHECK-NEXT: bc: 3e 3e 3e 4a decl %edx +# CHECK-NEXT: c0: 0f 86 3a ff ff ff jbe {{.*}} + + .text + .globl infiniteLoop +main: + pushl %ebp + pushl %esp + jmp infiniteLoop + + .p2align 4 + .rept 11 + .byte 0x90 + .endr +infiniteLoop: + pushl %ebp + cmp %eax, %ebp + js main + .p2align 4 + .rept 11 + .byte 0x90 + .endr + pushl %ebp + cmp %eax, %ebp + jo main + .p2align 4 + .rept 11 + .byte 0x90 + .endr + pushl %ebp + add %eax, %ebp + jno main + .p2align 4 + .rept 11 + .byte 0x90 + .endr + pushl %ebp + sub %eax, %ebp + jpo main + .p2align 4 + .rept 11 + .byte 0x90 + .endr + pushl %ebp + inc %edx + jb main + .p2align 4 + .rept 11 + .byte 0x90 + .endr + pushl %ebp + dec %edx + jna main Index: llvm/test/MC/X86/align-branch-32-2a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-2a.s @@ -0,0 +1,23 @@ +# Check no prefix is inserted after hard code +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=2 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 main: +# CHECK-NEXT: 0: 2e 55 pushl %ebp +# CHECK-NEXT: 2: 3e 3e 89 e5 movl %esp, %ebp +# CHECK-COUNT-26: 90 nop +# CHECK-NEXT: 20: eb 00 jmp 0 +# CHECK: 00000022 infiniteLoop: +# CHECK-NEXT: 22: eb dc jmp -36
+ + .text + .globl infiniteLoop +main: + .byte 0x2e + pushl %ebp + movl %esp, %ebp + .rept 26 + .byte 0x90 + .endr + jmp infiniteLoop +infiniteLoop: + jmp main Index: llvm/test/MC/X86/align-branch-32-3a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-3a.s @@ -0,0 +1,57 @@ +# Check NOP padding is disabled before tls_get_addr calls +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 foo: +# CHECK-NEXT: 0: 64 a3 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 6: 55 pushl %ebp +# CHECK-NEXT: 7: 55 pushl %ebp +# CHECK-NEXT: 8: 55 pushl %ebp +# CHECK-NEXT: 9: 55 pushl %ebp +# CHECK-NEXT: a: 89 e5 movl %esp, %ebp +# CHECK-NEXT: c: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1e: e8 fc ff ff ff calll {{.*}} +# CHECK-NEXT: 23: 55 pushl %ebp +# CHECK-NEXT: 24: 55 pushl %ebp +# CHECK-NEXT: 25: 64 a3 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2b: 89 e5 movl %esp, %ebp +# CHECK-NEXT: 2d: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 30: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 33: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 36: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 39: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 3c: ff 91 00 00 00 00 calll *(%ecx) +# CHECK-NEXT: 42: 89 75 f4 movl %esi, -12(%ebp) + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushl %ebp + pushl %ebp + pushl %ebp + pushl %ebp + movl %esp, %ebp + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + call ___tls_get_addr@PLT + pushl %ebp + pushl %ebp + movl %eax, %fs:0x1 + movl %esp, %ebp + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + call *___tls_get_addr@GOT(%ecx) + movl %esi, -12(%ebp) Index: llvm/test/MC/X86/align-branch-32-4a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-32-4a.s @@ -0,0 +1,146 @@ +# Check approriate prefix is choosen to prefix an instruction. +# RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=2 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 00000000 foo: +# CHECK-NEXT: 0: 65 65 a3 01 00 00 00 movl %eax, %gs:1 +# CHECK-NEXT: 7: 3e 55 pushl %ebp +# CHECK-NEXT: 9: 57 pushl %edi +# CHECK-NEXT: a: 55 pushl %ebp +# CHECK-NEXT: b: 55 pushl %ebp +# CHECK-NEXT: c: 89 e5 movl %esp, %ebp +# CHECK-NEXT: e: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 1d: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 20: 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 22: 74 5e je {{.*}} +# CHECK-NEXT: 24: 3e 89 73 f4 movl %esi, %ds:-12(%ebx) +# CHECK-NEXT: 28: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 2b: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 3a: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 3d: 5d popl %ebp +# CHECK-NEXT: 3e: 5d popl %ebp +# CHECK-NEXT: 3f: 5d popl %ebp +# CHECK-NEXT: 40: 74 40 je {{.*}} +# CHECK-NEXT: 42: 5d popl %ebp +# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 45: 36 89 44 24 fc movl %eax, %ss:-4(%esp) +# CHECK-NEXT: 4a: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 4d: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-NEXT: 50: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 53: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 56: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 59: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 5c: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 5f: 5d popl %ebp +# CHECK-NEXT: 60: eb 26 jmp {{.*}} +# CHECK-NEXT: 62: eb 24 jmp {{.*}} +# CHECK-NEXT: 64: eb 22 jmp {{.*}} +# CHECK-NEXT: 66: 89 45 fc movl %eax, -4(%ebp) +# CHECK-NEXT: 69: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 6c: 89 7d f8 movl %edi, -8(%ebp) +# CHECK-NEXT: 6f: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 72: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 75: 89 75 f4 movl %esi, -12(%ebp) +# CHECK-NEXT: 78: 5d popl %ebp +# CHECK-NEXT: 79: 5d popl %ebp +# CHECK-NEXT: 7a: 39 c5 cmpl %eax, %ebp +# CHECK-NEXT: 7c: 74 04 je {{.*}} +# CHECK-NEXT: 7e: 90 nop +# CHECK-NEXT: 7f: 90 nop +# CHECK-NEXT: 80: eb 06 jmp {{.*}} +# CHECK-NEXT: 82: 8b 45 f4 movl -12(%ebp), %eax +# CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%ebp) +# CHECK-NEXT: 88: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: 8e: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: 94: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: 9a: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: a0: 89 75 0c movl %esi, 12(%ebp) +# CHECK-NEXT: a3: e9 fc ff ff ff jmp {{.*}} +# CHECK-NEXT: a8: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: ae: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: b4: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: ba: 89 b5 50 fb ff ff movl %esi, -1200(%ebp) +# CHECK-NEXT: c0: 89 75 00 movl %esi, (%ebp) +# CHECK-NEXT: c3: 74 c3 je {{.*}} +# CHECK-NEXT: c5: 74 c1 je {{.*}} + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %gs:0x1 + pushl %ebp + pushl %edi + pushl %ebp + pushl %ebp + movl %esp, %ebp + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + cmp %eax, %ebp + je .L_2 + movl %esi, -12(%ebx) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + popl %ebp + popl %ebp + popl %ebp + je .L_2 + popl %ebp + je .L_2 + movl %eax, -4(%esp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + popl %ebp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%ebp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + popl %ebp + popl %ebp + cmp %eax, %ebp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%ebp), %eax + movl %eax, -4(%ebp) +.L_3: + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, 12(%ebp) + jmp bar + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, (%ebp) + je .L_3 + je .L_3 + Index: llvm/test/MC/X86/align-branch-64-1a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1a.s @@ -0,0 +1,156 @@ +# Check option --x86-branches-within-32B-boundaries is equivalent to the combination of options --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-branches-within-32B-boundaries %s | llvm-objdump -d - > %t1 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - > %t2 +# RUN: cmp %t1 %t2 +# RUN: FileCheck --input-file=%t1 %s --check-prefixes=CHECK,PREFIX5 + +# Check the size of segment prefixes is limited with option --x86-align-branch-prefix-size=NUM +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=1 %s | llvm-objdump -d - > %t3 +# RUN: FileCheck --input-file=%t3 %s --check-prefixes=CHECK,PREFIX1 + +# Check no branches is aligned with option --x86-align-branch-boundary=0 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=0 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - > %t4 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t5 +# RUN: cmp %t4 %t5 + +# CHECK: 0000000000000000 foo: +# PREFIX5: 0: 64 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# PREFIX5-NEXT: b: 55 pushq %rbp +# PREFIX5-NEXT: c: 55 pushq %rbp +# PREFIX5-NEXT: d: 55 pushq %rbp +# PREFIX1: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# PREFIX1-NEXT: 8: 2e 55 pushq %rbp +# PREFIX1-NEXT: a: 2e 55 pushq %rbp +# PREFIX1-NEXT: c: 2e 55 pushq %rbp +# CHECK: e: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 23: 74 5d je {{.*}} +# CHECK-NEXT: 25: 2e 89 75 f4 movl %esi, %cs:-12(%rbp) +# CHECK-NEXT: 29: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2c: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 32: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 35: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 38: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3e: 5d popq %rbp +# CHECK-NEXT: 3f: 5d popq %rbp +# CHECK-NEXT: 40: 74 40 je {{.*}} +# CHECK-NEXT: 42: 5d popq %rbp +# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 45: 2e 89 45 fc movl %eax, %cs:-4(%rbp) +# CHECK-NEXT: 49: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4c: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 52: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 55: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 58: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5e: 5d popq %rbp +# CHECK-NEXT: 5f: 5d popq %rbp +# CHECK-NEXT: 60: eb 26 jmp {{.*}} +# CHECK-NEXT: 62: eb 24 jmp {{.*}} +# CHECK-NEXT: 64: eb 22 jmp {{.*}} +# CHECK-NEXT: 66: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 69: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6c: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 72: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 75: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 78: 5d popq %rbp +# CHECK-NEXT: 79: 5d popq %rbp +# CHECK-NEXT: 7a: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7d: 74 03 je {{.*}} +# CHECK-NEXT: 7f: 90 nop +# CHECK-NEXT: 80: eb 06 jmp {{.*}} +# CHECK-NEXT: 82: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 88: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8e: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 94: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9a: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a0: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a6: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: ac: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b2: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b8: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: be: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: c4: eb c2 jmp {{.*}} +# CHECK-NEXT: c6: 5d popq %rbp +# CHECK-NEXT: c7: c3 retq + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + cmp %rax, %rbp + je .L_2 + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + je .L_2 + popq %rbp + je .L_2 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + cmp %rax, %rbp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%rbp), %eax + movl %eax, -4(%rbp) +.L_3: + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + jmp .L_3 + popq %rbp + retq Index: llvm/test/MC/X86/align-branch-64-1b.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1b.s @@ -0,0 +1,139 @@ +# Check only fused conditional jumps and conditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 55 pushq %rbp +# CHECK-NEXT: d: 55 pushq %rbp +# CHECK-NEXT: e: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 23: 74 5b je {{.*}} +# CHECK-NEXT: 25: 2e 89 75 f4 movl %esi, %cs:-12(%rbp) +# CHECK-NEXT: 29: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2c: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 32: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 35: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 38: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3e: 5d popq %rbp +# CHECK-NEXT: 3f: 5d popq %rbp +# CHECK-NEXT: 40: 74 3e je {{.*}} +# CHECK-NEXT: 42: 5d popq %rbp +# CHECK-NEXT: 43: 74 3b je {{.*}} +# CHECK-NEXT: 45: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 48: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4b: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5d: 5d popq %rbp +# CHECK-NEXT: 5e: 5d popq %rbp +# CHECK-NEXT: 5f: eb 25 jmp {{.*}} +# CHECK-NEXT: 61: eb 23 jmp {{.*}} +# CHECK-NEXT: 63: eb 21 jmp {{.*}} +# CHECK-NEXT: 65: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 68: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6b: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 71: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 74: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 77: 5d popq %rbp +# CHECK-NEXT: 78: 5d popq %rbp +# CHECK-NEXT: 79: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7c: 74 02 je {{.*}} +# CHECK-NEXT: 7e: eb 06 jmp {{.*}} +# CHECK-NEXT: 80: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 83: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 86: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8c: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 92: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 98: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9e: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a4: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: aa: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b0: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b6: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: bc: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: c2: eb c2 jmp {{.*}} +# CHECK-NEXT: c4: 5d popq %rbp +# CHECK-NEXT: c5: c3 retq + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + cmp %rax, %rbp + je .L_2 + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + je .L_2 + popq %rbp + je .L_2 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + cmp %rax, %rbp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%rbp), %eax + movl %eax, -4(%rbp) +.L_3: + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + jmp .L_3 + popq %rbp + retq Index: llvm/test/MC/X86/align-branch-64-1c.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1c.s @@ -0,0 +1,140 @@ +# Check only conditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=jcc --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=jcc --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 20: 74 5b je {{.*}} +# CHECK-NEXT: 22: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 25: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 28: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 5d popq %rbp +# CHECK-NEXT: 3b: 5d popq %rbp +# CHECK-NEXT: 3c: 74 3f je {{.*}} +# CHECK-NEXT: 3e: 2e 5d popq %rbp +# CHECK-NEXT: 40: 74 3b je {{.*}} +# CHECK-NEXT: 42: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 45: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 48: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 5d popq %rbp +# CHECK-NEXT: 5b: 5d popq %rbp +# CHECK-NEXT: 5c: eb 25 jmp {{.*}} +# CHECK-NEXT: 5e: eb 23 jmp {{.*}} +# CHECK-NEXT: 60: eb 21 jmp {{.*}} +# CHECK-NEXT: 62: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 65: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 68: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 71: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 74: 5d popq %rbp +# CHECK-NEXT: 75: 5d popq %rbp +# CHECK-NEXT: 76: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 79: 74 02 je {{.*}} +# CHECK-NEXT: 7b: eb 06 jmp {{.*}} +# CHECK-NEXT: 7d: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 80: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 83: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 89: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8f: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 95: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9b: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a1: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a7: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: ad: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b3: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b9: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: bf: eb c2 jmp {{.*}} +# CHECK-NEXT: c1: 5d popq %rbp +# CHECK-NEXT: c2: c3 retq + + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + cmp %rax, %rbp + je .L_2 + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + je .L_2 + popq %rbp + je .L_2 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + cmp %rax, %rbp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%rbp), %eax + movl %eax, -4(%rbp) +.L_3: + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + jmp .L_3 + popq %rbp + retq Index: llvm/test/MC/X86/align-branch-64-1d.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-1d.s @@ -0,0 +1,147 @@ +# Check only conditional jumps and unconditional jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=jcc+jmp --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - > %t1 +# RUN: FileCheck --input-file=%t1 %s --check-prefixes=CHECK,SHORT-NOP + +# Check long NOP can be emitted to align branch if the target cpu support long nop. +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 -mcpu=x86-64 --x86-align-branch=jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - >%t2 +# RUN: FileCheck --input-file=%t2 %s --check-prefixes=CHECK,LONG-NOP + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 20: 74 5d je {{.*}} +# CHECK-NEXT: 22: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 25: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 28: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 2b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 5d popq %rbp +# CHECK-NEXT: 3b: 5d popq %rbp +# CHECK-NEXT: 3c: 74 41 je {{.*}} +# CHECK-NEXT: 3e: 2e 5d popq %rbp +# CHECK-NEXT: 40: 74 3d je {{.*}} +# CHECK-NEXT: 42: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 45: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 48: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 4b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 5d popq %rbp +# CHECK-NEXT: 5b: 5d popq %rbp +# CHECK-NEXT: 5c: eb 27 jmp {{.*}} +# SHORT-NOP: 5e: 90 nop +# SHORT-NOP-NEXT: 5f: 90 nop +# LONG-NOP: 5e: 66 90 nop +# CHECK: 60: eb 23 jmp {{.*}} +# CHECK-NEXT: 62: eb 21 jmp {{.*}} +# CHECK-NEXT: 64: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 67: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 6a: 89 7d f8 movl %edi, -8(%rbp) +# CHECK-NEXT: 6d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 70: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 73: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 76: 5d popq %rbp +# CHECK-NEXT: 77: 5d popq %rbp +# CHECK-NEXT: 78: 48 39 c5 cmpq %rax, %rbp +# CHECK-NEXT: 7b: 74 02 je {{.*}} +# CHECK-NEXT: 7d: eb 06 jmp {{.*}} +# CHECK-NEXT: 7f: 8b 45 f4 movl -12(%rbp), %eax +# CHECK-NEXT: 82: 89 45 fc movl %eax, -4(%rbp) +# CHECK-NEXT: 85: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 8b: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 91: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 97: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: 9d: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a3: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: a9: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: af: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: b5: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: bb: 89 b5 50 fb ff ff movl %esi, -1200(%rbp) +# CHECK-NEXT: c1: eb c2 jmp {{.*}} +# CHECK-NEXT: c3: 5d popq %rbp +# CHECK-NEXT: c4: c3 retq + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + cmp %rax, %rbp + je .L_2 + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + je .L_2 + popq %rbp + je .L_2 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%rbp) + movl %esi, -12(%rbp) + movl %edi, -8(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + popq %rbp + popq %rbp + cmp %rax, %rbp + je .L_2 + jmp .L_3 +.L_2: + movl -12(%rbp), %eax + movl %eax, -4(%rbp) +.L_3: + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + movl %esi, -1200(%rbp) + jmp .L_3 + popq %rbp + retq Index: llvm/test/MC/X86/align-branch-64-2a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2a.s @@ -0,0 +1,94 @@ +# Check indirect jumps and calls are not aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t1 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - > %t2 +# RUN: cmp %t1 %t2 + +# Check only indirect jumps are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=indirect --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=indirect --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 55 pushq %rbp +# CHECK-NEXT: d: 55 pushq %rbp +# CHECK-NEXT: e: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 20: ff e0 jmpq *%rax +# CHECK-NEXT: 22: 55 pushq %rbp +# CHECK-NEXT: 23: 55 pushq %rbp +# CHECK-NEXT: 24: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 2f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 32: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 35: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 38: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3e: ff d0 callq *%rax +# CHECK-NEXT: 40: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 43: 55 pushq %rbp +# CHECK-NEXT: 44: 55 pushq %rbp +# CHECK-NEXT: 45: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 4d: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 50: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 53: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 56: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 59: e8 a2 ff ff ff callq {{.*}} +# CHECK-NEXT: 5e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 61: 55 pushq %rbp +# CHECK-NEXT: 62: 55 pushq %rbp +# CHECK-NEXT: 63: 55 pushq %rbp +# CHECK-NEXT: 64: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 6c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 6f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 72: ff 14 25 00 00 00 00 callq *0 +# CHECK-NEXT: 79: 55 pushq %rbp + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + jmp *%rax + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call *%rax + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call foo + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + call *foo + pushq %rbp Index: llvm/test/MC/X86/align-branch-64-2b.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2b.s @@ -0,0 +1,89 @@ +# Check only calls are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=call --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1e: ff e0 jmpq *%rax +# CHECK-NEXT: 20: 55 pushq %rbp +# CHECK-NEXT: 21: 55 pushq %rbp +# CHECK-NEXT: 22: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2a: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 2d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 30: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 33: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 36: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 39: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3c: ff d0 callq *%rax +# CHECK-NEXT: 3e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 41: 55 pushq %rbp +# CHECK-NEXT: 42: 55 pushq %rbp +# CHECK-NEXT: 43: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 4b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 4e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 51: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 54: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 57: e8 a4 ff ff ff callq {{.*}} +# CHECK-NEXT: 5c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5f: 55 pushq %rbp +# CHECK-NEXT: 60: 55 pushq %rbp +# CHECK-NEXT: 61: 55 pushq %rbp +# CHECK-NEXT: 62: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 6a: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 6d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 70: ff 14 25 00 00 00 00 callq *0 +# CHECK-NEXT: 77: 55 pushq %rbp + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + jmp *%rax + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call *%rax + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call foo + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + call *foo + pushq %rbp Index: llvm/test/MC/X86/align-branch-64-2c.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-2c.s @@ -0,0 +1,89 @@ +# Check only indirect jumps and calls are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=indirect+call --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=indirect+call --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 55 pushq %rbp +# CHECK-NEXT: d: 55 pushq %rbp +# CHECK-NEXT: e: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 20: ff e0 jmpq *%rax +# CHECK-NEXT: 22: 2e 2e 55 pushq %rbp +# CHECK-NEXT: 25: 55 pushq %rbp +# CHECK-NEXT: 26: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2e: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 40: ff d0 callq *%rax +# CHECK-NEXT: 42: 2e 2e 2e 2e 2e 89 75 f4 movl %esi, %cs:-12(%rbp) +# CHECK-NEXT: 4a: 55 pushq %rbp +# CHECK-NEXT: 4b: 55 pushq %rbp +# CHECK-NEXT: 4c: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 54: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 57: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 5d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 60: e8 9b ff ff ff callq {{.*}} +# CHECK-NEXT: 65: 2e 2e 2e 2e 2e 89 75 f4 movl %esi, %cs:-12(%rbp) +# CHECK-NEXT: 6d: 2e 2e 55 pushq %rbp +# CHECK-NEXT: 70: 55 pushq %rbp +# CHECK-NEXT: 71: 55 pushq %rbp +# CHECK-NEXT: 72: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 7a: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 7d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 80: ff 14 25 00 00 00 00 callq *0 +# CHECK-NEXT: 87: 55 pushq %rbp + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + jmp *%rax + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call *%rax + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call foo + movl %esi, -12(%rbp) + pushq %rbp + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + call *foo + pushq %rbp Index: llvm/test/MC/X86/align-branch-64-3a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-3a.s @@ -0,0 +1,55 @@ +# Check NOP padding is disabled before tls_get_addr calls +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=call --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 8: 55 pushq %rbp +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 55 pushq %rbp +# CHECK-NEXT: c: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: f: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 12: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 15: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 18: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1b: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1e: e8 00 00 00 00 callq 0 +# CHECK-NEXT: 23: 55 pushq %rbp +# CHECK-NEXT: 24: 55 pushq %rbp +# CHECK-NEXT: 25: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2d: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: 30: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 33: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 36: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 39: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3c: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3f: ff 15 00 00 00 00 callq *(%rip) +# CHECK-NEXT: 45: 89 75 f4 movl %esi, -12(%rbp) + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call __tls_get_addr@PLT + pushq %rbp + pushq %rbp + movl %eax, %fs:0x1 + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + call *__tls_get_addr@GOTPCREL(%rip) + movl %esi, -12(%rbp) Index: llvm/test/MC/X86/align-branch-64-4a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-4a.s @@ -0,0 +1,60 @@ +# Check rets are not aligned with option --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - >%t2 +# RUN: cmp %t %t2 + +# Check only rets are aligned with option --x86-align-branch-boundary=32 --x86-align-branch=ret --x86-align-branch-prefix-size=5 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=ret --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: 64 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 9: 55 pushq %rbp +# CHECK-NEXT: a: 55 pushq %rbp +# CHECK-NEXT: b: 48 89 e5 movq %rsp, %rbp +# CHECK-NEXT: e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 11: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 14: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 17: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 1d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 20: c3 retq +# CHECK-NEXT: 21: 2e 2e 55 pushq %rbp +# CHECK-NEXT: 24: 64 89 04 25 01 00 00 00 movl %eax, %fs:1 +# CHECK-NEXT: 2c: 55 pushq %rbp +# CHECK-NEXT: 2d: 55 pushq %rbp +# CHECK-NEXT: 2e: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 31: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 34: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 37: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3a: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 3d: 89 75 f4 movl %esi, -12(%rbp) +# CHECK-NEXT: 40: c2 1e 00 retq $30 +# CHECK-NEXT: 43: 55 pushq %rbp + + .text + .globl foo + .p2align 4 +foo: + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + movq %rsp, %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + ret + pushq %rbp + movl %eax, %fs:0x1 + pushq %rbp + pushq %rbp + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + movl %esi, -12(%rbp) + ret $30 + pushq %rbp Index: llvm/test/MC/X86/align-branch-64-5a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-5a.s @@ -0,0 +1,65 @@ +# Check no nop or prefix is inserted if no branch cross or is against the boundary +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp+indirect+call+ret --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - > %t1 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp+indirect+call+ret --x86-align-branch-prefix-size=0 %s | llvm-objdump -d - > %t2 +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s | llvm-objdump -d - > %t3 +# RUN: cmp %t1 %t3 +# RUN: cmp %t2 %t3 +# RUN: FileCheck --input-file=%t3 %s + +# CHECK: 0000000000000000 foo: +# CHECK-NEXT: 0: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 3: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 6: 89 d1 movl %edx, %ecx +# CHECK-NEXT: 8: 31 c0 xorl %eax, %eax +# CHECK-NEXT: a: 31 c8 xorl %ecx, %eax +# CHECK-NEXT: c: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: f: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 12: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 15: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 18: f6 c2 02 testb $2, %dl +# CHECK-NEXT: 1b: f3 ab rep stosl %eax, %es:(%rdi) +# CHECK-NEXT: 1d: 75 e4 jne {{.*}} +# CHECK-NEXT: 1f: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 21: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 24: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 27: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 2a: 89 d1 movl %edx, %ecx +# CHECK-NEXT: 2c: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 2e: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 31: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 34: c1 e9 02 shrl $2, %ecx +# CHECK-NEXT: 37: f6 c2 02 testb $2, %dl +# CHECK-NEXT: 3a: e8 00 00 00 00 callq {{.*}} +# CHECK-NEXT: 3f: 31 c0 xorl %eax, %eax +# CHECK-NEXT: 41: 75 e1 jne {{.*}} + + .text + .p2align 4,,15 +foo: + shrl $2, %ecx +.L1: + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + xorl %ecx, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + testb $2, %dl + rep stosl + jne .L1 + xorl %eax, %eax + shrl $2, %ecx +.L2: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + testb $2, %dl + call bar + xorl %eax, %eax + jne .L2 Index: llvm/test/MC/X86/align-branch-64-6a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-6a.s @@ -0,0 +1,36 @@ +# Check no prefix is inserted before instruction with special relocation operand +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=fused+jcc+jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 _start: +# CHECK-NEXT: 0: 85 d2 testl %edx, %edx +# CHECK-NEXT: 2: 74 21 je 33 <_start+0x25> +# CHECK-NEXT: 4: 48 85 ff testq %rdi, %rdi +# CHECK-NEXT: 7: 74 1c je 28 <_start+0x25> +# CHECK-NEXT: 9: 48 8d 3d 00 00 00 00 leaq (%rip), %rdi +# CHECK-NEXT: 10: e8 00 00 00 00 callq 0 <_start+0x15> +# CHECK-NEXT: 15: 48 8b 98 00 00 00 00 movq (%rax), %rbx +# CHECK-NEXT: 1c: 90 nop +# CHECK-NEXT: 1d: 90 nop +# CHECK-NEXT: 1e: 90 nop +# CHECK-NEXT: 1f: 90 nop +# CHECK-NEXT: 20: 48 85 db testq %rbx, %rbx +# CHECK-NEXT: 23: 74 00 je 0 <_start+0x25> +# CHECK-NEXT: 25: c3 retq + + .text + .globl _start +_start: + testl %edx, %edx + je .L1 + testq %rdi, %rdi + je .L1 + leaq bar@tlsld(%rip), %rdi + call __tls_get_addr@PLT + movq bar@DTPOFF(%rax), %rbx + testq %rbx, %rbx + je .L1 +.L1: + ret + .section ".tdata", "awT", @progbits +bar: + .long 10 Index: llvm/test/MC/X86/align-branch-64-7a.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/align-branch-64-7a.s @@ -0,0 +1,32 @@ +# Check no prefixes is added to the instruction if there is a align directive between the instruction and the target branch +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=32 --x86-align-branch=jmp --x86-align-branch-prefix-size=5 %s | llvm-objdump -d - | FileCheck %s + +# CHECK: 0000000000000000 test1: +# CHECK-NEXT: 0: 31 d2 xorl %edx, %edx +# CHECK-NEXT: 2: 89 8c 24 84 00 00 00 movl %ecx, 132(%rsp) +# CHECK-NEXT: 9: 4c 89 c1 movq %r8, %rcx +# CHECK-NEXT: c: 4c 8b 8c 24 88 00 00 00 movq 136(%rsp), %r9 +# CHECK-NEXT: 14: 90 nop +# CHECK-NEXT: 15: 90 nop +# CHECK-NEXT: 16: 90 nop +# CHECK-NEXT: 17: 90 nop +# CHECK-NEXT: 18: 66 66 90 nop +# CHECK-NEXT: 1b: 2e 2e 4c 89 c1 movq %r8, %rcx +# CHECK-NEXT: 20: eb de jmp {{.*}} +# CHECK-NEXT: 22: c3 retq + + .text + .globl test1 +test1: +.Ltmp0: + xorl %edx, %edx + movl %ecx, 132(%rsp) + movq %r8, %rcx + movq 136(%rsp), %r9 + .p2align 3, 0x90 + .byte 102 + .byte 102 + nop + movq %r8, %rcx + jmp .Ltmp0 + retq