Index: llvm/include/llvm/MC/MCFragment.h =================================================================== --- llvm/include/llvm/MC/MCFragment.h +++ llvm/include/llvm/MC/MCFragment.h @@ -563,6 +563,8 @@ /// Represents required padding such that a particular other set of fragments /// does not cross a particular power-of-two boundary. The other fragments must /// follow this one within the same section. +/// If AvoidEndAlign is set, this fragment will emit a minimum size nop to +/// prevent the fragment following it from ending at a given \p AlignBoundary. class MCBoundaryAlignFragment : public MCFragment { /// The alignment requirement of the branch to be aligned. Align AlignBoundary; @@ -571,6 +573,9 @@ /// The size of the fragment. The size is lazily set during relaxation, and /// is not meaningful before that. uint64_t Size = 0; + /// Whether this fragment pads the subsequent fragment to prevent it from + /// ending at AlignBoundary. + bool IsAvoidEndAlign = false; public: MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr) @@ -589,6 +594,9 @@ LastFragment = F; } + bool isAvoidEndAlign() const { return IsAvoidEndAlign; } + void setAvoidEndAlign(bool V) { IsAvoidEndAlign = V; } + static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; } Index: llvm/lib/MC/MCAssembler.cpp =================================================================== --- llvm/lib/MC/MCAssembler.cpp +++ llvm/lib/MC/MCAssembler.cpp @@ -1083,14 +1083,26 @@ uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); uint64_t AlignedSize = 0; - for (const MCFragment *F = BF.getLastFragment(); F != &BF; - F = F->getPrevNode()) - AlignedSize += computeFragmentSize(Layout, *F); - + uint64_t NewSize = 0U; Align BoundaryAlignment = BF.getAlignment(); - uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) - ? offsetToAlignment(AlignedOffset, BoundaryAlignment) - : 0U; + + if (BF.isAvoidEndAlign()) { + // Get fragment size for the fragment following this BoundaryAlign. + const MCFragment *NF = BF.getNextNode(); + AlignedSize = computeFragmentSize(Layout, *NF); + + // Pad with a minimum size nop. + if (isAgainstBoundary(AlignedOffset, AlignedSize, BoundaryAlignment)) + NewSize = getBackend().getMinimumNopSize(); + } else { + for (const MCFragment *F = BF.getLastFragment(); F != &BF; + F = F->getPrevNode()) + AlignedSize += computeFragmentSize(Layout, *F); + + if (needPadding(AlignedOffset, AlignedSize, BoundaryAlignment)) + NewSize = offsetToAlignment(AlignedOffset, BoundaryAlignment); + } + if (NewSize == BF.getSize()) return false; BF.setSize(NewSize); Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -75,6 +75,13 @@ X86AlignBranchKind X86AlignBranchKindLoc; +cl::opt X86AlignForMacroFusion( + "x86-align-for-macrofusion", cl::init(false), + cl::desc("Align cmp+jcc pairs to avoid the 64B boundary falling between " + "instructions, improving Macro-Op Fusion. May break assumptions " + "about labels corresponding to particular instructions, and " + "should be used with caution.")); + cl::opt X86AlignBranchBoundary( "x86-align-branch-boundary", cl::init(0), cl::desc( @@ -150,11 +157,19 @@ // jumps, and (unfused) conditional jumps with nops. Both the // instructions aligned and the alignment method (nop vs prefix) may // change in the future. - AlignBoundary = assumeAligned(32);; + AlignBoundary = assumeAligned(32); AlignBranchType.addKind(X86::AlignBranchFused); AlignBranchType.addKind(X86::AlignBranchJcc); AlignBranchType.addKind(X86::AlignBranchJmp); } + if (X86AlignForMacroFusion) { + assert(!X86AlignBranchWithin32BBoundaries && + "X86AlignForMacroFusion is incompatible with " + "X86AlignBranchWithin32BBoundaries"); + AlignBoundary = assumeAligned(64); + AlignBranchType.addKind(X86::AlignBranchFused); + AlignBranchType.addKind(X86::AlignBranchJcc); + } // Allow overriding defaults set by master flag if (X86AlignBranchBoundary.getNumOccurrences()) AlignBoundary = assumeAligned(X86AlignBranchBoundary); @@ -615,7 +630,7 @@ // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. // - // Do nothing here since we already inserted a BoudaryAlign fragment when + // Do nothing here since we already inserted a BoundaryAlign fragment when // we met the first instruction in the fused pair and we'll tie them // together in emitInstructionEnd. // @@ -633,11 +648,14 @@ return; } - if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && - isFirstMacroFusibleInst(Inst, *MCII))) { + bool IsBranchFused = (AlignBranchType & X86::AlignBranchFused) && + isFirstMacroFusibleInst(Inst, *MCII); + if (needAlign(Inst) || IsBranchFused) { // If we meet a unfused branch or the first instuction in a fusiable pair, // insert a BoundaryAlign fragment. OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary)); + if (X86AlignForMacroFusion && IsBranchFused) + PendingBA->setAvoidEndAlign(true); } } @@ -655,7 +673,7 @@ if (!needAlign(Inst) || !PendingBA) return; - // Tie the aligned instructions into a a pending BoundaryAlign. + // Tie the aligned instructions into a pending BoundaryAlign. PendingBA->setLastFragment(CF); PendingBA = nullptr; Index: llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -981,7 +981,7 @@ return (TSFlags & X86II::ImmMask) != 0; } - /// Decode the "size of immediate" field from the TSFlags field of the + /// Decode the "size of immediate" field from the TSFlags field of the /// specified instruction. inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { Index: llvm/test/MC/X86/auto-mf-align.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/auto-mf-align.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc -triple=x86_64 -x86-align-for-macrofusion %s -filetype=obj | llvm-objdump --no-show-raw-insn -d - | FileCheck %s + +# no padding is expected since test doesn't end at alignment boundary: +# CHECK-NOT: nop + testl %eax, %eax +# CHECK: testl %eax, %eax + je .LBB0 + +.nops 57 + int3 +# BoundaryAlign followed by MCDataFragment: +# inserts nop because `test` would end at alignment boundary: +# CHECK: 3e: nop + testl %eax, %eax +# CHECK-NEXT: 3f: testl %eax, %eax + je .LBB0 +# CHECK-NEXT: 41: je +.LBB0: + retq + +.p2align 6 +.L0: +.nops 57 + int3 +# BoundaryAlign followed by RelaxableFragment: +# CHECK: ba: nop + cmpl $(.L1-.L0), %eax +# CHECK-NEXT: bb: cmpl + je .L0 +# CHECK-NEXT: c1: je +.nops 65 +.L1: