diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -563,6 +563,8 @@ /// Represents required padding such that a particular other set of fragments /// does not cross a particular power-of-two boundary. The other fragments must /// follow this one within the same section. +/// If AvoidEndAlign is set, this fragment will emit a minimum size nop to +/// prevent the fragment following it from ending at a given \p AlignBoundary. class MCBoundaryAlignFragment : public MCFragment { /// The alignment requirement of the branch to be aligned. Align AlignBoundary; @@ -571,6 +573,9 @@ /// The size of the fragment. The size is lazily set during relaxation, and /// is not meaningful before that. uint64_t Size = 0; + /// Whether this fragment pads the subsequent fragment to prevent it from + /// ending at AlignBoundary. + bool IsAvoidEndAlign = false; public: MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr) @@ -589,6 +594,9 @@ LastFragment = F; } + bool isAvoidEndAlign() const { return IsAvoidEndAlign; } + void setAvoidEndAlign(bool V) { IsAvoidEndAlign = V; } + static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; } diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -1083,14 +1083,26 @@ uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); uint64_t AlignedSize = 0; - for (const MCFragment *F = BF.getLastFragment(); F != &BF; - F = F->getPrevNode()) - AlignedSize += computeFragmentSize(Layout, *F); - + uint64_t NewSize = 0; Align BoundaryAlignment = BF.getAlignment(); - uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) - ? offsetToAlignment(AlignedOffset, BoundaryAlignment) - : 0U; + + if (BF.isAvoidEndAlign()) { + // Get fragment size for the fragment following this BoundaryAlign. + const MCFragment *NF = BF.getNextNode(); + AlignedSize = computeFragmentSize(Layout, *NF); + + // Pad with a minimum size nop. + if (isAgainstBoundary(AlignedOffset, AlignedSize, BoundaryAlignment)) + NewSize = getBackend().getMinimumNopSize(); + } else { + for (const MCFragment *F = BF.getLastFragment(); F != &BF; + F = F->getPrevNode()) + AlignedSize += computeFragmentSize(Layout, *F); + + if (needPadding(AlignedOffset, AlignedSize, BoundaryAlignment)) + NewSize = offsetToAlignment(AlignedOffset, BoundaryAlignment); + } + if (NewSize == BF.getSize()) return false; BF.setSize(NewSize); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -75,6 +75,13 @@ X86AlignBranchKind X86AlignBranchKindLoc; +cl::opt X86AlignForMacroFusion( + "x86-align-for-macrofusion", cl::init(false), + cl::desc( + "Align macro-fusion pairs to avoid 64B boundary falling between " + "the instructions. May break assumptions about labels corresponding " + "to particular instructions, and should be used with caution.")); + cl::opt X86AlignBranchBoundary( "x86-align-branch-boundary", cl::init(0), cl::desc( @@ -145,15 +152,25 @@ X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) : MCAsmBackend(support::little), STI(STI), MCII(T.createMCInstrInfo()) { + if (X86AlignForMacroFusion) { + AlignBoundary = assumeAligned(64); + AlignBranchType.addKind(X86::AlignBranchFused); + AlignBranchType.addKind(X86::AlignBranchJcc); + } if (X86AlignBranchWithin32BBoundaries) { // At the moment, this defaults to aligning fused branches, unconditional // jumps, and (unfused) conditional jumps with nops. Both the // instructions aligned and the alignment method (nop vs prefix) may // change in the future. - AlignBoundary = assumeAligned(32);; + AlignBoundary = assumeAligned(32); AlignBranchType.addKind(X86::AlignBranchFused); AlignBranchType.addKind(X86::AlignBranchJcc); AlignBranchType.addKind(X86::AlignBranchJmp); + if (X86AlignForMacroFusion) { + // X86AlignBranchWithin32BBoundaries provides a stronger alignment restriction: + // that fused pairs don't cross 32B boundary. Turn X86AlignForMacroFusion off. + X86AlignForMacroFusion = false; + } } // Allow overriding defaults set by master flag if (X86AlignBranchBoundary.getNumOccurrences()) @@ -615,7 +632,7 @@ // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. // - // Do nothing here since we already inserted a BoudaryAlign fragment when + // Do nothing here since we already inserted a BoundaryAlign fragment when // we met the first instruction in the fused pair and we'll tie them // together in emitInstructionEnd. // @@ -633,11 +650,14 @@ return; } - if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && - isFirstMacroFusibleInst(Inst, *MCII))) { + bool IsBranchFused = (AlignBranchType & X86::AlignBranchFused) && + isFirstMacroFusibleInst(Inst, *MCII); + if (needAlign(Inst) || IsBranchFused) { // If we meet a unfused branch or the first instuction in a fusiable pair, // insert a BoundaryAlign fragment. OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary)); + if (X86AlignForMacroFusion && IsBranchFused) + PendingBA->setAvoidEndAlign(true); } } @@ -655,7 +675,7 @@ if (!needAlign(Inst) || !PendingBA) return; - // Tie the aligned instructions into a a pending BoundaryAlign. + // Tie the aligned instructions into a pending BoundaryAlign. PendingBA->setLastFragment(CF); PendingBA = nullptr; diff --git a/llvm/test/MC/X86/auto-mf-align.s b/llvm/test/MC/X86/auto-mf-align.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/auto-mf-align.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc -triple=x86_64 -x86-align-for-macrofusion %s -filetype=obj | llvm-objdump --no-show-raw-insn -d - | FileCheck %s + +# no padding is expected since test doesn't end at alignment boundary: +# CHECK-NOT: nop + testl %eax, %eax +# CHECK: testl %eax, %eax + je .LBB0 + +.nops 57 + int3 +# BoundaryAlign followed by MCDataFragment: +# inserts nop because `test` would end at alignment boundary: +# CHECK: 3e: nop + testl %eax, %eax +# CHECK-NEXT: 3f: testl %eax, %eax + je .LBB0 +# CHECK-NEXT: 41: je +.LBB0: + retq + +.p2align 6 +.L0: +.nops 57 + int3 +# BoundaryAlign followed by RelaxableFragment: +# CHECK: ba: nop + cmpl $(.L1-.L0), %eax +# CHECK-NEXT: bb: cmpl + je .L0 +# CHECK-NEXT: c1: je +.nops 65 +.L1: