diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -563,6 +563,8 @@ /// Represents required padding such that a particular other set of fragments /// does not cross a particular power-of-two boundary. The other fragments must /// follow this one within the same section. +/// If AvoidEndAlign is set, this fragment will emit a minimum size nop to +/// prevent the fragment following it from ending at a given \p AlignBoundary. class MCBoundaryAlignFragment : public MCFragment { /// The alignment requirement of the branch to be aligned. Align AlignBoundary; @@ -571,6 +573,9 @@ /// The size of the fragment. The size is lazily set during relaxation, and /// is not meaningful before that. uint64_t Size = 0; + /// Whether this fragment pads the subsequent fragment to prevent it from + /// ending at AlignBoundary. + bool IsAvoidEndAlign = false; public: MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr) @@ -589,6 +594,9 @@ LastFragment = F; } + bool isAvoidEndAlign() const { return IsAvoidEndAlign; } + void setAvoidEndAlign(bool V) { IsAvoidEndAlign = V; } + static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; } diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -1083,14 +1083,26 @@ uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); uint64_t AlignedSize = 0; - for (const MCFragment *F = BF.getLastFragment(); F != &BF; - F = F->getPrevNode()) - AlignedSize += computeFragmentSize(Layout, *F); - + uint64_t NewSize = 0; Align BoundaryAlignment = BF.getAlignment(); - uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) - ? offsetToAlignment(AlignedOffset, BoundaryAlignment) - : 0U; + + if (BF.isAvoidEndAlign()) { + // Get fragment size for the fragment following this BoundaryAlign. + const MCFragment *NF = BF.getNextNode(); + AlignedSize = computeFragmentSize(Layout, *NF); + + // Pad with a minimum size nop. + if (isAgainstBoundary(AlignedOffset, AlignedSize, BoundaryAlignment)) + NewSize = getBackend().getMinimumNopSize(); + } else { + for (const MCFragment *F = BF.getLastFragment(); F != &BF; + F = F->getPrevNode()) + AlignedSize += computeFragmentSize(Layout, *F); + + if (needPadding(AlignedOffset, AlignedSize, BoundaryAlignment)) + NewSize = offsetToAlignment(AlignedOffset, BoundaryAlignment); + } + if (NewSize == BF.getSize()) return false; BF.setSize(NewSize); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -71,10 +71,18 @@ operator uint8_t() const { return AlignBranchKind; } void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } + void clear() { AlignBranchKind = 0; } }; X86AlignBranchKind X86AlignBranchKindLoc; +cl::opt X86AlignForMacroFusion( + "x86-align-for-macrofusion", cl::init(false), + cl::desc( + "Align macro-fusion pairs to avoid 64B boundary falling between " + "the instructions. May break assumptions about labels corresponding " + "to particular instructions, and should be used with caution.")); + cl::opt X86AlignBranchBoundary( "x86-align-branch-boundary", cl::init(0), cl::desc( @@ -134,6 +142,7 @@ MCBoundaryAlignFragment *PendingBA = nullptr; std::pair PrevInstPosition; bool CanPadInst; + bool AlignForMacroFusionOnly; uint8_t determinePaddingPrefix(const MCInst &Inst) const; bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; @@ -150,7 +159,7 @@ // jumps, and (unfused) conditional jumps with nops. Both the // instructions aligned and the alignment method (nop vs prefix) may // change in the future. - AlignBoundary = assumeAligned(32);; + AlignBoundary = assumeAligned(32); AlignBranchType.addKind(X86::AlignBranchFused); AlignBranchType.addKind(X86::AlignBranchJcc); AlignBranchType.addKind(X86::AlignBranchJmp); @@ -162,6 +171,24 @@ AlignBranchType = X86AlignBranchKindLoc; if (X86PadMaxPrefixSize.getNumOccurrences()) TargetPrefixMax = X86PadMaxPrefixSize; + // Clean the alignment request + if (AlignBoundary == Align()) + AlignBranchType.clear(); + else if (AlignBranchType) + AlignBoundary = Align(); + // X86AlignForMacroFusion overrides AlignBoundary and AlignBranchType + if (X86AlignForMacroFusion) { + AlignForMacroFusionOnly = true; + // Constrain or initialize AlignBoundary to the boundary for macrofusion + // alignment (64B). + if (AlignBoundary > Align(64) || AlignBoundary == Align()) { + AlignBoundary = assumeAligned(64); + } + // Required AlignBranch kinds for macro-fusion alignment + AlignBranchType.clear(); + AlignBranchType.addKind(X86::AlignBranchFused); + AlignBranchType.addKind(X86::AlignBranchJcc); + } } bool allowAutoPadding() const override; @@ -615,7 +642,7 @@ // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. // - // Do nothing here since we already inserted a BoudaryAlign fragment when + // Do nothing here since we already inserted a BoundaryAlign fragment when // we met the first instruction in the fused pair and we'll tie them // together in emitInstructionEnd. // @@ -633,11 +660,18 @@ return; } - if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && - isFirstMacroFusibleInst(Inst, *MCII))) { + bool IsFirstMacroFusibleInstAndMayNeedAlign = + (AlignBranchType & X86::AlignBranchFused) && + isFirstMacroFusibleInst(Inst, *MCII); + if (needAlign(Inst) || IsFirstMacroFusibleInstAndMayNeedAlign) { // If we meet a unfused branch or the first instuction in a fusiable pair, // insert a BoundaryAlign fragment. OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary)); + // MacroFusion alignment overrides BoundaryAlign logic: + // calculate only the size of the first instruction (not the pair) and + // check if it is against the boundary (not if it crosses the boundary). + if (AlignForMacroFusionOnly) + PendingBA->setAvoidEndAlign(true); } } @@ -655,7 +689,7 @@ if (!needAlign(Inst) || !PendingBA) return; - // Tie the aligned instructions into a a pending BoundaryAlign. + // Tie the aligned instructions into a pending BoundaryAlign. PendingBA->setLastFragment(CF); PendingBA = nullptr; diff --git a/llvm/test/MC/X86/auto-mf-align.s b/llvm/test/MC/X86/auto-mf-align.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/auto-mf-align.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc -triple=x86_64 -x86-align-for-macrofusion %s -filetype=obj | llvm-objdump --no-show-raw-insn -d - | FileCheck %s + +# no padding is expected since test doesn't end at alignment boundary: +# CHECK-NOT: nop + testl %eax, %eax +# CHECK: testl %eax, %eax + je .LBB0 + +.nops 57 + int3 +# BoundaryAlign followed by MCDataFragment: +# inserts nop because `test` would end at alignment boundary: +# CHECK: 3e: nop + testl %eax, %eax +# CHECK-NEXT: 3f: testl %eax, %eax + je .LBB0 +# CHECK-NEXT: 41: je +.LBB0: + retq + +.p2align 6 +.L0: +.nops 57 + int3 +# BoundaryAlign followed by RelaxableFragment: +# CHECK: ba: nop + cmpl $(.L1-.L0), %eax +# CHECK-NEXT: bb: cmpl + je .L0 +# CHECK-NEXT: c1: je +.nops 65 +.L1: