diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -523,20 +523,16 @@ class MCBoundaryAlignFragment : public MCFragment { /// The alignment requirement of the branch to be aligned. Align AlignBoundary; - /// Flag to indicate whether the branch is fused. Use in determining the - /// region of fragments being aligned. - bool Fused : 1; - /// Flag to indicate whether NOPs should be emitted. - bool EmitNops : 1; + /// The last fragment in the set of fragments to be aligned. + const MCFragment *LastFragment = nullptr; /// The size of the fragment. The size is lazily set during relaxation, and /// is not meaningful before that. uint64_t Size = 0; public: - MCBoundaryAlignFragment(Align AlignBoundary = Align(1), bool Fused = false, - bool EmitNops = false, MCSection *Sec = nullptr) - : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), - Fused(Fused), EmitNops(EmitNops) {} + MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr) + : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) { + } uint64_t getSize() const { return Size; } void setSize(uint64_t Value) { Size = Value; } @@ -544,11 +540,11 @@ Align getAlignment() const { return AlignBoundary; } void setAlignment(Align Value) { AlignBoundary = Value; } - bool isFused() const { return Fused; } - void setFused(bool Value) { Fused = Value; } - - bool canEmitNops() const { return EmitNops; } - void setEmitNops(bool Value) { EmitNops = Value; } + const MCFragment *getLastFragment() const { return LastFragment; } + void setLastFragment(const MCFragment *F) { + assert(!F || getParent() == F->getParent()); + LastFragment = F; + } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -996,27 +996,22 @@ bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF) { - // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed. - if (!BF.canEmitNops()) + // BoundaryAlignFragment that doesn't need to align any fragment should not be + // relaxed. + if (!BF.getLastFragment()) return false; - uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode()); + uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); uint64_t AlignedSize = 0; - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise it is - // emitted into two fragments at most, the next MCBoundaryAlignFragment(if - // exists) also marks the end of the branch. - for (auto i = 0, N = BF.isFused() ? 2 : 1; - i != N && !isa(F); ++i, F = F->getNextNode()) { + for (const MCFragment *F = BF.getLastFragment(); F != &BF; + F = F->getPrevNode()) AlignedSize += computeFragmentSize(Layout, *F); - } - uint64_t OldSize = BF.getSize(); - AlignedOffset -= OldSize; + Align BoundaryAlignment = BF.getAlignment(); uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) ? offsetToAlignment(AlignedOffset, BoundaryAlignment) : 0U; - if (NewSize == OldSize) + if (NewSize == BF.getSize()) return false; BF.setSize(NewSize); Layout.invalidateFragmentsFrom(&BF); diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -424,14 +424,9 @@ } case MCFragment::FT_BoundaryAlign: { const auto *BF = cast(this); - if (BF->canEmitNops()) - OS << " (can emit nops to align"; - if (BF->isFused()) - OS << " fused branch)"; - else - OS << " unfused branch)"; OS << "\n "; OS << " BoundarySize:" << BF->getAlignment().value() + << " LastFragment:" << BF->getLastFragment() << " Size:" << BF->getSize(); break; } diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -215,15 +215,6 @@ return F; } -MCBoundaryAlignFragment *MCObjectStreamer::getOrCreateBoundaryAlignFragment() { - auto *F = dyn_cast_or_null(getCurrentFragment()); - if (!F || F->canEmitNops()) { - F = new MCBoundaryAlignFragment(); - insert(F); - } - return F; -} - void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) { Assembler->registerSymbol(Sym); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -133,6 +133,7 @@ bool needAlign(MCObjectStreamer &OS) const; bool needAlignInst(const MCInst &Inst) const; MCInst PrevInst; + MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -507,27 +508,30 @@ (AlignBranchType & X86::AlignBranchIndirect)); } -/// Insert MCBoundaryAlignFragment before instructions to align branches. +/// Insert BoundaryAlignFragment before instructions to align branches. void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; - MCFragment *CF = OS.getCurrentFragment(); - bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (hasInterruptDelaySlot(PrevInst)) { + if (hasInterruptDelaySlot(PrevInst)) // If this instruction follows an interrupt enabling instruction with a one // instruction delay, inserting a nop would change behavior. - } else if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + return; + + if (!isMacroFused(PrevInst, Inst)) + // Macro fusion doesn't happen indeed, clear the pending. + PendingBoundaryAlign = nullptr; + + if (PendingBoundaryAlign && + OS.getCurrentFragment()->getPrevNode() == PendingBoundaryAlign) { // Macro fusion actually happens and there is no other fragment inserted - // after the previous instruction. NOP can be emitted in PF to align fused - // jcc. - if (auto *PF = - dyn_cast_or_null(CF->getPrevNode())) { - const_cast(PF)->setEmitNops(true); - const_cast(PF)->setFused(true); - } - } else if (needAlignInst(Inst)) { + // after the previous instruction. + // + // Do nothing here since we already inserted a BoudaryAlign fragment when + // we met the first instruction in the fused pair and we'll tie them + // together in alignBranchesEnd. + // // Note: When there is at least one fragment, such as MCAlignFragment, // inserted after the previous instruction, e.g. // @@ -539,36 +543,38 @@ // // We will treat the JCC as a unfused branch although it may be fused // with the CMP. - auto *F = OS.getOrCreateBoundaryAlignFragment(); - F->setAlignment(AlignBoundary); - F->setEmitNops(true); - F->setFused(false); - } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) { - // We don't know if macro fusion happens until the reaching the next - // instruction, so a place holder is put here if necessary. - auto *F = OS.getOrCreateBoundaryAlignFragment(); - F->setAlignment(AlignBoundary); + return; } - PrevInst = Inst; + if (needAlignInst(Inst) || ((AlignBranchType & X86::AlignBranchFused) && + isFirstMacroFusibleInst(Inst, *MCII))) { + // If we meet a unfused branch or the first instuction in a fusiable pair, + // insert a BoundaryAlign fragment. + OS.insert(PendingBoundaryAlign = + new MCBoundaryAlignFragment(AlignBoundary)); + } } -/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned -/// if necessary. +/// Set the last fragment to be aligned for the BoundaryAlignFragment. void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; - // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the - // branch is fused, the fused branch(macro fusion pair) must be emitted into - // two fragments. Or when the branch is unfused, the branch must be emitted - // into one fragment. The MCRelaxableFragment naturally marks the end of the - // fused or unfused branch. - // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of - // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align - // other branch. - if (needAlignInst(Inst) && !isa(OS.getCurrentFragment())) - OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + + PrevInst = Inst; + + if (!needAlignInst(Inst) || !PendingBoundaryAlign) + return; + + // Tie the aligned instructions into a a pending BoundaryAlign. + PendingBoundaryAlign->setLastFragment(OS.getCurrentFragment()); + PendingBoundaryAlign = nullptr; + + // We need to ensure that further data isn't added to the current + // DataFragment, so that we can get the size of instructions later in + // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty + // DataFragment. + if (isa_and_nonnull(OS.getCurrentFragment())) + OS.insert(new MCDataFragment()); // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); @@ -872,16 +878,12 @@ // If we're looking at a boundary align, make sure we don't try to pad // its target instructions for some following directive. Doing so would // break the alignment of the current boundary align. - if (F.getKind() == MCFragment::FT_BoundaryAlign) { - auto &BF = cast(F); - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise - // it is emitted into two fragments at most, the next - // MCBoundaryAlignFragment(if exists) also marks the end of the branch. - for (int i = 0, N = BF.isFused() ? 2 : 1; - i != N && !isa(F); - ++i, F = F->getNextNode(), I++) { - } + if (auto *BF = dyn_cast(&F)) { + const MCFragment *LastFragment = BF->getLastFragment(); + if (!LastFragment) + continue; + while (&*I != LastFragment) + ++I; } } } diff --git a/llvm/test/MC/X86/align-branch-64-negative.s b/llvm/test/MC/X86/align-branch-64-negative.s --- a/llvm/test/MC/X86/align-branch-64-negative.s +++ b/llvm/test/MC/X86/align-branch-64-negative.s @@ -27,11 +27,13 @@ # In the second test, we have a label which is expected to be bound to the # end of the call. For instance, we want the to associate some data with # the return address of the call. - # CHECK: labeled_call_test2: - # CHECK: 5a: callq - # CHECK: 5f: nop - # CHECK: 60 label_after - # CHECK: 60: jmp +## # check: labeled_call_test2: +## # check: 5a: callq +## # check: 5f: nop +## # check: 60 label_after +## # check: 60: jmp +## The label "label_after" will be at address 0x5f now, should we remove the +## second test? .globl labeled_call_test2 .p2align 5 labeled_call_test2: