Index: llvm/include/llvm/MC/MCFragment.h =================================================================== --- llvm/include/llvm/MC/MCFragment.h +++ llvm/include/llvm/MC/MCFragment.h @@ -519,35 +519,34 @@ /// Represents required padding such that a particular other set of fragments /// does not cross a particular power-of-two boundary. The other fragments must -/// follow this one within the same section. +/// follow this one within the same section. The set of aligned fragments is +/// (ThisFragment, LastFragment]. That is, the range starting at the fragment +/// following this one and stopping at LastFragment (inclusive). class MCBoundaryAlignFragment : public MCFragment { - /// The alignment requirement of the branch to be aligned. - Align AlignBoundary; - /// Flag to indicate whether the branch is fused. Use in determining the - /// region of fragments being aligned. - bool Fused : 1; - /// Flag to indicate whether NOPs should be emitted. - bool EmitNops : 1; + /// The last fragment in the range of fragments being aligned. The range of + /// instructions being aligned is defined as (this, LastFragment]. + MCFragment *LastFragment = nullptr; /// The size of the fragment. The size is lazily set during relaxation, and /// is not meaningful before that. - uint64_t Size = 0; + uint16_t Size = 0; + /// The alignment requirement of the branch to be aligned. + Align AlignBoundary; public: - MCBoundaryAlignFragment(Align AlignBoundary, bool Fused = false, - bool EmitNops = false, MCSection *Sec = nullptr) - : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), - Fused(Fused), EmitNops(EmitNops) {} + MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr) + : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) + {} uint64_t getSize() const { return Size; } - void setSize(uint64_t Value) { Size = Value; } + void setSize(uint64_t Value) { + assert(Value == (uint64_t)((uint16_t)Value)); + Size = Value; + } Align getAlignment() const { return AlignBoundary; } - bool isFused() const { return Fused; } - void setFused(bool Value) { Fused = Value; } - - bool canEmitNops() const { return EmitNops; } - void setEmitNops(bool Value) { EmitNops = Value; } + MCFragment *getLastFragment() const { return LastFragment; } + void setLastFragment(MCFragment *F) { LastFragment = F; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; Index: llvm/lib/MC/MCAssembler.cpp =================================================================== --- llvm/lib/MC/MCAssembler.cpp +++ llvm/lib/MC/MCAssembler.cpp @@ -986,20 +986,22 @@ bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF) { - // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed. - if (!BF.canEmitNops()) + // If the last fragment was never set, then there's nothing to align. + if (!BF.getLastFragment()) return false; uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode()); + // Compute the size of all the instructions in the range we're trying to + // align. The range is [BF.getNextNode, LastNode]. uint64_t AlignedSize = 0; - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise it is - // emitted into two fragments at most, the next MCBoundaryAlignFragment(if - // exists) also marks the end of the branch. - for (auto i = 0, N = BF.isFused() ? 2 : 1; - i != N && !isa(F); ++i, F = F->getNextNode()) { + for (const MCFragment *F = BF.getNextNode(), *Last = BF.getLastFragment(); ; + F = F->getNextNode()) { AlignedSize += computeFragmentSize(Layout, *F); + if (F == Last) + break; } + assert(AlignedSize <= BF.getAlignment().value() && + "can't align a region larger than the alignment"); uint64_t OldSize = BF.getSize(); AlignedOffset -= OldSize; Align BoundaryAlignment = BF.getAlignment(); Index: llvm/lib/MC/MCFragment.cpp =================================================================== --- llvm/lib/MC/MCFragment.cpp +++ llvm/lib/MC/MCFragment.cpp @@ -424,15 +424,9 @@ } case MCFragment::FT_BoundaryAlign: { const auto *BF = cast(this); - if (BF->canEmitNops()) - OS << " (can emit nops to align"; - if (BF->isFused()) - OS << " fused branch)"; - else - OS << " unfused branch)"; - OS << "\n "; OS << " BoundarySize:" << BF->getAlignment().value() - << " Size:" << BF->getSize(); + << " Size:" << BF->getSize() + << " LastFragment:" << BF->getLastFragment(); break; } case MCFragment::FT_SymbolId: { Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -152,9 +152,9 @@ bool needAlign(MCObjectStreamer &OS) const; bool needAlignInst(const MCInst &Inst) const; - MCBoundaryAlignFragment * - getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const; + MCInst PrevInst; + MCBoundaryAlignFragment *PendingBA = nullptr; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -458,57 +458,31 @@ (AlignBranchType & X86AlignBranchKind::AlignBranchIndirect)); } -static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) { - // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it. - return !F.canEmitNops(); -} - -MCBoundaryAlignFragment * -X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const { - auto *F = dyn_cast_or_null(OS.getCurrentFragment()); - if (!F || !canReuseBoundaryAlignFragment(*F)) { - F = new MCBoundaryAlignFragment(AlignBoundary); - OS.insert(F); - } - return F; -} - /// Insert MCBoundaryAlignFragment before instructions to align branches. void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; - MCFragment *CF = OS.getCurrentFragment(); + // Summary of pending BA scheme + // - If we encounter the first instruction in a fusible pair, insert a BA + // fragment. + // - If we emit anything which isn't part of a fusible pair, clear pending. + // - If we ecounter an jmp/branch/etc, insert a BA fragment if there isn't + // already one pending for a valid fusible sequence. + // - Close the pending sequence after any alignable instruction (since they + // also terminate fusible sequences.) + // Note that the BoundaryAlign fragment only has semantic meaning once a + // valid LastFragment is set. Otherwise, it remains a nop. + bool NeedAlignFused = AlignBranchType & X86AlignBranchKind::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { - // Macro fusion actually happens and there is no other fragment inserted - // after the previous instruction. NOP can be emitted in PF to align fused - // jcc. - if (auto *PF = - dyn_cast_or_null(CF->getPrevNode())) { - const_cast(PF)->setEmitNops(true); - const_cast(PF)->setFused(true); - } - } else if (needAlignInst(Inst)) { - // Note: When there is at least one fragment, such as MCAlignFragment, - // inserted after the previous instruction, e.g. - // - // \code - // cmp %rax %rcx - // .align 16 - // je .Label0 - // \ endcode - // - // We will treat the JCC as a unfused branch although it may be fused - // with the CMP. - auto *F = getOrCreateBoundaryAlignFragment(OS); - F->setEmitNops(true); - F->setFused(false); - } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) { - // We don't know if macro fusion happens until the reaching the next - // instruction, so a place holder is put here if necessary. - getOrCreateBoundaryAlignFragment(OS); + if (!needAlignInst(Inst) && PendingBA) + PendingBA = nullptr; + if ((needAlignInst(Inst) && (!PendingBA || + !isMacroFused(PrevInst, Inst))) || + (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII))) { + PendingBA = new MCBoundaryAlignFragment(AlignBoundary); + OS.insert(PendingBA); } PrevInst = Inst; @@ -517,20 +491,28 @@ /// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned /// if necessary. void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { - if (!needAlign(OS)) + if (!needAlign(OS) || !needAlignInst(Inst)) + return; + + auto *BA = PendingBA; + if (!BA) + return; + PendingBA = nullptr; + + auto *CF = OS.getCurrentFragment(); + if (!CF) return; - // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the - // branch is fused, the fused branch(macro fusion pair) must be emitted into - // two fragments. Or when the branch is unfused, the branch must be emitted - // into one fragment. The MCRelaxableFragment naturally marks the end of the - // fused or unfused branch. - // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of - // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align - // other branch. - if (needAlignInst(Inst) && !isa(OS.getCurrentFragment())) - OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + // Define the range of fragments being aligned. + BA->setLastFragment(CF); + + // We need to ensure that further instructions aren't added to the current + // fragment. The easiest way is to insert a new empty DataFragment. + // RelaxableFragments are always single instructions, and are common for + // jumps, so avoid inserting a potentially unused fragment in that case. + if (!isa(CF)) + OS.insert(new MCDataFragment()); + // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); if (AlignBoundary.value() > Sec->getAlignment()) Index: llvm/test/MC/X86/align-branch-64-negative.s =================================================================== --- llvm/test/MC/X86/align-branch-64-negative.s +++ llvm/test/MC/X86/align-branch-64-negative.s @@ -4,13 +4,13 @@ # in the assembler. These are the examples brought up in review and # discussion which motivate the need for a assembler directive to # selectively enable/disable. - # FIXME: the checks are checking current *incorrect* behavior + # FIXME: (some of) the checks are checking current *incorrect* behavior .text # In the first test, we have a label which is expected to be bound to the # start of the call. For instance, we want to associate a fault on the call - # target with some bit of higher level sementic. + # target with some bit of higher level sementic. (FIXME) # CHECK: labeled_call_test1: # CHECK: 1f label_before # CHECK: 1f: nop @@ -29,8 +29,8 @@ # the return address of the call. # CHECK: labeled_call_test2: # CHECK: 5a: callq + # CHECK: 5f label_after # CHECK: 5f: nop - # CHECK: 60 label_after # CHECK: 60: jmp .globl labeled_call_test2 .p2align 5 @@ -44,7 +44,7 @@ # Our third test is like the first w/a labeled fault, but specifically to # a fused memory comparison. This is the form produced by implicit null - # checks for instance. + # checks for instance. (FIXME) # CHECK: implicit_null_check: # CHECK: 9f fault_addr # CHECK: 9f: nop