diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -134,6 +134,7 @@ bool needAlignInst(const MCInst &Inst) const; MCInst PrevInst; MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr; + std::pair PrevInstPosition; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -479,6 +480,52 @@ return false; } +/// Check if the instruction to be emitted is right after any data. +static bool +isRightAfterData(MCFragment *CurrentFragment, + const std::pair &PrevInstPosition) { + MCFragment *F = CurrentFragment; + // Empty data fragments may be created to prevent further data being + // added into the previous fragment, we need to skip them since they + // have no contents. + for (; isa_and_nonnull(F); F = F->getPrevNode()) + if (cast(F)->getContents().size() != 0) + break; + + // Since data is always emitted into a DataFragment, our check strategy is + // simple here. + // - If the fragment is a DataFragment + // - If it's not the fragment where the previous instruction is, + // returns true. + // - If it's the fragment holding the previous instruction but its + // size changed since the the previous instruction was emitted into + // it, returns true. + // - Otherwise returns false. + // - If the fragment is not a DataFragment, returns false. + if (auto *DF = dyn_cast_or_null(F)) + return DF != PrevInstPosition.first || + DF->getContents().size() != PrevInstPosition.second; + + return false; +} + +/// \returns the fragment size if it has instructions, otherwise returns 0. +static size_t getSizeForInstFragment(const MCFragment *F) { + if (!F || !F->hasInstructions()) + return 0; + // MCEncodedFragmentWithContents being templated makes this tricky. + switch (F->getKind()) { + default: + llvm_unreachable("Unknown fragment with instructions!"); + case MCFragment::FT_Data: + return cast(*F).getContents().size(); + case MCFragment::FT_Relaxable: + return cast(*F).getContents().size(); + case MCFragment::FT_CompactEncodedInst: + return cast(*F).getContents().size(); + } +} + /// Check if the instruction operand needs to be aligned. Padding is disabled /// before intruction which may be rewritten by linker(e.g. TLSCALL). bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { @@ -515,6 +562,11 @@ // semantic. return; + if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) + // If this instruction follows any data, there is no clear + // instruction boundary, inserting a nop would change semantic. + return; + if (!isMacroFused(PrevInst, Inst)) // Macro fusion doesn't happen indeed, clear the pending. PendingBoundaryAlign = nullptr; @@ -557,19 +609,21 @@ return; PrevInst = Inst; + MCFragment *CF = OS.getCurrentFragment(); + PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); if (!needAlignInst(Inst) || !PendingBoundaryAlign) return; // Tie the aligned instructions into a a pending BoundaryAlign. - PendingBoundaryAlign->setLastFragment(OS.getCurrentFragment()); + PendingBoundaryAlign->setLastFragment(CF); PendingBoundaryAlign = nullptr; // We need to ensure that further data isn't added to the current // DataFragment, so that we can get the size of instructions later in // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty // DataFragment. - if (isa_and_nonnull(OS.getCurrentFragment())) + if (isa_and_nonnull(CF)) OS.insert(new MCDataFragment()); // Update the maximum alignment on the current section if necessary. diff --git a/llvm/test/MC/X86/align-branch-64-hardcode.s b/llvm/test/MC/X86/align-branch-64-hardcode.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/align-branch-64-hardcode.s @@ -0,0 +1,32 @@ + # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu --x86-align-branch-boundary=32 --x86-align-branch=jmp+call %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s + + # Exercise cases where instructions to be aligned is after hardcode + # and thus can't add a nop in between without changing semantic. + + .text + + # CHECK: 1d: int3 + # CHECK: 1e: jmp + # CHECK: 24: int3 + .p2align 5 + .rept 30 + int3 + .endr + .byte 0x2e + jmp baz + int3 + + # CHECK: 5d: int3 + # CHECK: 5e: call + # CHECK: 66: int3 + .p2align 5 + .rept 30 + int3 + .endr + .byte 0x66 + call *___tls_get_addr@GOT(%ecx) + int3 + + .section ".text.other" +bar: + retq