diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -259,6 +259,8 @@ /// The instruction this is a fragment for. MCInst Inst; + /// Can we auto pad the instruction? + bool AllowAutoPadding = false; public: MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI, @@ -269,6 +271,9 @@ const MCInst &getInst() const { return Inst; } void setInst(const MCInst &Value) { Inst = Value; } + bool getAllowAutoPadding() const { return AllowAutoPadding; } + void setAllowAutoPadding(bool V) { AllowAutoPadding = V; } + static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Relaxable; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -136,9 +136,11 @@ bool needAlign(MCObjectStreamer &OS) const; bool needAlignInst(const MCInst &Inst) const; + bool allowAutoPaddingForInst(const MCInst &Inst, MCObjectStreamer &OS) const; MCInst PrevInst; MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr; std::pair PrevInstPosition; + bool AllowAutoPaddingForInst; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -538,13 +540,8 @@ } } -/// Check if the instruction operand needs to be aligned. Padding is disabled -/// before intruction which may be rewritten by linker(e.g. TLSCALL). +/// Check if the instruction operand needs to be aligned. bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { - // Linker may rewrite the instruction with variant symbol operand. - if (hasVariantSymbol(Inst)) - return false; - const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode()); return (InstDesc.isConditionalBranch() && (AlignBranchType & X86::AlignBranchJcc)) || @@ -558,31 +555,53 @@ (AlignBranchType & X86::AlignBranchIndirect)); } -/// Insert BoundaryAlignFragment before instructions to align branches. -void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, - const MCInst &Inst) { - if (!needAlign(OS)) - return; +/// Return true if we can insert NOP or prefixes automatically before the +/// the instruction to be emitted. +bool X86AsmBackend::allowAutoPaddingForInst(const MCInst &Inst, + MCObjectStreamer &OS) const { + if (hasVariantSymbol(Inst)) + // Linker may rewrite the instruction with variant symbol operand(e.g. + // TLSCALL). + return false; if (hasInterruptDelaySlot(PrevInst)) // If this instruction follows an interrupt enabling instruction with a one // instruction delay, inserting a nop would change behavior. - return; + return false; if (isPrefix(PrevInst, *MCII)) - // If this instruction follows a prefix, inserting a nop would change + // If this instruction follows a prefix, inserting a nop/prefix would change // semantic. - return; + return false; + + if (isPrefix(Inst, *MCII)) + // If this instruction is a prefix, inserting a prefix would change + // semantic. + return false; if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) // If this instruction follows any data, there is no clear - // instruction boundary, inserting a nop would change semantic. + // instruction boundary, inserting a nop/prefix would change semantic. + return false; + + return true; +} + +/// Insert BoundaryAlignFragment before instructions to align branches. +void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, + const MCInst &Inst) { + AllowAutoPaddingForInst = allowAutoPaddingForInst(Inst, OS); + + if (!needAlign(OS)) return; if (!isMacroFused(PrevInst, Inst)) // Macro fusion doesn't happen indeed, clear the pending. PendingBoundaryAlign = nullptr; + if (!AllowAutoPaddingForInst) + return; + if (PendingBoundaryAlign && OS.getCurrentFragment()->getPrevNode() == PendingBoundaryAlign) { // Macro fusion actually happens and there is no other fragment inserted @@ -617,12 +636,14 @@ /// Set the last fragment to be aligned for the BoundaryAlignFragment. void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { - if (!needAlign(OS)) - return; - PrevInst = Inst; MCFragment *CF = OS.getCurrentFragment(); PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); + if (auto *F = dyn_cast_or_null(CF)) + F->setAllowAutoPadding(AllowAutoPaddingForInst); + + if (!needAlign(OS)) + return; if (!needAlignInst(Inst) || !PendingBoundaryAlign) return; @@ -809,12 +830,6 @@ return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); } - -static bool shouldAddPrefix(const MCInst &Inst, const MCInstrInfo &MCII) { - // Linker may rewrite the instruction with variant symbol operand. - return !hasVariantSymbol(Inst); -} - static unsigned getRemainingPrefixSize(const MCInst &Inst, const MCSubtargetInfo &STI, MCCodeEmitter &Emitter) { @@ -838,7 +853,7 @@ bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, unsigned &RemainingSize) const { - if (!shouldAddPrefix(RF.getInst(), *MCII)) + if (!RF.getAllowAutoPadding()) return false; // If the instruction isn't fully relaxed, shifting it around might require a // larger value for one of the fixups then can be encoded. The outer loop diff --git a/llvm/test/MC/X86/align-via-padding-corner.s b/llvm/test/MC/X86/align-via-padding-corner.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/align-via-padding-corner.s @@ -0,0 +1,29 @@ + # RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 | llvm-objdump -d - | FileCheck %s + + + # The first test check the correctness cornercase - can't add prefixes on a + # a instruction following by a prefix. + .globl labeled_prefix_test +labeled_prefix_test: +# CHECK: 0: 2e 2e 2e 2e 2e e9 06 00 00 00 jmp +# CHECK: a: 3e e9 00 00 00 00 jmp + jmp bar + DS + jmp bar + .p2align 4 +bar: + ret + + # The second test is similar to the second test - can't add prefixes on a + # instruction following by hardcode. + .p2align 5 + .globl labeled_hardcode_test +labeled_hardcode_test: +# CHECK: 20: 2e 2e 2e 2e 2e e9 06 00 00 00 jmp +# CHECK: 2a: 3e e9 00 00 00 00 jmp + jmp baz + .byte 0x3e + jmp baz + .p2align 4 +baz: + ret