diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -49,6 +49,10 @@ /// Return true if this target might automatically pad instructions and thus /// need to emit padding enable/disable directives around sensative code. virtual bool allowAutoPadding() const { return false; } + /// Return true if this target allows an unrelaxable instruction to be + /// emitted into RelaxableFragment and then we can increase its size in a + /// tricky way for optimization. + virtual bool allowEnhancedRelaxation() const { return false; } /// Give the target a chance to manipulate state related to instruction /// alignment (e.g. padding for optimization), instruction relaxablility, etc. diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -385,7 +385,9 @@ // If this instruction doesn't need relaxation, just emit it as data. MCAssembler &Assembler = getAssembler(); - if (!Assembler.getBackend().mayNeedRelaxation(Inst, STI)) { + MCAsmBackend &Backend = Assembler.getBackend(); + if (!(Backend.mayNeedRelaxation(Inst, STI) || + (Backend.allowEnhancedRelaxation() && getAllowAutoPadding()))) { EmitInstToData(Inst, STI); return; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -139,6 +139,7 @@ MCInst PrevInst; MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr; std::pair PrevInstPosition; + bool StreamerAllowAutoPadding = false; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -162,6 +163,7 @@ } bool allowAutoPadding() const override; + bool allowEnhancedRelaxation() const override; void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override; void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; @@ -455,6 +457,10 @@ return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); } +bool X86AsmBackend::allowEnhancedRelaxation() const { + return allowAutoPadding() && X86PadMaxPrefixSize != 0 && X86PadForBranchAlign; +} + bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { if (!OS.getAllowAutoPadding()) return false; @@ -538,13 +544,8 @@ } } -/// Check if the instruction operand needs to be aligned. Padding is disabled -/// before intruction which may be rewritten by linker(e.g. TLSCALL). +/// Check if the instruction operand needs to be aligned. bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { - // Linker may rewrite the instruction with variant symbol operand. - if (hasVariantSymbol(Inst)) - return false; - const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode()); return (InstDesc.isConditionalBranch() && (AlignBranchType & X86::AlignBranchJcc)) || @@ -558,31 +559,64 @@ (AlignBranchType & X86::AlignBranchIndirect)); } -/// Insert BoundaryAlignFragment before instructions to align branches. -void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, - const MCInst &Inst) { - if (!needAlign(OS)) - return; +/// Return true if we can insert NOP or prefixes automatically before the +/// the instruction to be emitted. +/// \param Inst the instruction to be emitted. +/// \param PrevInst the previous emitted instruction. +/// \param PrevInstPosition the position where the \p PrevInst was emitted. +static bool +allowAutoPaddingForInst(const MCInst &Inst, const MCInst &PrevInst, + const MCInstrInfo &MCII, + const std::pair &PrevInstPosition, + MCObjectStreamer &OS) { + if (hasVariantSymbol(Inst)) + // Linker may rewrite the instruction with variant symbol operand(e.g. + // TLSCALL). + return false; if (hasInterruptDelaySlot(PrevInst)) // If this instruction follows an interrupt enabling instruction with a one // instruction delay, inserting a nop would change behavior. - return; + return false; - if (isPrefix(PrevInst, *MCII)) - // If this instruction follows a prefix, inserting a nop would change + if (isPrefix(PrevInst, MCII)) + // If this instruction follows a prefix, inserting a nop/prefix would change // semantic. - return; + return false; + + if (isPrefix(Inst, MCII)) + // If this instruction is a prefix, inserting a prefix would change + // semantic. + return false; if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) // If this instruction follows any data, there is no clear - // instruction boundary, inserting a nop would change semantic. + // instruction boundary, inserting a nop/prefix would change semantic. + return false; + + return true; +} + +/// Insert BoundaryAlignFragment before instructions to align branches. +void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, + const MCInst &Inst) { + // Preserve the status of streamer for allowing auto padding. + StreamerAllowAutoPadding = OS.getAllowAutoPadding(); + + if (!needAlign(OS)) return; if (!isMacroFused(PrevInst, Inst)) // Macro fusion doesn't happen indeed, clear the pending. PendingBoundaryAlign = nullptr; + if (!allowAutoPaddingForInst(Inst, PrevInst, *MCII, PrevInstPosition, OS)) { + // Disable the allow padding for streamer when the instruction to be + // emitted can not be padding. + OS.setAllowAutoPadding(false); + return; + } + if (PendingBoundaryAlign && OS.getCurrentFragment()->getPrevNode() == PendingBoundaryAlign) { // Macro fusion actually happens and there is no other fragment inserted @@ -617,6 +651,9 @@ /// Set the last fragment to be aligned for the BoundaryAlignFragment. void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { + // Restore the status of streamer for allowing auto padding. + OS.setAllowAutoPadding(StreamerAllowAutoPadding); + if (!needAlign(OS)) return; diff --git a/llvm/test/MC/X86/align-branch-64-enhanced-relaxation.s b/llvm/test/MC/X86/align-branch-64-enhanced-relaxation.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/align-branch-64-enhanced-relaxation.s @@ -0,0 +1,56 @@ + # RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 --x86-align-branch-boundary=32 --x86-align-branch=jmp+indirect | llvm-objdump -d - | FileCheck %s + + # Exercise cases where we are allowed to increase the length of unrelaxable + # instructions (by adding prefixes) for alignment purposes. + + # The first test is a basic test, we just check the jmp is aligned by prefix + # padding the previous instructions. + .text + .globl labeled_basic_test +labeled_basic_test: + .p2align 5 + .rept 30 + int3 + .endr +# CHECK: 1e: 2e cc int3 +# CHECK: 20: eb 00 jmp + int3 + jmp foo +foo: + ret + + # The second test check the correctness cornercase - can't add prefixes on a + # prefix or a instruction following by a prefix. + .globl labeled_prefix_test +labeled_prefix_test: + .p2align 5 + .rept 28 + int3 + .endr +# CHECK: 5c: 2e cc int3 + int3 +# CHECK: 5e: 3e cc int3 + DS + int3 +# CHECK: 60: eb 00 jmp + jmp bar +bar: + ret + + # The third test is similar to the second test - can't add prefixes on a + # instruction following by hardcode. + .globl labeled_hardcode_test +labeled_hardcode_test: + .p2align 5 + .rept 28 + int3 + .endr +# CHECK: 9c: 2e cc int3 + int3 +# CHECK: 9e: 3e cc int3 + .byte 0x3e + int3 +# CHECK: a0: eb 00 jmp + jmp baz +baz: + ret