diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -373,6 +373,27 @@ return true; } +/// X86 has certain instructions which enable interrupts exactly one +/// instruction *after* the instruction which stores to SS. Return true if the +/// given instruction has such an interrupt delay slot. +static bool hasInterruptDelaySlot(const MCInst &Inst) { + switch (Inst.getOpcode()) { + case X86::POPSS16: + case X86::POPSS32: + case X86::STI: + return true; + + case X86::MOV16sr: + case X86::MOV32sr: + case X86::MOV64sr: + case X86::MOV16sm: + if (Inst.getOperand(0).getReg() == X86::SS) + return true; + break; + } + return false; +} + /// Check if the instruction operand needs to be aligned. Padding is disabled /// before intruction which may be rewritten by linker(e.g. TLSCALL). bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { @@ -401,7 +422,10 @@ MCFragment *CF = OS.getCurrentFragment(); bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + if (hasInterruptDelaySlot(PrevInst)) { + // If this instruction follows an interrupt enabling instruction with a one + // instruction delay, inserting a nop would change behavior. + } else if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. NOP can be emitted in PF to align fused // jcc. diff --git a/llvm/test/MC/X86/align-branch-64-system.s b/llvm/test/MC/X86/align-branch-64-system.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/align-branch-64-system.s @@ -0,0 +1,68 @@ + # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu --x86-align-branch-boundary=32 --x86-align-branch=jmp %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s + + # Exercise cases where we're enabling interrupts with one instruction delay + # and thus can't add a nop in between without changing behavior. + + .text + + # CHECK: 1e: sti + # CHECK: 1f: jmp + .p2align 5 + .rept 30 + int3 + .endr + sti + jmp baz + + # CHECK: 5c: movq %rax, %ss + # CHECK: 5f: jmp + .p2align 5 + .rept 28 + int3 + .endr + movq %rax, %ss + jmp baz + + # CHECK: 9d: movl %esi, %ss + # CHECK: 9f: jmp + .p2align 5 + .rept 29 + int3 + .endr + movl %esi, %ss + jmp baz + + # movw and movl are interchangeable since we're only using the low 16 bits. + # Both are generated as "MOV Sreg,r/m16**" (8E /r), but disassembled as movl + # CHECK: dd: movl %esi, %ss + # CHECK: df: jmp + .p2align 5 + .rept 29 + int3 + .endr + movw %si, %ss + jmp baz + + # CHECK: 11b: movw (%esi), %ss + # CHECK: 11e: jmp + .p2align 5 + .rept 27 + int3 + .endr + movw (%esi), %ss + jmp baz + + # CHECK: 15b: movw (%rsi), %ss + # CHECK: 15d: jmp + .p2align 5 + .rept 27 + int3 + .endr + movw (%rsi), %ss + jmp baz + + + int3 + .section ".text.other" +bar: + retq