diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -105,6 +105,10 @@ "assumptions about labels corresponding to particular instructions, " "and should be used with caution.")); +cl::opt X86PadMaxPrefixSize( + "x86-pad-max-prefix-size", cl::init(0), + cl::desc("Maximum number of prefixes to use for padding")); + cl::opt X86PadForAlign( "x86-pad-for-align", cl::init(true), cl::Hidden, cl::desc("Pad previous instructions to implement align directives")); @@ -186,8 +190,16 @@ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override; + bool padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, unsigned &RemainingSize) const; + void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; @@ -734,19 +746,86 @@ Res.setOpcode(RelaxedOp); } -static bool canBeRelaxedForPadding(const MCRelaxableFragment &RF) { - // TODO: There are lots of other tricks we could apply for increasing - // encoding size without impacting performance. +/// Return true if this instruction has been fully relaxed into it's most +/// general available form. +static bool isFullyRelaxed(const MCRelaxableFragment &RF) { auto &Inst = RF.getInst(); auto &STI = *RF.getSubtargetInfo(); bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; - return getRelaxedOpcode(Inst, Is16BitMode) != Inst.getOpcode(); + return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); } -bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, - MCCodeEmitter &Emitter, - unsigned &RemainingSize) const { - if (!canBeRelaxedForPadding(RF)) + +static bool shouldAddPrefix(const MCInst &Inst, const MCInstrInfo &MCII) { + // Linker may rewrite the instruction with variant symbol operand. + return !hasVariantSymbol(Inst); +} + +static unsigned getRemainingPrefixSize(const MCInst &Inst, + const MCSubtargetInfo &STI, + MCCodeEmitter &Emitter) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Emitter.emitPrefix(Inst, VecOS, STI); + assert(Code.size() < 15 && "The number of prefixes must be less than 15."); + + // TODO: It turns out we need a decent amount of plumbing for the target + // specific bits to determine number of prefixes its safe to add. Various + // targets (older chips mostly, but also Atom family) encounter decoder + // stalls with too many prefixes. For testing purposes, we set the value + // externally for the moment. + unsigned ExistingPrefixSize = Code.size(); + unsigned TargetPrefixMax = X86PadMaxPrefixSize; + if (TargetPrefixMax <= ExistingPrefixSize) + return 0; + return TargetPrefixMax - ExistingPrefixSize; +} + +bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (!shouldAddPrefix(RF.getInst(), *MCII)) + return false; + // If the instruction isn't fully relaxed, shifting it around might require a + // larger value for one of the fixups then can be encoded. The outer loop + // will also catch this before moving to the next instruction, but we need to + // prevent padding this single instruction as well. + if (!isFullyRelaxed(RF)) + return false; + + const unsigned OldSize = RF.getContents().size(); + if (OldSize == 15) + return false; + + const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); + const unsigned PrefixBytesToAdd = + std::min(MaxPossiblePad, + getRemainingPrefixSize(RF.getInst(), STI, Emitter)); + if (PrefixBytesToAdd == 0) + return false; + + const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); + + SmallString<256> Code; + Code.append(PrefixBytesToAdd, Prefix); + Code.append(RF.getContents().begin(), RF.getContents().end()); + RF.getContents() = Code; + + // Adjust the fixups for the change in offsets + for (auto &F : RF.getFixups()) { + F.setOffset(F.getOffset() + PrefixBytesToAdd); + } + + RemainingSize -= PrefixBytesToAdd; + return true; +} + +bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (isFullyRelaxed(RF)) + // TODO: There are lots of other tricks we could apply for increasing + // encoding size without impacting performance. return false; MCInst Relaxed; @@ -769,6 +848,17 @@ return true; } +bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + bool Changed = false; + if (RemainingSize != 0) + Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); + if (RemainingSize != 0) + Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); + return Changed; +} + void X86AsmBackend::finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const { // See if we can further relax some instructions to cut down on the number of @@ -851,7 +941,7 @@ // We don't need to worry about larger positive offsets as none of the // possible offsets between this and our align are visible, and the // ones afterwards aren't changing. - if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) + if (!isFullyRelaxed(RF)) break; } Relaxable.clear(); diff --git a/llvm/test/MC/X86/align-via-padding.s b/llvm/test/MC/X86/align-via-padding.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/align-via-padding.s @@ -0,0 +1,76 @@ +# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 | llvm-objdump -d --section=.text - | FileCheck %s + +# This test file highlights the interactions between prefix padding and +# relaxation padding. + + .file "test.c" + .text + .section .text + # We can both relax and prefix for padding purposes, but the moment, we + # can't prefix without first having relaxed. + # CHECK: .text + # CHECK: 0: eb 1f jmp + # CHECK: 2: eb 1d jmp + # CHECK: 4: eb 1b jmp + # CHECK: 6: eb 19 jmp + # CHECK: 8: eb 17 jmp + # CHECK: a: 2e 2e 2e 2e 2e e9 0d 00 00 00 jmp + # CHECK: 14: 2e 2e 2e 2e 2e e9 03 00 00 00 jmp + # CHECK: 1e: 66 90 nop + # CHECK: 20: cc int3 + .p2align 4 + jmp foo + jmp foo + jmp foo + jmp foo + jmp foo + jmp foo + jmp foo + .p2align 5 + int3 +foo: + ret + + # Canonical toy loop to show benefit - we can align the loop header with + # fewer nops by relaxing the branch, even though we don't need to + # CHECK: : + # CHECK: 45: 48 85 c0 testq %rax, %rax + # CHECK: 48: 2e 2e 2e 2e 0f 8e 1e 00 00 00 jle 30 + # CHECK: 52: 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) + # CHECK: : + # CHECK: 60: 48 83 e8 01 subq $1, %rax + # CHECK: 64: 48 85 c0 testq %rax, %rax + # CHECK: 67: 7e 07 jle 7 + # CHECK: 69: 2e 2e e9 f0 ff ff ff jmp + # CHECK: : + # CHECK: 70: c3 retq + .p2align 5 + .skip 5 +loop_preheader: + testq %rax, %rax + jle loop_exit + .p2align 5 +loop_header: + subq $1, %rax + testq %rax, %rax + jle loop_exit + jmp loop_header + .p2align 4 +loop_exit: + ret + + # Correctness cornercase - can't prefix pad jmp without having relaxed it + # first as doing so would make the relative offset too large + # CHECK: fd: cc int3 + # CHECK: fe: eb 80 jmp -128 + # CHECK: 100: cc int3 +.p2align 5 +.L1: +.rept 126 + int3 +.endr + jmp .L1 +.rept 30 + int3 +.endr +.p2align 5 diff --git a/llvm/test/MC/X86/align-via-relaxation.s b/llvm/test/MC/X86/align-via-relaxation.s --- a/llvm/test/MC/X86/align-via-relaxation.s +++ b/llvm/test/MC/X86/align-via-relaxation.s @@ -1,5 +1,7 @@ -# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s | llvm-objdump -d --section=.text - | FileCheck %s +# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu -x86-pad-max-prefix-size=0 %s | llvm-objdump -d --section=.text - | FileCheck %s +# This test exercises only the padding via relaxation logic. The interaction +# etween prefix padding and relaxation logic can be seen in align-via-padding.s .file "test.c" .text