Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -105,6 +105,10 @@ "assumptions about labels corresponding to particular instructions, " "and should be used with caution.")); +cl::opt X86PadMaxPrefixSize( + "x86-pad-max-prefix-size", cl::init(0), + cl::desc("Maximum number of redundant prefixes to use for padding")); + cl::opt X86PadForAlign( "x86-pad-for-align", cl::init(true), cl::Hidden, cl::desc("Pad previous instructions to implement align directives")); @@ -185,8 +189,16 @@ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override; + bool padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, unsigned &RemainingSize) const; + void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; @@ -718,6 +730,63 @@ Res.setOpcode(RelaxedOp); } +static bool shouldAddPrefix(const MCInst &Inst, const MCInstrInfo &MCII) { + // Linker may rewrite the instruction with variant symbol operand. + return !hasVariantSymbol(Inst); +} + +static unsigned getRemainingPrefixSize(const MCInst &Inst, + const MCSubtargetInfo &STI, + MCCodeEmitter &Emitter) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Emitter.emitPrefix(Inst, VecOS, STI); + assert(Code.size() < 15 && "The number of prefixes must be less than 15."); + + // TODO: It turns out we need a decent amount of plumbing for the target + // specific bits to determine number of prefixes its safe to add. Various + // targets (older chips mostly, but also Atom family) encounter decoder + // stalls with too many prefixes. For testing purposes, we set the value + // externally for the moment. + unsigned ExistingPrefixSize = Code.size(); + unsigned TargetPrefixMax = X86PadMaxPrefixSize; + if (TargetPrefixMax <= ExistingPrefixSize) + return 0; + return TargetPrefixMax - ExistingPrefixSize; +} + +bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (!shouldAddPrefix(RF.getInst(), *MCII)) + return false; + const unsigned OldSize = RF.getContents().size(); + if (OldSize == 15) + return false; + + const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); + const unsigned PrefixBytesToAdd = + std::min(MaxPossiblePad, + getRemainingPrefixSize(RF.getInst(), STI, Emitter)); + if (PrefixBytesToAdd == 0) + return false; + + const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); + + SmallString<256> Code; + Code.append(PrefixBytesToAdd, Prefix); + Code.append(RF.getContents().begin(), RF.getContents().end()); + RF.getContents() = Code; + + // Adjust the fixups for the change in offsets + for (auto &F : RF.getFixups()) { + F.setOffset(F.getOffset() + PrefixBytesToAdd); + } + + RemainingSize -= PrefixBytesToAdd; + return true; +} + static bool canBeRelaxedForPadding(const MCRelaxableFragment &RF) { // TODO: There are lots of other tricks we could apply for increasing // encoding size without impacting performance. @@ -727,9 +796,9 @@ return getRelaxedOpcode(Inst, Is16BitMode) != Inst.getOpcode(); } -bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, - MCCodeEmitter &Emitter, - unsigned &RemainingSize) const { +bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { if (!canBeRelaxedForPadding(RF)) return false; @@ -753,6 +822,17 @@ return true; } +bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + bool Changed = false; + if (RemainingSize != 0) + Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); + if (RemainingSize != 0) + Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); + return Changed; +} + void X86AsmBackend::finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const { // See if we can further relax some instructions to cut down on the number of Index: llvm/test/MC/X86/align-via-relaxation.s =================================================================== --- llvm/test/MC/X86/align-via-relaxation.s +++ llvm/test/MC/X86/align-via-relaxation.s @@ -1,4 +1,4 @@ -# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s | llvm-objdump -d --section=.text - | FileCheck %s +# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 | llvm-objdump -d --section=.text - | FileCheck %s .file "test.c" @@ -7,14 +7,14 @@ # Demonstrate that we can relax instructions to provide padding, not # just insert nops. jmps are being used for ease of demonstration. # CHECK: .text - # CHECK: 0: eb 1f jmp 31 - # CHECK: 2: e9 1a 00 00 00 jmp 26 - # CHECK: 7: e9 15 00 00 00 jmp 21 - # CHECK: c: e9 10 00 00 00 jmp 16 - # CHECK: 11: e9 0b 00 00 00 jmp 11 - # CHECK: 16: e9 06 00 00 00 jmp 6 - # CHECK: 1b: e9 01 00 00 00 jmp 1 - # CHECK: 20: cc int3 + # CHECK: 0: eb 1f jmp + # CHECK: 2: eb 1d jmp + # CHECK: 4: eb 1b jmp + # CHECK: 6: eb 19 jmp + # CHECK: 8: 2e 2e eb 15 jmp + # CHECK: c: 2e 2e 2e 2e 2e e9 0b 00 00 00 jmp + # CHECK: 16: 2e 2e 2e 2e 2e e9 01 00 00 00 jmp + # CHECK: 20: cc int3 .p2align 4 jmp foo jmp foo @@ -47,15 +47,14 @@ # fewer nops by relaxing the branch, even though we don't need to # CHECK: : # CHECK: 45: 48 85 c0 testq %rax, %rax - # CHECK: 48: 0f 8e 22 00 00 00 jle 34 - # CHECK: 4e: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) - # CHECK: 58: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) + # CHECK: 48: 2e 2e 2e 2e 0f 8e 1e 00 00 00 jle 30 + # CHECK: 52: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) + # CHECK: 5c: 0f 1f 40 00 nopl (%rax) # CHECK: : # CHECK: 60: 48 83 e8 01 subq $1, %rax # CHECK: 64: 48 85 c0 testq %rax, %rax # CHECK: 67: 7e 07 jle 7 - # CHECK: 69: e9 f2 ff ff ff jmp -14 - # CHECK: 6e: 66 90 nop + # CHECK: 69: 2e 2e e9 f0 ff ff ff jmp # CHECK: : # CHECK: 70: c3 retq .p2align 5