Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -105,6 +105,10 @@ "assumptions about labels corresponding to particular instructions, " "and should be used with caution.")); +cl::opt X86PadMaxPrefixSize( + "x86-pad-max-prefix-size", cl::init(15), + cl::desc("Maximum number of redundant prefixes to use for padding")); + cl::opt X86PadForAlign( "x86-pad-for-align", cl::init(true), cl::Hidden, cl::desc("Pad previous instructions to implement align directives")); @@ -185,8 +189,16 @@ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override; + bool padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, unsigned &RemainingSize) const; + void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; @@ -732,6 +744,60 @@ Res.setOpcode(RelaxedOp); } +static bool shouldAddPrefix(const MCInst &Inst, const MCInstrInfo &MCII) { + // Linker may rewrite the instruction with variant symbol operand. + return !hasVariantSymbol(Inst); +} + +bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (RemainingSize == 0) + return false; + if (!shouldAddPrefix(RF.getInst(), *MCII)) + return false; + const unsigned OldSize = RF.getContents().size(); + if (OldSize == 15) + return false; + + // The number of prefixes is limited by AlignMaxPrefixSize for some + // performance reasons, so we need to compute how many prefixes can be + // added. + auto GetRemainingPrefixSize = [&](const MCInst &Inst) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Emitter.emitPrefix(Inst, VecOS, STI); + assert(Code.size() < 15 && "The number of prefixes must be less than 15."); + uint8_t ExistingPrefixSize = static_cast(Code.size()); + // TODO: This cap needs to be target specific - 5 is not a safe number on + // older AMD chips or Intel Atom family. + const uint8_t AlignMaxPrefixSize = + std::min((uint8_t)5, (uint8_t)X86PadMaxPrefixSize); + if (AlignMaxPrefixSize > ExistingPrefixSize) + return (AlignMaxPrefixSize - ExistingPrefixSize); + return 0; + }; + const size_t PrefixBytesToAdd = + std::min(std::min((size_t)15 - OldSize, (size_t)RemainingSize), + (size_t)GetRemainingPrefixSize(RF.getInst())); + if (PrefixBytesToAdd == 0) + return false; + + const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); + SmallString<256> Code; + Code.append(PrefixBytesToAdd, Prefix); + Code.append(RF.getContents().begin(), RF.getContents().end()); + RF.getContents() = Code; + RemainingSize -= PrefixBytesToAdd; + + // Adjust the fixups for the change in offsets + for (auto &F : RF.getFixups()) { + F.setOffset(F.getOffset() + PrefixBytesToAdd); + } + + return true; +} + static bool canBeRelaxedForPadding(const MCRelaxableFragment &RF) { // TODO: There are lots of other tricks we could apply for increasing // encoding size without impacting performance. @@ -741,9 +807,9 @@ return getRelaxedOpcode(Inst, is16BitMode) != Inst.getOpcode(); } -bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, - MCCodeEmitter &Emitter, - unsigned &RemainingSize) const { +bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { if (!canBeRelaxedForPadding(RF)) return false; @@ -767,6 +833,15 @@ return true; } +bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + bool Changed = false; + Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); + Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); + return Changed; +} + void X86AsmBackend::finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const { // See if we can further relax some instructions to cut down on the number of Index: llvm/test/MC/X86/align-branch-64-1a.s =================================================================== --- llvm/test/MC/X86/align-branch-64-1a.s +++ llvm/test/MC/X86/align-branch-64-1a.s @@ -16,16 +16,14 @@ # CHECK-NEXT: 1b: 31 c0 xorl %eax, %eax # CHECK-COUNT-3: : 90 nop # CHECK: 20: 48 39 c5 cmpq %rax, %rbp -# CHECK-NEXT: 23: 74 5d je {{.*}} +# CHECK-NEXT: 23: 2e 74 5c je {{.*}} # CHECK-COUNT-3: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 -# CHECK: 3d: 31 c0 xorl %eax, %eax -# CHECK-NEXT: 3f: 90 nop +# CHECK: 3e: 31 c0 xorl %eax, %eax # CHECK-NEXT: 40: 74 40 je {{.*}} # CHECK-NEXT: 42: 5d popq %rbp -# CHECK-NEXT: 43: 74 3d je {{.*}} +# CHECK-NEXT: 43: 2e 74 3c je {{.*}} # CHECK-COUNT-3: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 -# CHECK: 5d: 31 c0 xorl %eax, %eax -# CHECK-NEXT: 5f: 90 nop +# CHECK: 5e: 31 c0 xorl %eax, %eax # CHECK-NEXT: 60: eb 26 jmp {{.*}} # CHECK-NEXT: 62: eb 24 jmp {{.*}} # CHECK-NEXT: 64: eb 22 jmp {{.*}} @@ -33,8 +31,7 @@ # CHECK: 76: 89 45 fc movl %eax, -4(%rbp) # CHECK-NEXT: 79: 5d popq %rbp # CHECK-NEXT: 7a: 48 39 c5 cmpq %rax, %rbp -# CHECK-NEXT: 7d: 74 03 je {{.*}} -# CHECK-NEXT: 7f: 90 nop +# CHECK-NEXT: 7d: 2e 74 02 je {{.*}} # CHECK-NEXT: 80: eb 06 jmp {{.*}} # CHECK-NEXT: 82: 8b 45 f4 movl -12(%rbp), %eax # CHECK-NEXT: 85: 89 45 fc movl %eax, -4(%rbp) Index: llvm/test/MC/X86/align-branch-64-1b.s =================================================================== --- llvm/test/MC/X86/align-branch-64-1b.s +++ llvm/test/MC/X86/align-branch-64-1b.s @@ -7,10 +7,9 @@ # CHECK-NEXT: 1b: 31 c0 xorl %eax, %eax # CHECK-COUNT-3: : 90 nop # CHECK-NEXT: 20: 48 39 c5 cmpq %rax, %rbp -# CHECK-NEXT: 23: 74 5b je {{.*}} +# CHECK-NEXT: 23: 2e 74 5a je {{.*}} # CHECK-COUNT-3: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 -# CHECK: 3d: 31 c0 xorl %eax, %eax -# CHECK-NEXT: 3f: 90 nop +# CHECK: 3e: 31 c0 xorl %eax, %eax # CHECK-NEXT: 40: 74 3e je {{.*}} # CHECK-NEXT: 42: 5d popq %rbp # CHECK-NEXT: 43: 74 3b je {{.*}} Index: llvm/test/MC/X86/align-branch-64-1c.s =================================================================== --- llvm/test/MC/X86/align-branch-64-1c.s +++ llvm/test/MC/X86/align-branch-64-1c.s @@ -9,9 +9,8 @@ # CHECK-NEXT: 20: 74 5b je {{.*}} # CHECK-COUNT-3: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 # CHECK: 3a: 31 c0 xorl %eax, %eax -# CHECK-NEXT: 3c: 74 3f je {{.*}} -# CHECK-NEXT: 3e: 5d popq %rbp -# CHECK-NEXT: 3f: 90 nop +# CHECK-NEXT: 3c: 2e 74 3e je {{.*}} +# CHECK-NEXT: 3f: 5d popq %rbp # CHECK-NEXT: 40: 74 3b je {{.*}} # CHECK-COUNT-3: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 # CHECK: 5a: 31 c0 xorl %eax, %eax Index: llvm/test/MC/X86/align-branch-64-1d.s =================================================================== --- llvm/test/MC/X86/align-branch-64-1d.s +++ llvm/test/MC/X86/align-branch-64-1d.s @@ -14,15 +14,12 @@ # CHECK-NEXT: 20: 74 5d je {{.*}} # CHECK-COUNT-3: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 # CHECK: 3a: 31 c0 xorl %eax, %eax -# CHECK-NEXT: 3c: 74 41 je {{.*}} -# CHECK-NEXT: 3e: 5d popq %rbp -# CHECK-NEXT: 3f: 90 nop +# CHECK-NEXT: 3c: 2e 74 40 je {{.*}} +# CHECK-NEXT: 3f: 5d popq %rbp # CHECK-NEXT: 40: 74 3d je {{.*}} # CHECK-COUNT-3: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 # CHECK: 5a: 31 c0 xorl %eax, %eax -# CHECK-NEXT: 5c: eb 27 jmp {{.*}} -# SHORT-NOP-COUNT-2: : 90 nop -# LONG-NOP: 5e: 66 90 nop +# CHECK-NEXT: 5c: 2e 2e eb 25 jmp {{.*}} # CHECK-NEXT: 60: eb 23 jmp {{.*}} # CHECK-NEXT: 62: eb 21 jmp {{.*}} # CHECK-COUNT-2: : 64 89 04 25 01 00 00 00 movl %eax, %fs:1 Index: llvm/test/MC/X86/align-branch-64.s =================================================================== --- llvm/test/MC/X86/align-branch-64.s +++ llvm/test/MC/X86/align-branch-64.s @@ -1,4 +1,4 @@ - # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu --x86-align-branch-boundary=32 --x86-align-branch=call+jmp+indirect+ret+jcc %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s + # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu --x86-align-branch-boundary=32 --x86-align-branch=call+jmp+indirect+ret+jcc -x86-pad-max-prefix-size=0 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s # These tests are checking the basic cases for each instructions, and a # bit of the alignment checking logic itself. Fused instruction cases are Index: llvm/test/MC/X86/align-via-relaxation.s =================================================================== --- llvm/test/MC/X86/align-via-relaxation.s +++ llvm/test/MC/X86/align-via-relaxation.s @@ -7,14 +7,14 @@ # Demonstrate that we can relax instructions to provide padding, not # just insert nops. jmps are being used for ease of demonstration. # CHECK: .text - # CHECK: 0: eb 1f jmp 31 - # CHECK: 2: e9 1a 00 00 00 jmp 26 - # CHECK: 7: e9 15 00 00 00 jmp 21 - # CHECK: c: e9 10 00 00 00 jmp 16 - # CHECK: 11: e9 0b 00 00 00 jmp 11 - # CHECK: 16: e9 06 00 00 00 jmp 6 - # CHECK: 1b: e9 01 00 00 00 jmp 1 - # CHECK: 20: cc int3 + # CHECK: 0: eb 1f jmp + # CHECK: 2: eb 1d jmp + # CHECK: 4: eb 1b jmp + # CHECK: 6: eb 19 jmp + # CHECK: 8: 2e 2e eb 15 jmp + # CHECK: c: 2e 2e 2e 2e 2e e9 0b 00 00 00 jmp + # CHECK: 16: 2e 2e 2e 2e 2e e9 01 00 00 00 jmp + # CHECK: 20: cc int3 .p2align 4 jmp foo jmp foo @@ -47,15 +47,14 @@ # fewer nops by relaxing the branch, even though we don't need to # CHECK: loop_preheader: # CHECK: 45: 48 85 c0 testq %rax, %rax - # CHECK: 48: 0f 8e 22 00 00 00 jle 34 - # CHECK: 4e: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) - # CHECK: 58: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) + # CHECK: 48: 2e 2e 2e 2e 0f 8e 1e 00 00 00 jle 30 + # CHECK: 52: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) + # CHECK: 5c: 0f 1f 40 00 nopl (%rax) # CHECK: loop_header: # CHECK: 60: 48 83 e8 01 subq $1, %rax # CHECK: 64: 48 85 c0 testq %rax, %rax # CHECK: 67: 7e 07 jle 7 - # CHECK: 69: e9 f2 ff ff ff jmp -14 - # CHECK: 6e: 66 90 nop + # CHECK: 69: 2e 2e e9 f0 ff ff ff jmp # CHECK: loop_exit: # CHECK: 70: c3 retq .p2align 5