diff --git a/lld/test/COFF/lto-cpu-string.ll b/lld/test/COFF/lto-cpu-string.ll --- a/lld/test/COFF/lto-cpu-string.ll +++ b/lld/test/COFF/lto-cpu-string.ll @@ -7,7 +7,7 @@ ; RUN: lld-link -mllvm:-mcpu=znver1 -noentry -nodefaultlib %t.obj -out:%t.znver1.dll -dll ; RUN: llvm-objdump -d --section=".text" --no-leading-addr --no-show-raw-insn %t.znver1.dll | FileCheck --check-prefix=ZNVER1 %s -; ZNVER1: nopw +; ZNVER1: leal (%rdi,%riz), %edi target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.14.26433" diff --git a/lld/test/COFF/lto.ll b/lld/test/COFF/lto.ll --- a/lld/test/COFF/lto.ll +++ b/lld/test/COFF/lto.ll @@ -86,8 +86,10 @@ ; TEXT-10-EMPTY: ; TEXT-10-NEXT: <.text>: ; TEXT-10-NEXT: retq -; TEXT-10-NEXT: nopw %cs:(%rax,%rax) -; TEXT-10-NEXT: nopl (%rax,%rax) +; TEXT-10-NEXT: leal (%rsi), %esi +; TEXT-10-NEXT: leal (%rdi,%riz), %edi +; TEXT-10-NEXT: nop +; TEXT-10-NEXT: leal (%rsi,%riz), %esi ; TEXT-10-NEXT: retq ; TEXT-10-NEXT: int3 ; TEXT-10-NEXT: int3 diff --git a/lld/test/ELF/lto/cpu-string.ll b/lld/test/ELF/lto/cpu-string.ll --- a/lld/test/ELF/lto/cpu-string.ll +++ b/lld/test/ELF/lto/cpu-string.ll @@ -7,7 +7,7 @@ ; RUN: ld.lld -mllvm -mcpu=znver1 %t.o -o %t.znver1.so -shared ; RUN: llvm-objdump -d --section=".text" --no-leading-addr --no-show-raw-insn %t.znver1.so | FileCheck --check-prefix=ZNVER1 %s -; ZNVER1: nopw +; ZNVER1: leal ; Check we are able to use -plugin-opt=mcpu= to set CPU string. ; RUN: ld.lld -plugin-opt=mcpu=znver1 %t.o -o %t.znver1.so -shared diff --git a/lld/test/ELF/lto/mllvm.ll b/lld/test/ELF/lto/mllvm.ll --- a/lld/test/ELF/lto/mllvm.ll +++ b/lld/test/ELF/lto/mllvm.ll @@ -10,7 +10,7 @@ ; CHECK: Pass Arguments: ; CHECK: # *** IR Dump -; DISASM: nopw +; DISASM: leal target triple = "x86_64-unknown-linux-gnu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/lld/test/MachO/cpu-string.ll b/lld/test/MachO/cpu-string.ll --- a/lld/test/MachO/cpu-string.ll +++ b/lld/test/MachO/cpu-string.ll @@ -8,8 +8,8 @@ ; RUN: %lld -mcpu znver1 %t.o -o %t.znver1.dylib -dylib ; RUN: llvm-objdump -d --section="__text" --no-leading-addr --no-show-raw-insn %t.znver1.dylib | FileCheck %s --check-prefix=ZNVER1 -; ZNVER1: nopw -; ZNVER1-NOT: nop{{$}} +; ZNVER1: leaw +; ZNVER1: leal target triple = "x86_64-apple-darwin" target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -1087,32 +1087,79 @@ return 10; } +namespace { +// 32-bit nop patterns: +// nop +const char *Nop32_1 = "\x90"; +// xchg %ax,%ax +const char *Nop32_2 = "\x66\x90"; +// leal 0(%esi),%esi +const char *Nop32_3 = "\x8d\x76\x00"; +// leal 0(%esi,1),%esi +const char *Nop32_4 = "\x8d\x74\x26\x00"; +// nop; leal 0(%esi,1),%esi +const char *Nop32_5 = "\x90\x8d\x74\x26\x00"; +// leal 0L(%esi),%esi +const char *Nop32_6 = "\x8d\xb6\x00\x00\x00\x00"; +// leal 0L(%esi,1),%esi +const char *Nop32_7 = "\x8d\xb4\x26\x00\x00\x00\x00"; +// nop; leal 0L(%esi,1),%esi +const char *Nop32_8 = "\x90\x8d\xb4\x26\x00\x00\x00\x00"; +// movl %esi,%esi; leal 0L(%edi,1),%edi +const char *Nop32_9 = "\x89\xf6\x8d\xbc\x27\x00\x00\x00\x00"; +// leal 0(%esi),%esi; leal 0L(%edi,1),%edi +const char *Nop32_10 = "\x8d\x76\x00\x8d\xbc\x27\x00\x00\x00\x00"; +// leal 0(%esi,1),%esi; leal 0L(%edi,1),%edi +const char *Nop32_11 = "\x8d\x74\x26\x00\x8d\xbc\x27\x00\x00\x00\x00"; +// leal 0L(%esi),%esi; leal 0L(%edi),%edi +const char *Nop32_12 = "\x8d\xb6\x00\x00\x00\x00\x8d\xbf\x00\x00\x00\x00"; +// leal 0L(%esi),%esi; leal 0L(%edi,1),%edi +const char *Nop32_13 = "\x8d\xb6\x00\x00\x00\x00\x8d\xbc\x27\x00\x00\x00\x00"; +// leal 0L(%esi,1),%esi; leal 0L(%edi,1),%edi +const char *Nop32_14 = + "\x8d\xb4\x26\x00\x00\x00\x00\x8d\xbc\x27\x00\x00\x00\x00"; + +// 16-bit nop patterns: +// lea 0(%esi),%esi +const char *Nop16_3 = "\x8d\x74\x00"; +// lea 0w(%si),%si +const char *Nop16_4 = "\x8d\xb4\x00\x00"; +// nop; lea 0w(%si),%si +const char *Nop16_5 = "\x90\x8d\xb4\x00\x00"; +// mov %si,%si; lea 0w(%di),%di +const char *Nop16_6 = "\x89\xf6\x8d\xbd\x00\x00"; +// lea 0(%si),%si; lea 0w(%di),%di +const char *Nop16_7 = "\x8d\x74\x00\x8d\xbd\x00\x00"; +// lea 0w(%si),%si; lea 0w(%di),%di +const char *Nop16_8 = "\x8d\xb4\x00\x00\x8d\xbd\x00\x00"; + +// lotsa nops +static const char LotsOfNops[] = "\x90\x90\x90\x90\x90\x90\x90\x90\x90" + "\x90\x90\x90\x90\x90\x90\x90\x90\x90" + "\x90\x90\x90\x90\x90\x90\x90\x90\x90" + "\x90\x90\x90\x90"; + +const char *Nop32BitPatterns[] = { + Nop32_1, Nop32_2, Nop32_3, Nop32_4, Nop32_5, Nop32_6, Nop32_7, + Nop32_8, Nop32_9, Nop32_10, Nop32_11, Nop32_12, Nop32_13, Nop32_14}; + +const char *Nop16BitPatterns[] = {Nop32_1, Nop32_2, Nop16_3, Nop16_4, + Nop16_5, Nop16_6, Nop16_7, Nop16_8}; +} // end anonymous namespace + /// Write a sequence of optimal nops to the output, covering \p Count /// bytes. /// \return - true on success, false on failure bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { - static const char Nops[10][11] = { - // nop - "\x90", - // xchg %ax,%ax - "\x66\x90", - // nopl (%[re]ax) - "\x0f\x1f\x00", - // nopl 0(%[re]ax) - "\x0f\x1f\x40\x00", - // nopl 0(%[re]ax,%[re]ax,1) - "\x0f\x1f\x44\x00\x00", - // nopw 0(%[re]ax,%[re]ax,1) - "\x66\x0f\x1f\x44\x00\x00", - // nopl 0L(%[re]ax) - "\x0f\x1f\x80\x00\x00\x00\x00", - // nopl 0L(%[re]ax,%[re]ax,1) - "\x0f\x1f\x84\x00\x00\x00\x00\x00", - // nopw 0L(%[re]ax,%[re]ax,1) - "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", - // nopw %cs:0L(%[re]ax,%[re]ax,1) - "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", - }; + + if (STI.getFeatureBits()[X86::Mode16Bit]) { + if (Count > 8) + OS.write(LotsOfNops, Count); + else + OS.write(Nop16BitPatterns[Count - 1], Count); + + return true; + } uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(); @@ -1125,7 +1172,7 @@ OS << '\x66'; const uint8_t Rest = ThisNopLength - Prefixes; if (Rest != 0) - OS.write(Nops[Rest - 1], Rest); + OS.write(Nop32BitPatterns[Rest - 1], Rest); Count -= ThisNopLength; } while (Count != 0); diff --git a/llvm/test/MC/COFF/align-nops.s b/llvm/test/MC/COFF/align-nops.s --- a/llvm/test/MC/COFF/align-nops.s +++ b/llvm/test/MC/COFF/align-nops.s @@ -31,7 +31,7 @@ //CHECK-NEXT: IMAGE_SCN_MEM_READ //CHECK-NEXT: ] //CHECK-NEXT: SectionData ( -//CHECK-NEXT: 0000: 00000000 0F1F4000 00000000 0F1F4000 +//CHECK-NEXT: 0000: 00000000 8D742600 00000000 8D742600 //CHECK-NEXT: ) //CHECK: Name: .data diff --git a/llvm/test/MC/ELF/align-nops.s b/llvm/test/MC/ELF/align-nops.s --- a/llvm/test/MC/ELF/align-nops.s +++ b/llvm/test/MC/ELF/align-nops.s @@ -30,7 +30,7 @@ // CHECK-NEXT: AddressAlignment: 8 // CHECK-NEXT: EntrySize: 0 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 00000000 0F1F4000 00000000 0F1F4000 +// CHECK-NEXT: 0000: 00000000 8D742600 00000000 8D742600 // CHECK-NEXT: ) // CHECK-NEXT: } diff --git a/llvm/test/MC/MachO/x86_32-optimal_nop.s b/llvm/test/MC/MachO/x86_32-optimal_nop.s --- a/llvm/test/MC/MachO/x86_32-optimal_nop.s +++ b/llvm/test/MC/MachO/x86_32-optimal_nop.s @@ -192,25 +192,25 @@ // CHECK: SectionData ( // CHECK: 0000: C390C300 00000000 00000000 00000000 |................| // CHECK: 0010: C3C36690 C3000000 00000000 00000000 |..f.............| -// CHECK: 0020: C30F1F00 C3000000 00000000 00000000 |................| -// CHECK: 0030: C3C3C3C3 0F1F4000 C3000000 00000000 |......@.........| -// CHECK: 0040: C3C3C30F 1F440000 C3000000 00000000 |.....D..........| -// CHECK: 0050: C3C3660F 1F440000 C3000000 00000000 |..f..D..........| -// CHECK: 0060: C30F1F80 00000000 C3000000 00000000 |................| +// CHECK: 0020: C38D7600 C3000000 00000000 00000000 |..v.............| +// CHECK: 0030: C3C3C3C3 8D742600 C3000000 00000000 |.....t&.........| +// CHECK: 0040: C3C3C390 8D742600 C3000000 00000000 |.....t&.........| +// CHECK: 0050: C3C38DB6 00000000 C3000000 00000000 |................| +// CHECK: 0060: C38DB426 00000000 C3000000 00000000 |...&............| // CHECK: 0070: C3C3C3C3 C3C3C3C3 C3000000 00000000 |................| -// CHECK: 0080: C3C3C3C3 C3C3C366 0F1F8400 00000000 |.......f........| +// CHECK: 0080: C3C3C3C3 C3C3C389 F68DBC27 00000000 |...........'....| // CHECK: 0090: C3000000 00000000 00000000 00000000 |................| -// CHECK: 00A0: C3C3C3C3 C3C3C366 0F1F8400 00000000 |.......f........| +// CHECK: 00A0: C3C3C3C3 C3C3C389 F68DBC27 00000000 |...........'....| // CHECK: 00B0: C3000000 00000000 00000000 00000000 |................| -// CHECK: 00C0: C3C3C3C3 C3662E0F 1F840000 00000090 |.....f..........| +// CHECK: 00C0: C3C3C3C3 C38D7600 8DBC2700 00000090 |......v...'.....| // CHECK: 00D0: C3000000 00000000 00000000 00000000 |................| -// CHECK: 00E0: C3C3C3C3 662E0F1F 84000000 00006690 |....f.........f.| +// CHECK: 00E0: C3C3C3C3 8D76008D BC270000 00006690 |.....v...'....f.| // CHECK: 00F0: C3000000 00000000 00000000 00000000 |................| -// CHECK: 0100: C3C3C366 2E0F1F84 00000000 000F1F00 |...f............| +// CHECK: 0100: C3C3C38D 76008DBC 27000000 008D7600 |....v...'.....v.| // CHECK: 0110: C3000000 00000000 00000000 00000000 |................| -// CHECK: 0120: C3C3662E 0F1F8400 00000000 0F1F4000 |..f...........@.| +// CHECK: 0120: C3C38D76 008DBC27 00000000 8D742600 |...v...'.....t&.| // CHECK: 0130: C3000000 00000000 00000000 00000000 |................| -// CHECK: 0140: C3662E0F 1F840000 0000000F 1F440000 |.f...........D..| +// CHECK: 0140: C38D7600 8DBC2700 00000090 8D742600 |..v...'......t&.| // CHECK: 0150: C3 |.| // CHECK: ) // CHECK: } diff --git a/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s b/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s --- a/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s +++ b/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s @@ -14,9 +14,11 @@ callq bar .bundle_unlock # To align this group to a bundle end, we need a two 10-byte NOPs and a 7-byte NOP. -# CHECK: 0: nop -# CHECK-NEXT: a: nop -# CHECK-NEXT: 14: nop +# CHECK: 0: leal (%rsi), %esi +# CHECK-NEXT: 3: leal (%rdi,%riz), %edi +# CHECK-NEXT: a: leal (%rsi), %esi +# CHECK-NEXT: d: leal (%rdi,%riz), %edi +# CHECK-NEXT: 14: leal (%rsi,%riz), %esi # CHECK: 1b: callq # This push instruction is 1 byte long @@ -24,8 +26,11 @@ push %rax .bundle_unlock # To align this group to a bundle end, we need three 10-byte NOPs, and a 1-byte. -# CHECK: 20: nop -# CHECK-NEXT: 2a: nop -# CHECK-NEXT: 34: nop -# CHECK-NEXT: 3e: nop -# CHECK-NEXT: 3f: pushq +# CHECK: 20: leal (%rsi), %esi +# CHECK-NEXT: 23: leal (%rdi,%riz), %edi +# CHECK-NEXT: 2a: leal (%rsi), %esi +# CHECK-NEXT: 2d: leal (%rdi,%riz), %edi +# CHECK-NEXT: 34: leal (%rsi), %esi +# CHECK-NEXT: 37: leal (%rdi,%riz), %edi +# CHECK-NEXT: 3e: nop +# CHECK-NEXT: 3f: pushq %rax diff --git a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s --- a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s +++ b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s @@ -11,13 +11,18 @@ push %ebp # 1 byte .align 16 .bundle_lock align_to_end -# CHECK: 1: nopw %cs:(%eax,%eax) -# CHECK: 10: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 1a: nop -# CHECK-RELAX: 20: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 2a: nopw %cs:(%eax,%eax) +# CHECK: 1: leal (%esi), %esi + +# CHECK-OPT: 10: leal (%esi), %esi # CHECK-OPT: 1b: calll 0x1c -# CHECK-RELAX: 3b: calll 0x3c + +# CHECK-RELAX: b: nop +# CHECK-RELAX: c: leal (%esi,%eiz), %esi +# CHECK-RELAX: 10: leal (%esi), %esi +# CHECK-RELAX: 1a: leal (%esi), %esi +# CHECK-RELAX: 20: leal (%esi), %esi +# CHECK-RELAX: 2a: leal (%esi), %esi +# CHECK-RELAX: 3b: calll 0x3c calll bar # 5 bytes .bundle_unlock ret # 1 byte diff --git a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s --- a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s +++ b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s @@ -10,9 +10,9 @@ .bundle_align_mode 5 push %ebp # 1 byte .align 16 -# CHECK: 1: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 10: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 1a: nop +# CHECK: 1: leal (%esi), %esi +# CHECK-RELAX: 10: leal (%esi), %esi +# CHECK-RELAX: 1a: leal (%esi), %esi # CHECK-OPT: 10: movl $1, (%esp) # CHECK-RELAX: 20: movl $1, (%esp) movl $0x1, (%esp) # 7 bytes diff --git a/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s b/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s --- a/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s +++ b/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s @@ -16,7 +16,7 @@ callq bar .bundle_unlock # To align this group to a bundle end, we need a 1-byte NOP. -# CHECK: a: nop +# CHECK: a: nop # CHECK-NEXT: b: callq callq bar @@ -27,9 +27,9 @@ .bundle_unlock # Here we have to pad until the end of the *next* boundary because # otherwise the group crosses a boundary. -# CHECK: 1a: nop +# CHECK: 1a: leal (%rsi), %esi # The nop sequence may be implemented as one instruction or many, but if # it's one instruction, that instruction cannot itself cross the boundary. -# CHECK: 20: nop +# CHECK: 20: leal (%rsi), %esi # CHECK-NEXT: 26: callq # CHECK-NEXT: 2b: callq diff --git a/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s b/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s --- a/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s +++ b/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s @@ -18,7 +18,7 @@ callq bar .bundle_unlock # We'll need a 6-byte NOP before this group -# CHECK: a: nop +# CHECK: a: leal # CHECK-NEXT: 10: callq # CHECK-NEXT: 15: callq @@ -27,7 +27,7 @@ callq bar .bundle_unlock # Same here -# CHECK: 1a: nop +# CHECK: 1a: leal # CHECK-NEXT: 20: callq # CHECK-NEXT: 25: callq @@ -40,7 +40,7 @@ .bundle_unlock # And here we'll need a 10-byte NOP + 1-byte NOP # CHECK: 30: callq -# CHECK: 35: nop +# CHECK: 35: leal # CHECK: 3f: nop # CHECK-NEXT: 40: callq # CHECK-NEXT: 45: callq diff --git a/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s b/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s --- a/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s +++ b/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s @@ -12,7 +12,7 @@ push %rax .endr # CHECK: 1c: push -# CHECK: 1d: nop +# CHECK: 1d: leal # CHECK: 20: jne jne 0x100 diff --git a/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s b/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s --- a/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s +++ b/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s @@ -23,7 +23,8 @@ jle .L_ELSE # This group would've started at 0x18 and is too long, so a chunky NOP padding # is inserted to push it to 0x20. -# CHECK: 18: {{[a-f0-9 ]+}} nopl +# CHECK: 18: {{[a-f0-9 ]+}} nop +# CHECK: 19: {{[a-f0-9 ]+}} leal # The long encoding for JLE should be used here even though its target is close # CHECK-NEXT: 20: 0f 8e diff --git a/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s b/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s --- a/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s +++ b/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s @@ -26,7 +26,7 @@ movl %ebx, %edi callq bar cmpl %r14d, %ebp -# CHECK-RELAX: nopl +# CHECK-RELAX: leal jle .L_ELSE # Due to the padding that's inserted before the addl, the jump target # becomes farther by one byte. diff --git a/llvm/test/MC/X86/align-branch-pad-max-prefix.s b/llvm/test/MC/X86/align-branch-pad-max-prefix.s --- a/llvm/test/MC/X86/align-branch-pad-max-prefix.s +++ b/llvm/test/MC/X86/align-branch-pad-max-prefix.s @@ -10,7 +10,7 @@ # following nops, doing so would make the jmp misaligned. # CHECK: 18: jmp jmp bar -# CHECK: 1d: nopl (%rax) +# CHECK: 1d: leal (%rsi), %esi # CHECK: 20: int3 .p2align 5 int3 diff --git a/llvm/test/MC/X86/align-via-padding.s b/llvm/test/MC/X86/align-via-padding.s --- a/llvm/test/MC/X86/align-via-padding.s +++ b/llvm/test/MC/X86/align-via-padding.s @@ -36,7 +36,8 @@ # CHECK: : # CHECK: 45: 48 85 c0 testq %rax, %rax # CHECK: 48: 2e 2e 2e 2e 0f 8e 1e 00 00 00 jle 0x70 - # CHECK: 52: 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) + # CHECK: 52: 66 66 66 66 8d 76 00 leaw (%rsi), %si + # CHECK: 59: 8d bc 27 00 00 00 00 leal (%rdi,%riz), %edi # CHECK: : # CHECK: 60: 48 83 e8 01 subq $1, %rax # CHECK: 64: 48 85 c0 testq %rax, %rax diff --git a/llvm/test/MC/X86/align-via-relaxation.s b/llvm/test/MC/X86/align-via-relaxation.s --- a/llvm/test/MC/X86/align-via-relaxation.s +++ b/llvm/test/MC/X86/align-via-relaxation.s @@ -9,16 +9,17 @@ .section .text # NOPAD-LABEL: <.text>: -# NOPAD-NEXT: 0: eb 1f jmp 0x21 -# NOPAD-NEXT: 2: eb 1d jmp 0x21 -# NOPAD-NEXT: 4: eb 1b jmp 0x21 -# NOPAD-NEXT: 6: eb 19 jmp 0x21 -# NOPAD-NEXT: 8: eb 17 jmp 0x21 -# NOPAD-NEXT: a: eb 15 jmp 0x21 -# NOPAD-NEXT: c: eb 13 jmp 0x21 -# NOPAD-NEXT: e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) -# NOPAD-NEXT: 1d: 0f 1f 00 nopl (%rax) -# NOPAD-NEXT: 20: cc int3 +# NOPAD-NEXT: 0: eb 1f jmp 0x21 +# NOPAD-NEXT: 2: eb 1d jmp 0x21 +# NOPAD-NEXT: 4: eb 1b jmp 0x21 +# NOPAD-NEXT: 6: eb 19 jmp 0x21 +# NOPAD-NEXT: 8: eb 17 jmp 0x21 +# NOPAD-NEXT: a: eb 15 jmp 0x21 +# NOPAD-NEXT: c: eb 13 jmp 0x21 +# NOPAD-NEXT: e: 66 66 66 66 66 8d 76 00 leaw (%rsi), %si +# NOPAD-NEXT: 16: 8d bc 27 00 00 00 00 leal (%rdi,%riz), %edi +# NOPAD-NEXT: 1d: 8d 76 00 leal (%rsi), %esi +# NOPAD-NEXT: 20: cc int3 # Demonstrate that we can relax instructions to provide padding, not # just insert nops. jmps are being used for ease of demonstration. @@ -48,7 +49,7 @@ # that would require a further round of relaxation # CHECK: : # CHECK: 22: eb fe jmp 0x22 - # CHECK: 24: 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) + # CHECK: 24: 66 66 8d 76 00 leaw (%rsi), %si # CHECK: 30: 0f 0b ud2 bar: @@ -63,8 +64,9 @@ # CHECK: : # CHECK: 45: 48 85 c0 testq %rax, %rax # CHECK: 48: 0f 8e 22 00 00 00 jle 0x70 - # CHECK: 4e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) - # CHECK: 5d: 0f 1f 00 nopl (%rax) + # CHECK: 4e: 66 66 66 66 66 8d 76 00 leaw (%rsi), %si + # CHECK: 56: 8d bc 27 00 00 00 00 leal (%rdi,%riz), %edi + # CHECK: 5d: 8d 76 00 leal (%rsi), %esi # CHECK: : # CHECK: 60: 48 83 e8 01 subq $1, %rax # CHECK: 64: 48 85 c0 testq %rax, %rax diff --git a/llvm/test/MC/X86/code16gcc-align.s b/llvm/test/MC/X86/code16gcc-align.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/code16gcc-align.s @@ -0,0 +1,101 @@ +# RUN: llvm-mc -filetype=obj -triple=i386-unknown-unknown %s | llvm-objdump --triple=i386-unknown-unknown-code16 -d - | FileCheck %s + +# Ensure that the "movzbl" is aligned such that the prefixes 0x67 0x66 are +# properly included in the "movz" instruction. + +# CHECK-LABEL: : +# CHECK: 22: 66 89 c7 movl %eax, %edi +# CHECK-NEXT: 25: 66 31 db xorl %ebx, %ebx +# CHECK-NEXT: 28: 90 nop +# CHECK-NEXT: 29: 90 nop +# CHECK-NEXT: 2a: 90 nop +# CHECK-NEXT: 2b: 90 nop +# CHECK-NEXT: 2c: 90 nop +# CHECK-NEXT: 2d: 90 nop +# CHECK-NEXT: 2e: 90 nop +# CHECK-NEXT: 2f: 90 nop +# CHECK-NEXT: 30: 67 66 0f b6 0c 1e movzbl (%esi,%ebx), %ecx + + .text + .code16gcc + .globl print_serial + .p2align 4, 0x90 + .type print_serial,@function +print_serial: + pushl %ebp + movl %esp, %ebp + pushl %ebx + pushl %edi + pushl %esi + subl $12, %esp + movl 8(%ebp), %esi + movl %esi, %ecx + calll strlen + testl %eax, %eax + je .LBB0_3 + movl %eax, %edi + xorl %ebx, %ebx + .p2align 4, 0x90 +.LBB0_2: + movzbl (%esi,%ebx), %ecx + calll serial_outb + addl $1, %ebx + cmpl %ebx, %edi + jne .LBB0_2 +.LBB0_3: + addl $12, %esp + popl %esi + popl %edi + popl %ebx + popl %ebp + retl +.Lfunc_end0: + .size print_serial, .Lfunc_end0-print_serial + .p2align 4, 0x90 + .type strlen,@function +strlen: + pushl %ebp + movl %esp, %ebp + cmpb $0, (%ecx) + je .LBB1_1 + xorl %edx, %edx + .p2align 4, 0x90 +.LBB1_3: + leal 1(%edx), %eax + cmpb $0, 1(%ecx,%edx) + movl %eax, %edx + jne .LBB1_3 + popl %ebp + retl +.LBB1_1: + xorl %eax, %eax + popl %ebp + retl +.Lfunc_end1: + .size strlen, .Lfunc_end1-strlen + .p2align 4, 0x90 + .type serial_outb,@function +serial_outb: + pushl %ebp + movl %esp, %ebp + subl $8, %esp + calll outb + addl $8, %esp + popl %ebp + retl +.Lfunc_end2: + .size serial_outb, .Lfunc_end2-serial_outb + .p2align 4, 0x90 + .type outb,@function +outb: + pushl %ebp + movl %esp, %ebp + movl %ecx, %eax + movw $1016, %dx + #APP + outb %al, %dx + #NO_APP + popl %ebp + retl +.Lfunc_end3: + .size outb, .Lfunc_end3-outb diff --git a/llvm/test/MC/X86/x86_64-directive-nops.s b/llvm/test/MC/X86/x86_64-directive-nops.s --- a/llvm/test/MC/X86/x86_64-directive-nops.s +++ b/llvm/test/MC/X86/x86_64-directive-nops.s @@ -9,11 +9,11 @@ # CHECK-NEXT: 4: 66 90 nop # CHECK-NEXT: 6: 66 90 nop .nops 4, 3 -# CHECK-NEXT: 8: 0f 1f 00 nopl (%rax) +# CHECK-NEXT: 8: 8d 76 00 leal (%rsi), %esi # CHECK-NEXT: b: 90 nop .nops 4, 4 -# CHECK-NEXT: c: 0f 1f 40 00 nopl (%rax) +# CHECK-NEXT: c: 8d 74 26 00 leal (%rsi,%riz), %esi .nops 4, 5 -# CHECK-NEXT: 10: 0f 1f 40 00 nopl (%rax) +# CHECK-NEXT: 10: 8d 74 26 00 leal (%rsi,%riz), %esi .nops 4 -# CHECK-NEXT: 14: 0f 1f 40 00 nopl (%rax) +# CHECK-NEXT: 14: 8d 74 26 00 leal (%rsi,%riz), %esi diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s --- a/llvm/test/MC/X86/x86_long_nop.s +++ b/llvm/test/MC/X86/x86_long_nop.s @@ -34,31 +34,39 @@ .p2align 5 inc %eax # LNOP15: 0: inc -# LNOP15-NEXT: 1: nop -# LNOP15-NEXT: 10: nop +# LNOP15-NEXT: 1: leaw +# LNOP15-NEXT: 9: leal +# LNOP15-NEXT: 10: leaw +# LNOP15-NEXT: 18: leal # LNOP15-NEXT: 1f: nop # LNOP15-NEXT: 20: inc # LNOP11: 0: inc -# LNOP11-NEXT: 1: nop -# LNOP11-NEXT: c: nop -# LNOP11-NEXT: 17: nop +# LNOP11-NEXT: 1: leaw +# LNOP11-NEXT: 5: leal +# LNOP11-NEXT: c: leaw +# LNOP11-NEXT: 10: leal +# LNOP11-NEXT: 17: movl +# LNOP11-NEXT: 19: leal # LNOP11-NEXT: 20: inc # LNOP10: 0: inc -# LNOP10-NEXT: 1: nop -# LNOP10-NEXT: b: nop -# LNOP10-NEXT: 15: nop +# LNOP10-NEXT: 1: leal +# LNOP10-NEXT: 4: leal +# LNOP10-NEXT: b: leal +# LNOP10-NEXT: e: leal +# LNOP10-NEXT: 15: leal +# LNOP10-NEXT: 18: leal # LNOP10-NEXT: 1f: nop # LNOP10-NEXT: 20: inc # On Silvermont we emit only 7 byte NOPs since longer NOPs are not profitable. # LNOP7: 0: inc -# LNOP7-NEXT: 1: nop -# LNOP7-NEXT: 8: nop -# LNOP7-NEXT: f: nop -# LNOP7-NEXT: 16: nop -# LNOP7-NEXT: 1d: nop +# LNOP7-NEXT: 1: leal +# LNOP7-NEXT: 8: leal +# LNOP7-NEXT: f: leal +# LNOP7-NEXT: 16: leal +# LNOP7-NEXT: 1d: leal # LNOP7-NEXT: 20: inc # On Lakemont we emit only 1 byte NOPs since longer NOPs are not supported/legal diff --git a/llvm/test/MC/X86/x86_nop.s b/llvm/test/MC/X86/x86_nop.s --- a/llvm/test/MC/X86/x86_nop.s +++ b/llvm/test/MC/X86/x86_nop.s @@ -33,5 +33,5 @@ // NOPL: 0: 40 incl %eax -// NOPL: 1: 0f 1f 80 00 00 00 00 nopl (%eax) +// NOPL: 1: 8d b4 26 00 00 00 00 leal (%esi,%eiz), %esi // NOPL: 8: 40 incl %eax diff --git a/llvm/test/tools/llvm-profgen/symbolize.ll b/llvm/test/tools/llvm-profgen/symbolize.ll --- a/llvm/test/tools/llvm-profgen/symbolize.ll +++ b/llvm/test/tools/llvm-profgen/symbolize.ll @@ -11,7 +11,7 @@ ; CHECK: e: cmovl edx, ecx fib:2 @ funcLeaf:2 @ funcA:1 ; CHECK: 11: sub eax, edx funcLeaf:2 @ funcA:1 ; CHECK: 13: ret funcA:2 -; CHECK: 14: nop word ptr cs:[rax + rax] +; CHECK: 14: lea esi, [rsi] ; CHECK: 1e: nop ; CHECK: : ; CHECK: 20: mov eax, edi funcLeaf:1 @@ -21,7 +21,8 @@ ; CHECK: 2e: cmovl edx, ecx fib:2 @ funcLeaf:2 ; CHECK: 31: sub eax, edx funcLeaf:2 ; CHECK: 33: ret funcLeaf:3 -; CHECK: 34: nop word ptr cs:[rax + rax] +; CHECK: 34: lea esi, [rsi] +; CHECK: 37: lea edi, [rdi + riz] ; CHECK: 3e: nop ; CHECK: : ; CHECK: 40: lea eax, [rdi + 3] fib:2