diff --git a/lld/test/COFF/lto-cpu-string.ll b/lld/test/COFF/lto-cpu-string.ll
--- a/lld/test/COFF/lto-cpu-string.ll
+++ b/lld/test/COFF/lto-cpu-string.ll
@@ -7,7 +7,7 @@
 
 ; RUN: lld-link -mllvm:-mcpu=znver1 -noentry -nodefaultlib %t.obj -out:%t.znver1.dll -dll
 ; RUN: llvm-objdump -d --section=".text" --no-leading-addr --no-show-raw-insn %t.znver1.dll | FileCheck --check-prefix=ZNVER1 %s
-; ZNVER1: nopw
+; ZNVER1: leal    (%rdi,%riz), %edi
 
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc19.14.26433"
diff --git a/lld/test/COFF/lto.ll b/lld/test/COFF/lto.ll
--- a/lld/test/COFF/lto.ll
+++ b/lld/test/COFF/lto.ll
@@ -86,8 +86,10 @@
 ; TEXT-10-EMPTY:
 ; TEXT-10-NEXT: <.text>:
 ; TEXT-10-NEXT: retq
-; TEXT-10-NEXT: nopw %cs:(%rax,%rax)
-; TEXT-10-NEXT: nopl (%rax,%rax)
+; TEXT-10-NEXT: leal (%rsi), %esi
+; TEXT-10-NEXT: leal (%rdi,%riz), %edi
+; TEXT-10-NEXT: nop
+; TEXT-10-NEXT: leal (%rsi,%riz), %esi
 ; TEXT-10-NEXT: retq
 ; TEXT-10-NEXT: int3
 ; TEXT-10-NEXT: int3
diff --git a/lld/test/ELF/lto/cpu-string.ll b/lld/test/ELF/lto/cpu-string.ll
--- a/lld/test/ELF/lto/cpu-string.ll
+++ b/lld/test/ELF/lto/cpu-string.ll
@@ -7,7 +7,7 @@
 
 ; RUN: ld.lld -mllvm -mcpu=znver1 %t.o -o %t.znver1.so -shared
 ; RUN: llvm-objdump -d --section=".text" --no-leading-addr --no-show-raw-insn %t.znver1.so | FileCheck --check-prefix=ZNVER1 %s
-; ZNVER1: nopw
+; ZNVER1: leal
 
 ; Check we are able to use -plugin-opt=mcpu=<CPU> to set CPU string.
 ; RUN: ld.lld -plugin-opt=mcpu=znver1 %t.o -o %t.znver1.so -shared
diff --git a/lld/test/ELF/lto/mllvm.ll b/lld/test/ELF/lto/mllvm.ll
--- a/lld/test/ELF/lto/mllvm.ll
+++ b/lld/test/ELF/lto/mllvm.ll
@@ -10,7 +10,7 @@
 ; CHECK: Pass Arguments:
 ; CHECK: # *** IR Dump
 
-; DISASM: nopw
+; DISASM: leal
 
 target triple = "x86_64-unknown-linux-gnu"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/lld/test/MachO/cpu-string.ll b/lld/test/MachO/cpu-string.ll
--- a/lld/test/MachO/cpu-string.ll
+++ b/lld/test/MachO/cpu-string.ll
@@ -8,8 +8,8 @@
 ; RUN: %lld -mcpu znver1 %t.o -o %t.znver1.dylib -dylib
 ; RUN: llvm-objdump -d --section="__text" --no-leading-addr --no-show-raw-insn %t.znver1.dylib | FileCheck %s --check-prefix=ZNVER1
 
-; ZNVER1: nopw
-; ZNVER1-NOT: nop{{$}}
+; ZNVER1: leaw
+; ZNVER1: leal
 
 target triple = "x86_64-apple-darwin"
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -1087,32 +1087,79 @@
   return 10;
 }
 
+namespace {
+// 32-bit nop patterns:
+// nop
+const char *Nop32_1 = "\x90";
+// xchg %ax,%ax
+const char *Nop32_2 = "\x66\x90";
+// leal 0(%esi),%esi
+const char *Nop32_3 = "\x8d\x76\x00";
+// leal 0(%esi,1),%esi
+const char *Nop32_4 = "\x8d\x74\x26\x00";
+// nop; leal 0(%esi,1),%esi
+const char *Nop32_5 = "\x90\x8d\x74\x26\x00";
+// leal 0L(%esi),%esi
+const char *Nop32_6 = "\x8d\xb6\x00\x00\x00\x00";
+// leal 0L(%esi,1),%esi
+const char *Nop32_7 = "\x8d\xb4\x26\x00\x00\x00\x00";
+// nop; leal 0L(%esi,1),%esi
+const char *Nop32_8 = "\x90\x8d\xb4\x26\x00\x00\x00\x00";
+// movl %esi,%esi; leal 0L(%edi,1),%edi
+const char *Nop32_9 = "\x89\xf6\x8d\xbc\x27\x00\x00\x00\x00";
+// leal 0(%esi),%esi; leal 0L(%edi,1),%edi
+const char *Nop32_10 = "\x8d\x76\x00\x8d\xbc\x27\x00\x00\x00\x00";
+// leal 0(%esi,1),%esi; leal 0L(%edi,1),%edi
+const char *Nop32_11 = "\x8d\x74\x26\x00\x8d\xbc\x27\x00\x00\x00\x00";
+// leal 0L(%esi),%esi; leal 0L(%edi),%edi
+const char *Nop32_12 = "\x8d\xb6\x00\x00\x00\x00\x8d\xbf\x00\x00\x00\x00";
+// leal 0L(%esi),%esi; leal 0L(%edi,1),%edi
+const char *Nop32_13 = "\x8d\xb6\x00\x00\x00\x00\x8d\xbc\x27\x00\x00\x00\x00";
+// leal 0L(%esi,1),%esi; leal 0L(%edi,1),%edi
+const char *Nop32_14 =
+    "\x8d\xb4\x26\x00\x00\x00\x00\x8d\xbc\x27\x00\x00\x00\x00";
+
+// 16-bit nop patterns:
+// lea 0(%esi),%esi
+const char *Nop16_3 = "\x8d\x74\x00";
+// lea 0w(%si),%si
+const char *Nop16_4 = "\x8d\xb4\x00\x00";
+// nop; lea 0w(%si),%si
+const char *Nop16_5 = "\x90\x8d\xb4\x00\x00";
+// mov %si,%si; lea 0w(%di),%di
+const char *Nop16_6 = "\x89\xf6\x8d\xbd\x00\x00";
+// lea 0(%si),%si; lea 0w(%di),%di
+const char *Nop16_7 = "\x8d\x74\x00\x8d\xbd\x00\x00";
+// lea 0w(%si),%si; lea 0w(%di),%di
+const char *Nop16_8 = "\x8d\xb4\x00\x00\x8d\xbd\x00\x00";
+
+// lotsa nops
+static const char LotsOfNops[] = "\x90\x90\x90\x90\x90\x90\x90\x90\x90"
+                                 "\x90\x90\x90\x90\x90\x90\x90\x90\x90"
+                                 "\x90\x90\x90\x90\x90\x90\x90\x90\x90"
+                                 "\x90\x90\x90\x90";
+
+const char *Nop32BitPatterns[] = {
+    Nop32_1, Nop32_2, Nop32_3,  Nop32_4,  Nop32_5,  Nop32_6,  Nop32_7,
+    Nop32_8, Nop32_9, Nop32_10, Nop32_11, Nop32_12, Nop32_13, Nop32_14};
+
+const char *Nop16BitPatterns[] = {Nop32_1, Nop32_2, Nop16_3, Nop16_4,
+                                  Nop16_5, Nop16_6, Nop16_7, Nop16_8};
+} // end anonymous namespace
+
 /// Write a sequence of optimal nops to the output, covering \p Count
 /// bytes.
 /// \return - true on success, false on failure
 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
-  static const char Nops[10][11] = {
-    // nop
-    "\x90",
-    // xchg %ax,%ax
-    "\x66\x90",
-    // nopl (%[re]ax)
-    "\x0f\x1f\x00",
-    // nopl 0(%[re]ax)
-    "\x0f\x1f\x40\x00",
-    // nopl 0(%[re]ax,%[re]ax,1)
-    "\x0f\x1f\x44\x00\x00",
-    // nopw 0(%[re]ax,%[re]ax,1)
-    "\x66\x0f\x1f\x44\x00\x00",
-    // nopl 0L(%[re]ax)
-    "\x0f\x1f\x80\x00\x00\x00\x00",
-    // nopl 0L(%[re]ax,%[re]ax,1)
-    "\x0f\x1f\x84\x00\x00\x00\x00\x00",
-    // nopw 0L(%[re]ax,%[re]ax,1)
-    "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
-    // nopw %cs:0L(%[re]ax,%[re]ax,1)
-    "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
-  };
+
+  if (STI.getFeatureBits()[X86::Mode16Bit]) {
+    if (Count > 8)
+      OS.write(LotsOfNops, Count);
+    else
+      OS.write(Nop16BitPatterns[Count - 1], Count);
+
+    return true;
+  }
 
   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize();
 
@@ -1125,7 +1172,7 @@
       OS << '\x66';
     const uint8_t Rest = ThisNopLength - Prefixes;
     if (Rest != 0)
-      OS.write(Nops[Rest - 1], Rest);
+      OS.write(Nop32BitPatterns[Rest - 1], Rest);
     Count -= ThisNopLength;
   } while (Count != 0);
 
diff --git a/llvm/test/MC/COFF/align-nops.s b/llvm/test/MC/COFF/align-nops.s
--- a/llvm/test/MC/COFF/align-nops.s
+++ b/llvm/test/MC/COFF/align-nops.s
@@ -31,7 +31,7 @@
 //CHECK-NEXT:        IMAGE_SCN_MEM_READ
 //CHECK-NEXT:     ]
 //CHECK-NEXT:     SectionData (
-//CHECK-NEXT:       0000: 00000000 0F1F4000 00000000 0F1F4000
+//CHECK-NEXT:       0000: 00000000 8D742600 00000000 8D742600
 //CHECK-NEXT:     )
 
 //CHECK:          Name: .data
diff --git a/llvm/test/MC/ELF/align-nops.s b/llvm/test/MC/ELF/align-nops.s
--- a/llvm/test/MC/ELF/align-nops.s
+++ b/llvm/test/MC/ELF/align-nops.s
@@ -30,7 +30,7 @@
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 00000000 0F1F4000 00000000 0F1F4000
+// CHECK-NEXT:       0000: 00000000 8D742600 00000000 8D742600
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
 
diff --git a/llvm/test/MC/MachO/x86_32-optimal_nop.s b/llvm/test/MC/MachO/x86_32-optimal_nop.s
--- a/llvm/test/MC/MachO/x86_32-optimal_nop.s
+++ b/llvm/test/MC/MachO/x86_32-optimal_nop.s
@@ -192,25 +192,25 @@
 // CHECK:     SectionData (
 // CHECK:       0000: C390C300 00000000 00000000 00000000  |................|
 // CHECK:       0010: C3C36690 C3000000 00000000 00000000  |..f.............|
-// CHECK:       0020: C30F1F00 C3000000 00000000 00000000  |................|
-// CHECK:       0030: C3C3C3C3 0F1F4000 C3000000 00000000  |......@.........|
-// CHECK:       0040: C3C3C30F 1F440000 C3000000 00000000  |.....D..........|
-// CHECK:       0050: C3C3660F 1F440000 C3000000 00000000  |..f..D..........|
-// CHECK:       0060: C30F1F80 00000000 C3000000 00000000  |................|
+// CHECK:       0020: C38D7600 C3000000 00000000 00000000  |..v.............|
+// CHECK:       0030: C3C3C3C3 8D742600 C3000000 00000000  |.....t&.........|
+// CHECK:       0040: C3C3C390 8D742600 C3000000 00000000  |.....t&.........|
+// CHECK:       0050: C3C38DB6 00000000 C3000000 00000000  |................|
+// CHECK:       0060: C38DB426 00000000 C3000000 00000000  |...&............|
 // CHECK:       0070: C3C3C3C3 C3C3C3C3 C3000000 00000000  |................|
-// CHECK:       0080: C3C3C3C3 C3C3C366 0F1F8400 00000000  |.......f........|
+// CHECK:       0080: C3C3C3C3 C3C3C389 F68DBC27 00000000  |...........'....|
 // CHECK:       0090: C3000000 00000000 00000000 00000000  |................|
-// CHECK:       00A0: C3C3C3C3 C3C3C366 0F1F8400 00000000  |.......f........|
+// CHECK:       00A0: C3C3C3C3 C3C3C389 F68DBC27 00000000  |...........'....|
 // CHECK:       00B0: C3000000 00000000 00000000 00000000  |................|
-// CHECK:       00C0: C3C3C3C3 C3662E0F 1F840000 00000090  |.....f..........|
+// CHECK:       00C0: C3C3C3C3 C38D7600 8DBC2700 00000090  |......v...'.....|
 // CHECK:       00D0: C3000000 00000000 00000000 00000000  |................|
-// CHECK:       00E0: C3C3C3C3 662E0F1F 84000000 00006690  |....f.........f.|
+// CHECK:       00E0: C3C3C3C3 8D76008D BC270000 00006690  |.....v...'....f.|
 // CHECK:       00F0: C3000000 00000000 00000000 00000000  |................|
-// CHECK:       0100: C3C3C366 2E0F1F84 00000000 000F1F00  |...f............|
+// CHECK:       0100: C3C3C38D 76008DBC 27000000 008D7600  |....v...'.....v.|
 // CHECK:       0110: C3000000 00000000 00000000 00000000  |................|
-// CHECK:       0120: C3C3662E 0F1F8400 00000000 0F1F4000  |..f...........@.|
+// CHECK:       0120: C3C38D76 008DBC27 00000000 8D742600  |...v...'.....t&.|
 // CHECK:       0130: C3000000 00000000 00000000 00000000  |................|
-// CHECK:       0140: C3662E0F 1F840000 0000000F 1F440000  |.f...........D..|
+// CHECK:       0140: C38D7600 8DBC2700 00000090 8D742600  |..v...'......t&.|
 // CHECK:       0150: C3                                   |.|
 // CHECK:     )
 // CHECK:   }
diff --git a/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s b/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s
--- a/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s
+++ b/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s
@@ -14,9 +14,11 @@
   callq   bar
   .bundle_unlock
 # To align this group to a bundle end, we need a two 10-byte NOPs and a 7-byte NOP.
-# CHECK:        0:  nop
-# CHECK-NEXT:   a:  nop
-# CHECK-NEXT:   14: nop
+# CHECK:        0:  leal (%rsi), %esi
+# CHECK-NEXT:   3:  leal (%rdi,%riz), %edi
+# CHECK-NEXT:   a:  leal (%rsi), %esi
+# CHECK-NEXT:   d:  leal (%rdi,%riz), %edi
+# CHECK-NEXT:   14: leal (%rsi,%riz), %esi
 # CHECK:   1b: callq
 
 # This push instruction is 1 byte long
@@ -24,8 +26,11 @@
   push %rax
   .bundle_unlock
 # To align this group to a bundle end, we need three 10-byte NOPs, and a 1-byte.
-# CHECK:        20:  nop
-# CHECK-NEXT:   2a:  nop
-# CHECK-NEXT:   34:  nop
-# CHECK-NEXT:   3e:  nop
-# CHECK-NEXT:   3f: pushq
+# CHECK:        20: leal (%rsi), %esi
+# CHECK-NEXT:   23: leal (%rdi,%riz), %edi
+# CHECK-NEXT:   2a: leal (%rsi), %esi
+# CHECK-NEXT:   2d: leal (%rdi,%riz), %edi
+# CHECK-NEXT:   34: leal (%rsi), %esi
+# CHECK-NEXT:   37: leal (%rdi,%riz), %edi
+# CHECK-NEXT:   3e: nop
+# CHECK-NEXT:   3f: pushq %rax
diff --git a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s
--- a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s
+++ b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s
@@ -11,13 +11,18 @@
         push    %ebp # 1 byte
         .align  16
         .bundle_lock align_to_end
-# CHECK:            1:  nopw %cs:(%eax,%eax)
-# CHECK:            10: nopw %cs:(%eax,%eax)
-# CHECK-RELAX:      1a: nop
-# CHECK-RELAX:      20: nopw %cs:(%eax,%eax)
-# CHECK-RELAX:      2a: nopw %cs:(%eax,%eax)
+# CHECK:            1: leal (%esi), %esi
+
+# CHECK-OPT:        10: leal (%esi), %esi
 # CHECK-OPT:        1b: calll 0x1c
-# CHECK-RELAX:      3b: calll 0x3c
+
+# CHECK-RELAX:      b: nop
+# CHECK-RELAX:      c: leal (%esi,%eiz), %esi
+# CHECK-RELAX:      10: leal (%esi), %esi
+# CHECK-RELAX:      1a: leal (%esi), %esi
+# CHECK-RELAX:      20: leal (%esi), %esi
+# CHECK-RELAX:      2a: leal (%esi), %esi
+# CHECK-RELAX:      3b: calll 0x3c <foo+0x3c>
         calll   bar # 5 bytes
         .bundle_unlock
         ret         # 1 byte
diff --git a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s
--- a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s
+++ b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s
@@ -10,9 +10,9 @@
         .bundle_align_mode 5
         push    %ebp          # 1 byte
         .align  16
-# CHECK:            1:  nopw %cs:(%eax,%eax)
-# CHECK-RELAX:      10: nopw %cs:(%eax,%eax)
-# CHECK-RELAX:      1a: nop
+# CHECK:            1:  leal (%esi), %esi
+# CHECK-RELAX:      10: leal (%esi), %esi
+# CHECK-RELAX:      1a: leal (%esi), %esi
 # CHECK-OPT:        10: movl $1, (%esp)
 # CHECK-RELAX:      20: movl $1, (%esp)
         movl $0x1, (%esp)     # 7 bytes
diff --git a/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s b/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
--- a/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
+++ b/llvm/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
@@ -16,7 +16,7 @@
   callq   bar
   .bundle_unlock
 # To align this group to a bundle end, we need a 1-byte NOP.
-# CHECK:        a:  nop
+# CHECK:        a: nop
 # CHECK-NEXT:   b: callq
 
   callq   bar
@@ -27,9 +27,9 @@
   .bundle_unlock
 # Here we have to pad until the end of the *next* boundary because
 # otherwise the group crosses a boundary.
-# CHECK:      1a: nop
+# CHECK:      1a: leal (%rsi), %esi
 # The nop sequence may be implemented as one instruction or many, but if
 # it's one instruction, that instruction cannot itself cross the boundary.
-# CHECK:      20: nop
+# CHECK:      20: leal (%rsi), %esi
 # CHECK-NEXT: 26: callq
 # CHECK-NEXT: 2b: callq
diff --git a/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s b/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s
--- a/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s
+++ b/llvm/test/MC/X86/AlignedBundling/pad-bundle-groups.s
@@ -18,7 +18,7 @@
   callq   bar
   .bundle_unlock
 # We'll need a 6-byte NOP before this group
-# CHECK:        a:  nop
+# CHECK:        a:  leal
 # CHECK-NEXT:   10: callq
 # CHECK-NEXT:   15: callq
 
@@ -27,7 +27,7 @@
   callq   bar
   .bundle_unlock
 # Same here
-# CHECK:        1a:  nop
+# CHECK:        1a: leal
 # CHECK-NEXT:   20: callq
 # CHECK-NEXT:   25: callq
 
@@ -40,7 +40,7 @@
   .bundle_unlock
 # And here we'll need a 10-byte NOP + 1-byte NOP
 # CHECK:        30: callq
-# CHECK:        35: nop
+# CHECK:        35: leal
 # CHECK:        3f: nop
 # CHECK-NEXT:   40: callq
 # CHECK-NEXT:   45: callq
diff --git a/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s b/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
--- a/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
+++ b/llvm/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
@@ -12,7 +12,7 @@
         push %rax
         .endr
 # CHECK: 1c: push
-# CHECK: 1d: nop
+# CHECK: 1d: leal
 # CHECK: 20: jne
         jne 0x100
 
diff --git a/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s b/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
--- a/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
+++ b/llvm/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
@@ -23,7 +23,8 @@
   jle     .L_ELSE
 # This group would've started at 0x18 and is too long, so a chunky NOP padding
 # is inserted to push it to 0x20.
-# CHECK: 18: {{[a-f0-9 ]+}} nopl
+# CHECK: 18: {{[a-f0-9 ]+}} nop
+# CHECK: 19: {{[a-f0-9 ]+}} leal
 
 # The long encoding for JLE should be used here even though its target is close
 # CHECK-NEXT: 20: 0f 8e
diff --git a/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s b/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s
--- a/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s
+++ b/llvm/test/MC/X86/AlignedBundling/single-inst-bundling.s
@@ -26,7 +26,7 @@
   movl    %ebx, %edi
   callq   bar
   cmpl    %r14d, %ebp
-# CHECK-RELAX:   nopl
+# CHECK-RELAX:   leal
   jle     .L_ELSE
 # Due to the padding that's inserted before the addl, the jump target
 # becomes farther by one byte.
diff --git a/llvm/test/MC/X86/align-branch-pad-max-prefix.s b/llvm/test/MC/X86/align-branch-pad-max-prefix.s
--- a/llvm/test/MC/X86/align-branch-pad-max-prefix.s
+++ b/llvm/test/MC/X86/align-branch-pad-max-prefix.s
@@ -10,7 +10,7 @@
   # following nops, doing so would make the jmp misaligned.
 # CHECK:      18:          jmp
   jmp bar
-# CHECK:      1d:          nopl (%rax)
+# CHECK:      1d:          leal    (%rsi), %esi
 # CHECK:      20:          int3
   .p2align 5
   int3
diff --git a/llvm/test/MC/X86/align-via-padding.s b/llvm/test/MC/X86/align-via-padding.s
--- a/llvm/test/MC/X86/align-via-padding.s
+++ b/llvm/test/MC/X86/align-via-padding.s
@@ -36,7 +36,8 @@
   # CHECK: <loop_preheader>:
   # CHECK: 45: 48 85 c0                       testq %rax, %rax
   # CHECK: 48: 2e 2e 2e 2e 0f 8e 1e 00 00 00  jle 0x70 <loop_exit>
-  # CHECK: 52: 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00    	nopw	%cs:(%rax,%rax)
+  # CHECK: 52: 66 66 66 66 8d 76 00           leaw (%rsi), %si
+  # CHECK: 59: 8d bc 27 00 00 00 00           leal (%rdi,%riz), %edi
   # CHECK: <loop_header>:
   # CHECK: 60: 48 83 e8 01                    subq $1, %rax
   # CHECK: 64: 48 85 c0                       testq %rax, %rax
diff --git a/llvm/test/MC/X86/align-via-relaxation.s b/llvm/test/MC/X86/align-via-relaxation.s
--- a/llvm/test/MC/X86/align-via-relaxation.s
+++ b/llvm/test/MC/X86/align-via-relaxation.s
@@ -9,16 +9,17 @@
   .section  .text
 
 # NOPAD-LABEL: <.text>:
-# NOPAD-NEXT:     0: eb 1f           jmp 0x21 <foo>
-# NOPAD-NEXT:     2: eb 1d           jmp 0x21 <foo>
-# NOPAD-NEXT:     4: eb 1b           jmp 0x21 <foo>
-# NOPAD-NEXT:     6: eb 19           jmp 0x21 <foo>
-# NOPAD-NEXT:     8: eb 17           jmp 0x21 <foo>
-# NOPAD-NEXT:     a: eb 15           jmp 0x21 <foo>
-# NOPAD-NEXT:     c: eb 13           jmp 0x21 <foo>
-# NOPAD-NEXT:     e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00  nopw    %cs:(%rax,%rax)
-# NOPAD-NEXT:    1d: 0f 1f 00        nopl (%rax)
-# NOPAD-NEXT:    20: cc              int3
+# NOPAD-NEXT:     0: eb 1f                   jmp 0x21 <foo>
+# NOPAD-NEXT:     2: eb 1d                   jmp 0x21 <foo>
+# NOPAD-NEXT:     4: eb 1b                   jmp 0x21 <foo>
+# NOPAD-NEXT:     6: eb 19                   jmp 0x21 <foo>
+# NOPAD-NEXT:     8: eb 17                   jmp 0x21 <foo>
+# NOPAD-NEXT:     a: eb 15                   jmp 0x21 <foo>
+# NOPAD-NEXT:     c: eb 13                   jmp 0x21 <foo>
+# NOPAD-NEXT:     e: 66 66 66 66 66 8d 76 00 leaw (%rsi), %si
+# NOPAD-NEXT:    16: 8d bc 27 00 00 00 00    leal (%rdi,%riz), %edi
+# NOPAD-NEXT:    1d: 8d 76 00                leal (%rsi), %esi
+# NOPAD-NEXT:    20: cc                      int3
 
   # Demonstrate that we can relax instructions to provide padding, not
   # just insert nops.  jmps are being used for ease of demonstration.
@@ -48,7 +49,7 @@
   # that would require a further round of relaxation
   # CHECK: <bar>:
   # CHECK: 22: eb fe                          jmp 0x22 <bar>
-  # CHECK: 24: 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
+  # CHECK: 24: 66 66 8d 76 00                 leaw (%rsi), %si
   # CHECK: 30: 0f 0b                          ud2
 
 bar:  
@@ -63,8 +64,9 @@
   # CHECK: <loop_preheader>:
   # CHECK: 45: 48 85 c0                       testq %rax, %rax
   # CHECK: 48: 0f 8e 22 00 00 00              jle 0x70 <loop_exit>
-  # CHECK: 4e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
-  # CHECK: 5d: 0f 1f 00                       nopl (%rax)
+  # CHECK: 4e: 66 66 66 66 66 8d 76 00        leaw (%rsi), %si
+  # CHECK: 56: 8d bc 27 00 00 00 00           leal (%rdi,%riz), %edi
+  # CHECK: 5d: 8d 76 00                       leal (%rsi), %esi
   # CHECK: <loop_header>:
   # CHECK: 60: 48 83 e8 01                    subq $1, %rax
   # CHECK: 64: 48 85 c0                       testq %rax, %rax
diff --git a/llvm/test/MC/X86/code16gcc-align.s b/llvm/test/MC/X86/code16gcc-align.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/X86/code16gcc-align.s
@@ -0,0 +1,101 @@
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-unknown %s | llvm-objdump --triple=i386-unknown-unknown-code16 -d - | FileCheck %s 
+
+# Ensure that the "movzbl" is aligned such that the prefixes 0x67 0x66 are
+# properly included in the "movz" instruction.
+
+# CHECK-LABEL: <print_serial>:
+# CHECK:           22: 66 89 c7                     	movl	%eax, %edi
+# CHECK-NEXT:      25: 66 31 db                     	xorl	%ebx, %ebx
+# CHECK-NEXT:      28: 90                           	nop
+# CHECK-NEXT:      29: 90                           	nop
+# CHECK-NEXT:      2a: 90                           	nop
+# CHECK-NEXT:      2b: 90                           	nop
+# CHECK-NEXT:      2c: 90                           	nop
+# CHECK-NEXT:      2d: 90                           	nop
+# CHECK-NEXT:      2e: 90                           	nop
+# CHECK-NEXT:      2f: 90                           	nop
+# CHECK-NEXT:      30: 67 66 0f b6 0c 1e            	movzbl	(%esi,%ebx), %ecx
+
+	.text
+	.code16gcc
+	.globl	print_serial
+	.p2align	4, 0x90
+	.type	print_serial,@function
+print_serial:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$12, %esp
+	movl	8(%ebp), %esi
+	movl	%esi, %ecx
+	calll	strlen
+	testl	%eax, %eax
+	je	.LBB0_3
+	movl	%eax, %edi
+	xorl	%ebx, %ebx
+	.p2align	4, 0x90
+.LBB0_2:
+	movzbl	(%esi,%ebx), %ecx
+	calll	serial_outb
+	addl	$1, %ebx
+	cmpl	%ebx, %edi
+	jne	.LBB0_2
+.LBB0_3:
+	addl	$12, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.Lfunc_end0:
+	.size	print_serial, .Lfunc_end0-print_serial
+	.p2align	4, 0x90
+	.type	strlen,@function
+strlen:
+	pushl	%ebp
+	movl	%esp, %ebp
+	cmpb	$0, (%ecx)
+	je	.LBB1_1
+	xorl	%edx, %edx
+	.p2align	4, 0x90
+.LBB1_3:
+	leal	1(%edx), %eax
+	cmpb	$0, 1(%ecx,%edx)
+	movl	%eax, %edx
+	jne	.LBB1_3
+	popl	%ebp
+	retl
+.LBB1_1:
+	xorl	%eax, %eax
+	popl	%ebp
+	retl
+.Lfunc_end1:
+	.size	strlen, .Lfunc_end1-strlen
+	.p2align	4, 0x90
+	.type	serial_outb,@function
+serial_outb:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	calll	outb
+	addl	$8, %esp
+	popl	%ebp
+	retl
+.Lfunc_end2:
+	.size	serial_outb, .Lfunc_end2-serial_outb
+	.p2align	4, 0x90
+	.type	outb,@function
+outb:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	%ecx, %eax
+	movw	$1016, %dx
+	#APP
+	outb	%al, %dx
+	#NO_APP
+	popl	%ebp
+	retl
+.Lfunc_end3:
+	.size	outb, .Lfunc_end3-outb
diff --git a/llvm/test/MC/X86/x86_64-directive-nops.s b/llvm/test/MC/X86/x86_64-directive-nops.s
--- a/llvm/test/MC/X86/x86_64-directive-nops.s
+++ b/llvm/test/MC/X86/x86_64-directive-nops.s
@@ -9,11 +9,11 @@
 # CHECK-NEXT:  4: 66 90 nop
 # CHECK-NEXT:  6: 66 90 nop
 .nops 4, 3
-# CHECK-NEXT:  8: 0f 1f 00 nopl (%rax)
+# CHECK-NEXT:  8: 8d 76 00 leal (%rsi), %esi
 # CHECK-NEXT:  b: 90 nop
 .nops 4, 4
-# CHECK-NEXT:  c: 0f 1f 40 00 nopl (%rax)
+# CHECK-NEXT:  c: 8d 74 26 00 leal (%rsi,%riz), %esi
 .nops 4, 5
-# CHECK-NEXT:  10: 0f 1f 40 00 nopl (%rax)
+# CHECK-NEXT: 10: 8d 74 26 00 leal (%rsi,%riz), %esi
 .nops 4
-# CHECK-NEXT:  14: 0f 1f 40 00 nopl (%rax)
+# CHECK-NEXT: 14: 8d 74 26 00 leal (%rsi,%riz), %esi
diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s
--- a/llvm/test/MC/X86/x86_long_nop.s
+++ b/llvm/test/MC/X86/x86_long_nop.s
@@ -34,31 +34,39 @@
 .p2align 5
 inc %eax
 # LNOP15: 0:  inc
-# LNOP15-NEXT: 1:  nop
-# LNOP15-NEXT: 10: nop
+# LNOP15-NEXT: 1:  leaw
+# LNOP15-NEXT: 9:  leal
+# LNOP15-NEXT: 10: leaw
+# LNOP15-NEXT: 18: leal
 # LNOP15-NEXT: 1f: nop
 # LNOP15-NEXT: 20: inc
 
 # LNOP11: 0:  inc
-# LNOP11-NEXT: 1:  nop
-# LNOP11-NEXT: c:  nop
-# LNOP11-NEXT: 17: nop
+# LNOP11-NEXT: 1:  leaw
+# LNOP11-NEXT: 5:  leal
+# LNOP11-NEXT: c:  leaw
+# LNOP11-NEXT: 10: leal
+# LNOP11-NEXT: 17: movl
+# LNOP11-NEXT: 19: leal
 # LNOP11-NEXT: 20: inc
 
 # LNOP10: 0:  inc
-# LNOP10-NEXT: 1:  nop
-# LNOP10-NEXT: b:  nop
-# LNOP10-NEXT: 15: nop
+# LNOP10-NEXT: 1:  leal
+# LNOP10-NEXT: 4:  leal
+# LNOP10-NEXT: b:  leal
+# LNOP10-NEXT: e:  leal
+# LNOP10-NEXT: 15: leal
+# LNOP10-NEXT: 18: leal
 # LNOP10-NEXT: 1f: nop
 # LNOP10-NEXT: 20: inc
 
 # On Silvermont we emit only 7 byte NOPs since longer NOPs are not profitable.
 # LNOP7: 0:  inc
-# LNOP7-NEXT: 1:  nop
-# LNOP7-NEXT: 8:  nop
-# LNOP7-NEXT: f:  nop
-# LNOP7-NEXT: 16: nop
-# LNOP7-NEXT: 1d: nop
+# LNOP7-NEXT: 1:  leal
+# LNOP7-NEXT: 8:  leal
+# LNOP7-NEXT: f:  leal
+# LNOP7-NEXT: 16: leal
+# LNOP7-NEXT: 1d: leal
 # LNOP7-NEXT: 20: inc
 
 # On Lakemont we emit only 1 byte NOPs since longer NOPs are not supported/legal
diff --git a/llvm/test/MC/X86/x86_nop.s b/llvm/test/MC/X86/x86_nop.s
--- a/llvm/test/MC/X86/x86_nop.s
+++ b/llvm/test/MC/X86/x86_nop.s
@@ -33,5 +33,5 @@
 
 
 // NOPL: 0:	40                                           	incl	%eax
-// NOPL: 1:	0f 1f 80 00 00 00 00                         	nopl	(%eax)
+// NOPL: 1: 8d b4 26 00 00 00 00                                leal	(%esi,%eiz), %esi
 // NOPL: 8:	40                                           	incl	%eax
diff --git a/llvm/test/tools/llvm-profgen/symbolize.ll b/llvm/test/tools/llvm-profgen/symbolize.ll
--- a/llvm/test/tools/llvm-profgen/symbolize.ll
+++ b/llvm/test/tools/llvm-profgen/symbolize.ll
@@ -11,7 +11,7 @@
 ; CHECK:        e:	cmovl	edx, ecx                       fib:2 @ funcLeaf:2 @ funcA:1
 ; CHECK:       11:	sub	eax, edx                         funcLeaf:2 @ funcA:1
 ; CHECK:       13:	ret                                  funcA:2
-; CHECK:       14:	nop	word ptr cs:[rax + rax]
+; CHECK:       14:	lea	esi, [rsi]
 ; CHECK:       1e:	nop
 ; CHECK: <funcLeaf>:
 ; CHECK:      20:	mov	eax, edi                           funcLeaf:1
@@ -21,7 +21,8 @@
 ; CHECK:      2e:	cmovl	edx, ecx                         fib:2 @ funcLeaf:2
 ; CHECK:      31:	sub	eax, edx                           funcLeaf:2
 ; CHECK:      33:	ret                                    funcLeaf:3
-; CHECK:      34:	nop	word ptr cs:[rax + rax]
+; CHECK:      34:	lea	esi, [rsi]
+; CHECK:      37:	lea	edi, [rdi + riz]
 ; CHECK:      3e:	nop
 ; CHECK: <fib>:
 ; CHECK:      40:	lea	eax, [rdi + 3]                     fib:2