Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
===================================================================
--- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -105,6 +105,10 @@
         "assumptions about labels corresponding to particular instructions, "
         "and should be used with caution."));
 
+cl::opt<unsigned> X86PadMaxPrefixSize(
+    "x86-pad-max-prefix-size", cl::init(0),
+    cl::desc("Maximum number of redundant prefixes to use for padding"));
+
 cl::opt<bool> X86PadForAlign(
     "x86-pad-for-align", cl::init(true), cl::Hidden,
     cl::desc("Pad previous instructions to implement align directives"));
@@ -185,8 +189,16 @@
   void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
                         MCInst &Res) const override;
 
+  bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
+                                   MCCodeEmitter &Emitter,
+                                   unsigned &RemainingSize) const;
+
+  bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
+                               unsigned &RemainingSize) const;
+
   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
                               unsigned &RemainingSize) const;
+
   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
 
   bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
@@ -718,6 +730,63 @@
   Res.setOpcode(RelaxedOp);
 }
 
+static bool shouldAddPrefix(const MCInst &Inst, const MCInstrInfo &MCII) {
+  // Linker may rewrite the instruction with variant symbol operand.
+  return !hasVariantSymbol(Inst);
+}
+
+static unsigned getRemainingPrefixSize(const MCInst &Inst,
+                                       const MCSubtargetInfo &STI,
+                                       MCCodeEmitter &Emitter) {
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  Emitter.emitPrefix(Inst, VecOS, STI);
+  assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
+
+  // TODO: It turns out we need a decent amount of plumbing for the target
+  // specific bits to determine number of prefixes its safe to add.  Various
+  // targets (older chips mostly, but also Atom family) encounter decoder
+  // stalls with too many prefixes.  For testing purposes, we set the value
+  // externally for the moment.
+  uint8_t ExistingPrefixSize = static_cast<uint8_t>(Code.size());
+  uint8_t TargetPrefixMax = X86PadMaxPrefixSize;
+  if (TargetPrefixMax <= ExistingPrefixSize)
+    return 0;
+  return TargetPrefixMax - ExistingPrefixSize;
+}
+
+bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
+                                            MCCodeEmitter &Emitter,
+                                            unsigned &RemainingSize) const {
+  if (!shouldAddPrefix(RF.getInst(), *MCII))
+    return false;
+  const unsigned OldSize = RF.getContents().size();
+  if (OldSize == 15)
+    return false;
+
+  const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
+  const unsigned PrefixBytesToAdd =
+    std::min(MaxPossiblePad,
+             getRemainingPrefixSize(RF.getInst(), STI, Emitter));
+  if (PrefixBytesToAdd == 0)
+    return false;
+
+  const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
+
+  SmallString<256> Code;
+  Code.append(PrefixBytesToAdd, Prefix);
+  Code.append(RF.getContents().begin(), RF.getContents().end());
+  RF.getContents() = Code;
+
+  // Adjust the fixups for the change in offsets
+  for (auto &F : RF.getFixups()) {
+    F.setOffset(F.getOffset() + PrefixBytesToAdd);
+  }
+
+  RemainingSize -= PrefixBytesToAdd;
+  return true;
+}
+
 static bool canBeRelaxedForPadding(const MCRelaxableFragment &RF) {
   // TODO: There are lots of other tricks we could apply for increasing
   // encoding size without impacting performance.
@@ -727,9 +796,9 @@
   return getRelaxedOpcode(Inst, Is16BitMode) != Inst.getOpcode();
 }
 
-bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
-                                           MCCodeEmitter &Emitter,
-                                           unsigned &RemainingSize) const {
+bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
+                                                MCCodeEmitter &Emitter,
+                                                unsigned &RemainingSize) const {
   if (!canBeRelaxedForPadding(RF))
     return false;
 
@@ -753,6 +822,17 @@
   return true;
 }
 
+bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
+                                           MCCodeEmitter &Emitter,
+                                           unsigned &RemainingSize) const {
+  bool Changed = false;
+  if (RemainingSize != 0)
+    Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
+  if (RemainingSize != 0)
+    Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
+  return Changed;
+}
+
 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
                                  MCAsmLayout &Layout) const {
   // See if we can further relax some instructions to cut down on the number of
Index: llvm/test/MC/X86/align-via-relaxation.s
===================================================================
--- llvm/test/MC/X86/align-via-relaxation.s
+++ llvm/test/MC/X86/align-via-relaxation.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s | llvm-objdump -d --section=.text - | FileCheck %s
+# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 | llvm-objdump -d --section=.text - | FileCheck %s
 
 
   .file "test.c"
@@ -7,14 +7,14 @@
   # Demonstrate that we can relax instructions to provide padding, not
   # just insert nops.  jmps are being used for ease of demonstration.
   # CHECK: .text
-  # CHECK: 0: eb 1f                         jmp 31 <foo>
-  # CHECK: 2: e9 1a 00 00 00                jmp 26 <foo>
-  # CHECK: 7: e9 15 00 00 00                jmp 21 <foo>
-  # CHECK: c: e9 10 00 00 00                jmp 16 <foo>
-  # CHECK: 11: e9 0b 00 00 00               jmp 11 <foo>
-  # CHECK: 16: e9 06 00 00 00               jmp 6 <foo>
-  # CHECK: 1b: e9 01 00 00 00               jmp 1 <foo>
-  # CHECK: 20: cc                           int3
+  # CHECK: 0: eb 1f                          jmp
+  # CHECK: 2: eb 1d                          jmp
+  # CHECK: 4: eb 1b                          jmp
+  # CHECK: 6: eb 19                          jmp
+  # CHECK: 8: 2e 2e eb 15                    jmp
+  # CHECK: c: 2e 2e 2e 2e 2e e9 0b 00 00 00  jmp
+  # CHECK: 16: 2e 2e 2e 2e 2e e9 01 00 00 00 jmp
+  # CHECK: 20: cc                            int3
   .p2align 4
   jmp foo
   jmp foo
@@ -47,15 +47,14 @@
   # fewer nops by relaxing the branch, even though we don't need to
   # CHECK: <loop_preheader>:
   # CHECK: 45: 48 85 c0                       testq %rax, %rax
-  # CHECK: 48: 0f 8e 22 00 00 00              jle 34 <loop_exit>
-  # CHECK: 4e: 66 2e 0f 1f 84 00 00 00 00 00  nopw %cs:(%rax,%rax)
-  # CHECK: 58: 0f 1f 84 00 00 00 00 00        nopl (%rax,%rax)
+  # CHECK: 48: 2e 2e 2e 2e 0f 8e 1e 00 00 00	jle	30 <loop_exit>
+  # CHECK: 52: 66 2e 0f 1f 84 00 00 00 00 00	nopw	%cs:(%rax,%rax)
+  # CHECK: 5c: 0f 1f 40 00                  	nopl	(%rax)
   # CHECK: <loop_header>:
   # CHECK: 60: 48 83 e8 01                    subq $1, %rax
   # CHECK: 64: 48 85 c0                       testq %rax, %rax
   # CHECK: 67: 7e 07                          jle 7 <loop_exit>
-  # CHECK: 69: e9 f2 ff ff ff                 jmp -14 <loop_header>
-  # CHECK: 6e: 66 90                          nop
+  # CHECK: 69: 2e 2e e9 f0 ff ff ff           jmp
   # CHECK: <loop_exit>:
   # CHECK: 70: c3                             retq
   .p2align 5