diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -49,6 +49,10 @@
   /// Return true if this target might automatically pad instructions and thus
   /// need to emit padding enable/disable directives around sensative code.
   virtual bool allowAutoPadding() const { return false; }
+  /// Return true if this target allows an unrelaxable instruction to be
+  /// emitted into RelaxableFragment and then we can increase its size in a
+  /// tricky way for optimization.
+  virtual bool allowEnhancedRelaxation() const { return false; }
 
   /// Give the target a chance to manipulate state related to instruction
   /// alignment (e.g. padding for optimization), instruction relaxablility, etc.
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -385,7 +385,9 @@
 
   // If this instruction doesn't need relaxation, just emit it as data.
   MCAssembler &Assembler = getAssembler();
-  if (!Assembler.getBackend().mayNeedRelaxation(Inst, STI)) {
+  MCAsmBackend &Backend = Assembler.getBackend();
+  if (!(Backend.mayNeedRelaxation(Inst, STI) ||
+        (Backend.allowEnhancedRelaxation() && getAllowAutoPadding()))) {
     EmitInstToData(Inst, STI);
     return;
   }
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -139,6 +139,7 @@
   MCInst PrevInst;
   MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr;
   std::pair<MCFragment *, size_t> PrevInstPosition;
+  bool StreamerAllowAutoPadding = false;
 
 public:
   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
@@ -162,6 +163,7 @@
   }
 
   bool allowAutoPadding() const override;
+  bool allowEnhancedRelaxation() const override;
   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
 
@@ -455,6 +457,10 @@
   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
 }
 
+bool X86AsmBackend::allowEnhancedRelaxation() const {
+  return allowAutoPadding() && X86PadMaxPrefixSize != 0 && X86PadForBranchAlign;
+}
+
 bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
   if (!OS.getAllowAutoPadding())
     return false;
@@ -538,13 +544,8 @@
   }
 }
 
-/// Check if the instruction operand needs to be aligned. Padding is disabled
-/// before intruction which may be rewritten by linker(e.g. TLSCALL).
+/// Check if the instruction operand needs to be aligned.
 bool X86AsmBackend::needAlignInst(const MCInst &Inst) const {
-  // Linker may rewrite the instruction with variant symbol operand.
-  if (hasVariantSymbol(Inst))
-    return false;
-
   const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode());
   return (InstDesc.isConditionalBranch() &&
           (AlignBranchType & X86::AlignBranchJcc)) ||
@@ -558,31 +559,64 @@
           (AlignBranchType & X86::AlignBranchIndirect));
 }
 
-/// Insert BoundaryAlignFragment before instructions to align branches.
-void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
-                                       const MCInst &Inst) {
-  if (!needAlign(OS))
-    return;
+/// Return true if we can insert NOP or prefixes automatically before the
+/// the instruction to be emitted.
+/// \param Inst the instruction to be emitted.
+/// \param PrevInst the previous emitted instruction.
+/// \param PrevInstPosition the position where the \p PrevInst was emitted.
+static bool
+allowAutoPaddingForInst(const MCInst &Inst, const MCInst &PrevInst,
+                        const MCInstrInfo &MCII,
+                        const std::pair<MCFragment *, size_t> &PrevInstPosition,
+                        MCObjectStreamer &OS) {
+  if (hasVariantSymbol(Inst))
+    // Linker may rewrite the instruction with variant symbol operand(e.g.
+    // TLSCALL).
+    return false;
 
   if (hasInterruptDelaySlot(PrevInst))
     // If this instruction follows an interrupt enabling instruction with a one
     // instruction delay, inserting a nop would change behavior.
-    return;
+    return false;
 
-  if (isPrefix(PrevInst, *MCII))
-    // If this instruction follows a prefix, inserting a nop would change
+  if (isPrefix(PrevInst, MCII))
+    // If this instruction follows a prefix, inserting a nop/prefix would change
     // semantic.
-    return;
+    return false;
+
+  if (isPrefix(Inst, MCII))
+    // If this instruction is a prefix, inserting a prefix would change
+    // semantic.
+    return false;
 
   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
     // If this instruction follows any data, there is no clear
-    // instruction boundary, inserting a nop would change semantic.
+    // instruction boundary, inserting a nop/prefix would change semantic.
+    return false;
+
+  return true;
+}
+
+/// Insert BoundaryAlignFragment before instructions to align branches.
+void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
+                                         const MCInst &Inst) {
+  // Preserve the status of streamer for allowing auto padding.
+  StreamerAllowAutoPadding = OS.getAllowAutoPadding();
+
+  if (!needAlign(OS))
     return;
 
   if (!isMacroFused(PrevInst, Inst))
     // Macro fusion doesn't happen indeed, clear the pending.
     PendingBoundaryAlign = nullptr;
 
+  if (!allowAutoPaddingForInst(Inst, PrevInst, *MCII, PrevInstPosition, OS)) {
+    // Disable the allow padding for streamer when the instruction to be
+    // emitted can not be padding.
+    OS.setAllowAutoPadding(false);
+    return;
+  }
+
   if (PendingBoundaryAlign &&
       OS.getCurrentFragment()->getPrevNode() == PendingBoundaryAlign) {
     // Macro fusion actually happens and there is no other fragment inserted
@@ -617,6 +651,9 @@
 
 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
+  // Restore the status of streamer for allowing auto padding.
+  OS.setAllowAutoPadding(StreamerAllowAutoPadding);
+
   if (!needAlign(OS))
     return;
 
diff --git a/llvm/test/MC/X86/align-branch-64-enhanced-relaxation.s b/llvm/test/MC/X86/align-branch-64-enhanced-relaxation.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/X86/align-branch-64-enhanced-relaxation.s
@@ -0,0 +1,56 @@
+  # RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 --x86-align-branch-boundary=32 --x86-align-branch=jmp+indirect | llvm-objdump -d - | FileCheck %s
+
+  # Exercise cases where we are allowed to increase the length of unrelaxable
+  # instructions (by adding prefixes) for alignment purposes.
+
+  # The first test is a basic test, we just check the jmp is aligned by prefix
+  # padding the previous instructions.
+  .text
+  .globl labeled_basic_test
+labeled_basic_test:
+  .p2align 5
+  .rept 30
+  int3
+  .endr
+# CHECK:      1e: 2e cc                            int3
+# CHECK:      20: eb 00                            jmp
+  int3
+  jmp foo
+foo:
+  ret
+
+  # The second test check the correctness cornercase - can't add prefixes on a
+  # prefix or a instruction following by a prefix.
+  .globl labeled_prefix_test
+labeled_prefix_test:
+  .p2align 5
+  .rept 28
+  int3
+  .endr
+# CHECK:      5c: 2e cc                            int3
+  int3
+# CHECK:      5e: 3e cc                            int3
+  DS
+  int3
+# CHECK:      60: eb 00                            jmp
+  jmp bar
+bar:
+  ret
+
+  # The third test is similar to the second test - can't add prefixes on a
+  # instruction following by hardcode.
+  .globl labeled_hardcode_test
+labeled_hardcode_test:
+  .p2align 5
+  .rept 28
+  int3
+  .endr
+# CHECK:      9c: 2e cc                            int3
+  int3
+# CHECK:      9e: 3e cc                            int3
+  .byte 0x3e
+  int3
+# CHECK:      a0: eb 00                            jmp
+  jmp baz
+baz:
+  ret