Index: lib/MC/MCAssembler.cpp
===================================================================
--- lib/MC/MCAssembler.cpp
+++ lib/MC/MCAssembler.cpp
@@ -254,7 +254,7 @@
     else { // EndOfFragment > BundleSize
       return 2 * BundleSize - EndOfFragment;
     }
-  } else if (EndOfFragment > BundleSize)
+  } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize)
     return BundleSize - OffsetInBundle;
   else
     return 0;
@@ -581,16 +581,22 @@
   // size won't include the padding.
   //
   // When the -mc-relax-all flag is used, we optimize bundling by writting the
-  // bundle padding directly into fragments when the instructions are emitted
-  // inside the streamer.
+  // padding directly into fragments when the instructions are emitted inside
+  // the streamer. When the fragment is larger than the bundle size, we need to
+  // ensure that it's bundle aligned. This means that if we end up with
+  // multiple fragments, we must emit bundle padding between fragments.
   //
-  if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() &&
-      F->hasInstructions()) {
+  // ".align N" is an example of a directive that introduces multiple
+  // fragments. We could add a special case to handle ".align N" by emitting
+  // within-fragment padding (which would produce less padding when N is less
+  // than the bundle size), but for now we don't.
+  //
+  if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
     assert(isa<MCEncodedFragment>(F) &&
            "Only MCEncodedFragment implementations have instructions");
     uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
 
-    if (FSize > Assembler.getBundleAlignSize())
+    if (!Assembler.getRelaxAll() && FSize > Assembler.getBundleAlignSize())
       report_fatal_error("Fragment can't be larger than a bundle size");
 
     uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F,
Index: test/MC/X86/AlignedBundling/misaligned-bundle-group.s
===================================================================
--- /dev/null
+++ test/MC/X86/AlignedBundling/misaligned-bundle-group.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
+
+        .text
+foo:
+        .bundle_align_mode 5
+        push    %ebp # 1 byte
+        .align  16
+        .bundle_lock align_to_end
+# CHECK:            1:  nopw %cs:(%eax,%eax)
+# CHECK:            10: nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      1f: nop
+# CHECK-RELAX:      20: nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      2f: nopw %cs:(%eax,%eax)
+# CHECK-OPT:        1b: calll -4
+# CHECK-RELAX:      3b: calll -4
+        calll   bar # 5 bytes
+        .bundle_unlock
+        ret         # 1 byte
Index: test/MC/X86/AlignedBundling/misaligned-bundle.s
===================================================================
--- /dev/null
+++ test/MC/X86/AlignedBundling/misaligned-bundle.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
+
+        .text
+foo:
+        .bundle_align_mode 5
+        push    %ebp          # 1 byte
+        .align  16
+# CHECK:            1:  nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      10: nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      1f: nop
+# CHECK-OPT:        10: movl $1, (%esp)
+# CHECK-RELAX:      20: movl $1, (%esp)
+        movl $0x1, (%esp)     # 7 bytes
+        movl $0x1, (%esp)     # 7 bytes
+# CHECK-OPT:        1e: nop
+        movl $0x2, 0x1(%esp)  # 8 bytes
+        movl $0x2, 0x1(%esp)  # 8 bytes
+# CHECK-RELAX:      3e: nop
+# CHECK-RELAX:      40: movl $2, 1(%esp)
+        movl $0x2, 0x1(%esp)  # 8 bytes
+        movl $0x2, (%esp)     # 7 bytes
+# CHECK-OPT:        3f: nop
+# CHECK-OPT:        40: movl $3, (%esp)
+        movl $0x3, (%esp)     # 7 bytes
+        movl $0x3, (%esp)     # 7 bytes
+        ret