Index: llvm/trunk/lib/MC/MCAssembler.cpp =================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp +++ llvm/trunk/lib/MC/MCAssembler.cpp @@ -254,7 +254,7 @@ else { // EndOfFragment > BundleSize return 2 * BundleSize - EndOfFragment; } - } else if (EndOfFragment > BundleSize) + } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize) return BundleSize - OffsetInBundle; else return 0; @@ -581,16 +581,22 @@ // size won't include the padding. // // When the -mc-relax-all flag is used, we optimize bundling by writting the - // bundle padding directly into fragments when the instructions are emitted - // inside the streamer. + // padding directly into fragments when the instructions are emitted inside + // the streamer. When the fragment is larger than the bundle size, we need to + // ensure that it's bundle aligned. This means that if we end up with + // multiple fragments, we must emit bundle padding between fragments. // - if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() && - F->hasInstructions()) { + // ".align N" is an example of a directive that introduces multiple + // fragments. We could add a special case to handle ".align N" by emitting + // within-fragment padding (which would produce less padding when N is less + // than the bundle size), but for now we don't. + // + if (Assembler.isBundlingEnabled() && F->hasInstructions()) { assert(isa(F) && "Only MCEncodedFragment implementations have instructions"); uint64_t FSize = Assembler.computeFragmentSize(*this, *F); - if (FSize > Assembler.getBundleAlignSize()) + if (!Assembler.getRelaxAll() && FSize > Assembler.getBundleAlignSize()) report_fatal_error("Fragment can't be larger than a bundle size"); uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F, Index: llvm/trunk/test/MC/X86/AlignedBundling/misaligned-bundle-group.s =================================================================== --- llvm/trunk/test/MC/X86/AlignedBundling/misaligned-bundle-group.s +++ llvm/trunk/test/MC/X86/AlignedBundling/misaligned-bundle-group.s @@ -0,0 +1,23 @@ +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ +# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ +# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s + + .text +foo: + .bundle_align_mode 5 + push %ebp # 1 byte + .align 16 + .bundle_lock align_to_end +# CHECK: 1: nopw %cs:(%eax,%eax) +# CHECK: 10: nopw %cs:(%eax,%eax) +# CHECK-RELAX: 1f: nop +# CHECK-RELAX: 20: nopw %cs:(%eax,%eax) +# CHECK-RELAX: 2f: nopw %cs:(%eax,%eax) +# CHECK-OPT: 1b: calll -4 +# CHECK-RELAX: 3b: calll -4 + calll bar # 5 bytes + .bundle_unlock + ret # 1 byte Index: llvm/trunk/test/MC/X86/AlignedBundling/misaligned-bundle.s =================================================================== --- llvm/trunk/test/MC/X86/AlignedBundling/misaligned-bundle.s +++ llvm/trunk/test/MC/X86/AlignedBundling/misaligned-bundle.s @@ -0,0 +1,31 @@ +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ +# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ +# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s + + .text +foo: + .bundle_align_mode 5 + push %ebp # 1 byte + .align 16 +# CHECK: 1: nopw %cs:(%eax,%eax) +# CHECK-RELAX: 10: nopw %cs:(%eax,%eax) +# CHECK-RELAX: 1f: nop +# CHECK-OPT: 10: movl $1, (%esp) +# CHECK-RELAX: 20: movl $1, (%esp) + movl $0x1, (%esp) # 7 bytes + movl $0x1, (%esp) # 7 bytes +# CHECK-OPT: 1e: nop + movl $0x2, 0x1(%esp) # 8 bytes + movl $0x2, 0x1(%esp) # 8 bytes +# CHECK-RELAX: 3e: nop +# CHECK-RELAX: 40: movl $2, 1(%esp) + movl $0x2, 0x1(%esp) # 8 bytes + movl $0x2, (%esp) # 7 bytes +# CHECK-OPT: 3f: nop +# CHECK-OPT: 40: movl $3, (%esp) + movl $0x3, (%esp) # 7 bytes + movl $0x3, (%esp) # 7 bytes + ret