Index: lib/CodeGen/BranchRelaxation.cpp =================================================================== --- lib/CodeGen/BranchRelaxation.cpp +++ lib/CodeGen/BranchRelaxation.cpp @@ -49,10 +49,20 @@ /// Compute the offset immediately following this block. If LogAlign is /// specified, return the offset the successor block will get if it has /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { + unsigned postOffset(const MachineBasicBlock &MBB) const { unsigned PO = Offset + Size; - unsigned Align = 1 << LogAlign; - return (PO + Align - 1) / Align * Align; + unsigned Align = MBB.getAlignment(); + if (Align == 0) + return PO; + + unsigned AlignAmt = 1 << Align; + unsigned ParentAlign = MBB.getParent()->getAlignment(); + if (Align <= ParentAlign) + return PO + OffsetToAlignment(PO, AlignAmt); + + // The alignment of this MBB is larger than the function's alignment, so we + // can't tell whether or not it will insert nops. Assume that it will. + return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt); } }; @@ -99,7 +109,7 @@ unsigned Align = MBB.getAlignment(); unsigned Num = MBB.getNumber(); assert(BlockInfo[Num].Offset % (1u << Align) == 0); - assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset); + assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset); PrevNum = Num; } #endif @@ -167,8 +177,8 @@ continue; // Get the offset and known bits at the end of the layout predecessor. // Include the alignment of the current block. - unsigned LogAlign = MBB.getAlignment(); - BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign); + BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB); + PrevNum = Num; } } Index: test/CodeGen/AArch64/branch-relax-alignment.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/branch-relax-alignment.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-bcc-offset-bits=4 -align-all-nofallthru-blocks=4 < %s | FileCheck %s + +; Long branch is assumed because the block has a higher alignment +; requirement than the function. + +; CHECK-LABEL: invert_bcc_block_align_higher_func: +; CHECK: b.eq [[JUMP_BB1:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: b [[JUMP_BB2:LBB[0-9]+_[0-9]+]] + +; CHECK: [[JUMP_BB1]]: +; CHECK: ret +; CHECK: .p2align 4 + +; CHECK: [[JUMP_BB2]]: +; CHECK: ret +define i32 @invert_bcc_block_align_higher_func(i32 %x, i32 %y) align 4 #0 { + %1 = icmp eq i32 %x, %y + br i1 %1, label %bb1, label %bb2 + +bb2: + store volatile i32 9, i32* undef + ret i32 1 + +bb1: + store volatile i32 42, i32* undef + ret i32 0 +} + +attributes #0 = { nounwind } \ No newline at end of file