Index: llvm/lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -2950,7 +2950,12 @@ // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. if (!LayoutPred->isSuccessor(ChainBB)) { - ChainBB->setAlignment(Align, MaxBytesForAlignment); + if (MaxBytesForAlignmentOverride.getNumOccurrences() == 0 && + L->isInnermost() && + (ChainBB == LoopHeader || MLI->getLoopFor(LayoutPred) != L)) + ChainBB->setAlignment(Align, 0); + else + ChainBB->setAlignment(Align, MaxBytesForAlignment); continue; } Index: llvm/test/CodeGen/AArch64/loop-align-limit.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/loop-align-limit.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s | FileCheck %s -check-prefixes CHECK,CHECK-NOLIMIT +; RUN: llc -max-bytes-for-alignment=2 < %s | FileCheck %s -check-prefixes CHECK,CHECK-LIMIT +target triple = "aarch64-linux" + +declare i1 @cond(i64, i64) +declare i32 @h(i32) + +define i32 @g(ptr %a, i64 %n, i32 %d) "tune-cpu"="neoverse-v1" { +; CHECK-LABEL: g: +; CHECK: b .LBB0_2 +; CHECK-NOLIMIT-NEXT: .p2align 5{{$}} +; CHECK-LIMIT-NEXT: .p2align 5, , 2 +; CHECK-NEXT: // %if.end +entry: + br label %loop + +loop: + %i = phi i64 [0, %entry], [%i.next, %if.end] + %s = phi i32 [0, %entry], [%s.next, %if.end] + %c = icmp slt i64 %i, %n + br i1 %c, label %loop.body, label %exit + +loop.body: + %p = getelementptr i32, ptr %a, i64 %i + %v = load i32, ptr %p + %c1 = icmp slt i32 %d, 1 + br i1 %c1, label %if.then, label %if.end + +if.then: + %v0 = call i32 @h(i32 %v) + br label %if.end + +if.end: + %w = phi i32 [%v0, %if.then], [%v, %loop.body] + %s.next = add i32 %s, %w + %i.next = add i64 %i, 1 + br label %loop + + +exit: + ret i32 %s +} + Index: llvm/test/CodeGen/AArch64/merge-store-dependency.ll =================================================================== --- llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -19,12 +19,12 @@ ; A53-NEXT: mov x8, x0 ; A53-NEXT: mov x19, x8 ; A53-NEXT: mov w0, w1 -; A53-NEXT: mov w9, #256 +; A53-NEXT: mov w9, #256 // =0x100 ; A53-NEXT: stp x2, x3, [x8, #32] ; A53-NEXT: mov x2, x8 ; A53-NEXT: str q0, [x19, #16]! ; A53-NEXT: str w1, [x19] -; A53-NEXT: mov w1, #4 +; A53-NEXT: mov w1, #4 // =0x4 ; A53-NEXT: str q0, [x8] ; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str wzr, [x8, #20] @@ -51,7 +51,7 @@ ; A53-NEXT: .cfi_restore w19 ; A53-NEXT: .cfi_restore w30 ; A53-NEXT: ret -; A53-NEXT: .p2align 4, , 8 +; A53-NEXT: .p2align 4 ; A53-NEXT: .LBB0_4: // %while.body.i.split ; A53-NEXT: // =>This Inner Loop Header: Depth=1 ; A53-NEXT: .cfi_restore_state