Index: llvm/lib/Target/ARM/ARMConstantIslandPass.cpp =================================================================== --- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -338,6 +338,32 @@ } #endif +// Align blocks where the previous block does not fall through. This may add +// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a +// measure of how much to align, and only runs at CodeGenOpt::Aggressive. +static bool AlignBlocks(MachineFunction *MF) { + if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive || + MF->getFunction().hasOptSize()) + return false; + + auto *TLI = MF->getSubtarget().getTargetLowering(); + const Align Alignment = TLI->getPrefLoopAlignment(); + if (Alignment < 4) + return false; + + bool Changed = false; + bool PrevCanFallthough = true; + for (auto &MBB : *MF) { + if (!PrevCanFallthough) { + Changed = true; + MBB.setAlignment(Alignment); + } + PrevCanFallthough = MBB.canFallThrough(); + } + + return Changed; +} + bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MCP = mf.getConstantPool(); @@ -380,6 +406,9 @@ MF->RenumberBlocks(); } + // Align any non-fallthrough blocks + MadeChange |= AlignBlocks(MF); + // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector CPEMIs; Index: llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll =================================================================== --- llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -26,6 +26,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB0_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -58,6 +59,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB0_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -129,6 +131,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, r5, r7, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB1_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -161,6 +164,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB1_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -232,6 +236,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB2_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -264,6 +269,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB2_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -335,6 +341,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB3_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -367,6 +374,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB3_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -440,6 +448,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, lr, r12 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB4_4: ; CHECK-LE-NEXT: mov.w lr, #0 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -474,6 +483,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, lr ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB4_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: mov.w lr, #0 Index: llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll =================================================================== --- llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll +++ llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1-m.main-none-eabi -mcpu=cortex-m55 < %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1-m.main-none-eabi -mcpu=cortex-m55 -O3 < %s | FileCheck %s define i32 @loop(i32* nocapture readonly %x) { ; CHECK-LABEL: loop: @@ -57,6 +57,7 @@ ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: movs r3, #0