diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -338,6 +338,32 @@ } #endif +// Align blocks where the previous block does not fall through. This may add +// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a +// measure of how much to align, and only runs at CodeGenOpt::Aggressive. +static bool AlignBlocks(MachineFunction *MF) { + if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive || + MF->getFunction().hasOptSize()) + return false; + + auto *TLI = MF->getSubtarget().getTargetLowering(); + const Align Alignment = TLI->getPrefLoopAlignment(); + if (Alignment < 4) + return false; + + bool Changed = false; + bool PrevCanFallthough = true; + for (auto &MBB : *MF) { + if (!PrevCanFallthough) { + Changed = true; + MBB.setAlignment(Alignment); + } + PrevCanFallthough = MBB.canFallThrough(); + } + + return Changed; +} + bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MCP = mf.getConstantPool(); @@ -380,6 +406,9 @@ MF->RenumberBlocks(); } + // Align any non-fallthrough blocks + MadeChange |= AlignBlocks(MF); + // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector CPEMIs; diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll --- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -26,6 +26,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB0_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -58,6 +59,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB0_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -129,6 +131,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, r5, r7, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB1_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -161,6 +164,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB1_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -232,6 +236,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB2_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -264,6 +269,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB2_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -335,6 +341,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB3_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -367,6 +374,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB3_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -440,6 +448,7 @@ ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, lr, r12 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB4_4: ; CHECK-LE-NEXT: mov.w lr, #0 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -474,6 +483,7 @@ ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, lr ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB4_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: mov.w lr, #0 diff --git a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll --- a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll +++ b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll @@ -57,6 +57,7 @@ ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: movs r3, #0