diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -37,6 +37,7 @@ Pass *createMVETailPredicationPass(); FunctionPass *createARMLowOverheadLoopsPass(); +FunctionPass *createARMBlockPlacementPass(); Pass *createARMParallelDSPPass(); FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -0,0 +1,147 @@ +//===-- ARMBlockPlacement.cpp - ARM block placement pass ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass re-arranges machine basic blocks to suit target requirements. +// Currently it only moves blocks to fix backwards WLS branches. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBasicBlockInfo.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define DEBUG_TYPE "arm-block-placement" +#define DEBUG_PREFIX "ARM Block Placement: " + +namespace llvm { +class ARMBlockPlacement : public MachineFunctionPass { +private: + const ARMBaseInstrInfo *TII; + std::unique_ptr BBUtils = nullptr; + +public: + static char ID; + ARMBlockPlacement() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After); + bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other); +}; + +} // namespace llvm + +FunctionPass *llvm::createARMBlockPlacementPass() { + return new ARMBlockPlacement(); +} + +char ARMBlockPlacement::ID = 0; + +static RegisterPass X(DEBUG_TYPE, "ARM block placement", + false, false); + +bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) { + const ARMSubtarget &ST = static_cast(MF.getSubtarget()); + if (!ST.hasLOB()) + return false; + LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n"); + TII = static_cast(ST.getInstrInfo()); + BBUtils = std::unique_ptr(new ARMBasicBlockUtils(MF)); + MF.RenumberBlocks(); + BBUtils->computeAllBlockSizes(); + BBUtils->adjustBBOffsetsAfter(&MF.front()); + bool Changed = false; + + for (auto &BB : MF) { + // Check if the block has a backwards WhileLoopStart instruction and move + // the target if so + for (auto &MI : BB.terminators()) { + if (MI.getOpcode() != ARM::t2WhileLoopStart) + continue; + MachineBasicBlock *Target = MI.getOperand(1).getMBB(); + if (blockIsBefore(&BB, Target)) + continue; + LLVM_DEBUG(dbgs() << DEBUG_PREFIX + << "Found a backwards t2WhileLoopStart from " + << BB.getName() << " to " << Target->getName() << "\n"); + // Make sure that moving this block would not cause a WLS in the target BB + // to branch backwards + bool CanMove = true; + for (auto &MI : Target->terminators()) { + if (MI.getOpcode() != ARM::t2WhileLoopStart) + continue; + MachineBasicBlock *Target2 = MI.getOperand(1).getMBB(); + // We can move the block as long as the WLS doesn't become a backwards + // branch or it was already a backwards branch + if (blockIsBefore(&BB, Target2) || blockIsBefore(Target2, Target)) + continue; + CanMove = false; + break; + } + if (CanMove) { + moveBasicBlock(Target, &BB); + Changed = true; + } + } + } + return Changed; +} + +bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB, + MachineBasicBlock *Other) { + return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB); +} + +void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB, + MachineBasicBlock *After) { + LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after " + << After->getName() << "\n"); + MachineBasicBlock *BBPrevious = BB->getPrevNode(); + MachineBasicBlock *AfterNext = After->getNextNode(); + MachineBasicBlock *BBNext = BB->getNextNode(); + + BB->moveAfter(After); + + auto fixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) { + LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from " + << From->getName() << " to " << To->getName() << "\n"); + assert(From->isSuccessor(To) && + "'To' is expected to be a successor of 'From'"); + MachineInstr &Terminator = *(--From->terminators().end()); + if (!Terminator.isUnconditionalBranch()) { + // The BB doesn't have an unconditional branch so it relied on + // fall-through. Fix by adding an unconditional branch to the moved BB. + unsigned BrOpc = + BBUtils->isBBInRange(&Terminator, To, 254) ? ARM::tBcc : ARM::t2Bcc; + MachineInstrBuilder MIB = + BuildMI(From, Terminator.getDebugLoc(), TII->get(BrOpc)); + MIB.addMBB(To); + MIB.addImm(ARMCC::CondCodes::AL); + MIB.addReg(ARM::NoRegister); + LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from " + << From->getName() << " to " << To->getName() << ": " + << *MIB.getInstr()); + } + }; + + // Fix fall-through to the moved BB from the one that used to be before it + if (BBPrevious && BBPrevious->isSuccessor(BB)) + fixFallthrough(BBPrevious, BB); + // Fix fall through from the destination BB to the one that used to follow + if (AfterNext && After->isSuccessor(AfterNext)) + fixFallthrough(After, AfterNext); + // Fix fall through from the moved BB to the one that used to follow + if (BBNext && BB->isSuccessor(BBNext)) + fixFallthrough(BB, BBNext); + + BBUtils->adjustBBOffsetsAfter(After); +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -548,6 +548,8 @@ return MF.getSubtarget().isThumb2(); })); + addPass(createARMBlockPlacementPass()); + // Don't optimize barriers at -O0. if (getOptLevel() != CodeGenOpt::None) addPass(createARMOptimizeBarriersPass()); diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -41,6 +41,7 @@ ARMParallelDSP.cpp ARMLoadStoreOptimizer.cpp ARMLowOverheadLoops.cpp + ARMBlockPlacement.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp ARMMacroFusion.cpp diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -168,6 +168,7 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Thumb2 instruction size reduce pass ; CHECK-NEXT: Unpack machine instruction bundles +; CHECK-NEXT: ARM block placement ; CHECK-NEXT: optimise barriers pass ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/Thumb2/block-placement.mir b/llvm/test/CodeGen/Thumb2/block-placement.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/block-placement.mir @@ -0,0 +1,576 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement %s -o - | FileCheck %s +--- | + %struct.arm_fir_instance_f32 = type { i16, float*, float* } + + define void @backwards_branch(%struct.arm_fir_instance_f32* nocapture readonly %S, float* nocapture readonly %pSrc, float* nocapture %pDst, i32 %blockSize) { + entry: + ret void + } + + define void @backwards_branch_fallthrough(%struct.arm_fir_instance_f32* nocapture readonly %S, float* nocapture readonly %pSrc, float* nocapture %pDst, i32 %blockSize) { + entry: + ret void + } + + define void @backwards_branch_nested(%struct.arm_fir_instance_f32* nocapture readonly %S, float* nocapture readonly %pSrc, float* nocapture %pDst, i32 %blockSize) { + entry: + ret void + } + +... +--- +name: backwards_branch +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 120 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -92, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -96, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -100, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -104, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -108, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -112, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -116, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 14, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 15, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 16, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d13', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 17, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d12', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 18, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 19, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 20, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 21, name: '', type: spill-slot, offset: -88, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: backwards_branch + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11, $d12, $d13 + ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $q0, $r2, $r5, $r7, $r10, $r12 + ; CHECK: $r9, $r1 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + ; CHECK: t2WhileLoopStart renamable $r3, %bb.1, implicit-def dead $cpsr + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.1: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + ; CHECK: renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: t2B %bb.5, 14 /* CC::al */, killed $cpsr + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r3, $r5, $r7, $r9, $r10, $r12 + ; CHECK: $r6 = tMOVr $r5, 14 /* CC::al */, $noreg + ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg + ; CHECK: bb.4: + ; CHECK: successors: %bb.4(0x7c000000), %bb.5(0x04000000) + ; CHECK: liveins: $lr, $q0, $r1, $r2, $r3, $r5, $r6, $r7, $r9, $r10, $r12 + ; CHECK: renamable $lr = t2LoopDec killed renamable $lr, 1 + ; CHECK: t2LoopEnd renamable $lr, %bb.4, implicit-def dead $cpsr + ; CHECK: bb.5: + ; CHECK: $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + bb.0: + successors: %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11, $d12, $d13 + + t2B %bb.2, 14 /* CC::al */, $noreg + + bb.1: + successors: %bb.5(0x04000000) + liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + + renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + t2B %bb.5, 14 /* CC::eq */, killed $cpsr + + bb.2: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $q0, $r2, $r5, $r7, $r10, $r12 + + $r9, $r1 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg + renamable $r3 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + t2WhileLoopStart renamable $r3, %bb.1, implicit-def dead $cpsr + t2B %bb.3, 14 /* CC::al */, $noreg + + bb.3: + successors: %bb.4(0x80000000) + liveins: $q0, $r1, $r2, $r3, $r5, $r7, $r9, $r10, $r12 + + $r6 = tMOVr $r5, 14 /* CC::al */, $noreg + $lr = tMOVr $r3, 14 /* CC::al */, $noreg + + bb.4: + successors: %bb.4(0x7c000000), %bb.5(0x04000000) + liveins: $lr, $q0, $r1, $r2, $r3, $r5, $r6, $r7, $r9, $r10, $r12 + + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.4, implicit-def dead $cpsr + + bb.5: + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + +... +--- +name: backwards_branch_fallthrough +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 120 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -92, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -96, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -100, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -104, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -108, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -112, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -116, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 14, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 15, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 16, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d13', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 17, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d12', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 18, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 19, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 20, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 21, name: '', type: spill-slot, offset: -88, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: backwards_branch_fallthrough + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11, $d12, $d13 + ; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, $noreg + ; CHECK: tBcc %bb.1, 14 /* CC::al */, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $q0, $r2, $r5, $r7, $r10, $r12 + ; CHECK: $r9, $r1 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + ; CHECK: t2WhileLoopStart renamable $r3, %bb.1, implicit-def dead $cpsr + ; CHECK: tBcc %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + ; CHECK: renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: tBcc %bb.2, 14 /* CC::al */, $noreg + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + ; CHECK: renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r3, $r5, $r7, $r9, $r10, $r12 + ; CHECK: $r6 = tMOVr $r5, 14 /* CC::al */, $noreg + ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg + ; CHECK: bb.5: + ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) + ; CHECK: liveins: $lr, $q0, $r1, $r2, $r3, $r5, $r6, $r7, $r9, $r10, $r12 + ; CHECK: renamable $lr = t2LoopDec killed renamable $lr, 1 + ; CHECK: t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr + ; CHECK: bb.6: + ; CHECK: $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11, $d12, $d13 + + t2Bcc %bb.2, 0 /* CC::al */, $noreg + + bb.1: + successors: %bb.2(0x04000000) + liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + + renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + + bb.2: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $q0, $r2, $r5, $r7, $r10, $r12 + + $r9, $r1 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg + renamable $r3 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + t2WhileLoopStart renamable $r3, %bb.1, implicit-def dead $cpsr + + bb.3: + successors: %bb.4(0x04000000) + liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + + renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + + bb.4: + successors: %bb.5(0x80000000) + liveins: $q0, $r1, $r2, $r3, $r5, $r7, $r9, $r10, $r12 + + $r6 = tMOVr $r5, 14 /* CC::al */, $noreg + $lr = tMOVr $r3, 14 /* CC::al */, $noreg + + bb.5: + successors: %bb.5(0x7c000000), %bb.6(0x04000000) + liveins: $lr, $q0, $r1, $r2, $r3, $r5, $r6, $r7, $r9, $r10, $r12 + + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr + + bb.6: + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + +... +--- +name: backwards_branch_nested +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 120 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -92, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -96, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -100, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -104, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -108, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -112, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -116, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 14, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 15, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 16, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d13', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 17, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d12', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 18, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 19, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 20, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 21, name: '', type: spill-slot, offset: -88, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$d8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: backwards_branch_nested + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11, $d12, $d13 + ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg + ; CHECK: bb.1: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + ; CHECK: renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: t2WhileLoopStart renamable $r1, %bb.2, implicit-def $cpsr + ; CHECK: t2B %bb.5, 14 /* CC::al */, killed $cpsr + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $q0, $r2, $r5, $r7, $r10, $r12 + ; CHECK: $r9, $r1 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + ; CHECK: t2WhileLoopStart renamable $r3, %bb.1, implicit-def dead $cpsr + ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r3, $r5, $r7, $r9, $r10, $r12 + ; CHECK: $r6 = tMOVr $r5, 14 /* CC::al */, $noreg + ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg + ; CHECK: bb.5: + ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) + ; CHECK: liveins: $lr, $q0, $r1, $r2, $r3, $r5, $r6, $r7, $r9, $r10, $r12 + ; CHECK: renamable $lr = t2LoopDec killed renamable $lr, 1 + ; CHECK: t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr + ; CHECK: bb.6: + ; CHECK: $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + bb.0: + successors: %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11, $d12, $d13 + + t2B %bb.2, 14 /* CC::al */, $noreg + + bb.1: + successors: %bb.5(0x04000000) + liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + + renamable $r10 = nsw t2SUBri killed renamable $r10, 1, 14 /* CC::al */, $noreg, def $cpsr + t2WhileLoopStart renamable $r1, %bb.2, implicit-def $cpsr + t2B %bb.5, 14 /* CC::eq */, killed $cpsr + + bb.2: + successors: %bb.3(0x04000000) + liveins: $q0, $r1, $r2, $r5, $r9, $r10, $r12 + + bb.3: + successors: %bb.4(0x40000000), %bb.1(0x40000000) + liveins: $q0, $r2, $r5, $r7, $r10, $r12 + + $r9, $r1 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg + renamable $r3 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + t2WhileLoopStart renamable $r3, %bb.1, implicit-def dead $cpsr + t2B %bb.4, 14 /* CC::al */, $noreg + + bb.4: + successors: %bb.5(0x80000000) + liveins: $q0, $r1, $r2, $r3, $r5, $r7, $r9, $r10, $r12 + + $r6 = tMOVr $r5, 14 /* CC::al */, $noreg + $lr = tMOVr $r3, 14 /* CC::al */, $noreg + + bb.5: + successors: %bb.5(0x7c000000), %bb.6(0x04000000) + liveins: $lr, $q0, $r1, $r2, $r3, $r5, $r6, $r7, $r9, $r10, $r12 + + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr + + bb.6: + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + +... diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1072,18 +1072,10 @@ ; CHECK-NEXT: str r6, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: b .LBB16_4 -; CHECK-NEXT: .LBB16_3: @ %while.end -; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: add.w r0, r5, r0, lsl #2 -; CHECK-NEXT: add.w r5, r0, #16 -; CHECK-NEXT: beq .LBB16_12 -; CHECK-NEXT: .LBB16_4: @ %while.body +; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: .LBB16_3: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB16_6 Depth 2 +; CHECK-NEXT: @ Child Loop BB16_5 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 ; CHECK-NEXT: add.w lr, r10, #8 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 @@ -1110,14 +1102,14 @@ ; CHECK-NEXT: vfma.f32 q0, q3, r11 ; CHECK-NEXT: cmp r0, #16 ; CHECK-NEXT: vfma.f32 q0, q1, r8 -; CHECK-NEXT: blo .LBB16_7 -; CHECK-NEXT: @ %bb.5: @ %for.body.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: blo .LBB16_6 +; CHECK-NEXT: @ %bb.4: @ %for.body.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: .LBB16_6: @ %for.body -; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 +; CHECK-NEXT: .LBB16_5: @ %for.body +; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldm.w r7, {r0, r3, r4, r6} ; CHECK-NEXT: vldrw.u32 q1, [r5], #32 @@ -1140,34 +1132,40 @@ ; CHECK-NEXT: adds r7, #32 ; CHECK-NEXT: vfma.f32 q0, q3, r11 ; CHECK-NEXT: vfma.f32 q0, q1, r9 -; CHECK-NEXT: le lr, .LBB16_6 -; CHECK-NEXT: b .LBB16_8 -; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: le lr, .LBB16_5 +; CHECK-NEXT: b .LBB16_7 +; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1 ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: .LBB16_8: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: .LBB16_7: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 ; CHECK-NEXT: ldrd r9, r1, [sp, #24] @ 8-byte Folded Reload ; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: cmp.w r4, #0 -; CHECK-NEXT: beq .LBB16_3 +; CHECK-NEXT: wls lr, r4, .LBB16_8 ; CHECK-NEXT: b .LBB16_9 +; CHECK-NEXT: .LBB16_8: @ %while.end +; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: subs.w r12, r12, #1 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 +; CHECK-NEXT: add.w r0, r5, r0, lsl #2 +; CHECK-NEXT: add.w r5, r0, #16 +; CHECK-NEXT: beq .LBB16_12 +; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: mov lr, r4 ; CHECK-NEXT: .LBB16_10: @ %while.body76 -; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 +; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldr r0, [r7], #4 ; CHECK-NEXT: vldrw.u32 q1, [r3], #4 -; CHECK-NEXT: subs.w lr, lr, #1 ; CHECK-NEXT: vfma.f32 q0, q1, r0 -; CHECK-NEXT: bne .LBB16_10 -; CHECK-NEXT: b .LBB16_11 -; CHECK-NEXT: .LBB16_11: @ %while.end.loopexit -; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: le lr, .LBB16_10 +; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit +; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 ; CHECK-NEXT: add.w r5, r5, r4, lsl #2 -; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: b .LBB16_8 ; CHECK-NEXT: .LBB16_12: @ %if.end ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}