Index: llvm/lib/Target/ARM/ARMBaseRegisterInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -32,8 +32,11 @@ namespace ARMRI { enum { + // Used for LDRD register pairs RegPairOdd = 1, - RegPairEven = 2 + RegPairEven = 2, + // Used to hint for lr in t2DoLoopStart + RegLR = 3 }; } // end namespace ARMRI Index: llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -330,9 +330,12 @@ case ARMRI::RegPairOdd: Odd = 1; break; - default: + case ARMRI::RegLR: TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); + Hints.push_back(ARM::LR); return false; + default: + return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); } // This register should preferably be even (Odd == 0) or odd (Odd == 1). Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -11260,6 +11260,14 @@ return EmitLowered__chkstk(MI, BB); case ARM::WIN__DBZCHK: return EmitLowered__dbzchk(MI, BB); + case ARM::t2DoLoopStart: + // We are just here to set a register allocation hint, prefering lr for the + // input register to make it more likely to be movable and removable, later + // in the pipeline. + Register R = MI.getOperand(1).getReg(); + MachineFunction *MF = MI.getParent()->getParent(); + MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0); + return BB; } } Index: llvm/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5422,6 +5422,7 @@ let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in { +let usesCustomInserter = 1 in def t2DoLoopStart : t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br, [(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>; Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: bic r12, r12, #3 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w r12, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: and r4, r12, #15 @@ -107,9 +107,9 @@ ; CHECK-NEXT: bic r4, r4, #3 ; CHECK-NEXT: sub.w lr, r4, #4 ; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: add.w r4, r4, lr, lsr #2 -; CHECK-NEXT: dls lr, r4 +; CHECK-NEXT: add.w lr, r4, lr, lsr #2 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: and r5, r4, #15 @@ -210,9 +210,9 @@ ; CHECK-NEXT: bic r4, r4, #3 ; CHECK-NEXT: sub.w lr, r4, #4 ; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: add.w r4, r4, lr, lsr #2 -; CHECK-NEXT: dls lr, r4 +; CHECK-NEXT: add.w lr, r4, lr, lsr #2 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 @@ -309,9 +309,9 @@ ; CHECK-NEXT: bic r4, r4, #3 ; CHECK-NEXT: sub.w lr, r4, #4 ; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: add.w r4, r4, lr, lsr #2 -; CHECK-NEXT: dls lr, r4 +; CHECK-NEXT: add.w lr, r4, lr, lsr #2 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 @@ -402,8 +402,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB4_1: @ %bb3 -; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB4_2: @ %bb9 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #4 @@ -464,8 +464,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %bb4 -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0] Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: ldrd r12, r4, [r0] ; CHECK-NEXT: ldrd r2, r3, [r0, #8] ; CHECK-NEXT: rsb r12, r12, r4, lsl #1 -; CHECK-NEXT: dlstp.16 lr, r12 ; CHECK-NEXT: mov r4, r12 +; CHECK-NEXT: dlstp.16 lr, r4 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r3], #16 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph -; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #8 @@ -69,8 +69,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph -; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #8 @@ -129,8 +129,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB2_1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #4 @@ -189,8 +189,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #4 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -49,10 +49,10 @@ ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: add.w r3, r12, r3, lsr #2 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r12, r3, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, r1, r3 @@ -228,9 +228,9 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -321,10 +321,10 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: sub.w r12, r2, #1 ; CHECK-NEXT: adr r2, .LCPI2_1 -; CHECK-NEXT: mov lr, r3 +; CHECK-NEXT: mov lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll @@ -15,10 +15,10 @@ ; CHECK: ne_and_guard ; CHECK: body: ; CHECK: bb.0.entry: -; CHECK: tCMPi8 renamable $r0, 0 +; CHECK: t2CMPri renamable $lr, 0 ; CHECK: tBcc %bb.4 ; CHECK: bb.2.while.body.preheader: -; CHECK: $lr = t2DLS killed renamable $r0 +; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.3.while.body: ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { @@ -48,10 +48,10 @@ ; CHECK: ne_preheader ; CHECK: body: ; CHECK: bb.0.entry: -; CHECK: tCMPi8 renamable $r0, 0 +; CHECK: t2CMPri renamable $lr, 0 ; CHECK: tBcc %bb.4 ; CHECK: bb.2.while.body.preheader: -; CHECK: $lr = t2DLS killed renamable $r0 +; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.3.while.body: ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { @@ -83,10 +83,10 @@ ; CHECK: eq_preheader ; CHECK: body: ; CHECK: bb.0.entry: -; CHECK: tCMPi8 renamable $r0, 0 +; CHECK: t2CMPri renamable $lr, 0 ; CHECK: tBcc %bb.4 ; CHECK: bb.2.while.body.preheader: -; CHECK: $lr = t2DLS killed renamable $r0 +; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.3.while.body: ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { @@ -118,10 +118,10 @@ ; CHECK: ne_prepreheader ; CHECK: body: ; CHECK: bb.0.entry: -; CHECK: t2CMPri renamable $r12, 0 +; CHECK: t2CMPri renamable $lr, 0 ; CHECK: tBcc %bb.4 ; CHECK: bb.2.while.body.preheader: -; CHECK: $lr = t2DLS killed renamable $r12 +; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: bb.3.while.body: ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -92,9 +92,9 @@ ; CHECK-NEXT: sub.w r7, r12, #4 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: add.w r7, r6, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: dls lr, r7 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_12: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r5], #16 @@ -311,9 +311,9 @@ ; CHECK-NEXT: sub.w r7, r12, #4 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: add.w r7, r6, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: dls lr, r7 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_12: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r5], #16 @@ -530,9 +530,9 @@ ; CHECK-NEXT: sub.w r7, r12, #4 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: add.w r7, r6, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: dls lr, r7 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_12: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r5], #16 @@ -680,9 +680,9 @@ ; CHECK-NEXT: sub.w r7, r12, #4 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: add.w r7, r6, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: dls lr, r7 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r5], #16 @@ -889,9 +889,9 @@ ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: sub.w r6, r12, #4 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: add.w r6, r5, r6, lsr #2 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB4_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -906,11 +906,11 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB4_6: @ %for.body.preheader11 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 ; CHECK-NEXT: add.w r1, r1, r12, lsl #2 ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r3, [r0], #4 @@ -994,9 +994,9 @@ ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: sub.w r6, r12, #4 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: add.w r6, r5, r6, lsr #2 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB5_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -1021,11 +1021,11 @@ ; CHECK-NEXT: cmp r12, r3 ; CHECK-NEXT: beq .LBB5_8 ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader11 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #1 ; CHECK-NEXT: add.w r1, r1, r12, lsl #1 ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr.16 s0, [r1] @@ -1111,9 +1111,9 @@ ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: sub.w r6, r12, #4 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: add.w r6, r5, r6, lsr #2 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB6_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -1138,11 +1138,11 @@ ; CHECK-NEXT: cmp r12, r3 ; CHECK-NEXT: beq .LBB6_8 ; CHECK-NEXT: .LBB6_6: @ %for.body.preheader11 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #1 ; CHECK-NEXT: add.w r1, r1, r12, lsl #1 ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr.16 s0, [r1] @@ -1228,9 +1228,9 @@ ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: sub.w r6, r12, #4 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: add.w r6, r5, r6, lsr #2 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB7_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -1255,11 +1255,11 @@ ; CHECK-NEXT: cmp r12, r3 ; CHECK-NEXT: beq .LBB7_8 ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader11 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #1 ; CHECK-NEXT: add.w r1, r1, r12, lsl #1 ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr.16 s0, [r1] @@ -1345,9 +1345,9 @@ ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: sub.w r6, r12, #4 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: add.w r6, r5, r6, lsr #2 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB8_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -1377,11 +1377,11 @@ ; CHECK-NEXT: cmp r12, r3 ; CHECK-NEXT: beq .LBB8_8 ; CHECK-NEXT: .LBB8_6: @ %for.body.preheader13 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #1 ; CHECK-NEXT: add.w r1, r1, r12, lsl #1 ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r3, [r1], #2 @@ -1476,9 +1476,9 @@ ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI9_0 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w r2, r3, r2, lsr #2 +; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, r0, r3 @@ -1633,9 +1633,9 @@ ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI10_0 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w r2, r3, r2, lsr #2 +; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB10_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, r0, r3 @@ -1790,9 +1790,9 @@ ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI11_0 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w r2, r3, r2, lsr #2 +; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: adds r3, r1, #4 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: adds r2, r0, #4 ; CHECK-NEXT: .LBB11_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -91,9 +91,9 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -167,9 +167,9 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -243,9 +243,9 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -319,9 +319,9 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -430,9 +430,9 @@ ; CHECK-NEXT: add.w r4, r3, #8 ; CHECK-NEXT: subs r5, #4 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w r6, r6, r5, lsr #2 +; CHECK-NEXT: add.w lr, r6, r5, lsr #2 ; CHECK-NEXT: adds r5, r0, #3 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: adds r6, r1, #1 ; CHECK-NEXT: .LBB5_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -624,8 +624,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} ; CHECK-NEXT: .LBB6_1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 @@ -732,9 +732,9 @@ ; CHECK-NEXT: add.w r4, r3, #8 ; CHECK-NEXT: subs r5, #4 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w r6, r6, r5, lsr #2 +; CHECK-NEXT: add.w lr, r6, r5, lsr #2 ; CHECK-NEXT: adds r5, r0, #3 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: adds r6, r1, #1 ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -926,8 +926,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 @@ -1034,9 +1034,9 @@ ; CHECK-NEXT: add.w r4, r3, #8 ; CHECK-NEXT: subs r5, #4 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w r6, r6, r5, lsr #2 +; CHECK-NEXT: add.w lr, r6, r5, lsr #2 ; CHECK-NEXT: add.w r5, r0, #8 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: add.w r6, r1, #8 ; CHECK-NEXT: .LBB9_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -1214,8 +1214,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB10_1: @ %vector.ph -; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #8 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -73,8 +73,8 @@ ; CHECK-NEXT: bic r3, r3, #7 ; CHECK-NEXT: sub.w r12, r3, #8 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 @@ -146,8 +146,8 @@ ; CHECK-NEXT: bic r3, r3, #15 ; CHECK-NEXT: sub.w r12, r3, #16 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #4 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #4 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.8 r2 @@ -216,8 +216,8 @@ ; CHECK-NEXT: bic r3, r3, #7 ; CHECK-NEXT: sub.w r12, r3, #8 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 @@ -288,8 +288,8 @@ ; CHECK-NEXT: bic r3, r3, #15 ; CHECK-NEXT: sub.w r12, r3, #16 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #4 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #4 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.8 r2 @@ -358,8 +358,8 @@ ; CHECK-NEXT: bic r3, r3, #7 ; CHECK-NEXT: sub.w r12, r3, #8 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 @@ -429,9 +429,9 @@ ; CHECK-NEXT: subs r6, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: add.w r3, r3, r6, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r6, lsr #2 ; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 @@ -449,10 +449,10 @@ ; CHECK-NEXT: cbz r2, .LBB6_7 ; CHECK-NEXT: @ %bb.4: @ %vector.ph47 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r6, lsr #2 -; CHECK-NEXT: movs r6, #0 -; CHECK-NEXT: vdup.32 q0, r6 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r6, lsr #2 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: vmov.32 q0[0], r12 ; CHECK-NEXT: .LBB6_5: @ %vector.body46 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -555,10 +555,10 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r3, #8 ; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: add.w r3, r4, r3, lsr #3 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r4, r3, lsr #3 ; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/regalloc.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/regalloc.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/regalloc.ll @@ -27,9 +27,9 @@ ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, #4 ; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: add.w r6, r6, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: adds r7, r0, #2 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: .LBB0_5: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -105,8 +105,8 @@ ; CHECK-NEXT: vmov.i32 q2, #0x1 ; CHECK-NEXT: add.w lr, r5, #3 ; CHECK-NEXT: movs r5, #1 -; CHECK-NEXT: add.w r5, r5, lr, lsr #2 -; CHECK-NEXT: dls lr, r5 +; CHECK-NEXT: add.w lr, r5, lr, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_1: @ %bb6 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll @@ -8,14 +8,15 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: blt .LBB0_7 ; CHECK-NEXT: @ %bb.1: @ %for.cond1.preheader.us.preheader +; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: lsl.w r12, r3, #1 -; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: .LBB0_2: @ %for.cond1.preheader.us ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB0_3 Depth 2 ; CHECK-NEXT: @ Child Loop BB0_5 Depth 2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, r8 ; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: .LBB0_3: @ %for.body4.us ; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 @@ -28,7 +29,7 @@ ; CHECK-NEXT: le lr, .LBB0_3 ; CHECK-NEXT: @ %bb.4: @ %for.body15.us.preheader ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, r8 ; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: .LBB0_5: @ %for.body15.us ; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 @@ -41,10 +42,10 @@ ; CHECK-NEXT: le lr, .LBB0_5 ; CHECK-NEXT: @ %bb.6: @ %for.cond.cleanup14.us ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: add.w r8, r8, #1 +; CHECK-NEXT: adds r3, #1 ; CHECK-NEXT: add r2, r12 ; CHECK-NEXT: add r4, r12 -; CHECK-NEXT: cmp r8, r3 +; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: bne .LBB0_2 ; CHECK-NEXT: .LBB0_7: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll @@ -17,7 +17,7 @@ ; ENABLED-NEXT: .LBB0_2: @ %vector.ph ; ENABLED-NEXT: @ =>This Loop Header: Depth=1 ; ENABLED-NEXT: @ Child Loop BB0_3 Depth 2 -; ENABLED-NEXT: mov r8, r0 +; ENABLED-NEXT: mov r12, r0 ; ENABLED-NEXT: mov r4, r2 ; ENABLED-NEXT: mov r5, r1 ; ENABLED-NEXT: mov r6, r3 @@ -28,7 +28,7 @@ ; ENABLED-NEXT: vldrw.u32 q0, [r5], #16 ; ENABLED-NEXT: vldrw.u32 q1, [r4], #16 ; ENABLED-NEXT: vadd.i32 q0, q1, q0 -; ENABLED-NEXT: vstrw.32 q0, [r8], #16 +; ENABLED-NEXT: vstrw.32 q0, [r12], #16 ; ENABLED-NEXT: letp lr, .LBB0_3 ; ENABLED-NEXT: b .LBB0_2 ; ENABLED-NEXT: .LBB0_4: @ %for.cond.cleanup @@ -40,20 +40,20 @@ ; DISABLED-NEXT: cmp r3, #1 ; DISABLED-NEXT: blt .LBB0_4 ; DISABLED-NEXT: @ %bb.1: @ %vector.ph.preheader -; DISABLED-NEXT: adds r6, r3, #3 -; DISABLED-NEXT: movs r5, #1 -; DISABLED-NEXT: bic r6, r6, #3 -; DISABLED-NEXT: subs r6, #4 -; DISABLED-NEXT: add.w r12, r5, r6, lsr #2 +; DISABLED-NEXT: adds r7, r3, #3 +; DISABLED-NEXT: movs r6, #1 +; DISABLED-NEXT: bic r7, r7, #3 +; DISABLED-NEXT: subs r7, #4 +; DISABLED-NEXT: add.w r8, r6, r7, lsr #2 ; DISABLED-NEXT: .LBB0_2: @ %vector.ph ; DISABLED-NEXT: @ =>This Loop Header: Depth=1 ; DISABLED-NEXT: @ Child Loop BB0_3 Depth 2 -; DISABLED-NEXT: mov r7, r12 -; DISABLED-NEXT: mov r8, r0 +; DISABLED-NEXT: mov r7, r8 +; DISABLED-NEXT: mov r12, r0 ; DISABLED-NEXT: mov r4, r2 ; DISABLED-NEXT: mov r5, r1 ; DISABLED-NEXT: mov r6, r3 -; DISABLED-NEXT: dls lr, r12 +; DISABLED-NEXT: dls lr, r8 ; DISABLED-NEXT: .LBB0_3: @ %vector.body ; DISABLED-NEXT: @ Parent Loop BB0_2 Depth=1 ; DISABLED-NEXT: @ => This Inner Loop Header: Depth=2 @@ -66,7 +66,7 @@ ; DISABLED-NEXT: vldrwt.u32 q1, [r4], #16 ; DISABLED-NEXT: vadd.i32 q0, q1, q0 ; DISABLED-NEXT: vpst -; DISABLED-NEXT: vstrwt.32 q0, [r8], #16 +; DISABLED-NEXT: vstrwt.32 q0, [r12], #16 ; DISABLED-NEXT: le lr, .LBB0_3 ; DISABLED-NEXT: b .LBB0_2 ; DISABLED-NEXT: .LBB0_4: @ %for.cond.cleanup Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll @@ -246,11 +246,11 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: adr r3, .LCPI5_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] +; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vadd.i32 q2, q0, r12 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: movs r2, #3 +; CHECK-NEXT: mov.w lr, #3 ; CHECK-NEXT: adr r3, .LCPI0_0 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q2, [r3] ; CHECK-NEXT: vmov.i32 q0, #0x80000000 ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -57,9 +57,9 @@ ; CHECK-NEXT: subs r3, r2, r3 ; CHECK-NEXT: add.w r12, r3, #3 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -92,9 +92,9 @@ ; CHECK-NEXT: bic r1, r1, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w r1, r3, r1, lsr #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #2 ; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -163,9 +163,9 @@ ; CHECK-NEXT: bic r1, r1, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w r1, r3, r1, lsr #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #2 ; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -228,8 +228,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 @@ -285,8 +285,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB4_1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 @@ -342,8 +342,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %vector.ph -; CHECK-NEXT: dlstp.8 lr, r3 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dlstp.8 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #16 @@ -402,8 +402,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB6_1: @ %vector.ph -; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #8 Index: llvm/test/CodeGen/Thumb2/mve-float16regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -723,9 +723,9 @@ ; CHECK: @ %bb.0: @ %for.body.us.preheader ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: ldrd r3, r12, [sp, #16] -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: ldrd lr, r12, [sp, #16] ; CHECK-NEXT: lsl.w r3, r12, #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB14_1: @ %for.body.us ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB14_2 Depth 2 Index: llvm/test/CodeGen/Thumb2/mve-float32regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -701,9 +701,9 @@ ; CHECK: @ %bb.0: @ %for.body.us.preheader ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: ldrd r3, r12, [sp, #16] -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: ldrd lr, r12, [sp, #16] ; CHECK-NEXT: lsl.w r3, r12, #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB14_1: @ %for.body.us ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB14_2 Depth 2 @@ -1410,10 +1410,11 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: ldrb.w r8, [r0] +; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: ldrb.w r12, [r0] +; CHECK-NEXT: ldrd r3, r0, [r0, #4] ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: ldrd r12, r0, [r0, #4] -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: strd r4, r4, [sp, #16] ; CHECK-NEXT: beq .LBB17_5 ; CHECK-NEXT: @ %bb.1: @@ -1423,14 +1424,14 @@ ; CHECK-NEXT: .LBB17_2: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB17_3 Depth 2 ; CHECK-NEXT: ldrd r5, r7, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r12] +; CHECK-NEXT: vldrw.u32 q1, [r3] ; CHECK-NEXT: vldr s8, [r0, #8] ; CHECK-NEXT: ldr r6, [r0, #12] ; CHECK-NEXT: vstrw.32 q1, [r4] ; CHECK-NEXT: vdup.32 q1, r7 ; CHECK-NEXT: vldr s12, [r0, #16] ; CHECK-NEXT: vmov.f32 s6, s8 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, r8 ; CHECK-NEXT: vmov.f32 s7, s8 ; CHECK-NEXT: vdup.32 q2, r6 ; CHECK-NEXT: vmov.f32 s10, s12 @@ -1450,17 +1451,17 @@ ; CHECK-NEXT: vstrw.32 q3, [r4] ; CHECK-NEXT: le lr, .LBB17_3 ; CHECK-NEXT: @ %bb.4: @ in Loop: Header=BB17_2 Depth=1 -; CHECK-NEXT: subs.w r8, r8, #1 +; CHECK-NEXT: subs.w r12, r12, #1 ; CHECK-NEXT: add.w r0, r0, #20 -; CHECK-NEXT: vstrb.8 q3, [r12], #16 +; CHECK-NEXT: vstrb.8 q3, [r3], #16 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: bne .LBB17_2 ; CHECK-NEXT: b .LBB17_7 ; CHECK-NEXT: .LBB17_5: @ %.preheader -; CHECK-NEXT: dls lr, r8 +; CHECK-NEXT: dls lr, r12 ; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: .LBB17_6: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r12], #16 +; CHECK-NEXT: vldrw.u32 q0, [r3], #16 ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: le lr, .LBB17_6 ; CHECK-NEXT: .LBB17_7: Index: llvm/test/CodeGen/Thumb2/mve-fma-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -332,8 +332,8 @@ ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 ; CHECK-NEXT: vneg.f32 q0, q0 -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 @@ -400,9 +400,9 @@ ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: bic r12, r12, #3 ; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w r12, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: .LBB6_2: @ %vector.body @@ -475,9 +475,9 @@ ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: bic r12, r12, #3 ; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w r12, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: .LBB7_2: @ %vector.body @@ -615,9 +615,9 @@ ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: bic r12, r12, #3 ; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w r12, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: .LBB9_2: @ %vector.body Index: llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll +++ llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll @@ -6,9 +6,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: mov.w r2, #256 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #256 ; CHECK-NEXT: adr r2, .LCPI0_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -52,9 +52,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #128 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #128 ; CHECK-NEXT: adr r2, .LCPI1_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -102,9 +102,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #64 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #64 ; CHECK-NEXT: adr r2, .LCPI2_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -160,9 +160,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: mov.w r2, #256 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #256 ; CHECK-NEXT: adr r2, .LCPI3_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -206,9 +206,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #128 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #128 ; CHECK-NEXT: adr r2, .LCPI4_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -256,9 +256,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #64 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #64 ; CHECK-NEXT: adr r2, .LCPI5_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -314,9 +314,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: mov.w r2, #256 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #256 ; CHECK-NEXT: adr r2, .LCPI6_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -362,9 +362,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #128 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #128 ; CHECK-NEXT: adr r2, .LCPI7_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -415,9 +415,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #64 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #64 ; CHECK-NEXT: adr r2, .LCPI8_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -478,9 +478,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #128 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #128 ; CHECK-NEXT: adr r2, .LCPI9_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -534,9 +534,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #128 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #128 ; CHECK-NEXT: adr r2, .LCPI10_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: .LBB10_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll @@ -740,8 +740,8 @@ ; CHECK-NEXT: .LBB22_1: @ %vector.body.preheader ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r2, r3, r2, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r3, r2, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB22_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 @@ -786,8 +786,8 @@ ; CHECK-NEXT: .LBB23_1: @ %vector.body.preheader ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r2, r3, r2, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r3, r2, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB23_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 Index: llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -710,11 +710,11 @@ ; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: ldr r0, [sp, #112] ; CHECK-NEXT: add.w r5, r8, r7 +; CHECK-NEXT: sub.w lr, r9, r7 ; CHECK-NEXT: mla r3, r0, r7, r1 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: sub.w r7, r9, r7 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: add.w r5, r0, r5, lsl #1 -; CHECK-NEXT: dls lr, r7 ; CHECK-NEXT: add.w r3, r6, r3, lsl #1 ; CHECK-NEXT: .LBB10_14: @ %for.body8.us.us ; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1 Index: llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll @@ -7,9 +7,9 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r3, #249 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr r3, .LCPI0_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -57,10 +57,10 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r1, #249 -; CHECK-NEXT: adr r3, .LCPI1_1 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr r1, .LCPI1_0 +; CHECK-NEXT: adr r3, .LCPI1_1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: .LBB1_1: @ %vector.body @@ -115,9 +115,9 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r3, #249 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr r3, .LCPI2_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -170,10 +170,10 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r3, #249 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr.w r12, .LCPI3_0 -; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: adr r3, .LCPI3_1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vldrw.u32 q1, [r12] ; CHECK-NEXT: .LBB3_1: @ %vector.body @@ -237,9 +237,9 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r3, #249 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr r3, .LCPI4_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -300,10 +300,10 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r3, #249 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr.w r12, .LCPI5_0 -; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: adr r3, .LCPI5_1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vldrw.u32 q1, [r12] ; CHECK-NEXT: .LBB5_1: @ %vector.body @@ -383,9 +383,9 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r3, #249 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr r3, .LCPI6_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -433,10 +433,10 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r1, #249 -; CHECK-NEXT: adr r3, .LCPI7_1 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: adr r1, .LCPI7_0 +; CHECK-NEXT: adr r3, .LCPI7_1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: .LBB7_1: @ %vector.body @@ -492,11 +492,11 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov s0, r2 -; CHECK-NEXT: movs r3, #249 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-NEXT: dls lr, r3 -; CHECK-NEXT: vmov.f16 r2, s0 ; CHECK-NEXT: adr r3, .LCPI8_0 +; CHECK-NEXT: vmov.f16 r2, s0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -550,13 +550,13 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov s0, r2 -; CHECK-NEXT: movs r2, #249 +; CHECK-NEXT: adr r2, .LCPI9_0 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov.w lr, #249 ; CHECK-NEXT: vmov.f16 r1, s0 -; CHECK-NEXT: adr r2, .LCPI9_0 ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: adr r2, .LCPI9_1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -7,10 +7,10 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 -; CHECK-NEXT: movw r0, #1250 -; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: adr r0, .LCPI0_0 ; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: movw lr, #1250 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 @@ -78,10 +78,10 @@ ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: movw lr, #1250 ; CHECK-NEXT: add.w r4, r0, r3, lsl #2 -; CHECK-NEXT: movw r0, #1250 -; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: adr r0, .LCPI1_0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: add.w r12, r3, #4 ; CHECK-NEXT: vmov.i32 q2, #0x0 @@ -153,14 +153,14 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 -; CHECK-NEXT: movw r0, #1250 -; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: adr r0, .LCPI2_0 ; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: movw lr, #1250 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: vmov.i32 q2, #0x3 ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 +; CHECK-NEXT: vmov.i32 q2, #0x3 ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 @@ -239,9 +239,9 @@ ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: adr r6, .LCPI3_4 ; CHECK-NEXT: adr r5, .LCPI3_3 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: adr r4, .LCPI3_2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #160] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r7] ; CHECK-NEXT: adr.w r8, .LCPI3_1 Index: llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll +++ llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll @@ -8,17 +8,18 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: mov lr, r1 ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: blt .LBB0_4 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: dls lr, r1 -; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .LBB0_2: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r2], #32 +; CHECK-NEXT: vldrw.u32 q0, [r1], #32 ; CHECK-NEXT: vaddva.s32 r0, q0 -; CHECK-NEXT: vldrw.u32 q0, [r2, #-16] +; CHECK-NEXT: vldrw.u32 q0, [r1, #-16] ; CHECK-NEXT: vaddva.s32 r0, q0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -282,9 +283,9 @@ ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: sub.w r6, r12, #8 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: add.w r6, r5, r6, lsr #3 +; CHECK-NEXT: add.w lr, r5, r6, lsr #3 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB2_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -304,11 +305,11 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB2_6: @ %for.body.preheader12 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 ; CHECK-NEXT: add.w r1, r1, r12, lsl #2 ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr s0, [r0] Index: llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -22,9 +22,9 @@ ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: sub.w r6, r12, #4 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: add.w r6, r5, r6, lsr #2 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -39,11 +39,11 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader12 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 ; CHECK-NEXT: add.w r1, r1, r12, lsl #2 ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr s0, [r0] @@ -129,14 +129,14 @@ ; CHECK-NEXT: blt .LBB1_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: adr r4, .LCPI1_0 -; CHECK-NEXT: bic r12, r12, #3 ; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: bic r12, r12, #3 +; CHECK-NEXT: adr r4, .LCPI1_0 ; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r4] -; CHECK-NEXT: add.w r12, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: sub.w r12, r3, #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: .LBB1_2: @ %vector.body Index: llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll @@ -14,9 +14,9 @@ ; CHECK-NEXT: add.w r1, r3, r1, lsl #2 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r1, r3, r1, lsr #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #2 ; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] @@ -71,9 +71,9 @@ ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r1, r3, r1, lsr #3 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #3 ; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] @@ -128,9 +128,9 @@ ; CHECK-NEXT: add.w r1, r3, r1, lsl #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r1, r3, r1, lsr #4 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #4 ; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] @@ -184,9 +184,9 @@ ; CHECK-NEXT: mvn r2, #3 ; CHECK-NEXT: add.w r1, r2, r1, lsl #2 ; CHECK-NEXT: movs r2, #1 -; CHECK-NEXT: add.w r2, r2, r1, lsr #2 +; CHECK-NEXT: add.w lr, r2, r1, lsr #2 ; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: eor r2, r1, #-2147483648 ; CHECK-NEXT: .LBB3_2: @ %vector.body @@ -241,12 +241,12 @@ ; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7 ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 -; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: vneg.f16 s0, s0 -; CHECK-NEXT: add.w r3, r3, r1, lsr #3 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #3 ; CHECK-NEXT: vmov.f16 r1, s0 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -307,9 +307,9 @@ ; CHECK-NEXT: add.w r1, r3, r1, lsl #2 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r1, r3, r1, lsr #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #2 ; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] @@ -364,9 +364,9 @@ ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r1, r3, r1, lsr #3 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #3 ; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] @@ -421,9 +421,9 @@ ; CHECK-NEXT: add.w r1, r3, r1, lsl #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r1, r3, r1, lsr #4 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #4 ; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] @@ -477,9 +477,9 @@ ; CHECK-NEXT: mvn r2, #3 ; CHECK-NEXT: add.w r1, r2, r1, lsl #2 ; CHECK-NEXT: movs r2, #1 -; CHECK-NEXT: add.w r2, r2, r1, lsr #2 +; CHECK-NEXT: add.w lr, r2, r1, lsr #2 ; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: eor r2, r1, #-2147483648 ; CHECK-NEXT: .LBB8_2: @ %vector.body @@ -534,12 +534,12 @@ ; CHECK-NEXT: .LBB9_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7 ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 -; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: vneg.f16 s0, s0 -; CHECK-NEXT: add.w r3, r3, r1, lsr #3 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: add.w lr, r3, r1, lsr #3 ; CHECK-NEXT: vmov.f16 r1, s0 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -19,9 +19,9 @@ ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r6, lr, r6, lsr #2 +; CHECK-NEXT: add.w lr, lr, r6, lsr #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: dls lr, r6 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.i32 q3, #0x4 ; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: .LBB0_1: @ %do.body Index: llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -30,11 +30,11 @@ ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: add.w r7, r6, r7, lsr #1 +; CHECK-NEXT: add.w lr, r6, r7, lsr #1 ; CHECK-NEXT: add.w r11, r2, r3, lsl #2 ; CHECK-NEXT: add.w r9, r1, r3, lsl #2 ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r7 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 ; CHECK-NEXT: mov.w r10, #-1 @@ -108,10 +108,10 @@ ; CHECK-NEXT: cmp r7, r3 ; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader -; CHECK-NEXT: subs r0, r3, r7 -; CHECK-NEXT: mov.w r1, #-2147483648 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r7 ; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov.w r1, #-2147483648 ; CHECK-NEXT: mvn r2, #-2147483648 ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -250,13 +250,13 @@ ; CHECK-NEXT: movs r7, #1 ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: add.w r11, r8, r3, lsl #2 -; CHECK-NEXT: add.w r7, r7, r2, lsr #2 -; CHECK-NEXT: add.w r10, r1, r3, lsl #2 -; CHECK-NEXT: dls lr, r7 +; CHECK-NEXT: add.w lr, r7, r2, lsr #2 ; CHECK-NEXT: adr r7, .LCPI1_0 ; CHECK-NEXT: vldrw.u32 q0, [r7] ; CHECK-NEXT: adr r7, .LCPI1_1 +; CHECK-NEXT: add.w r10, r1, r3, lsl #2 ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q1, [r7] ; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: mvn r9, #-2147483648 @@ -395,10 +395,10 @@ ; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: beq .LBB1_8 ; CHECK-NEXT: .LBB1_6: @ %for.body.preheader21 -; CHECK-NEXT: subs r0, r3, r2 -; CHECK-NEXT: mov.w r1, #-2147483648 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r2 ; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov.w r1, #-2147483648 ; CHECK-NEXT: mvn r3, #-2147483648 ; CHECK-NEXT: .LBB1_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -536,10 +536,10 @@ ; CHECK-NEXT: vldrw.u32 q2, [r4] ; CHECK-NEXT: adr r4, .LCPI2_2 ; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: add.w r7, r6, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: adr r6, .LCPI2_0 -; CHECK-NEXT: dls lr, r7 ; CHECK-NEXT: subs r7, r3, #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q0, [r6] ; CHECK-NEXT: vldrw.u32 q3, [r4] ; CHECK-NEXT: vdup.32 q1, r7 @@ -772,9 +772,9 @@ ; CHECK-NEXT: str r5, [sp] @ 4-byte Spill ; CHECK-NEXT: add.w r8, r2, r5, lsl #2 ; CHECK-NEXT: add.w r11, r1, r5, lsl #2 -; CHECK-NEXT: add.w r4, r6, r7, lsr #1 +; CHECK-NEXT: add.w lr, r6, r7, lsr #1 ; CHECK-NEXT: add.w r12, r0, r5, lsl #2 -; CHECK-NEXT: dls lr, r4 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrd r4, r9, [r0] @@ -816,8 +816,8 @@ ; CHECK-NEXT: cmp r7, r3 ; CHECK-NEXT: beq .LBB3_8 ; CHECK-NEXT: .LBB3_6: @ %for.body.preheader -; CHECK-NEXT: subs r0, r3, r7 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r7 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r0, [r12], #4 @@ -929,9 +929,9 @@ ; CHECK-NEXT: sub.w r7, r8, #4 ; CHECK-NEXT: add.w r10, r2, r8, lsl #2 ; CHECK-NEXT: add.w r9, r1, r8, lsl #2 -; CHECK-NEXT: add.w r4, r6, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: add.w r12, r0, r8, lsl #2 -; CHECK-NEXT: dls lr, r4 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 @@ -1007,8 +1007,8 @@ ; CHECK-NEXT: cmp r8, r3 ; CHECK-NEXT: beq .LBB4_8 ; CHECK-NEXT: .LBB4_6: @ %for.body.preheader21 -; CHECK-NEXT: sub.w r0, r3, r8 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r8 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r0, [r12], #4 @@ -1138,8 +1138,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader21 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r0, [r12], #2 @@ -1271,8 +1271,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB6_6: @ %for.body.preheader21 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r0, [r12], #2 @@ -1401,8 +1401,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader21 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r0, [r12], #2 @@ -1514,14 +1514,14 @@ ; CHECK-NEXT: popeq {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: adr r4, .LCPI8_0 -; CHECK-NEXT: bic r12, r12, #3 ; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: bic r12, r12, #3 +; CHECK-NEXT: adr r4, .LCPI8_0 ; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r4] -; CHECK-NEXT: add.w r12, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: sub.w r12, r3, #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: .LBB8_2: @ %vector.body @@ -1606,15 +1606,15 @@ ; CHECK-NEXT: add.w r12, r3, #7 ; CHECK-NEXT: adr r4, .LCPI9_0 ; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: vldrw.u32 q0, [r4] -; CHECK-NEXT: sub.w r12, r12, #8 ; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: sub.w r12, r12, #8 +; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI9_1 ; CHECK-NEXT: vmov.i8 q2, #0x0 -; CHECK-NEXT: add.w r12, lr, r12, lsr #3 -; CHECK-NEXT: vldrw.u32 q4, [r4] -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #3 ; CHECK-NEXT: sub.w r12, r3, #1 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: vldrw.u32 q4, [r4] ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vmov.i8 q3, #0xff @@ -1780,15 +1780,15 @@ ; CHECK-NEXT: add.w r12, r3, #7 ; CHECK-NEXT: adr r4, .LCPI10_0 ; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: vldrw.u32 q0, [r4] -; CHECK-NEXT: sub.w r12, r12, #8 ; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: sub.w r12, r12, #8 +; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI10_1 ; CHECK-NEXT: vmov.i8 q2, #0x0 -; CHECK-NEXT: add.w r12, lr, r12, lsr #3 -; CHECK-NEXT: vldrw.u32 q4, [r4] -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #3 ; CHECK-NEXT: sub.w r12, r3, #1 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: vldrw.u32 q4, [r4] ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vmov.i8 q3, #0xff @@ -1938,9 +1938,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB11_6: @ %for.body.preheader21 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB11_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrh r1, [r12], #2 @@ -2072,9 +2072,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB12_6: @ %for.body.preheader21 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB12_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrh r1, [r12], #2 @@ -2208,8 +2208,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB13_6: @ %for.body.preheader21 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB13_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsb r0, [r12], #1 @@ -2335,8 +2335,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB14_6: @ %for.body.preheader23 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB14_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsb r0, [r12], #1 @@ -2468,8 +2468,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB15_6: @ %for.body.preheader23 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB15_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsb r0, [r12], #1 @@ -2598,8 +2598,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB16_6: @ %for.body.preheader23 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsb r0, [r12], #1 @@ -2714,15 +2714,15 @@ ; CHECK-NEXT: add.w r12, r3, #7 ; CHECK-NEXT: adr r4, .LCPI17_0 ; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: vldrw.u32 q0, [r4] -; CHECK-NEXT: sub.w r12, r12, #8 ; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: sub.w r12, r12, #8 +; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI17_1 ; CHECK-NEXT: vmov.i8 q2, #0x0 -; CHECK-NEXT: add.w r12, lr, r12, lsr #3 -; CHECK-NEXT: vldrw.u32 q4, [r4] -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #3 ; CHECK-NEXT: sub.w r12, r3, #1 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: vldrw.u32 q4, [r4] ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vmov.i8 q3, #0xff @@ -2840,18 +2840,18 @@ ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: adr r4, .LCPI18_1 ; CHECK-NEXT: vmov.i8 q2, #0x0 -; CHECK-NEXT: add.w r12, lr, r12, lsr #4 -; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #4 +; CHECK-NEXT: sub.w r12, r3, #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI18_2 -; CHECK-NEXT: sub.w r12, r3, #1 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI18_3 -; CHECK-NEXT: vdup.32 q1, r12 +; CHECK-NEXT: vmov.i8 q3, #0xff ; CHECK-NEXT: vldrw.u32 q6, [r4] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: .LBB18_2: @ %vector.body @@ -3140,18 +3140,18 @@ ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: adr r4, .LCPI19_1 ; CHECK-NEXT: vmov.i8 q2, #0x0 -; CHECK-NEXT: add.w r12, lr, r12, lsr #4 -; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: add.w lr, lr, r12, lsr #4 +; CHECK-NEXT: sub.w r12, r3, #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI19_2 -; CHECK-NEXT: sub.w r12, r3, #1 ; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI19_3 -; CHECK-NEXT: vdup.32 q1, r12 +; CHECK-NEXT: vmov.i8 q3, #0xff ; CHECK-NEXT: vldrw.u32 q6, [r4] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: .LBB19_2: @ %vector.body @@ -3372,8 +3372,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB20_6: @ %for.body.preheader23 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB20_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrb r0, [r12], #1 @@ -3507,8 +3507,8 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB21_6: @ %for.body.preheader23 -; CHECK-NEXT: subs r0, r3, r5 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: sub.w lr, r3, r5 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB21_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrb r0, [r12], #1 Index: llvm/test/CodeGen/Thumb2/mve-shifts-scalar.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shifts-scalar.ll +++ llvm/test/CodeGen/Thumb2/mve-shifts-scalar.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 @@ -53,8 +53,8 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #8 @@ -97,8 +97,8 @@ ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4 @@ -142,8 +142,8 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 @@ -187,8 +187,8 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #8 @@ -232,8 +232,8 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4 @@ -277,8 +277,8 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 @@ -322,8 +322,8 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #8 @@ -367,8 +367,8 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4 Index: llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -23,10 +23,10 @@ ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: subs r0, r3, #4 -; CHECK-NEXT: add.w r0, r2, r0, lsr #2 -; CHECK-NEXT: mov r2, r12 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 ; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: .LBB0_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 @@ -37,9 +37,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB0_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r2, r12, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r2], #4 @@ -113,9 +113,9 @@ ; CHECK-NEXT: bic r12, r1, #3 ; CHECK-NEXT: vmov.i32 q0, #0x1 ; CHECK-NEXT: sub.w r3, r12, #4 -; CHECK-NEXT: add.w r2, r2, r3, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r3, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -132,9 +132,9 @@ ; CHECK-NEXT: mul r2, r2, lr ; CHECK-NEXT: beq .LBB1_8 ; CHECK-NEXT: .LBB1_6: @ %for.body.preheader1 -; CHECK-NEXT: sub.w r1, r1, r12 +; CHECK-NEXT: sub.w lr, r1, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -213,9 +213,9 @@ ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: vmov.i8 q0, #0xff -; CHECK-NEXT: add.w r2, r2, r12, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r12, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -232,9 +232,9 @@ ; CHECK-NEXT: and.w r2, r2, r12 ; CHECK-NEXT: beq .LBB2_9 ; CHECK-NEXT: .LBB2_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -313,9 +313,9 @@ ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r2, r2, r12, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r12, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -332,9 +332,9 @@ ; CHECK-NEXT: orr.w r2, r2, r12 ; CHECK-NEXT: beq .LBB3_9 ; CHECK-NEXT: .LBB3_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -413,9 +413,9 @@ ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r2, r2, r12, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r12, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -432,9 +432,9 @@ ; CHECK-NEXT: eor.w r2, r2, r12 ; CHECK-NEXT: beq .LBB4_9 ; CHECK-NEXT: .LBB4_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -513,9 +513,9 @@ ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w r12, r2, #4 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r3], #16 @@ -528,9 +528,9 @@ ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: beq .LBB5_9 ; CHECK-NEXT: .LBB5_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: sub.w lr, r1, r2 ; CHECK-NEXT: add.w r0, r0, r2, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr s2, [r0] @@ -614,9 +614,9 @@ ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w r12, r2, #4 ; CHECK-NEXT: vmov.f32 q0, #1.000000e+00 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r3], #16 @@ -629,9 +629,9 @@ ; CHECK-NEXT: vmul.f32 s0, s0, s4 ; CHECK-NEXT: beq .LBB6_9 ; CHECK-NEXT: .LBB6_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: sub.w lr, r1, r2 ; CHECK-NEXT: add.w r0, r0, r2, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr s2, [r0] @@ -711,9 +711,9 @@ ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: vmvn.i32 q0, #0x80000000 -; CHECK-NEXT: add.w r2, r2, r12, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r12, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -725,9 +725,9 @@ ; CHECK-NEXT: vminv.s32 r2, q0 ; CHECK-NEXT: beq .LBB7_9 ; CHECK-NEXT: .LBB7_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -809,10 +809,10 @@ ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: subs r0, r3, #4 -; CHECK-NEXT: add.w r0, r2, r0, lsr #2 -; CHECK-NEXT: mov r2, r12 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 ; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: .LBB8_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 @@ -823,9 +823,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB8_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r2, r12, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r2], #4 @@ -907,9 +907,9 @@ ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: vmov.i32 q0, #0x80000000 -; CHECK-NEXT: add.w r2, r2, r12, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r12, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -921,9 +921,9 @@ ; CHECK-NEXT: vmaxv.s32 r2, q0 ; CHECK-NEXT: beq .LBB9_9 ; CHECK-NEXT: .LBB9_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -1005,10 +1005,10 @@ ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: subs r0, r3, #4 -; CHECK-NEXT: add.w r0, r2, r0, lsr #2 -; CHECK-NEXT: mov r2, r12 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 ; CHECK-NEXT: mov.w r0, #-2147483648 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: .LBB10_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 @@ -1019,9 +1019,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB10_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r2, r12, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB10_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r2], #4 @@ -1103,9 +1103,9 @@ ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: vmov.i8 q0, #0xff -; CHECK-NEXT: add.w r2, r2, r12, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r12, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB11_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -1117,9 +1117,9 @@ ; CHECK-NEXT: vminv.u32 r2, q0 ; CHECK-NEXT: beq .LBB11_9 ; CHECK-NEXT: .LBB11_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB11_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -1201,10 +1201,10 @@ ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: subs r0, r3, #4 -; CHECK-NEXT: add.w r0, r2, r0, lsr #2 -; CHECK-NEXT: mov r2, r12 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 ; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: .LBB12_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 @@ -1215,9 +1215,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB12_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r2, r12, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB12_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r2], #4 @@ -1299,9 +1299,9 @@ ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r2, r2, r12, lsr #2 -; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: add.w lr, r2, r12, lsr #2 ; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB13_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 @@ -1313,9 +1313,9 @@ ; CHECK-NEXT: vmaxv.u32 r2, q0 ; CHECK-NEXT: beq .LBB13_9 ; CHECK-NEXT: .LBB13_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB13_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 @@ -1397,10 +1397,10 @@ ; CHECK-NEXT: bic r3, r1, #3 ; CHECK-NEXT: movs r2, #1 ; CHECK-NEXT: subs r0, r3, #4 -; CHECK-NEXT: add.w r0, r2, r0, lsr #2 -; CHECK-NEXT: mov r2, r12 -; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 ; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: .LBB14_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r2], #16 @@ -1411,9 +1411,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB14_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sub.w lr, r1, r3 ; CHECK-NEXT: add.w r2, r12, r3, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB14_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r2], #4 @@ -1495,9 +1495,9 @@ ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w r12, r2, #4 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB15_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r3], #16 @@ -1511,9 +1511,9 @@ ; CHECK-NEXT: cmp r2, r1 ; CHECK-NEXT: beq .LBB15_9 ; CHECK-NEXT: .LBB15_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: sub.w lr, r1, r2 ; CHECK-NEXT: add.w r0, r0, r2, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB15_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldmia r0!, {s2} @@ -1600,9 +1600,9 @@ ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w r12, r2, #4 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r3], #16 @@ -1616,9 +1616,9 @@ ; CHECK-NEXT: cmp r2, r1 ; CHECK-NEXT: beq .LBB16_9 ; CHECK-NEXT: .LBB16_7: @ %for.body.preheader1 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: sub.w lr, r1, r2 ; CHECK-NEXT: add.w r0, r0, r2, lsl #2 -; CHECK-NEXT: dls lr, r1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldmia r0!, {s2} @@ -1690,8 +1690,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cbz r1, .LBB17_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB17_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 @@ -1795,8 +1795,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cbz r1, .LBB19_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: dlstp.16 lr, r1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dlstp.16 lr, r1 ; CHECK-NEXT: .LBB19_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r0], #16 @@ -1903,8 +1903,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cbz r1, .LBB21_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: dlstp.8 lr, r1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dlstp.8 lr, r1 ; CHECK-NEXT: .LBB21_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q0, [r0], #16 @@ -2011,8 +2011,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cbz r1, .LBB23_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: dlstp.16 lr, r1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dlstp.16 lr, r1 ; CHECK-NEXT: .LBB23_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r0], #16 @@ -2116,8 +2116,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cbz r1, .LBB25_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: dlstp.8 lr, r1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dlstp.8 lr, r1 ; CHECK-NEXT: .LBB25_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q0, [r0], #16 @@ -2224,8 +2224,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cbz r1, .LBB27_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: dlstp.8 lr, r1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dlstp.8 lr, r1 ; CHECK-NEXT: .LBB27_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q0, [r0], #16 @@ -2329,8 +2329,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cbz r1, .LBB29_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: mov r3, r2 ; CHECK-NEXT: .LBB29_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/Thumb2/mve-vldst4.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vldst4.ll +++ llvm/test/CodeGen/Thumb2/mve-vldst4.ll @@ -20,8 +20,8 @@ ; CHECK-NEXT: and.w r3, r3, r12, lsr #2 ; CHECK-NEXT: sub.w r12, r3, #8 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: add.w lr, r3, r12, lsr #3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q5, [r0, #32] Index: llvm/test/Transforms/HardwareLoops/ARM/calls-codegen.ll =================================================================== --- llvm/test/Transforms/HardwareLoops/ARM/calls-codegen.ll +++ llvm/test/Transforms/HardwareLoops/ARM/calls-codegen.ll @@ -4,8 +4,8 @@ ; DISABLED-NOT: dls lr, ; CHECK-LABEL: test_target_specific: -; CHECK: movs r2, #50 -; CHECK: dls lr, r2 +; CHECK: mov.w lr, #50 +; CHECK: dls lr, lr ; CHECK-NOT: mov lr, ; CHECK: [[LOOP_HEADER:\.LBB[0-9_]+]]: ; CHECK: le lr, [[LOOP_HEADER]] @@ -31,8 +31,8 @@ } ; CHECK-LABEL: test_fabs: -; CHECK: movs r1, #100 -; CHECK: dls lr, r1 +; CHECK: mov.w lr, #100 +; CHECK: dls lr, lr ; CHECK-NOT: mov lr, ; CHECK: [[LOOP_HEADER:\.LBB[0-9_]+]]: ; CHECK-NOT: bl Index: llvm/test/Transforms/HardwareLoops/ARM/structure.ll =================================================================== --- llvm/test/Transforms/HardwareLoops/ARM/structure.ll +++ llvm/test/Transforms/HardwareLoops/ARM/structure.ll @@ -420,7 +420,7 @@ ; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: ; CHECK-UNROLL: le lr, [[PROLOGUE]] ; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: -; CHECK-UNROLL: dls lr, r5 +; CHECK-UNROLL: dls lr, lr ; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: ; CHECK-UNROLL: le lr, [[BODY]] ; CHECK-UNROLL-NOT: b