diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -629,17 +629,15 @@ // width, the Loop Start instruction will immediately generate one or more // false lane mask which can, incorrectly, affect the proceeding MVE // instructions in the preheader. - auto cannotInsertWDLSTPBetween = [](MachineInstr *Begin, - MachineInstr *End) { - auto I = MachineBasicBlock::iterator(Begin); - auto E = MachineBasicBlock::iterator(End); + auto cannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { for (; I != E; ++I) if (shouldInspect(*I)) return true; return false; }; - if (cannotInsertWDLSTPBetween(StartInsertPt, &InsertBB->back())) + if (cannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end())) return false; // Especially in the case of while loops, InsertBB may not be the diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir @@ -46,6 +46,7 @@ ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 ; CHECK: tCMPi8 renamable $r1, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: renamable $r12 = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBcc %bb.2, 2 /* CC::hs */, killed $cpsr ; CHECK: bb.1: ; CHECK: liveins: $r2 @@ -54,24 +55,31 @@ ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: liveins: $r0, $r1, $r2, $r12 + ; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: $r12 = tMOVr renamable $r1, 11 /* CC::lt */, killed $cpsr, implicit killed renamable $r12, implicit killed $itstate + ; CHECK: renamable $r3 = t2SUBrr renamable $r1, killed renamable $r12, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg ; CHECK: $r12 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: bb.3: ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4 ; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r3, 0, 0, $noreg ; CHECK: renamable $q0 = nnan ninf nsz MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 ; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.3 ; CHECK: bb.4: ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: liveins: $q0, $r0, $r1, $r2 + ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 + ; CHECK: $lr = t2DLS killed $r4 ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0 ; CHECK: $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg @@ -80,13 +88,16 @@ ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: bb.5: ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $s4 + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $s4 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: $r4 = VMOVRS $s4, 14 /* CC::al */, $noreg - ; CHECK: renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg - ; CHECK: renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 0, $noreg, undef renamable $q2 - ; CHECK: renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg + ; CHECK: MVE_VPST 2, implicit $vpr + ; CHECK: renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr + ; CHECK: renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 1, renamable $vpr, undef renamable $q2 + ; CHECK: renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 1, killed renamable $vpr + ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.5 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.5 ; CHECK: bb.6: ; CHECK: liveins: $q0, $r1, $r2 ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -6,10 +6,19 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: cmp r1, #4 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r3, #4 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: subs r3, r1, r3 ; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: adds r3, #3 +; CHECK-NEXT: add.w lr, r12, r3, lsr #2 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: mov r4, lr ; CHECK-NEXT: .LBB0_1: @ %do.body.i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r12], #16 @@ -18,7 +27,7 @@ ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 ; CHECK-NEXT: mov r3, r1 -; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: dls lr, r4 ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 @@ -26,10 +35,13 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: .LBB0_3: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vsub.f32 q1, q1, r12 -; CHECK-NEXT: vfma.f32 q0, q1, q1 -; CHECK-NEXT: letp lr, .LBB0_3 +; CHECK-NEXT: vctp.32 r3 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vpsttt +; CHECK-NEXT: vldrwt.u32 q1, [r0], #16 +; CHECK-NEXT: vsubt.f32 q1, q1, r12 +; CHECK-NEXT: vfmat.f32 q0, q1, q1 +; CHECK-NEXT: le lr, .LBB0_3 ; CHECK-NEXT: @ %bb.4: @ %do.end ; CHECK-NEXT: subs r0, r1, #1 ; CHECK-NEXT: vadd.f32 s0, s3, s3