Index: llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2896,10 +2896,12 @@ LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg " << Base.virtRegIndex() << "\n"); - // Make sure that Increment has no uses before BaseAccess. + // Make sure that Increment has no uses before BaseAccess that are not PHI + // uses. for (MachineInstr &Use : MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) { - if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) { + if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI && + !DT->dominates(BaseAccess, &Use))) { LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n"); return false; } Index: llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll +++ llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll @@ -25,11 +25,11 @@ ; CHECK-NEXT: vldrh.u16 q5, [r1], #32 ; CHECK-NEXT: mov r4, r5 ; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: vldrh.u16 q1, [r0, #-16] +; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] ; CHECK-NEXT: mov r6, r5 ; CHECK-NEXT: sub.w lr, lr, #1 ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16] -; CHECK-NEXT: vldrh.u16 q2, [r1], #32 +; CHECK-NEXT: vldrh.u16 q1, [r1], #32 ; CHECK-NEXT: vldrh.u16 q0, [r0], #32 ; CHECK-NEXT: vmlsldava.s16 r4, r7, q4, q5 ; CHECK-NEXT: cmp.w lr, #0 @@ -38,36 +38,35 @@ ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB0_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q1, q3 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q3 -; CHECK-NEXT: vldrh.u16 q1, [r0, #-16] -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q0, q2 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q2 +; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q3 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3 +; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] +; CHECK-NEXT: vmlaldavax.s16 r6, r5, q0, q1 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1 ; CHECK-NEXT: vldrh.u16 q0, [r0], #32 ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16] -; CHECK-NEXT: vldrh.u16 q2, [r1] -; CHECK-NEXT: adds r1, #32 +; CHECK-NEXT: vldrh.u16 q1, [r1], #32 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: mov.w lr, #14 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q3 -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q1, q3 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3 +; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q3 ; CHECK-NEXT: and.w r2, lr, r2, lsl #1 -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q0, q2 -; CHECK-NEXT: vldrh.u16 q1, [r0, #-16] -; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q2 +; CHECK-NEXT: vmlaldavax.s16 r6, r5, q0, q1 +; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] +; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1 ; CHECK-NEXT: vldrh.u16 q0, [r1, #-16] ; CHECK-NEXT: vctp.16 r2 ; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrht.u16 q2, [r0] +; CHECK-NEXT: vldrht.u16 q1, [r0] ; CHECK-NEXT: vldrht.u16 q3, [r1] -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q1, q0 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0 +; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q0 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlsldavat.s16 r4, r7, q2, q3 +; CHECK-NEXT: vmlsldavat.s16 r4, r7, q1, q3 ; CHECK-NEXT: cmp r2, #9 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlaldavaxt.s16 r6, r5, q2, q3 +; CHECK-NEXT: vmlaldavaxt.s16 r6, r5, q1, q3 ; CHECK-NEXT: blo .LBB0_10 ; CHECK-NEXT: @ %bb.4: @ %do.body.1 ; CHECK-NEXT: subs r2, #8