diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13053,6 +13053,18 @@ } } + // fold (VMOVhr (load x)) -> (load (f16*)x) + if (LoadSDNode *LN0 = dyn_cast(Op0)) { + if (LN0->hasOneUse() && LN0->isUnindexed() && + LN0->getMemoryVT() == MVT::i16) { + SDValue Load = DCI.DAG.getLoad(MVT::f16, SDLoc(N), LN0->getChain(), + LN0->getBasePtr(), LN0->getMemOperand()); + DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0)); + DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1)); + return Load; + } + } + // Only the bottom 16 bits of the source register are used. APInt DemandedMask = APInt::getLowBitsSet(32, 16); const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -5176,105 +5176,104 @@ ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: .vsave {d8, d9, d10} ; CHECK-MVE-NEXT: vpush {d8, d9, d10} -; CHECK-MVE-NEXT: ldrh r1, [r0] +; CHECK-MVE-NEXT: vldr.16 s16, [r0] ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 -; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: movs r2, #0 -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: movs r0, #0 -; CHECK-MVE-NEXT: vmov.f16 s16, r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vcmp.f16 s0, s16 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 ; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s18 +; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 ; CHECK-MVE-NEXT: vcmp.f16 s18, s16 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: lsls r0, r0, #31 ; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s18 +; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s20, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s18 +; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 ; CHECK-MVE-NEXT: vcmp.f16 s18, s16 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: lsls r0, r0, #31 ; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s18 +; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vcmp.f16 s0, s16 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s0, s7 -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: lsls r0, r0, #31 ; CHECK-MVE-NEXT: vmovx.f16 s2, s11 ; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r0, #1 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s18 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 ; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q3[7], r0