Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12946,6 +12946,25 @@ return NewCopy; } +static SDValue PerformVMOVrhCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (VMOVrh (load x)) -> (zextload (i16*)x) + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast(N0); + + SDValue Load = DCI.DAG.getExtLoad( + ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), MVT::i16, LN0->getAlignment(), + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); + DCI.DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + return Load; + } + + return SDValue(); +} + /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node /// are normal, non-volatile loads. If so, it is profitable to bitcast an /// i64 vector to have f64 elements, since the value can then be loaded @@ -15042,6 +15061,7 @@ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI); + case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI); case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); Index: llvm/test/CodeGen/Thumb2/mve-float16regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -729,9 +729,8 @@ ; CHECK-NEXT: .LBB14_1: @ %for.body.us ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB14_2 Depth 2 -; CHECK-NEXT: vldr.16 s0, [r1] +; CHECK-NEXT: ldrh r4, [r1] ; CHECK-NEXT: mov r5, r12 -; CHECK-NEXT: vmov.f16 r4, s0 ; CHECK-NEXT: vdup.16 q0, r4 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: .LBB14_2: @ %vector.body @@ -1089,170 +1088,154 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #28 +; CHECK-NEXT: sub sp, #28 ; CHECK-NEXT: cmp r3, #8 +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: blo.w .LBB16_12 ; CHECK-NEXT: @ %bb.1: @ %if.then ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: cmp.w r7, r3, lsr #2 ; CHECK-NEXT: beq.w .LBB16_12 ; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph -; CHECK-NEXT: ldrh.w r11, [r0] -; CHECK-NEXT: mov.w r8, #1 +; CHECK-NEXT: ldrh r4, [r0] +; CHECK-NEXT: movs r1, #1 ; CHECK-NEXT: ldrd r5, r12, [r0, #4] -; CHECK-NEXT: lsrs r3, r3, #2 -; CHECK-NEXT: sub.w r0, r11, #8 -; CHECK-NEXT: and r10, r0, #7 +; CHECK-NEXT: lsr.w r11, r3, #2 +; CHECK-NEXT: sub.w r0, r4, #8 +; CHECK-NEXT: rsbs r3, r4, #0 ; CHECK-NEXT: add.w r7, r0, r0, lsr #29 -; CHECK-NEXT: add.w r0, r10, #1 +; CHECK-NEXT: and r0, r0, #7 ; CHECK-NEXT: asrs r6, r7, #3 ; CHECK-NEXT: cmp r6, #1 ; CHECK-NEXT: it gt -; CHECK-NEXT: asrgt.w r8, r7, #3 -; CHECK-NEXT: add.w r7, r5, r11, lsl #1 -; CHECK-NEXT: subs r4, r7, #2 -; CHECK-NEXT: rsb.w r7, r11, #0 -; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: add.w r7, r12, #16 -; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: asrgt r1, r7, #3 +; CHECK-NEXT: add.w r7, r5, r4, lsl #1 +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-NEXT: subs r1, r7, #2 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: add.w r3, r12, #16 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: b .LBB16_4 ; CHECK-NEXT: .LBB16_3: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: subs r3, #1 +; CHECK-NEXT: subs.w r11, r11, #1 ; CHECK-NEXT: vstrb.8 q0, [r2], #8 -; CHECK-NEXT: add.w r0, r9, r0, lsl #1 +; CHECK-NEXT: add.w r0, r7, r0, lsl #1 ; CHECK-NEXT: add.w r5, r0, #8 ; CHECK-NEXT: beq.w .LBB16_12 ; CHECK-NEXT: .LBB16_4: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB16_6 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 -; CHECK-NEXT: vldrw.u32 q0, [r1], #8 -; CHECK-NEXT: vldr.16 s7, [r12] -; CHECK-NEXT: vldr.16 s4, [r12, #14] -; CHECK-NEXT: vldr.16 s6, [r12, #12] -; CHECK-NEXT: vldr.16 s8, [r12, #10] -; CHECK-NEXT: vldr.16 s10, [r12, #8] -; CHECK-NEXT: vldr.16 s12, [r12, #6] -; CHECK-NEXT: vldr.16 s14, [r12, #4] -; CHECK-NEXT: vldr.16 s5, [r12, #2] -; CHECK-NEXT: vstrb.8 q0, [r4], #8 -; CHECK-NEXT: adds r6, r5, #2 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldrh.w lr, [r12, #14] +; CHECK-NEXT: vldrw.u32 q0, [r0], #8 +; CHECK-NEXT: ldrh.w r10, [r12, #12] +; CHECK-NEXT: ldrh.w r7, [r12, #10] +; CHECK-NEXT: ldrh.w r4, [r12, #8] +; CHECK-NEXT: ldrh.w r3, [r12, #6] +; CHECK-NEXT: ldrh.w r6, [r12, #4] +; CHECK-NEXT: ldrh.w r8, [r12, #2] +; CHECK-NEXT: ldrh.w r9, [r12] +; CHECK-NEXT: vstrb.8 q0, [r1], #8 ; CHECK-NEXT: vldrw.u32 q0, [r5] -; CHECK-NEXT: vmov.f16 r0, s7 -; CHECK-NEXT: vldrw.u32 q4, [r6] -; CHECK-NEXT: vmul.f16 q0, q0, r0 -; CHECK-NEXT: vmov.f16 r0, s5 -; CHECK-NEXT: vfma.f16 q0, q4, r0 -; CHECK-NEXT: vldrw.u32 q4, [r5, #4] -; CHECK-NEXT: vmov.f16 r0, s14 -; CHECK-NEXT: adds r6, r5, #6 -; CHECK-NEXT: vfma.f16 q0, q4, r0 -; CHECK-NEXT: vmov.f16 r0, s12 -; CHECK-NEXT: vldrw.u32 q3, [r6] -; CHECK-NEXT: add.w r6, r5, #10 -; CHECK-NEXT: add.w r9, r5, #16 -; CHECK-NEXT: cmp.w r11, #16 -; CHECK-NEXT: vfma.f16 q0, q3, r0 -; CHECK-NEXT: vldrw.u32 q3, [r5, #8] -; CHECK-NEXT: vmov.f16 r0, s10 -; CHECK-NEXT: vfma.f16 q0, q3, r0 -; CHECK-NEXT: vmov.f16 r0, s8 -; CHECK-NEXT: vldrw.u32 q2, [r6] -; CHECK-NEXT: add.w r6, r5, #14 -; CHECK-NEXT: vfma.f16 q0, q2, r0 -; CHECK-NEXT: vldrw.u32 q2, [r5, #12] -; CHECK-NEXT: vmov.f16 r0, s6 -; CHECK-NEXT: vfma.f16 q0, q2, r0 -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r6] -; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: blo .LBB16_8 +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: adds r0, r5, #2 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmul.f16 q0, q0, r9 +; CHECK-NEXT: adds r0, r5, #6 +; CHECK-NEXT: vfma.f16 q0, q1, r8 +; CHECK-NEXT: vldrw.u32 q1, [r5, #4] +; CHECK-NEXT: vfma.f16 q0, q1, r6 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: add.w r0, r5, #10 +; CHECK-NEXT: vfma.f16 q0, q1, r3 +; CHECK-NEXT: vldrw.u32 q1, [r5, #8] +; CHECK-NEXT: vfma.f16 q0, q1, r4 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: add.w r0, r5, #14 +; CHECK-NEXT: vfma.f16 q0, q1, r7 +; CHECK-NEXT: vldrw.u32 q1, [r5, #12] +; CHECK-NEXT: add.w r7, r5, #16 +; CHECK-NEXT: vfma.f16 q0, q1, r10 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: vfma.f16 q0, q1, lr +; CHECK-NEXT: cmp r0, #16 +; CHECK-NEXT: blo .LBB16_7 ; CHECK-NEXT: @ %bb.5: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 -; CHECK-NEXT: dls lr, r8 +; CHECK-NEXT: ldr.w lr, [sp] @ 4-byte Reload +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldr.16 s4, [r6] -; CHECK-NEXT: add.w r5, r9, #2 -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r9] -; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: vldr.16 s4, [r6, #2] -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r5] -; CHECK-NEXT: add.w r5, r9, #6 +; CHECK-NEXT: ldrh r0, [r6] +; CHECK-NEXT: vldrw.u32 q1, [r7] +; CHECK-NEXT: adds r3, r7, #2 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: vldr.16 s4, [r6, #4] -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r9, #4] +; CHECK-NEXT: vldrw.u32 q1, [r3] +; CHECK-NEXT: ldrh r0, [r6, #2] +; CHECK-NEXT: adds r3, r7, #6 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: vldr.16 s4, [r6, #6] -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r5] -; CHECK-NEXT: add.w r5, r9, #10 +; CHECK-NEXT: ldrh r0, [r6, #4] +; CHECK-NEXT: vldrw.u32 q1, [r7, #4] ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: vldr.16 s4, [r6, #8] -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r9, #8] +; CHECK-NEXT: vldrw.u32 q1, [r3] +; CHECK-NEXT: ldrh r0, [r6, #6] +; CHECK-NEXT: add.w r3, r7, #10 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: vldr.16 s4, [r6, #10] -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r5] -; CHECK-NEXT: add.w r5, r9, #14 +; CHECK-NEXT: ldrh r0, [r6, #8] +; CHECK-NEXT: vldrw.u32 q1, [r7, #8] ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: vldr.16 s4, [r6, #12] -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r9, #12] -; CHECK-NEXT: add.w r9, r9, #16 +; CHECK-NEXT: vldrw.u32 q1, [r3] +; CHECK-NEXT: ldrh r0, [r6, #10] +; CHECK-NEXT: ldrh r3, [r6, #14] ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: vldr.16 s4, [r6, #14] +; CHECK-NEXT: ldrh r0, [r6, #12] +; CHECK-NEXT: vldrw.u32 q1, [r7, #12] ; CHECK-NEXT: adds r6, #16 -; CHECK-NEXT: vmov.f16 r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [r5] ; CHECK-NEXT: vfma.f16 q0, q1, r0 +; CHECK-NEXT: add.w r0, r7, #14 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: vfma.f16 q0, q1, r3 ; CHECK-NEXT: le lr, .LBB16_6 -; CHECK-NEXT: @ %bb.7: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: bne .LBB16_9 -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: b .LBB16_8 +; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: .LBB16_8: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: beq.w .LBB16_3 -; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader +; CHECK-NEXT: @ %bb.9: @ %while.body76.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r5, r9 +; CHECK-NEXT: mov r5, r7 ; CHECK-NEXT: .LBB16_10: @ %while.body76 ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldr.16 s4, [r6] +; CHECK-NEXT: ldrh r3, [r6], #2 +; CHECK-NEXT: vldrh.u16 q1, [r5], #2 ; CHECK-NEXT: subs r0, #1 -; CHECK-NEXT: adds r6, #2 +; CHECK-NEXT: vfma.f16 q0, q1, r3 ; CHECK-NEXT: cmp r0, #1 -; CHECK-NEXT: vmov.f16 r7, s4 -; CHECK-NEXT: vldrh.u16 q1, [r5], #2 -; CHECK-NEXT: vfma.f16 q0, q1, r7 ; CHECK-NEXT: bgt .LBB16_10 ; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 -; CHECK-NEXT: add.w r9, r9, r10, lsl #1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: add.w r7, r7, r0, lsl #1 ; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_12: @ %if.end -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: add sp, #28 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, %struct.arm_fir_instance_f32* %S, i32 0, i32 1 Index: llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -916,8 +916,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1058,8 +1057,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_one_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpnot @@ -1186,8 +1184,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ogt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1312,8 +1309,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_oge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1438,8 +1434,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_olt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1564,8 +1559,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ole_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1706,8 +1700,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -1833,8 +1826,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_une_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ne, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1959,8 +1951,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -2086,8 +2077,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -2213,8 +2203,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -2340,8 +2329,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -2467,8 +2455,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ord_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot @@ -2595,8 +2582,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uno_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -3527,8 +3513,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_oeq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -3669,8 +3654,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_one_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpnot @@ -3797,8 +3781,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ogt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -3923,8 +3906,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_oge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -4049,8 +4031,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_olt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -4175,8 +4156,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ole_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -4317,8 +4297,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ueq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -4444,8 +4423,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_une_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ne, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -4570,8 +4548,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -4697,8 +4674,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -4824,8 +4800,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -4951,8 +4926,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -5078,8 +5052,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ord_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot @@ -5206,8 +5179,7 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_uno_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] -; CHECK-MVEFP-NEXT: vmov.f16 r0, s12 +; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2