Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -821,6 +821,7 @@ enum NEONModImmType { VMOVModImm, VMVNModImm, + MveVMVNModImm, OtherModImm }; Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -691,6 +691,9 @@ if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) { // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // none of Neon, MVE or VFP supports any arithmetic operations on it. + // BUILD_VECTOR is still needed + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); @@ -836,9 +839,6 @@ setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::BUILD_VECTOR); - setTargetDAGCombine(ISD::VECTOR_SHUFFLE); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); @@ -856,6 +856,12 @@ } } + if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { + setTargetDAGCombine(ISD::BUILD_VECTOR); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + } + if (!Subtarget->hasFP64()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which @@ -5872,7 +5878,7 @@ } /// isNEONModifiedImm - Check if the specified splat value corresponds to a -/// valid vector constant for a NEON instruction with a "modified immediate" +/// valid vector constant for a NEON or MVE instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, @@ -5958,6 +5964,10 @@ break; } + // cmode == 0b1101 is not supported for MVE VMVN + if (type == MveVMVNModImm) + return SDValue(); + if ((SplatBits & ~0xffffff) == 0 && ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { // Value = 0x00nnffff: Op=x, Cmode=1101. @@ -6524,13 +6534,15 @@ if (SplatUndef.isAllOnesValue()) return DAG.getUNDEF(VT); - if (ST->hasNEON() && SplatBitSize <= 64) { + if ((ST->hasNEON() && SplatBitSize <= 64) || + (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) { // Check if an immediate VMOV works. EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMOVModImm); + if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); @@ -6538,10 +6550,10 @@ // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); - Val = isNEONModifiedImm(NegatedImm, - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VmovVT, VT.is128BitVector(), - VMVNModImm); + Val = isNEONModifiedImm( + NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, + DAG, dl, VmovVT, VT.is128BitVector(), + ST->hasMVEIntegerOps() ? MveVMVNModImm : VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -233,6 +233,11 @@ def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; +def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; +def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2195,6 +2195,23 @@ } } // let isReMaterializable = 1 +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (ARMvmovImm timm:$simm)), + (v16i8 (MVE_VMOVimmi8 nImmSplatI8:$simm))>; + def : Pat<(v8i16 (ARMvmovImm timm:$simm)), + (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>; + def : Pat<(v4i32 (ARMvmovImm timm:$simm)), + (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>; + + def : Pat<(v8i16 (ARMvmvnImm timm:$simm)), + (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>; + def : Pat<(v4i32 (ARMvmvnImm timm:$simm)), + (v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>; + + def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)), + (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>; +} + class MVE_VMINMAXA size, bit bit_12, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), Index: llvm/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrNEON.td +++ llvm/lib/Target/ARM/ARMInstrNEON.td @@ -526,11 +526,6 @@ def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; -def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; -def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; -def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; -def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; - def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; @@ -566,14 +561,14 @@ def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; -def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ +def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; -def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ +def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); @@ -5345,28 +5340,28 @@ def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmvn", "i16", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { + [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmvn", "i16", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { + [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmvn", "i32", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { + [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; } def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmvn", "i32", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { + [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; } } @@ -6053,57 +6048,57 @@ def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), (ins nImmSplatI8:$SIMM), IIC_VMOVImm, "vmov", "i8", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; + [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), (ins nImmSplatI8:$SIMM), IIC_VMOVImm, "vmov", "i8", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; + [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmov", "i16", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { + [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmov", "i16", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { + [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmov", "i32", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { + [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; } def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmov", "i32", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { + [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; } def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; + [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, "vmov", "f32", "$Vd, $SIMM", "", - [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; + [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, "vmov", "f32", "$Vd, $SIMM", "", - [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; + [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; } // isReMaterializable, isAsCheapAsAMove // Add support for bytes replication feature, so it could be GAS compatible. Index: llvm/test/CodeGen/Thumb2/mve-div-expand.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -741,11 +741,10 @@ ; CHECK-MVE-LABEL: fdiv_f32: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vdiv.f32 s8, s0, s4 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vdiv.f32 s10, s1, s5 ; CHECK-MVE-NEXT: vdiv.f32 s12, s2, s6 ; CHECK-MVE-NEXT: vdiv.f32 s4, s3, s7 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s8 ; CHECK-MVE-NEXT: vmov.f32 s1, s10 ; CHECK-MVE-NEXT: vmov.f32 s2, s12 @@ -798,9 +797,8 @@ ; CHECK-MVE-NEXT: mov r0, r4 ; CHECK-MVE-NEXT: mov r1, r6 ; CHECK-MVE-NEXT: bl fmodf -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -860,7 +858,6 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[1] ; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vdiv.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov s8, r1 @@ -868,7 +865,7 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vdiv.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vdup.16 q2, r2 +; CHECK-MVE-NEXT: vmov.i32 q2, #0x0 ; CHECK-MVE-NEXT: vmov.16 q2[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q1[2] ; CHECK-MVE-NEXT: vmov s12, r0 @@ -1016,11 +1013,10 @@ ; CHECK-MVE-NEXT: ldrd r0, r1, [sp, #48] ; CHECK-MVE-NEXT: bl fmodf ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vdup.16 q6, r1 +; CHECK-MVE-NEXT: vmov.i32 q6, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q6[0], r4 +; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q6[1], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[2] ; CHECK-MVE-NEXT: vmov s0, r0 Index: llvm/test/CodeGen/Thumb2/mve-fmath.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -6,11 +6,10 @@ ; CHECK-MVE-LABEL: sqrt_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vsqrt.f32 s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vsqrt.f32 s6, s1 ; CHECK-MVE-NEXT: vsqrt.f32 s8, s2 ; CHECK-MVE-NEXT: vsqrt.f32 s10, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s4 ; CHECK-MVE-NEXT: vmov.f32 s1, s6 ; CHECK-MVE-NEXT: vmov.f32 s2, s8 @@ -36,13 +35,12 @@ ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vsqrt.f16 s4, s4 ; CHECK-MVE-NEXT: vmov r0, s4 ; CHECK-MVE-NEXT: vmov s4, r1 ; CHECK-MVE-NEXT: vsqrt.f16 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vdup.16 q1, r2 +; CHECK-MVE-NEXT: vmov.i32 q1, #0x0 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s8, r0 @@ -150,9 +148,8 @@ ; CHECK-MVE-NEXT: vmov s20, r0 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: bl cosf -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -219,9 +216,8 @@ ; CHECK-MVE-NEXT: ldr r0, [sp, #28] ; CHECK-MVE-NEXT: bl cosf ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q5, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vdup.16 q5, r1 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -411,9 +407,8 @@ ; CHECK-MVE-NEXT: vmov s20, r0 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: bl sinf -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -480,9 +475,8 @@ ; CHECK-MVE-NEXT: ldr r0, [sp, #28] ; CHECK-MVE-NEXT: bl sinf ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q5, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vdup.16 q5, r1 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -672,9 +666,8 @@ ; CHECK-MVE-NEXT: vmov s20, r0 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: bl expf -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -741,9 +734,8 @@ ; CHECK-MVE-NEXT: ldr r0, [sp, #28] ; CHECK-MVE-NEXT: bl expf ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q5, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vdup.16 q5, r1 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -933,9 +925,8 @@ ; CHECK-MVE-NEXT: vmov s20, r0 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: bl exp2f -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -1002,9 +993,8 @@ ; CHECK-MVE-NEXT: ldr r0, [sp, #28] ; CHECK-MVE-NEXT: bl exp2f ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q5, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vdup.16 q5, r1 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -1194,9 +1184,8 @@ ; CHECK-MVE-NEXT: vmov s20, r0 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: bl logf -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -1263,9 +1252,8 @@ ; CHECK-MVE-NEXT: ldr r0, [sp, #28] ; CHECK-MVE-NEXT: bl logf ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q5, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vdup.16 q5, r1 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -1455,9 +1443,8 @@ ; CHECK-MVE-NEXT: vmov s20, r0 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: bl log2f -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -1524,9 +1511,8 @@ ; CHECK-MVE-NEXT: ldr r0, [sp, #28] ; CHECK-MVE-NEXT: bl log2f ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q5, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vdup.16 q5, r1 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -1716,9 +1702,8 @@ ; CHECK-MVE-NEXT: vmov s20, r0 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: bl log10f -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -1785,9 +1770,8 @@ ; CHECK-MVE-NEXT: ldr r0, [sp, #28] ; CHECK-MVE-NEXT: bl log10f ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q5, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vdup.16 q5, r1 ; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q5[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -1987,9 +1971,8 @@ ; CHECK-MVE-NEXT: mov r0, r4 ; CHECK-MVE-NEXT: mov r1, r6 ; CHECK-MVE-NEXT: bl powf -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vdup.32 q0, r1 ; CHECK-MVE-NEXT: vmov.f32 s0, s16 ; CHECK-MVE-NEXT: vmov.f32 s1, s18 ; CHECK-MVE-NEXT: vmov.f32 s2, s20 @@ -2075,11 +2058,10 @@ ; CHECK-MVE-NEXT: ldrd r0, r1, [sp, #48] ; CHECK-MVE-NEXT: bl powf ; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vdup.16 q6, r1 +; CHECK-MVE-NEXT: vmov.i32 q6, #0x0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q6[0], r4 +; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q6[1], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[2] ; CHECK-MVE-NEXT: vmov s0, r0 Index: llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll +++ llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll @@ -8,13 +8,12 @@ ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vneg.f16 s4, s4 ; CHECK-MVE-NEXT: vmov r0, s4 ; CHECK-MVE-NEXT: vmov s4, r1 ; CHECK-MVE-NEXT: vneg.f16 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vdup.16 q1, r2 +; CHECK-MVE-NEXT: vmov.i32 q1, #0x0 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s8, r0 @@ -63,11 +62,10 @@ ; CHECK-MVE-LABEL: fneg_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vneg.f32 s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vneg.f32 s6, s1 ; CHECK-MVE-NEXT: vneg.f32 s8, s2 ; CHECK-MVE-NEXT: vneg.f32 s10, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s4 ; CHECK-MVE-NEXT: vmov.f32 s1, s6 ; CHECK-MVE-NEXT: vmov.f32 s2, s8 @@ -89,13 +87,12 @@ ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vabs.f16 s4, s4 ; CHECK-MVE-NEXT: vmov r0, s4 ; CHECK-MVE-NEXT: vmov s4, r1 ; CHECK-MVE-NEXT: vabs.f16 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vdup.16 q1, r2 +; CHECK-MVE-NEXT: vmov.i32 q1, #0x0 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s8, r0 @@ -144,11 +141,10 @@ ; CHECK-MVE-LABEL: fabs_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vabs.f32 s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vabs.f32 s6, s1 ; CHECK-MVE-NEXT: vabs.f32 s8, s2 ; CHECK-MVE-NEXT: vabs.f32 s10, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s4 ; CHECK-MVE-NEXT: vmov.f32 s1, s6 ; CHECK-MVE-NEXT: vmov.f32 s2, s8 Index: llvm/test/CodeGen/Thumb2/mve-loadstore.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-loadstore.ll +++ llvm/test/CodeGen/Thumb2/mve-loadstore.ll @@ -92,8 +92,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: movs r0, #1 -; CHECK-NEXT: vdup.32 q0, r0 +; CHECK-NEXT: vmov.i32 q0, #0x1 ; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: movs r0, #3 @@ -121,8 +120,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: movs r0, #1 -; CHECK-NEXT: vdup.16 q0, r0 +; CHECK-NEXT: vmov.i16 q0, #0x1 ; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: movs r0, #3 @@ -150,8 +148,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: movs r0, #1 -; CHECK-NEXT: vdup.8 q0, r0 +; CHECK-NEXT: vmov.i8 q0, #0x1 ; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: movs r0, #3 Index: llvm/test/CodeGen/Thumb2/mve-minmax.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -5,9 +5,8 @@ define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: maxnm_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmov q2, q0 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmaxnm.f32 s0, s4, s8 ; CHECK-MVE-NEXT: vmaxnm.f32 s1, s5, s9 ; CHECK-MVE-NEXT: vmaxnm.f32 s2, s6, s10 @@ -32,7 +31,6 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov s8, r1 @@ -40,7 +38,7 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vdup.16 q2, r2 +; CHECK-MVE-NEXT: vmov.i32 q2, #0x0 ; CHECK-MVE-NEXT: vmov.16 q2[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s12, r0 Index: llvm/test/CodeGen/Thumb2/mve-shuffle.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -312,13 +312,12 @@ define arm_aapcs_vfpcc <8 x half> @shuffle1_f16(<8 x half> %src) { ; CHECK-MVE-LABEL: shuffle1_f16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] -; CHECK-MVE-NEXT: vdup.16 q1, r2 -; CHECK-MVE-NEXT: vmov.u16 r1, q0[6] +; CHECK-MVE-NEXT: vmov.i32 q1, #0x0 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[6] ; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] ; CHECK-MVE-NEXT: vmov.16 q1[2], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] ; CHECK-MVE-NEXT: vmov.16 q1[3], r0 @@ -370,13 +369,12 @@ define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) { ; CHECK-MVE-LABEL: shuffle3_f16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] -; CHECK-MVE-NEXT: vdup.16 q1, r2 -; CHECK-MVE-NEXT: vmov.u16 r1, q0[5] +; CHECK-MVE-NEXT: vmov.i32 q1, #0x0 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[5] ; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] ; CHECK-MVE-NEXT: vmov.16 q1[2], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] ; CHECK-MVE-NEXT: vmov.16 q1[3], r0 Index: llvm/test/CodeGen/Thumb2/mve-simple-arith.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-simple-arith.ll +++ llvm/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -36,11 +36,10 @@ ; CHECK-MVE-LABEL: add_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vadd.f32 s8, s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vadd.f32 s10, s5, s1 ; CHECK-MVE-NEXT: vadd.f32 s12, s6, s2 ; CHECK-MVE-NEXT: vadd.f32 s4, s7, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s8 ; CHECK-MVE-NEXT: vmov.f32 s1, s10 ; CHECK-MVE-NEXT: vmov.f32 s2, s12 @@ -64,7 +63,6 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vadd.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov s8, r1 @@ -72,7 +70,7 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vadd.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vdup.16 q2, r2 +; CHECK-MVE-NEXT: vmov.i32 q2, #0x0 ; CHECK-MVE-NEXT: vmov.16 q2[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s12, r0 @@ -164,11 +162,10 @@ ; CHECK-MVE-LABEL: sub_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vsub.f32 s8, s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vsub.f32 s10, s5, s1 ; CHECK-MVE-NEXT: vsub.f32 s12, s6, s2 ; CHECK-MVE-NEXT: vsub.f32 s4, s7, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s8 ; CHECK-MVE-NEXT: vmov.f32 s1, s10 ; CHECK-MVE-NEXT: vmov.f32 s2, s12 @@ -192,7 +189,6 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vsub.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov s8, r1 @@ -200,7 +196,7 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vsub.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vdup.16 q2, r2 +; CHECK-MVE-NEXT: vmov.i32 q2, #0x0 ; CHECK-MVE-NEXT: vmov.16 q2[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s12, r0 @@ -295,7 +291,6 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vmul.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov s8, r1 @@ -303,7 +298,7 @@ ; CHECK-MVE-NEXT: vmov s10, r1 ; CHECK-MVE-NEXT: vmul.f16 s8, s10, s8 ; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vdup.16 q2, r2 +; CHECK-MVE-NEXT: vmov.i32 q2, #0x0 ; CHECK-MVE-NEXT: vmov.16 q2[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s12, r0 @@ -364,11 +359,10 @@ ; CHECK-MVE-LABEL: mul_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmul.f32 s8, s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmul.f32 s10, s5, s1 ; CHECK-MVE-NEXT: vmul.f32 s12, s6, s2 ; CHECK-MVE-NEXT: vmul.f32 s4, s7, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s8 ; CHECK-MVE-NEXT: vmov.f32 s1, s10 ; CHECK-MVE-NEXT: vmov.f32 s2, s12 Index: llvm/test/CodeGen/Thumb2/mve-vcvt.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -6,11 +6,10 @@ ; CHECK-MVE-LABEL: foo_float_int32: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcvt.f32.s32 s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vcvt.f32.s32 s6, s1 ; CHECK-MVE-NEXT: vcvt.f32.s32 s8, s2 ; CHECK-MVE-NEXT: vcvt.f32.s32 s10, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s4 ; CHECK-MVE-NEXT: vmov.f32 s1, s6 ; CHECK-MVE-NEXT: vmov.f32 s2, s8 @@ -30,11 +29,10 @@ ; CHECK-MVE-LABEL: foo_float_uint32: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcvt.f32.u32 s4, s0 -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vcvt.f32.u32 s6, s1 ; CHECK-MVE-NEXT: vcvt.f32.u32 s8, s2 ; CHECK-MVE-NEXT: vcvt.f32.u32 s10, s3 -; CHECK-MVE-NEXT: vdup.32 q0, r0 +; CHECK-MVE-NEXT: vmov.i32 q0, #0x0 ; CHECK-MVE-NEXT: vmov.f32 s0, s4 ; CHECK-MVE-NEXT: vmov.f32 s1, s6 ; CHECK-MVE-NEXT: vmov.f32 s2, s8 @@ -110,13 +108,12 @@ ; CHECK-MVE-NEXT: sxth r0, r0 ; CHECK-MVE-NEXT: sxth r1, r1 ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 ; CHECK-MVE-NEXT: vmov r0, s4 ; CHECK-MVE-NEXT: vmov s4, r1 ; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vdup.16 q1, r2 +; CHECK-MVE-NEXT: vmov.i32 q1, #0x0 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: sxth r0, r0 @@ -173,13 +170,12 @@ ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 ; CHECK-MVE-NEXT: vmov r0, s4 ; CHECK-MVE-NEXT: vmov s4, r1 ; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vdup.16 q1, r2 +; CHECK-MVE-NEXT: vmov.i32 q1, #0x0 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] ; CHECK-MVE-NEXT: vmov s8, r0 Index: llvm/test/CodeGen/Thumb2/mve-vmovimm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @mov_int8_1() { +; CHECK-LABEL: mov_int8_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0x1 +; CHECK-NEXT: bx lr +entry: + ret <16 x i8> +} + +define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1(i8 *%dest) { +; CHECK-LABEL: mov_int8_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr +entry: + ret <16 x i8> +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_1(i16 *%dest) { +; CHECK-LABEL: mov_int16_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0x1 +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1(i16 *%dest) { +; CHECK-LABEL: mov_int16_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_256(i16 *%dest) { +; CHECK-LABEL: mov_int16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0x100 +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_257() { +; CHECK-LABEL: mov_int16_257: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0x1 +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_258(i16 *%dest) { +; CHECK-LABEL: mov_int16_258: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI6_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI6_0: +; CHECK-NEXT: .long 16908546 @ double 8.204306265173532E-304 +; CHECK-NEXT: .long 16908546 +; CHECK-NEXT: .long 16908546 @ double 8.204306265173532E-304 +; CHECK-NEXT: .long 16908546 +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_1(i32 *%dest) { +; CHECK-LABEL: mov_int32_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x1 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_256(i32 *%dest) { +; CHECK-LABEL: mov_int32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x100 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536(i32 *%dest) { +; CHECK-LABEL: mov_int32_65536: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x10000 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216(i32 *%dest) { +; CHECK-LABEL: mov_int32_16777216: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x1000000 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217(i32 *%dest) { +; CHECK-LABEL: mov_int32_16777217: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI11_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI11_0: +; CHECK-NEXT: .long 16777217 @ double 7.2911290000737531E-304 +; CHECK-NEXT: .long 16777217 +; CHECK-NEXT: .long 16777217 @ double 7.2911290000737531E-304 +; CHECK-NEXT: .long 16777217 +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919(i32 *%dest) { +; CHECK-LABEL: mov_int32_17919: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x45ff +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519(i32 *%dest) { +; CHECK-LABEL: mov_int32_4587519: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q0, #0x45ffff +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1(i32 *%dest) { +; CHECK-LABEL: mov_int32_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760(i32 *%dest) { +; CHECK-LABEL: mov_int32_4294901760: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0xffff +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335(i32 *%dest) { +; CHECK-LABEL: mov_int32_4278190335: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI16_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI16_0: +; CHECK-NEXT: .long 4278190335 @ double -5.4874634341155774E+303 +; CHECK-NEXT: .long 4278190335 +; CHECK-NEXT: .long 4278190335 @ double -5.4874634341155774E+303 +; CHECK-NEXT: .long 4278190335 +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615(i32 *%dest) { +; CHECK-LABEL: mov_int32_4278255615: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0xff0000 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) { +; CHECK-LABEL: mov_float_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI18_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI18_0: +; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234 +; CHECK-NEXT: .long 1065353216 +; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234 +; CHECK-NEXT: .long 1065353216 +entry: + ret <4 x float> +} + +define arm_aapcs_vfpcc <4 x float> @mov_float_m3(float *%dest) { +; CHECK-LABEL: mov_float_m3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI19_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI19_0: +; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916 +; CHECK-NEXT: .long 3225419776 +; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916 +; CHECK-NEXT: .long 3225419776 +entry: + ret <4 x float> +} + +define arm_aapcs_vfpcc <8 x half> @mov_float16_1(half *%dest) { +; CHECK-LABEL: mov_float16_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0x3c00 +; CHECK-NEXT: bx lr + +entry: + ret <8 x half> +} + +define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) { +; CHECK-LABEL: mov_float16_m3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i16 q0, #0xc200 +; CHECK-NEXT: bx lr + +entry: + ret <8 x half> +} Index: llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_511(i16 *%dest) { +; CHECK-LABEL: mov_int16_511: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i16 q0, #0xfe00 +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281(i16 *%dest) { +; CHECK-LABEL: mov_int16_65281: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i16 q0, #0xfe +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7(i32 *%dest) { +; CHECK-LABEL: mov_int32_m7: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0x6 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769(i32 *%dest) { +; CHECK-LABEL: mov_int32_m769: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0x300 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145(i32 *%dest) { +; CHECK-LABEL: mov_int32_m262145: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0x40000 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729(i32 *%dest) { +; CHECK-LABEL: mov_int32_m134217729: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0x8000000 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528(i32 *%dest) { +; CHECK-LABEL: mov_int32_4294902528: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0xfcff +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688(i32 *%dest) { +; CHECK-LABEL: mov_int32_4278386688: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI7_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI7_0: +; CHECK-NEXT: .long 4278386688 @ double -6.5147775434702224E+303 +; CHECK-NEXT: .long 4278386688 +; CHECK-NEXT: .long 4278386688 @ double -6.5147775434702224E+303 +; CHECK-NEXT: .long 4278386688 +entry: + ret <4 x i32> +}