diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -89,6 +89,12 @@ GORCI, GORCIW, // Vector Extension + // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand + // for the VL value to be used for the operation. + VMV_V_X_VL, + // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand + // for the VL value to be used for the operation. + VFMV_V_F_VL, // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign // extended from the vector element size. VMV_X_S, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -523,6 +523,8 @@ // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); setOperationAction(ISD::ADD, VT, Custom); @@ -551,6 +553,8 @@ // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); setOperationAction(ISD::FADD, VT, Custom); @@ -758,6 +762,81 @@ } } +// Return the largest legal scalable vector type that matches VT's element type. +static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, + const RISCVSubtarget &Subtarget) { + assert(VT.isFixedLengthVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT) && + "Expected legal fixed length vector!"); + + unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); + assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); + + switch (VT.getVectorElementType().SimpleTy) { + default: + llvm_unreachable("unexpected element type for RVV container"); + case MVT::i8: + return MVT::getScalableVectorVT(MVT::i8, LMul * 8); + case MVT::i16: + return MVT::getScalableVectorVT(MVT::i16, LMul * 4); + case MVT::i32: + return MVT::getScalableVectorVT(MVT::i32, LMul * 2); + case MVT::i64: + return MVT::getScalableVectorVT(MVT::i64, LMul); + case MVT::f16: + return MVT::getScalableVectorVT(MVT::f16, LMul * 4); + case MVT::f32: + return MVT::getScalableVectorVT(MVT::f32, LMul * 2); + case MVT::f64: + return MVT::getScalableVectorVT(MVT::f64, LMul); + } +} + +// Grow V to consume an entire RVV register. +static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(VT.isScalableVector() && + "Expected to convert into a scalable vector!"); + assert(V.getValueType().isFixedLengthVector() && + "Expected a fixed length vector operand!"); + SDLoc DL(V); + SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); +} + +// Shrink V so it's just big enough to maintain a VT's worth of data. +static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(VT.isFixedLengthVector() && + "Expected to convert into a fixed length vector!"); + assert(V.getValueType().isScalableVector() && + "Expected a scalable vector operand!"); + SDLoc DL(V); + SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); +} + +static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isFixedLengthVector() && "Unexpected vector!"); + + if (SDValue Splat = cast(Op)->getSplatValue()) { + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + + SDLoc DL(Op); + SDValue VL = + DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); + + unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL + : RISCVISD::VMV_V_X_VL; + Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); + return convertFromScalableVector(VT, Splat, DAG, Subtarget); + } + + return SDValue(); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1005,6 +1084,8 @@ case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_SEQ_FADD: return lowerFPVECREDUCE(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::LOAD: return lowerFixedLengthVectorLoadToRVV(Op, DAG); case ISD::STORE: @@ -1704,6 +1785,15 @@ assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::riscv_vmv_v_x: { + SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), + Op.getOperand(1)); + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), + Scalar, Op.getOperand(2)); + } + case Intrinsic::riscv_vfmv_v_f: + return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); } } @@ -1861,60 +1951,6 @@ DAG.getConstant(0, DL, Subtarget.getXLenVT())); } -// Return the largest legal scalable vector type that matches VT's element type. -static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, - const RISCVSubtarget &Subtarget) { - assert(VT.isFixedLengthVector() && - DAG.getTargetLoweringInfo().isTypeLegal(VT) && - "Expected legal fixed length vector!"); - - unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); - assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); - - switch (VT.getVectorElementType().SimpleTy) { - default: - llvm_unreachable("unexpected element type for RVV container"); - case MVT::i8: - return MVT::getScalableVectorVT(MVT::i8, LMul * 8); - case MVT::i16: - return MVT::getScalableVectorVT(MVT::i16, LMul * 4); - case MVT::i32: - return MVT::getScalableVectorVT(MVT::i32, LMul * 2); - case MVT::i64: - return MVT::getScalableVectorVT(MVT::i64, LMul); - case MVT::f16: - return MVT::getScalableVectorVT(MVT::f16, LMul * 4); - case MVT::f32: - return MVT::getScalableVectorVT(MVT::f32, LMul * 2); - case MVT::f64: - return MVT::getScalableVectorVT(MVT::f64, LMul); - } -} - -// Grow V to consume an entire RVV register. -static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(VT.isScalableVector() && - "Expected to convert into a scalable vector!"); - assert(V.getValueType().isFixedLengthVector() && - "Expected a fixed length vector operand!"); - SDLoc DL(V); - SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); -} - -// Shrink V so it's just big enough to maintain a VT's worth of data. -static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(VT.isFixedLengthVector() && - "Expected to convert into a fixed length vector!"); - assert(V.getValueType().isScalableVector() && - "Expected a scalable vector operand!"); - SDLoc DL(V); - SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); -} - SDValue RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const { @@ -4540,6 +4576,8 @@ NODE_NAME_CASE(GREVIW) NODE_NAME_CASE(GORCI) NODE_NAME_CASE(GORCIW) + NODE_NAME_CASE(VMV_V_X_VL) + NODE_NAME_CASE(VFMV_V_F_VL) NODE_NAME_CASE(VMV_X_S) NODE_NAME_CASE(SPLAT_VECTOR_I64) NODE_NAME_CASE(READ_VLENB) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -14,6 +14,15 @@ /// //===----------------------------------------------------------------------===// +def riscv_vmv_v_x_vl : SDNode<"RISCVISD::VMV_V_X_VL", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisVT<1, XLenVT>, + SDTCisVT<2, XLenVT>]>>; +def riscv_vfmv_v_f_vl : SDNode<"RISCVISD::VFMV_V_F_VL", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>, + SDTCisEltOfVec<1, 0>, + SDTCisVT<2, XLenVT>]>>; + def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>, SDTCisInt<1>]>>; @@ -4048,10 +4057,10 @@ } foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), + def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMV_V_X_"#vti.LMul.MX) $rs2, GPR:$vl, vti.SEW)>; - def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))), + def : Pat<(vti.Vector (riscv_vmv_v_x_vl simm5:$imm5, (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMV_V_I_"#vti.LMul.MX) simm5:$imm5, GPR:$vl, vti.SEW)>; } @@ -4206,12 +4215,12 @@ //===----------------------------------------------------------------------===// foreach fvti = AllFloatVectors in { // If we're splatting fpimm0, use vmv.v.x vd, x0. - def : Pat<(fvti.Vector (int_riscv_vfmv_v_f + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl (fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) 0, GPR:$vl, fvti.SEW)>; - def : Pat<(fvti.Vector (int_riscv_vfmv_v_f + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @splat_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: splat_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x half> undef, half %y, i32 0 + %b = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> zeroinitializer + store <8 x half> %b, <8 x half>* %x + ret void +} + +define void @splat_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: splat_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x float> undef, float %y, i32 0 + %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %b, <4 x float>* %x + ret void +} + +define void @splat_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: splat_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x double> undef, double %y, i32 0 + %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer + store <2 x double> %b, <2 x double>* %x + ret void +} + +define void @splat_16f16(<16 x half>* %x, half %y) { +; LMULMAX2-LABEL: splat_16f16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 16 +; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; LMULMAX2-NEXT: vfmv.v.f v26, fa0 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_16f16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 8 +; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; LMULMAX1-NEXT: vfmv.v.f v25, fa0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x half> undef, half %y, i32 0 + %b = shufflevector <16 x half> %a, <16 x half> undef, <16 x i32> zeroinitializer + store <16 x half> %b, <16 x half>* %x + ret void +} + +define void @splat_v8f32(<8 x float>* %x, float %y) { +; LMULMAX2-LABEL: splat_v8f32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vfmv.v.f v26, fa0 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v8f32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vfmv.v.f v25, fa0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x float> undef, float %y, i32 0 + %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer + store <8 x float> %b, <8 x float>* %x + ret void +} + +define void @splat_v4f64(<4 x double>* %x, double %y) { +; LMULMAX2-LABEL: splat_v4f64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 4 +; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; LMULMAX2-NEXT: vfmv.v.f v26, fa0 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v4f64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 2 +; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; LMULMAX1-NEXT: vfmv.v.f v25, fa0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <4 x double> undef, double %y, i32 0 + %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer + store <4 x double> %b, <4 x double>* %x + ret void +} + +define void @splat_zero_v8f16(<8 x half>* %x) { +; CHECK-LABEL: splat_zero_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x half> undef, half 0.0, i32 0 + %b = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> zeroinitializer + store <8 x half> %b, <8 x half>* %x + ret void +} + +define void @splat_zero_v4f32(<4 x float>* %x) { +; CHECK-LABEL: splat_zero_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x float> undef, float 0.0, i32 0 + %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %b, <4 x float>* %x + ret void +} + +define void @splat_zero_v2f64(<2 x double>* %x) { +; CHECK-LABEL: splat_zero_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x double> undef, double 0.0, i32 0 + %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer + store <2 x double> %b, <2 x double>* %x + ret void +} + +define void @splat_zero_16f16(<16 x half>* %x) { +; LMULMAX2-LABEL: splat_zero_16f16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 16 +; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_16f16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 8 +; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x half> undef, half 0.0, i32 0 + %b = shufflevector <16 x half> %a, <16 x half> undef, <16 x i32> zeroinitializer + store <16 x half> %b, <16 x half>* %x + ret void +} + +define void @splat_zero_v8f32(<8 x float>* %x) { +; LMULMAX2-LABEL: splat_zero_v8f32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v8f32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x float> undef, float 0.0, i32 0 + %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer + store <8 x float> %b, <8 x float>* %x + ret void +} + +define void @splat_zero_v4f64(<4 x double>* %x) { +; LMULMAX2-LABEL: splat_zero_v4f64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 4 +; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v4f64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 2 +; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <4 x double> undef, double 0.0, i32 0 + %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer + store <4 x double> %b, <4 x double>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat-rv32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat-rv32.ll @@ -0,0 +1,437 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @splat_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: splat_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i8> undef, i8 %y, i32 0 + %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %b, <16 x i8>* %x + ret void +} + +define void @splat_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: splat_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i16> undef, i16 %y, i32 0 + %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %b, <8 x i16>* %x + ret void +} + +define void @splat_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: splat_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i32> undef, i32 %y, i32 0 + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %b, <4 x i32>* %x + ret void +} + +; FIXME: Support i64 splats on riscv32 +;define void @splat_v2i64(<2 x i64>* %x, i64 %y) { +; %a = insertelement <2 x i64> undef, i64 %y, i32 0 +; %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer +; store <2 x i64> %b, <2 x i64>* %x +; ret void +;} + +define void @splat_v32i8(<32 x i8>* %x, i8 %y) { +; LMULMAX2-LABEL: splat_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 16 +; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.x v25, a1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <32 x i8> undef, i8 %y, i32 0 + %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %b, <32 x i8>* %x + ret void +} + +define void @splat_v16i16(<16 x i16>* %x, i16 %y) { +; LMULMAX2-LABEL: splat_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 16 +; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 8 +; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.x v25, a1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x i16> undef, i16 %y, i32 0 + %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %b, <16 x i16>* %x + ret void +} + +define void @splat_v8i32(<8 x i32>* %x, i32 %y) { +; LMULMAX2-LABEL: splat_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.x v25, a1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x i32> undef, i32 %y, i32 0 + %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %b, <8 x i32>* %x + ret void +} + +; FIXME: Support i64 splats on riscv32 +;define void @splat_v4i64(<4 x i64>* %x, i64 %y) { +; %a = insertelement <4 x i64> undef, i64 %y, i32 0 +; %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer +; store <4 x i64> %b, <4 x i64>* %x +; ret void +;} + +define void @splat_zero_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: splat_zero_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i8> undef, i8 0, i32 0 + %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %b, <16 x i8>* %x + ret void +} + +define void @splat_zero_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: splat_zero_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i16> undef, i16 0, i32 0 + %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %b, <8 x i16>* %x + ret void +} + +define void @splat_zero_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: splat_zero_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i32> undef, i32 0, i32 0 + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %b, <4 x i32>* %x + ret void +} + +define void @splat_zero_v2i64(<2 x i64>* %x) { +; CHECK-LABEL: splat_zero_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x i64> undef, i64 0, i32 0 + %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %b, <2 x i64>* %x + ret void +} + +define void @splat_zero_v32i8(<32 x i8>* %x) { +; LMULMAX2-LABEL: splat_zero_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 32 +; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 16 +; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <32 x i8> undef, i8 0, i32 0 + %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %b, <32 x i8>* %x + ret void +} + +define void @splat_zero_v16i16(<16 x i16>* %x) { +; LMULMAX2-LABEL: splat_zero_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 16 +; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 8 +; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x i16> undef, i16 0, i32 0 + %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %b, <16 x i16>* %x + ret void +} + +define void @splat_zero_v8i32(<8 x i32>* %x) { +; LMULMAX2-LABEL: splat_zero_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x i32> undef, i32 0, i32 0 + %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %b, <8 x i32>* %x + ret void +} + +define void @splat_zero_v4i64(<4 x i64>* %x) { +; LMULMAX2-LABEL: splat_zero_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <4 x i64> undef, i64 0, i32 0 + %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %b, <4 x i64>* %x + ret void +} + +define void @splat_allones_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: splat_allones_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i8> undef, i8 -1, i32 0 + %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %b, <16 x i8>* %x + ret void +} + +define void @splat_allones_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: splat_allones_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i16> undef, i16 -1, i32 0 + %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %b, <8 x i16>* %x + ret void +} + +define void @splat_allones_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: splat_allones_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i32> undef, i32 -1, i32 0 + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %b, <4 x i32>* %x + ret void +} + +define void @splat_allones_v2i64(<2 x i64>* %x) { +; CHECK-LABEL: splat_allones_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x i64> undef, i64 -1, i32 0 + %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %b, <2 x i64>* %x + ret void +} + +define void @splat_allones_v32i8(<32 x i8>* %x) { +; LMULMAX2-LABEL: splat_allones_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 32 +; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 16 +; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <32 x i8> undef, i8 -1, i32 0 + %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %b, <32 x i8>* %x + ret void +} + +define void @splat_allones_v16i16(<16 x i16>* %x) { +; LMULMAX2-LABEL: splat_allones_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 16 +; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 8 +; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x i16> undef, i16 -1, i32 0 + %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %b, <16 x i16>* %x + ret void +} + +define void @splat_allones_v8i32(<8 x i32>* %x) { +; LMULMAX2-LABEL: splat_allones_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x i32> undef, i32 -1, i32 0 + %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %b, <8 x i32>* %x + ret void +} + +define void @splat_allones_v4i64(<4 x i64>* %x) { +; LMULMAX2-LABEL: splat_allones_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <4 x i64> undef, i64 -1, i32 0 + %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %b, <4 x i64>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat-rv64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat-rv64.ll @@ -0,0 +1,459 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @splat_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: splat_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i8> undef, i8 %y, i32 0 + %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %b, <16 x i8>* %x + ret void +} + +define void @splat_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: splat_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i16> undef, i16 %y, i32 0 + %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %b, <8 x i16>* %x + ret void +} + +define void @splat_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: splat_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i32> undef, i32 %y, i32 0 + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %b, <4 x i32>* %x + ret void +} + +define void @splat_v2i64(<2 x i64>* %x, i64 %y) { +; CHECK-LABEL: splat_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x i64> undef, i64 %y, i32 0 + %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %b, <2 x i64>* %x + ret void +} + +define void @splat_v32i8(<32 x i8>* %x, i8 %y) { +; LMULMAX2-LABEL: splat_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 16 +; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.x v25, a1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <32 x i8> undef, i8 %y, i32 0 + %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %b, <32 x i8>* %x + ret void +} + +define void @splat_v16i16(<16 x i16>* %x, i16 %y) { +; LMULMAX2-LABEL: splat_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 16 +; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 8 +; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.x v25, a1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x i16> undef, i16 %y, i32 0 + %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %b, <16 x i16>* %x + ret void +} + +define void @splat_v8i32(<8 x i32>* %x, i32 %y) { +; LMULMAX2-LABEL: splat_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.x v25, a1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x i32> undef, i32 %y, i32 0 + %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %b, <8 x i32>* %x + ret void +} + +define void @splat_v4i64(<4 x i64>* %x, i64 %y) { +; LMULMAX2-LABEL: splat_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 4 +; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 2 +; LMULMAX1-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.x v25, a1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <4 x i64> undef, i64 %y, i32 0 + %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %b, <4 x i64>* %x + ret void +} + +define void @splat_zero_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: splat_zero_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i8> undef, i8 0, i32 0 + %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %b, <16 x i8>* %x + ret void +} + +define void @splat_zero_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: splat_zero_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i16> undef, i16 0, i32 0 + %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %b, <8 x i16>* %x + ret void +} + +define void @splat_zero_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: splat_zero_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i32> undef, i32 0, i32 0 + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %b, <4 x i32>* %x + ret void +} + +define void @splat_zero_v2i64(<2 x i64>* %x) { +; CHECK-LABEL: splat_zero_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x i64> undef, i64 0, i32 0 + %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %b, <2 x i64>* %x + ret void +} + +define void @splat_zero_v32i8(<32 x i8>* %x) { +; LMULMAX2-LABEL: splat_zero_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 32 +; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 16 +; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <32 x i8> undef, i8 0, i32 0 + %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %b, <32 x i8>* %x + ret void +} + +define void @splat_zero_v16i16(<16 x i16>* %x) { +; LMULMAX2-LABEL: splat_zero_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 16 +; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 8 +; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x i16> undef, i16 0, i32 0 + %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %b, <16 x i16>* %x + ret void +} + +define void @splat_zero_v8i32(<8 x i32>* %x) { +; LMULMAX2-LABEL: splat_zero_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x i32> undef, i32 0, i32 0 + %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %b, <8 x i32>* %x + ret void +} + +define void @splat_zero_v4i64(<4 x i64>* %x) { +; LMULMAX2-LABEL: splat_zero_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 4 +; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_zero_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 2 +; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <4 x i64> undef, i64 0, i32 0 + %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %b, <4 x i64>* %x + ret void +} + +define void @splat_allones_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: splat_allones_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i8> undef, i8 -1, i32 0 + %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %b, <16 x i8>* %x + ret void +} + +define void @splat_allones_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: splat_allones_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i16> undef, i16 -1, i32 0 + %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %b, <8 x i16>* %x + ret void +} + +define void @splat_allones_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: splat_allones_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i32> undef, i32 -1, i32 0 + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %b, <4 x i32>* %x + ret void +} + +define void @splat_allones_v2i64(<2 x i64>* %x) { +; CHECK-LABEL: splat_allones_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x i64> undef, i64 -1, i32 0 + %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %b, <2 x i64>* %x + ret void +} + +define void @splat_allones_v32i8(<32 x i8>* %x) { +; LMULMAX2-LABEL: splat_allones_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 32 +; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 16 +; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <32 x i8> undef, i8 -1, i32 0 + %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %b, <32 x i8>* %x + ret void +} + +define void @splat_allones_v16i16(<16 x i16>* %x) { +; LMULMAX2-LABEL: splat_allones_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 16 +; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 8 +; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <16 x i16> undef, i16 -1, i32 0 + %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %b, <16 x i16>* %x + ret void +} + +define void @splat_allones_v8i32(<8 x i32>* %x) { +; LMULMAX2-LABEL: splat_allones_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 8 +; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 4 +; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <8 x i32> undef, i32 -1, i32 0 + %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %b, <8 x i32>* %x + ret void +} + +define void @splat_allones_v4i64(<4 x i64>* %x) { +; LMULMAX2-LABEL: splat_allones_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 4 +; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: splat_allones_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, zero, 2 +; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, -1 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: ret + %a = insertelement <4 x i64> undef, i64 -1, i32 0 + %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %b, <4 x i64>* %x + ret void +}