diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19517,13 +19517,6 @@ } } - // A splat of a single element is a SPLAT_VECTOR if supported on the target. - if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) - if (SDValue V = cast(N)->getSplatValue()) { - assert(!V.isUndef() && "Splat of undef should have been handled earlier"); - return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); - } - // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -19565,6 +19558,14 @@ if (SDValue V = reduceBuildVecToShuffle(N)) return V; + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + // Do this late as some of the above may replace the splat. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + return SDValue(); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -636,6 +636,15 @@ continue; } + // Use SPLAT_VECTOR to prevent type legalization from destroying the + // splats when type legalizing i64 scalar on RV32. + // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs + // improvements first. + if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); + } + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); @@ -1265,6 +1274,23 @@ return false; } +static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isFixedLengthVector() && "Unexpected vector!"); + + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + + SDLoc DL(Op); + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + unsigned Opc = + VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; + SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL); + return convertFromScalableVector(VT, Splat, DAG, Subtarget); +} + static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -1515,8 +1541,7 @@ // vector nxvXi64 VT. static SDValue splatPartsI64ThroughStack(const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi, SDValue VL, - SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG) { assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && "Unexpected VTs!"); @@ -1540,29 +1565,41 @@ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - MVT XLenVT = Subtarget.getXLenVT(); SDVTList VTs = DAG.getVTList({VT, MVT::Other}); - SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); - SDValue Ops[] = {Chain, IntID, StackSlot, DAG.getRegister(RISCV::X0, XLenVT), - VL}; + SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); + SDValue Ops[] = {Chain, IntID, StackSlot, + DAG.getRegister(RISCV::X0, MVT::i64), VL}; return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), MachineMemOperand::MOLoad); } +static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo, + SDValue Hi, SDValue VL, SelectionDAG &DAG) { + if (isa(Lo) && isa(Hi)) { + int32_t LoC = cast(Lo)->getSExtValue(); + int32_t HiC = cast(Hi)->getSExtValue(); + // If Hi constant is all the same sign bit as Lo, lower this as a custom + // node in order to try and match RVV vector/scalar instructions. + if ((LoC >> 31) == HiC) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); + } + + // Fall back to a stack store and stride x0 vector load. + return splatPartsI64ThroughStack(DL, VT, Lo, Hi, VL, DAG); +} + // Called by type legalization to handle splat of i64 on RV32. // FIXME: We can optimize this when the type has sign or zero bits in one // of the halves. static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, - SDValue VL, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + SDValue VL, SelectionDAG &DAG) { + assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(1, DL, MVT::i32)); - - // Fall back to a stack store and stride x0 vector load. - return splatPartsI64ThroughStack(DL, VT, Lo, Hi, VL, DAG, Subtarget); + return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG); } // This function lowers a splat of a scalar operand Splat with the vector @@ -1591,17 +1628,8 @@ assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && "Unexpected scalar for splat lowering!"); - // If this is a sign-extended 32-bit constant, we can truncate it and rely - // on the instruction to sign-extend since SEW>XLEN. - if (auto *CVal = dyn_cast(Scalar)) { - if (isInt<32>(CVal->getSExtValue())) - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, - DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32), - VL); - } - // Otherwise use the more complicated splatting algorithm. - return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG, Subtarget); + return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); } static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, @@ -2209,6 +2237,8 @@ return lowerVECTOR_REVERSE(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); + case ISD::SPLAT_VECTOR: + return lowerSPLAT_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); case ISD::CONCAT_VECTORS: { @@ -2781,6 +2811,17 @@ SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); + if (VecVT.isFixedLengthVector()) { + MVT ContainerVT = getContainerForFixedLengthVector(VecVT); + SDLoc DL(Op); + SDValue Mask, VL; + std::tie(Mask, VL) = + getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); + + SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG); + return convertFromScalableVector(VecVT, Res, DAG, Subtarget); + } + if (isa(Lo) && isa(Hi)) { int32_t LoC = cast(Lo)->getSExtValue(); int32_t HiC = cast(Hi)->getSExtValue(); @@ -2797,8 +2838,8 @@ return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. - return splatPartsI64ThroughStack( - DL, VecVT, Lo, Hi, DAG.getRegister(RISCV::X0, MVT::i64), DAG, Subtarget); + return splatPartsI64ThroughStack(DL, VecVT, Lo, Hi, + DAG.getRegister(RISCV::X0, MVT::i64), DAG); } // Custom-lower extensions from mask vectors by using a vselect either with 1 @@ -3136,7 +3177,7 @@ // VL should be the last operand. SDValue VL = Op.getOperand(Op.getNumOperands() - 1); assert(VL.getValueType() == XLenVT); - ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG, Subtarget); + ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } @@ -3204,12 +3245,9 @@ // FIXME: This is probably not the best way to do this, but it is // consistent with INSERT_VECTOR_ELT lowering so it is a good starting // point. - // vmv.v.x vX, hi - // vsll.vx vX, vX, /*32*/ - // vmv.v.x vY, lo - // vsll.vx vY, vY, /*32*/ - // vsrl.vx vY, vY, /*32*/ - // vor.vv vX, vX, vY + // sw lo, (a0) + // sw hi, 4(a0) + // vlse vX, (a0) // // vid.v vVid // vmseq.vx mMask, vVid, 0 @@ -3218,8 +3256,7 @@ SDValue Vec = Op.getOperand(1); SDValue VL = Op.getOperand(3); - SDValue SplattedVal = - splatSplitI64WithVL(DL, VT, Scalar, VL, DAG, Subtarget); + SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getConstant(0, DL, MVT::i32), VL); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -271,9 +271,8 @@ ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) ; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vle16.v v25, (a0) -; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vrgatherei16.vv v28, v12, v25 ; RV32-NEXT: addi a0, zero, 113 ; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -48,35 +48,41 @@ define void @splat_v2i64(<2 x i64>* %x, i64 %y) { ; LMULMAX8-RV32-LABEL: splat_v2i64: ; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi a3, zero, 5 -; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu -; LMULMAX8-RV32-NEXT: vmv.s.x v0, a3 -; LMULMAX8-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.x v25, a2 -; LMULMAX8-RV32-NEXT: vmerge.vxm v25, v25, a1, v0 -; LMULMAX8-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX8-RV32-NEXT: addi sp, sp, -16 +; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX8-RV32-NEXT: sw a2, 12(sp) +; LMULMAX8-RV32-NEXT: sw a1, 8(sp) +; LMULMAX8-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX8-RV32-NEXT: addi a1, sp, 8 +; LMULMAX8-RV32-NEXT: vlse64.v v25, (a1), zero +; LMULMAX8-RV32-NEXT: vse64.v v25, (a0) +; LMULMAX8-RV32-NEXT: addi sp, sp, 16 ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX2-RV32-LABEL: splat_v2i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi a3, zero, 5 -; LMULMAX2-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a3 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v25, a2 -; LMULMAX2-RV32-NEXT: vmerge.vxm v25, v25, a1, v0 -; LMULMAX2-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX2-RV32-NEXT: addi sp, sp, -16 +; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX2-RV32-NEXT: sw a2, 12(sp) +; LMULMAX2-RV32-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: addi a1, sp, 8 +; LMULMAX2-RV32-NEXT: vlse64.v v25, (a1), zero +; LMULMAX2-RV32-NEXT: vse64.v v25, (a0) +; LMULMAX2-RV32-NEXT: addi sp, sp, 16 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX1-RV32-LABEL: splat_v2i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a3, zero, 5 -; LMULMAX1-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v25, a2 -; LMULMAX1-RV32-NEXT: vmerge.vxm v25, v25, a1, v0 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi sp, sp, -16 +; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX1-RV32-NEXT: sw a2, 12(sp) +; LMULMAX1-RV32-NEXT: sw a1, 8(sp) +; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: addi a1, sp, 8 +; LMULMAX1-RV32-NEXT: vlse64.v v25, (a1), zero +; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi sp, sp, 16 ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX8-RV64-LABEL: splat_v2i64: @@ -197,24 +203,28 @@ define void @splat_v4i64(<4 x i64>* %x, i64 %y) { ; LMULMAX8-RV32-LABEL: splat_v4i64: ; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi a3, zero, 85 -; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu -; LMULMAX8-RV32-NEXT: vmv.s.x v0, a3 -; LMULMAX8-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.x v26, a2 -; LMULMAX8-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; LMULMAX8-RV32-NEXT: vse32.v v26, (a0) +; LMULMAX8-RV32-NEXT: addi sp, sp, -16 +; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX8-RV32-NEXT: sw a2, 12(sp) +; LMULMAX8-RV32-NEXT: sw a1, 8(sp) +; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX8-RV32-NEXT: addi a1, sp, 8 +; LMULMAX8-RV32-NEXT: vlse64.v v26, (a1), zero +; LMULMAX8-RV32-NEXT: vse64.v v26, (a0) +; LMULMAX8-RV32-NEXT: addi sp, sp, 16 ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX2-RV32-LABEL: splat_v4i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi a3, zero, 85 -; LMULMAX2-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a3 -; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v26, a2 -; LMULMAX2-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; LMULMAX2-RV32-NEXT: vse32.v v26, (a0) +; LMULMAX2-RV32-NEXT: addi sp, sp, -16 +; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX2-RV32-NEXT: sw a2, 12(sp) +; LMULMAX2-RV32-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: addi a1, sp, 8 +; LMULMAX2-RV32-NEXT: vlse64.v v26, (a1), zero +; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) +; LMULMAX2-RV32-NEXT: addi sp, sp, 16 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX1-RV32-LABEL: splat_v4i64: @@ -298,47 +308,12 @@ } define void @splat_zero_v2i64(<2 x i64>* %x) { -; LMULMAX8-RV32-LABEL: splat_zero_v2i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.i v25, 0 -; LMULMAX8-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_zero_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v25, 0 -; LMULMAX2-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_zero_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v25, 0 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_zero_v2i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX8-RV64-NEXT: vmv.v.i v25, 0 -; LMULMAX8-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_zero_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV64-NEXT: vmv.v.i v25, 0 -; LMULMAX2-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_zero_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.v.i v25, 0 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_zero_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = insertelement <2 x i64> undef, i64 0, i32 0 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer store <2 x i64> %b, <2 x i64>* %x @@ -435,19 +410,19 @@ } define void @splat_zero_v4i64(<4 x i64>* %x) { -; LMULMAX8-RV32-LABEL: splat_zero_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.i v26, 0 -; LMULMAX8-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX8-RV32-NEXT: ret +; LMULMAX8-LABEL: splat_zero_v4i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX8-NEXT: vmv.v.i v26, 0 +; LMULMAX8-NEXT: vse64.v v26, (a0) +; LMULMAX8-NEXT: ret ; -; LMULMAX2-RV32-LABEL: splat_zero_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v26, 0 -; LMULMAX2-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-LABEL: splat_zero_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: splat_zero_v4i64: ; LMULMAX1-RV32: # %bb.0: @@ -458,20 +433,6 @@ ; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; -; LMULMAX8-RV64-LABEL: splat_zero_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV64-NEXT: vmv.v.i v26, 0 -; LMULMAX8-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_zero_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV64-NEXT: vmv.v.i v26, 0 -; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX2-RV64-NEXT: ret -; ; LMULMAX1-RV64-LABEL: splat_zero_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu @@ -526,47 +487,12 @@ } define void @splat_allones_v2i64(<2 x i64>* %x) { -; LMULMAX8-RV32-LABEL: splat_allones_v2i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.i v25, -1 -; LMULMAX8-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_allones_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v25, -1 -; LMULMAX2-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_allones_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v25, -1 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_allones_v2i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX8-RV64-NEXT: vmv.v.i v25, -1 -; LMULMAX8-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_allones_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV64-NEXT: vmv.v.i v25, -1 -; LMULMAX2-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_allones_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.v.i v25, -1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_allones_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = insertelement <2 x i64> undef, i64 -1, i32 0 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer store <2 x i64> %b, <2 x i64>* %x @@ -663,19 +589,19 @@ } define void @splat_allones_v4i64(<4 x i64>* %x) { -; LMULMAX8-RV32-LABEL: splat_allones_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX8-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX8-RV32-NEXT: ret +; LMULMAX8-LABEL: splat_allones_v4i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX8-NEXT: vmv.v.i v26, -1 +; LMULMAX8-NEXT: vse64.v v26, (a0) +; LMULMAX8-NEXT: ret ; -; LMULMAX2-RV32-LABEL: splat_allones_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX2-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-LABEL: splat_allones_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, -1 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: splat_allones_v4i64: ; LMULMAX1-RV32: # %bb.0: @@ -686,20 +612,6 @@ ; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; -; LMULMAX8-RV64-LABEL: splat_allones_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV64-NEXT: vmv.v.i v26, -1 -; LMULMAX8-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_allones_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV64-NEXT: vmv.v.i v26, -1 -; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX2-RV64-NEXT: ret -; ; LMULMAX1-RV64-LABEL: splat_allones_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu @@ -719,27 +631,21 @@ ; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x ; with SEW=64 on RV32. define void @splat_allones_with_use_v4i64(<4 x i64>* %x) { -; LMULMAX8-RV32-LABEL: splat_allones_with_use_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX8-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.i v28, -1 -; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV32-NEXT: vadd.vv v26, v26, v28 -; LMULMAX8-RV32-NEXT: vse64.v v26, (a0) -; LMULMAX8-RV32-NEXT: ret +; LMULMAX8-LABEL: splat_allones_with_use_v4i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX8-NEXT: vle64.v v26, (a0) +; LMULMAX8-NEXT: vadd.vi v26, v26, -1 +; LMULMAX8-NEXT: vse64.v v26, (a0) +; LMULMAX8-NEXT: ret ; -; LMULMAX2-RV32-LABEL: splat_allones_with_use_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v28, -1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-LABEL: splat_allones_with_use_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-NEXT: vle64.v v26, (a0) +; LMULMAX2-NEXT: vadd.vi v26, v26, -1 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64: ; LMULMAX1-RV32: # %bb.0: @@ -756,22 +662,6 @@ ; LMULMAX1-RV32-NEXT: vse64.v v26, (a1) ; LMULMAX1-RV32-NEXT: ret ; -; LMULMAX8-RV64-LABEL: splat_allones_with_use_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX8-RV64-NEXT: vadd.vi v26, v26, -1 -; LMULMAX8-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_allones_with_use_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV64-NEXT: vadd.vi v26, v26, -1 -; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX2-RV64-NEXT: ret -; ; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu @@ -796,20 +686,17 @@ define void @vadd_vx_v16i64(<16 x i64>* %a, i64 %b, <16 x i64>* %c) { ; LMULMAX8-RV32-LABEL: vadd_vx_v16i64: ; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: addi sp, sp, -16 +; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 ; LMULMAX8-RV32-NEXT: vsetivli a4, 16, e64,m8,ta,mu ; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: addi a0, zero, 32 -; LMULMAX8-RV32-NEXT: vsetvli a4, a0, e32,m8,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.x v16, a2 -; LMULMAX8-RV32-NEXT: lui a2, 349525 -; LMULMAX8-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e32,m1,ta,mu -; LMULMAX8-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX8-RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu -; LMULMAX8-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 -; LMULMAX8-RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; LMULMAX8-RV32-NEXT: sw a2, 12(sp) +; LMULMAX8-RV32-NEXT: sw a1, 8(sp) +; LMULMAX8-RV32-NEXT: addi a0, sp, 8 +; LMULMAX8-RV32-NEXT: vlse64.v v16, (a0), zero ; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a3) +; LMULMAX8-RV32-NEXT: addi sp, sp, 16 ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX2-RV32-LABEL: vadd_vx_v16i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -5622,24 +5622,13 @@ } define void @add_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: add_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, -1 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vadd.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: add_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vadd.vi v25, v25, -1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: add_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5860,25 +5849,14 @@ } define void @sub_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: sub_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, -1 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsub.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: sub_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: vsub.vx v25, v25, a1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: sub_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6640,24 +6618,13 @@ } define void @xor_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: xor_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, -1 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vxor.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: xor_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vxor.vi v25, v25, -1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: xor_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -7435,21 +7402,21 @@ define void @mulhu_vx_v2i64(<2 x i64>* %x) { ; RV32-LABEL: mulhu_vx_v2i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 699051 ; RV32-NEXT: addi a2, a1, -1366 -; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.x v26, a2 +; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, a1, -1365 -; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v26, (a1), zero ; RV32-NEXT: vmulhu.vv v25, v25, v26 ; RV32-NEXT: vsrl.vi v25, v25, 1 ; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mulhu_vx_v2i64: @@ -7557,23 +7524,23 @@ define void @mulhs_vx_v2i64(<2 x i64>* %x) { ; RV32-LABEL: mulhs_vx_v2i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a2, a1, 1365 -; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.x v26, a2 +; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, a1, 1366 -; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v26, (a1), zero ; RV32-NEXT: vmulh.vv v25, v25, v26 ; RV32-NEXT: addi a1, zero, 63 ; RV32-NEXT: vsrl.vx v26, v25, a1 ; RV32-NEXT: vadd.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_vx_v2i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -54,26 +54,14 @@ declare <1 x i32> @llvm.masked.load.v1i32(<1 x i32>*, i32, <1 x i1>, <1 x i32>) define void @masked_load_v1i64(<1 x i64>* %a, <1 x i64>* %m_ptr, <1 x i64>* %res_ptr) nounwind { -; RV32-LABEL: masked_load_v1i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 1, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a1) -; RV32-NEXT: vsetivli a1, 2, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; RV32-NEXT: vmseq.vv v0, v25, v26 -; RV32-NEXT: vle64.v v25, (a0), v0.t -; RV32-NEXT: vse64.v v25, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: masked_load_v1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 1, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a1) -; RV64-NEXT: vmseq.vi v0, v25, 0 -; RV64-NEXT: vle64.v v25, (a0), v0.t -; RV64-NEXT: vse64.v v25, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: masked_load_v1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a1) +; CHECK-NEXT: vmseq.vi v0, v25, 0 +; CHECK-NEXT: vle64.v v25, (a0), v0.t +; CHECK-NEXT: vse64.v v25, (a2) +; CHECK-NEXT: ret %m = load <1 x i64>, <1 x i64>* %m_ptr %mask = icmp eq <1 x i64> %m, zeroinitializer %load = call <1 x i64> @llvm.masked.load.v1i64(<1 x i64>* %a, i32 8, <1 x i1> %mask, <1 x i64> undef) @@ -134,26 +122,14 @@ declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) define void @masked_load_v2i64(<2 x i64>* %a, <2 x i64>* %m_ptr, <2 x i64>* %res_ptr) nounwind { -; RV32-LABEL: masked_load_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a1) -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vmseq.vv v0, v25, v26 -; RV32-NEXT: vle64.v v25, (a0), v0.t -; RV32-NEXT: vse64.v v25, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: masked_load_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a1) -; RV64-NEXT: vmseq.vi v0, v25, 0 -; RV64-NEXT: vle64.v v25, (a0), v0.t -; RV64-NEXT: vse64.v v25, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: masked_load_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a1) +; CHECK-NEXT: vmseq.vi v0, v25, 0 +; CHECK-NEXT: vle64.v v25, (a0), v0.t +; CHECK-NEXT: vse64.v v25, (a2) +; CHECK-NEXT: ret %m = load <2 x i64>, <2 x i64>* %m_ptr %mask = icmp eq <2 x i64> %m, zeroinitializer %load = call <2 x i64> @llvm.masked.load.v2i64(<2 x i64>* %a, i32 8, <2 x i1> %mask, <2 x i64> undef) @@ -214,26 +190,14 @@ declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) define void @masked_load_v4i64(<4 x i64>* %a, <4 x i64>* %m_ptr, <4 x i64>* %res_ptr) nounwind { -; RV32-LABEL: masked_load_v4i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; RV32-NEXT: vle64.v v26, (a1) -; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; RV32-NEXT: vmseq.vv v0, v26, v28 -; RV32-NEXT: vle64.v v26, (a0), v0.t -; RV32-NEXT: vse64.v v26, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: masked_load_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; RV64-NEXT: vle64.v v26, (a1) -; RV64-NEXT: vmseq.vi v0, v26, 0 -; RV64-NEXT: vle64.v v26, (a0), v0.t -; RV64-NEXT: vse64.v v26, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: masked_load_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vmseq.vi v0, v26, 0 +; CHECK-NEXT: vle64.v v26, (a0), v0.t +; CHECK-NEXT: vse64.v v26, (a2) +; CHECK-NEXT: ret %m = load <4 x i64>, <4 x i64>* %m_ptr %mask = icmp eq <4 x i64> %m, zeroinitializer %load = call <4 x i64> @llvm.masked.load.v4i64(<4 x i64>* %a, i32 8, <4 x i1> %mask, <4 x i64> undef) @@ -294,26 +258,14 @@ declare <8 x i32> @llvm.masked.load.v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) define void @masked_load_v8i64(<8 x i64>* %a, <8 x i64>* %m_ptr, <8 x i64>* %res_ptr) nounwind { -; RV32-LABEL: masked_load_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 8, e64,m4,ta,mu -; RV32-NEXT: vle64.v v28, (a1) -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vmseq.vv v0, v28, v8 -; RV32-NEXT: vle64.v v28, (a0), v0.t -; RV32-NEXT: vse64.v v28, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: masked_load_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 8, e64,m4,ta,mu -; RV64-NEXT: vle64.v v28, (a1) -; RV64-NEXT: vmseq.vi v0, v28, 0 -; RV64-NEXT: vle64.v v28, (a0), v0.t -; RV64-NEXT: vse64.v v28, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: masked_load_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 8, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a1) +; CHECK-NEXT: vmseq.vi v0, v28, 0 +; CHECK-NEXT: vle64.v v28, (a0), v0.t +; CHECK-NEXT: vse64.v v28, (a2) +; CHECK-NEXT: ret %m = load <8 x i64>, <8 x i64>* %m_ptr %mask = icmp eq <8 x i64> %m, zeroinitializer %load = call <8 x i64> @llvm.masked.load.v8i64(<8 x i64>* %a, i32 8, <8 x i1> %mask, <8 x i64> undef) @@ -374,27 +326,14 @@ declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) define void @masked_load_v16i64(<16 x i64>* %a, <16 x i64>* %m_ptr, <16 x i64>* %res_ptr) nounwind { -; RV32-LABEL: masked_load_v16i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 16, e64,m8,ta,mu -; RV32-NEXT: vle64.v v8, (a1) -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu -; RV32-NEXT: vmseq.vv v0, v8, v16 -; RV32-NEXT: vle64.v v8, (a0), v0.t -; RV32-NEXT: vse64.v v8, (a2) -; RV32-NEXT: ret -; -; RV64-LABEL: masked_load_v16i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 16, e64,m8,ta,mu -; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vle64.v v8, (a0), v0.t -; RV64-NEXT: vse64.v v8, (a2) -; RV64-NEXT: ret +; CHECK-LABEL: masked_load_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: vse64.v v8, (a2) +; CHECK-NEXT: ret %m = load <16 x i64>, <16 x i64>* %m_ptr %mask = icmp eq <16 x i64> %m, zeroinitializer %load = call <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %a, i32 8, <16 x i1> %mask, <16 x i64> undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -54,26 +54,14 @@ declare void @llvm.masked.store.v1i32.p0v1i32(<1 x i32>, <1 x i32>*, i32, <1 x i1>) define void @masked_store_v1i64(<1 x i64>* %val_ptr, <1 x i64>* %a, <1 x i64>* %m_ptr) nounwind { -; RV32-LABEL: masked_store_v1i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 1, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a2) -; RV32-NEXT: vsetivli a2, 2, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; RV32-NEXT: vle64.v v27, (a0) -; RV32-NEXT: vmseq.vv v0, v25, v26 -; RV32-NEXT: vse64.v v27, (a1), v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: masked_store_v1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 1, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a2) -; RV64-NEXT: vle64.v v26, (a0) -; RV64-NEXT: vmseq.vi v0, v25, 0 -; RV64-NEXT: vse64.v v26, (a1), v0.t -; RV64-NEXT: ret +; CHECK-LABEL: masked_store_v1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a2) +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vmseq.vi v0, v25, 0 +; CHECK-NEXT: vse64.v v26, (a1), v0.t +; CHECK-NEXT: ret %m = load <1 x i64>, <1 x i64>* %m_ptr %mask = icmp eq <1 x i64> %m, zeroinitializer %val = load <1 x i64>, <1 x i64>* %val_ptr @@ -134,26 +122,14 @@ declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>) define void @masked_store_v2i64(<2 x i64>* %val_ptr, <2 x i64>* %a, <2 x i64>* %m_ptr) nounwind { -; RV32-LABEL: masked_store_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a2) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v27, (a0) -; RV32-NEXT: vmseq.vv v0, v25, v26 -; RV32-NEXT: vse64.v v27, (a1), v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: masked_store_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a2) -; RV64-NEXT: vle64.v v26, (a0) -; RV64-NEXT: vmseq.vi v0, v25, 0 -; RV64-NEXT: vse64.v v26, (a1), v0.t -; RV64-NEXT: ret +; CHECK-LABEL: masked_store_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a2) +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vmseq.vi v0, v25, 0 +; CHECK-NEXT: vse64.v v26, (a1), v0.t +; CHECK-NEXT: ret %m = load <2 x i64>, <2 x i64>* %m_ptr %mask = icmp eq <2 x i64> %m, zeroinitializer %val = load <2 x i64>, <2 x i64>* %val_ptr @@ -214,26 +190,14 @@ declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) define void @masked_store_v4i64(<4 x i64>* %val_ptr, <4 x i64>* %a, <4 x i64>* %m_ptr) nounwind { -; RV32-LABEL: masked_store_v4i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; RV32-NEXT: vle64.v v26, (a2) -; RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu -; RV32-NEXT: vle64.v v30, (a0) -; RV32-NEXT: vmseq.vv v0, v26, v28 -; RV32-NEXT: vse64.v v30, (a1), v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: masked_store_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; RV64-NEXT: vle64.v v26, (a2) -; RV64-NEXT: vle64.v v28, (a0) -; RV64-NEXT: vmseq.vi v0, v26, 0 -; RV64-NEXT: vse64.v v28, (a1), v0.t -; RV64-NEXT: ret +; CHECK-LABEL: masked_store_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a2) +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vmseq.vi v0, v26, 0 +; CHECK-NEXT: vse64.v v28, (a1), v0.t +; CHECK-NEXT: ret %m = load <4 x i64>, <4 x i64>* %m_ptr %mask = icmp eq <4 x i64> %m, zeroinitializer %val = load <4 x i64>, <4 x i64>* %val_ptr @@ -294,26 +258,14 @@ declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) define void @masked_store_v8i64(<8 x i64>* %val_ptr, <8 x i64>* %a, <8 x i64>* %m_ptr) nounwind { -; RV32-LABEL: masked_store_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 8, e64,m4,ta,mu -; RV32-NEXT: vle64.v v28, (a2) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vsetivli a2, 8, e64,m4,ta,mu -; RV32-NEXT: vle64.v v12, (a0) -; RV32-NEXT: vmseq.vv v0, v28, v8 -; RV32-NEXT: vse64.v v12, (a1), v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: masked_store_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 8, e64,m4,ta,mu -; RV64-NEXT: vle64.v v28, (a2) -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmseq.vi v0, v28, 0 -; RV64-NEXT: vse64.v v8, (a1), v0.t -; RV64-NEXT: ret +; CHECK-LABEL: masked_store_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 8, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a2) +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmseq.vi v0, v28, 0 +; CHECK-NEXT: vse64.v v8, (a1), v0.t +; CHECK-NEXT: ret %m = load <8 x i64>, <8 x i64>* %m_ptr %mask = icmp eq <8 x i64> %m, zeroinitializer %val = load <8 x i64>, <8 x i64>* %val_ptr @@ -374,27 +326,14 @@ declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) define void @masked_store_v16i64(<16 x i64>* %val_ptr, <16 x i64>* %a, <16 x i64>* %m_ptr) nounwind { -; RV32-LABEL: masked_store_v16i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a3, 16, e64,m8,ta,mu -; RV32-NEXT: vle64.v v8, (a2) -; RV32-NEXT: addi a2, zero, 32 -; RV32-NEXT: vsetvli a2, a2, e32,m8,ta,mu -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vsetivli a2, 16, e64,m8,ta,mu -; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: vmseq.vv v0, v8, v16 -; RV32-NEXT: vse64.v v24, (a1), v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: masked_store_v16i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a3, 16, e64,m8,ta,mu -; RV64-NEXT: vle64.v v8, (a2) -; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vse64.v v16, (a1), v0.t -; RV64-NEXT: ret +; CHECK-LABEL: masked_store_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vse64.v v16, (a1), v0.t +; CHECK-NEXT: ret %m = load <16 x i64>, <16 x i64>* %m_ptr %mask = icmp eq <16 x i64> %m, zeroinitializer %val = load <16 x i64>, <16 x i64>* %val_ptr