diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -120,6 +120,10 @@ // and the fifth the VL. VSLIDEUP_VL, VSLIDEDOWN_VL, + // Matches the semantics of vslide1up. The first operand is the source + // vector, the second is the XLenVT scalar value. The third and fourth + // operands are the mask and VL operands. + VSLIDE1UP_VL, // Matches the semantics of the vid.v instruction, with a mask and VL // operand. VID_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2218,6 +2218,12 @@ return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); } +// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the +// first position of a vector, and that vector is slid up to the insert index. +// By limiting the active vector length to index+1 and merging with the +// original vector (with an undisturbed tail policy for elements >= VL), we +// achieve the desired result of leaving all elements untouched except the one +// at VL-1, which is replaced with the desired value. SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -2233,51 +2239,67 @@ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } + MVT XLenVT = Subtarget.getXLenVT(); + + SDValue Zero = DAG.getConstant(0, DL, XLenVT); + bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; + // Even i64-element vectors on RV32 can be lowered without scalar + // legalization if the most-significant 32 bits of the value are not affected + // by the sign-extension of the lower 32 bits. + // TODO: We could also catch sign extensions of a 32-bit value. + if (!IsLegalInsert && isa(Val)) { + const auto *CVal = cast(Val); + if (isInt<32>(CVal->getSExtValue())) { + IsLegalInsert = true; + Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); + } + } + SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is - // first slid down into position, the value is inserted into the first - // position, and the vector is slid back up. We do this to simplify patterns. - // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), - if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) { + SDValue ValInVec; + + if (IsLegalInsert) { if (isNullConstant(Idx)) return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL); - SDValue Slidedown = - DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); - SDValue InsertElt0 = - DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Slidedown, Val, VL); - return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, InsertElt0, - Idx, Mask, VL); - } - - // Custom-legalize INSERT_VECTOR_ELT where XLEN, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT> ]>; +def SDTRVVSlide1 : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisVT<2, XLenVT>, + SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT> +]>; def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>; +def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>; def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>; let Predicates = [HasStdExtV] in { @@ -1157,6 +1162,12 @@ vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX) + vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), uimm5:$rs2, (vti.Mask true_mask), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -10,18 +10,13 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu ; RV32-NEXT: vle64.v v26, (a0) -; RV32-NEXT: vsetvli a3, zero, e64,m2,ta,mu -; RV32-NEXT: vmv.v.x v28, a2 -; RV32-NEXT: addi a2, zero, 32 -; RV32-NEXT: vsll.vx v28, v28, a2 -; RV32-NEXT: vmv.v.x v30, a1 -; RV32-NEXT: vsll.vx v30, v30, a2 -; RV32-NEXT: vsrl.vx v30, v30, a2 -; RV32-NEXT: vor.vv v28, v30, v28 +; RV32-NEXT: vsetivli a3, 2, e32,m2,ta,mu +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vslide1up.vx v30, v28, a2 +; RV32-NEXT: vslide1up.vx v28, v30, a1 +; RV32-NEXT: vsetivli a1, 4, e64,m2,tu,mu +; RV32-NEXT: vslideup.vi v26, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; RV32-NEXT: vid.v v30 -; RV32-NEXT: vmseq.vi v0, v30, 3 -; RV32-NEXT: vmerge.vvm v26, v26, v28, v0 ; RV32-NEXT: vse64.v v26, (a0) ; RV32-NEXT: ret ; @@ -29,7 +24,6 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; RV64-NEXT: vle64.v v26, (a0) -; RV64-NEXT: vslidedown.vi v28, v26, 3 ; RV64-NEXT: vmv.s.x v28, a1 ; RV64-NEXT: vsetivli a1, 4, e64,m2,tu,mu ; RV64-NEXT: vslideup.vi v26, v28, 3 @@ -65,20 +59,14 @@ ; RV32-NEXT: vle32.v v26, (sp) ; RV32-NEXT: vsetivli a3, 4, e64,m2,tu,mu ; RV32-NEXT: vslideup.vi v28, v26, 2 -; RV32-NEXT: vsetvli a3, zero, e64,m2,ta,mu -; RV32-NEXT: vmv.v.x v26, a2 -; RV32-NEXT: addi a3, zero, 32 -; RV32-NEXT: vsll.vx v26, v26, a3 -; RV32-NEXT: vmv.v.x v30, a1 -; RV32-NEXT: vsll.vx v30, v30, a3 -; RV32-NEXT: vsrl.vx v30, v30, a3 -; RV32-NEXT: vor.vv v26, v30, v26 -; RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; RV32-NEXT: vid.v v30 -; RV32-NEXT: vmseq.vi v0, v30, 2 -; RV32-NEXT: vmerge.vvm v26, v28, v26, v0 +; RV32-NEXT: vsetivli a3, 2, e32,m2,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vslide1up.vx v30, v26, a2 +; RV32-NEXT: vslide1up.vx v26, v30, a1 +; RV32-NEXT: vsetivli a3, 3, e64,m2,tu,mu +; RV32-NEXT: vslideup.vi v28, v26, 2 ; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; RV32-NEXT: vse64.v v26, (a0) +; RV32-NEXT: vse64.v v28, (a0) ; RV32-NEXT: sw a1, 16(a0) ; RV32-NEXT: sw a2, 20(a0) ; RV32-NEXT: addi sp, sp, 16 @@ -99,9 +87,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu ; RV32-NEXT: vle8.v v25, (a0) -; RV32-NEXT: vslidedown.vi v26, v25, 14 ; RV32-NEXT: vmv.s.x v26, a1 -; RV32-NEXT: vsetivli a1, 16, e8,m1,tu,mu +; RV32-NEXT: vsetivli a1, 15, e8,m1,tu,mu ; RV32-NEXT: vslideup.vi v25, v26, 14 ; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; RV32-NEXT: vse8.v v25, (a0) @@ -111,9 +98,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu ; RV64-NEXT: vle8.v v25, (a0) -; RV64-NEXT: vslidedown.vi v26, v25, 14 ; RV64-NEXT: vmv.s.x v26, a1 -; RV64-NEXT: vsetivli a1, 16, e8,m1,tu,mu +; RV64-NEXT: vsetivli a1, 15, e8,m1,tu,mu ; RV64-NEXT: vslideup.vi v25, v26, 14 ; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; RV64-NEXT: vse8.v v25, (a0) @@ -130,9 +116,9 @@ ; RV32-NEXT: addi a3, zero, 32 ; RV32-NEXT: vsetvli a4, a3, e16,m4,ta,mu ; RV32-NEXT: vle16.v v28, (a0) -; RV32-NEXT: vslidedown.vx v8, v28, a2 ; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vsetvli a1, a3, e16,m4,tu,mu +; RV32-NEXT: addi a1, a2, 1 +; RV32-NEXT: vsetvli a1, a1, e16,m4,tu,mu ; RV32-NEXT: vslideup.vx v28, v8, a2 ; RV32-NEXT: vsetvli a1, a3, e16,m4,ta,mu ; RV32-NEXT: vse16.v v28, (a0) @@ -143,11 +129,11 @@ ; RV64-NEXT: addi a3, zero, 32 ; RV64-NEXT: vsetvli a4, a3, e16,m4,ta,mu ; RV64-NEXT: vle16.v v28, (a0) -; RV64-NEXT: sext.w a2, a2 -; RV64-NEXT: vslidedown.vx v8, v28, a2 ; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli a1, a3, e16,m4,tu,mu -; RV64-NEXT: vslideup.vx v28, v8, a2 +; RV64-NEXT: sext.w a1, a2 +; RV64-NEXT: addi a2, a1, 1 +; RV64-NEXT: vsetvli a2, a2, e16,m4,tu,mu +; RV64-NEXT: vslideup.vx v28, v8, a1 ; RV64-NEXT: vsetvli a1, a3, e16,m4,ta,mu ; RV64-NEXT: vse16.v v28, (a0) ; RV64-NEXT: ret @@ -162,9 +148,9 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu ; RV32-NEXT: vle32.v v26, (a0) -; RV32-NEXT: vslidedown.vx v28, v26, a1 ; RV32-NEXT: vfmv.s.f v28, fa0 -; RV32-NEXT: vsetivli a2, 8, e32,m2,tu,mu +; RV32-NEXT: addi a2, a1, 1 +; RV32-NEXT: vsetvli a2, a2, e32,m2,tu,mu ; RV32-NEXT: vslideup.vx v26, v28, a1 ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vse32.v v26, (a0) @@ -174,10 +160,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a2, 8, e32,m2,ta,mu ; RV64-NEXT: vle32.v v26, (a0) -; RV64-NEXT: sext.w a1, a1 -; RV64-NEXT: vslidedown.vx v28, v26, a1 ; RV64-NEXT: vfmv.s.f v28, fa0 -; RV64-NEXT: vsetivli a2, 8, e32,m2,tu,mu +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: addi a2, a1, 1 +; RV64-NEXT: vsetvli a2, a2, e32,m2,tu,mu ; RV64-NEXT: vslideup.vx v26, v28, a1 ; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vse32.v v26, (a0) @@ -187,3 +173,117 @@ store <8 x float> %b, <8 x float>* %x ret void } + +define void @insertelt_v8i64_0(<8 x i64>* %x) { +; RV32-LABEL: insertelt_v8i64_0: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: vmv.s.x v28, a1 +; RV32-NEXT: vs4r.v v28, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v8i64_0: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: vmv.s.x v28, a1 +; RV64-NEXT: vse64.v v28, (a0) +; RV64-NEXT: ret + %a = load <8 x i64>, <8 x i64>* %x + %b = insertelement <8 x i64> %a, i64 -1, i32 0 + store <8 x i64> %b, <8 x i64>* %x + ret void +} + +define void @insertelt_v8i64(<8 x i64>* %x, i32 %idx) { +; RV32-LABEL: insertelt_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: vmv.s.x v8, a2 +; RV32-NEXT: addi a2, a1, 1 +; RV32-NEXT: vsetvli a2, a2, e64,m4,tu,mu +; RV32-NEXT: vslideup.vx v28, v8, a1 +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vse64.v v28, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: addi a2, a1, 1 +; RV64-NEXT: vsetvli a2, a2, e64,m4,tu,mu +; RV64-NEXT: vslideup.vx v28, v8, a1 +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vse64.v v28, (a0) +; RV64-NEXT: ret + %a = load <8 x i64>, <8 x i64>* %x + %b = insertelement <8 x i64> %a, i64 -1, i32 %idx + store <8 x i64> %b, <8 x i64>* %x + ret void +} + +define void @insertelt_c6_v8i64_0(<8 x i64>* %x) { +; RV32-LABEL: insertelt_c6_v8i64_0: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: addi a1, zero, 6 +; RV32-NEXT: vmv.s.x v28, a1 +; RV32-NEXT: vs4r.v v28, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_c6_v8i64_0: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: addi a1, zero, 6 +; RV64-NEXT: vmv.s.x v28, a1 +; RV64-NEXT: vse64.v v28, (a0) +; RV64-NEXT: ret + %a = load <8 x i64>, <8 x i64>* %x + %b = insertelement <8 x i64> %a, i64 6, i32 0 + store <8 x i64> %b, <8 x i64>* %x + ret void +} + +define void @insertelt_c6_v8i64(<8 x i64>* %x, i32 %idx) { +; RV32-LABEL: insertelt_c6_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: addi a2, zero, 6 +; RV32-NEXT: vmv.s.x v8, a2 +; RV32-NEXT: addi a2, a1, 1 +; RV32-NEXT: vsetvli a2, a2, e64,m4,tu,mu +; RV32-NEXT: vslideup.vx v28, v8, a1 +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vse64.v v28, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_c6_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a2, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: addi a2, zero, 6 +; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: addi a2, a1, 1 +; RV64-NEXT: vsetvli a2, a2, e64,m4,tu,mu +; RV64-NEXT: vslideup.vx v28, v8, a1 +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vse64.v v28, (a0) +; RV64-NEXT: ret + %a = load <8 x i64>, <8 x i64>* %x + %b = insertelement <8 x i64> %a, i64 6, i32 %idx + store <8 x i64> %b, <8 x i64>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll @@ -16,9 +16,8 @@ ; CHECK-LABEL: insertelt_nxv1f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -29,9 +28,9 @@ ; CHECK-LABEL: insertelt_nxv1f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -52,9 +51,8 @@ ; CHECK-LABEL: insertelt_nxv2f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -65,9 +63,9 @@ ; CHECK-LABEL: insertelt_nxv2f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -88,9 +86,8 @@ ; CHECK-LABEL: insertelt_nxv4f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -101,9 +98,9 @@ ; CHECK-LABEL: insertelt_nxv4f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -124,9 +121,8 @@ ; CHECK-LABEL: insertelt_nxv8f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -137,9 +133,9 @@ ; CHECK-LABEL: insertelt_nxv8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a0 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -160,9 +156,8 @@ ; CHECK-LABEL: insertelt_nxv16f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -173,9 +168,9 @@ ; CHECK-LABEL: insertelt_nxv16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a0 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -196,9 +191,8 @@ ; CHECK-LABEL: insertelt_nxv32f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -209,9 +203,9 @@ ; CHECK-LABEL: insertelt_nxv32f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -232,9 +226,8 @@ ; CHECK-LABEL: insertelt_nxv1f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -245,9 +238,9 @@ ; CHECK-LABEL: insertelt_nxv1f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -268,9 +261,8 @@ ; CHECK-LABEL: insertelt_nxv2f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -281,9 +273,9 @@ ; CHECK-LABEL: insertelt_nxv2f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -304,9 +296,8 @@ ; CHECK-LABEL: insertelt_nxv4f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -317,9 +308,9 @@ ; CHECK-LABEL: insertelt_nxv4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a0 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -340,9 +331,8 @@ ; CHECK-LABEL: insertelt_nxv8f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -353,9 +343,9 @@ ; CHECK-LABEL: insertelt_nxv8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a0 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -376,9 +366,8 @@ ; CHECK-LABEL: insertelt_nxv16f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -389,9 +378,9 @@ ; CHECK-LABEL: insertelt_nxv16f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -412,9 +401,8 @@ ; CHECK-LABEL: insertelt_nxv1f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -425,9 +413,9 @@ ; CHECK-LABEL: insertelt_nxv1f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m1,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx @@ -448,9 +436,8 @@ ; CHECK-LABEL: insertelt_nxv2f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -461,9 +448,9 @@ ; CHECK-LABEL: insertelt_nxv2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a0 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx @@ -484,9 +471,8 @@ ; CHECK-LABEL: insertelt_nxv4f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -497,9 +483,9 @@ ; CHECK-LABEL: insertelt_nxv4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a0 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx @@ -520,9 +506,8 @@ ; CHECK-LABEL: insertelt_nxv8f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -533,9 +518,9 @@ ; CHECK-LABEL: insertelt_nxv8f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll @@ -16,9 +16,8 @@ ; CHECK-LABEL: insertelt_nxv1f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -29,9 +28,9 @@ ; CHECK-LABEL: insertelt_nxv1f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -52,9 +51,8 @@ ; CHECK-LABEL: insertelt_nxv2f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -65,9 +63,9 @@ ; CHECK-LABEL: insertelt_nxv2f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -88,9 +86,8 @@ ; CHECK-LABEL: insertelt_nxv4f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -101,9 +98,9 @@ ; CHECK-LABEL: insertelt_nxv4f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m1,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -124,9 +121,8 @@ ; CHECK-LABEL: insertelt_nxv8f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -137,9 +133,9 @@ ; CHECK-LABEL: insertelt_nxv8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a0 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -160,9 +156,8 @@ ; CHECK-LABEL: insertelt_nxv16f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -173,9 +168,9 @@ ; CHECK-LABEL: insertelt_nxv16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a0 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -196,9 +191,8 @@ ; CHECK-LABEL: insertelt_nxv32f16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -209,9 +203,9 @@ ; CHECK-LABEL: insertelt_nxv32f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -232,9 +226,8 @@ ; CHECK-LABEL: insertelt_nxv1f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -245,9 +238,9 @@ ; CHECK-LABEL: insertelt_nxv1f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -268,9 +261,8 @@ ; CHECK-LABEL: insertelt_nxv2f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -281,9 +273,9 @@ ; CHECK-LABEL: insertelt_nxv2f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -304,9 +296,8 @@ ; CHECK-LABEL: insertelt_nxv4f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -317,9 +308,9 @@ ; CHECK-LABEL: insertelt_nxv4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a0 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -340,9 +331,8 @@ ; CHECK-LABEL: insertelt_nxv8f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -353,9 +343,9 @@ ; CHECK-LABEL: insertelt_nxv8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a0 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -376,9 +366,8 @@ ; CHECK-LABEL: insertelt_nxv16f32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -389,9 +378,9 @@ ; CHECK-LABEL: insertelt_nxv16f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -412,9 +401,8 @@ ; CHECK-LABEL: insertelt_nxv1f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -425,9 +413,9 @@ ; CHECK-LABEL: insertelt_nxv1f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a0 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m1,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx @@ -448,9 +436,8 @@ ; CHECK-LABEL: insertelt_nxv2f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -461,9 +448,9 @@ ; CHECK-LABEL: insertelt_nxv2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a0 ; CHECK-NEXT: vfmv.s.f v26, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m2,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx @@ -484,9 +471,8 @@ ; CHECK-LABEL: insertelt_nxv4f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -497,9 +483,9 @@ ; CHECK-LABEL: insertelt_nxv4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a0 ; CHECK-NEXT: vfmv.s.f v28, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m4,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx @@ -520,9 +506,8 @@ ; CHECK-LABEL: insertelt_nxv8f64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -533,9 +518,9 @@ ; CHECK-LABEL: insertelt_nxv8f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -16,9 +16,8 @@ ; CHECK-LABEL: insertelt_nxv1i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,mf8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -29,9 +28,9 @@ ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,mf8,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -52,9 +51,8 @@ ; CHECK-LABEL: insertelt_nxv2i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,mf4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -65,9 +63,9 @@ ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,mf4,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -88,9 +86,8 @@ ; CHECK-LABEL: insertelt_nxv4i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -101,9 +98,9 @@ ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -124,9 +121,8 @@ ; CHECK-LABEL: insertelt_nxv8i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -137,9 +133,9 @@ ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -160,9 +156,8 @@ ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -173,9 +168,9 @@ ; CHECK-LABEL: insertelt_nxv16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a1 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -196,9 +191,8 @@ ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -209,9 +203,9 @@ ; CHECK-LABEL: insertelt_nxv32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a1 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -232,9 +226,8 @@ ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -245,9 +238,9 @@ ; CHECK-LABEL: insertelt_nxv64i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -268,9 +261,8 @@ ; CHECK-LABEL: insertelt_nxv1i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -281,9 +273,9 @@ ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -304,9 +296,8 @@ ; CHECK-LABEL: insertelt_nxv2i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -317,9 +308,9 @@ ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -340,9 +331,8 @@ ; CHECK-LABEL: insertelt_nxv4i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -353,9 +343,9 @@ ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -376,9 +366,8 @@ ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -389,9 +378,9 @@ ; CHECK-LABEL: insertelt_nxv8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a1 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -412,9 +401,8 @@ ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -425,9 +413,9 @@ ; CHECK-LABEL: insertelt_nxv16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a1 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -448,9 +436,8 @@ ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -461,9 +448,9 @@ ; CHECK-LABEL: insertelt_nxv32i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -484,9 +471,8 @@ ; CHECK-LABEL: insertelt_nxv1i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -497,9 +483,9 @@ ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -520,9 +506,8 @@ ; CHECK-LABEL: insertelt_nxv2i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -533,9 +518,9 @@ ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -556,9 +541,8 @@ ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -569,9 +553,9 @@ ; CHECK-LABEL: insertelt_nxv4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a1 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -592,9 +576,8 @@ ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -605,9 +588,9 @@ ; CHECK-LABEL: insertelt_nxv8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a1 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -628,9 +611,8 @@ ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -641,9 +623,9 @@ ; CHECK-LABEL: insertelt_nxv16i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -653,17 +635,12 @@ define @insertelt_nxv1i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv1i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vi v0, v26, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vslide1up.vx v26, v25, a1 +; CHECK-NEXT: vslide1up.vx v25, v26, a0 +; CHECK-NEXT: vsetivli a0, 1, e64,m1,tu,mu +; CHECK-NEXT: vslideup.vi v8, v25, 0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -672,17 +649,12 @@ define @insertelt_nxv1i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv1i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vi v0, v26, 3 -; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vslide1up.vx v26, v25, a1 +; CHECK-NEXT: vslide1up.vx v25, v26, a0 +; CHECK-NEXT: vsetivli a0, 4, e64,m1,tu,mu +; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 ret %r @@ -691,17 +663,13 @@ define @insertelt_nxv1i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv1i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a3, zero, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vx v0, v26, a2 -; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0 +; CHECK-NEXT: vsetivli a3, 2, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vslide1up.vx v26, v25, a1 +; CHECK-NEXT: vslide1up.vx v25, v26, a0 +; CHECK-NEXT: addi a0, a2, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu +; CHECK-NEXT: vslideup.vx v8, v25, a2 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r @@ -710,17 +678,12 @@ define @insertelt_nxv2i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 -; CHECK-NEXT: vid.v v28 -; CHECK-NEXT: vmseq.vi v0, v28, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m2,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vslide1up.vx v28, v26, a1 +; CHECK-NEXT: vslide1up.vx v26, v28, a0 +; CHECK-NEXT: vsetivli a0, 1, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vi v8, v26, 0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -729,17 +692,12 @@ define @insertelt_nxv2i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 -; CHECK-NEXT: vid.v v28 -; CHECK-NEXT: vmseq.vi v0, v28, 3 -; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m2,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vslide1up.vx v28, v26, a1 +; CHECK-NEXT: vslide1up.vx v26, v28, a0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 ret %r @@ -748,17 +706,13 @@ define @insertelt_nxv2i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a3, zero, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 -; CHECK-NEXT: vid.v v28 -; CHECK-NEXT: vmseq.vx v0, v28, a2 -; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0 +; CHECK-NEXT: vsetivli a3, 2, e32,m2,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vslide1up.vx v28, v26, a1 +; CHECK-NEXT: vslide1up.vx v26, v28, a0 +; CHECK-NEXT: addi a0, a2, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vx v8, v26, a2 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r @@ -767,17 +721,12 @@ define @insertelt_nxv4i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vmseq.vi v0, v12, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m4,ta,mu +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: vslide1up.vx v12, v28, a1 +; CHECK-NEXT: vslide1up.vx v28, v12, a0 +; CHECK-NEXT: vsetivli a0, 1, e64,m4,tu,mu +; CHECK-NEXT: vslideup.vi v8, v28, 0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -786,17 +735,12 @@ define @insertelt_nxv4i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vmseq.vi v0, v12, 3 -; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m4,ta,mu +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: vslide1up.vx v12, v28, a1 +; CHECK-NEXT: vslide1up.vx v28, v12, a0 +; CHECK-NEXT: vsetivli a0, 4, e64,m4,tu,mu +; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 ret %r @@ -805,17 +749,13 @@ define @insertelt_nxv4i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv4i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a3, zero, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vmseq.vx v0, v12, a2 -; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0 +; CHECK-NEXT: vsetivli a3, 2, e32,m4,ta,mu +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: vslide1up.vx v12, v28, a1 +; CHECK-NEXT: vslide1up.vx v28, v12, a0 +; CHECK-NEXT: addi a0, a2, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu +; CHECK-NEXT: vslideup.vx v8, v28, a2 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r @@ -824,17 +764,12 @@ define @insertelt_nxv8i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 -; CHECK-NEXT: vid.v v24 -; CHECK-NEXT: vmseq.vi v0, v24, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m8,ta,mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vslide1up.vx v24, v16, a1 +; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vsetivli a0, 1, e64,m8,tu,mu +; CHECK-NEXT: vslideup.vi v8, v16, 0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -843,17 +778,12 @@ define @insertelt_nxv8i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 -; CHECK-NEXT: vid.v v24 -; CHECK-NEXT: vmseq.vi v0, v24, 3 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetivli a2, 2, e32,m8,ta,mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vslide1up.vx v24, v16, a1 +; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vsetivli a0, 4, e64,m8,tu,mu +; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 ret %r @@ -862,17 +792,13 @@ define @insertelt_nxv8i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv8i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a3, zero, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 -; CHECK-NEXT: vid.v v24 -; CHECK-NEXT: vmseq.vx v0, v24, a2 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetivli a3, 2, e32,m8,ta,mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vslide1up.vx v24, v16, a1 +; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: addi a0, a2, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m8,tu,mu +; CHECK-NEXT: vslideup.vx v8, v16, a2 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r @@ -882,10 +808,9 @@ define @insertelt_nxv2i64_0_c10( %v) { ; CHECK-LABEL: insertelt_nxv2i64_0_c10: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vi v0, v26, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 10, v0 +; CHECK-NEXT: addi a0, zero, 10 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 10, i32 0 ret %r @@ -894,10 +819,11 @@ define @insertelt_nxv2i64_imm_c10( %v) { ; CHECK-LABEL: insertelt_nxv2i64_imm_c10: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vi v0, v26, 3 -; CHECK-NEXT: vmerge.vim v8, v8, 10, v0 +; CHECK-NEXT: addi a0, zero, 10 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vmv.s.x v26, a0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 10, i32 3 ret %r @@ -906,10 +832,12 @@ define @insertelt_nxv2i64_idx_c10( %v, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i64_idx_c10: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vx v0, v26, a0 -; CHECK-NEXT: vmerge.vim v8, v8, 10, v0 +; CHECK-NEXT: addi a1, zero, 10 +; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu +; CHECK-NEXT: vmv.s.x v26, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 10, i32 %idx ret %r @@ -918,10 +846,9 @@ define @insertelt_nxv2i64_0_cn1( %v) { ; CHECK-LABEL: insertelt_nxv2i64_0_cn1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vi v0, v26, 0 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 -1, i32 0 ret %r @@ -930,10 +857,11 @@ define @insertelt_nxv2i64_imm_cn1( %v) { ; CHECK-LABEL: insertelt_nxv2i64_imm_cn1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vi v0, v26, 3 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vmv.s.x v26, a0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 -1, i32 3 ret %r @@ -942,10 +870,12 @@ define @insertelt_nxv2i64_idx_cn1( %v, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i64_idx_cn1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vid.v v26 -; CHECK-NEXT: vmseq.vx v0, v26, a0 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu +; CHECK-NEXT: vmv.s.x v26, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 -1, i32 %idx ret %r diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -16,9 +16,8 @@ ; CHECK-LABEL: insertelt_nxv1i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,mf8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -29,9 +28,9 @@ ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,mf8,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -52,9 +51,8 @@ ; CHECK-LABEL: insertelt_nxv2i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,mf4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -65,9 +63,9 @@ ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,mf4,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -88,9 +86,8 @@ ; CHECK-LABEL: insertelt_nxv4i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -101,9 +98,9 @@ ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -124,9 +121,8 @@ ; CHECK-LABEL: insertelt_nxv8i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -137,9 +133,9 @@ ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -160,9 +156,8 @@ ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -173,9 +168,9 @@ ; CHECK-LABEL: insertelt_nxv16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a1 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -196,9 +191,8 @@ ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -209,9 +203,9 @@ ; CHECK-LABEL: insertelt_nxv32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a1 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -232,9 +226,8 @@ ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e8,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -245,9 +238,9 @@ ; CHECK-LABEL: insertelt_nxv64i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e8,m8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -268,9 +261,8 @@ ; CHECK-LABEL: insertelt_nxv1i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -281,9 +273,9 @@ ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,mf4,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -304,9 +296,8 @@ ; CHECK-LABEL: insertelt_nxv2i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -317,9 +308,9 @@ ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -340,9 +331,8 @@ ; CHECK-LABEL: insertelt_nxv4i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -353,9 +343,9 @@ ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -376,9 +366,8 @@ ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -389,9 +378,9 @@ ; CHECK-LABEL: insertelt_nxv8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a1 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -412,9 +401,8 @@ ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -425,9 +413,9 @@ ; CHECK-LABEL: insertelt_nxv16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a1 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -448,9 +436,8 @@ ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -461,9 +448,9 @@ ; CHECK-LABEL: insertelt_nxv32i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -484,9 +471,8 @@ ; CHECK-LABEL: insertelt_nxv1i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -497,9 +483,9 @@ ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,mf2,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -520,9 +506,8 @@ ; CHECK-LABEL: insertelt_nxv2i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -533,9 +518,9 @@ ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vx v8, v25, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -556,9 +541,8 @@ ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -569,9 +553,9 @@ ; CHECK-LABEL: insertelt_nxv4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a1 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu ; CHECK-NEXT: vslideup.vx v8, v26, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -592,9 +576,8 @@ ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -605,9 +588,9 @@ ; CHECK-LABEL: insertelt_nxv8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a1 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu ; CHECK-NEXT: vslideup.vx v8, v28, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -628,9 +611,8 @@ ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -641,9 +623,9 @@ ; CHECK-LABEL: insertelt_nxv16i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m8,tu,mu ; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -664,9 +646,8 @@ ; CHECK-LABEL: insertelt_nxv1i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m1,tu,mu ; CHECK-NEXT: vslideup.vi v8, v25, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -676,12 +657,12 @@ define @insertelt_nxv1i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv1i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: sext.w a1, a1 ; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vslidedown.vx v25, v8, a1 ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu -; CHECK-NEXT: vslideup.vx v8, v25, a1 +; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,tu,mu +; CHECK-NEXT: vslideup.vx v8, v25, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r @@ -701,9 +682,8 @@ ; CHECK-LABEL: insertelt_nxv2i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vslidedown.vi v26, v8, 3 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m2,tu,mu ; CHECK-NEXT: vslideup.vi v8, v26, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -713,12 +693,12 @@ define @insertelt_nxv2i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: sext.w a1, a1 ; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu -; CHECK-NEXT: vslidedown.vx v26, v8, a1 ; CHECK-NEXT: vmv.s.x v26, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu -; CHECK-NEXT: vslideup.vx v8, v26, a1 +; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,tu,mu +; CHECK-NEXT: vslideup.vx v8, v26, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r @@ -738,9 +718,8 @@ ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vslidedown.vi v28, v8, 3 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m4,tu,mu ; CHECK-NEXT: vslideup.vi v8, v28, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -750,12 +729,12 @@ define @insertelt_nxv4i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv4i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: sext.w a1, a1 ; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu -; CHECK-NEXT: vslidedown.vx v28, v8, a1 ; CHECK-NEXT: vmv.s.x v28, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu -; CHECK-NEXT: vslideup.vx v8, v28, a1 +; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,tu,mu +; CHECK-NEXT: vslideup.vx v8, v28, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r @@ -775,9 +754,8 @@ ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vslidedown.vi v16, v8, 3 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vsetivli a0, 4, e64,m8,tu,mu ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -787,12 +765,12 @@ define @insertelt_nxv8i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv8i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: sext.w a1, a1 ; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu -; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,tu,mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 %idx ret %r