diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6180,6 +6180,32 @@ unsigned Opc = VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; if (isNullConstant(Idx)) { + + // Lower inserts at element 0 into what will become an identity + // shuffle as a vslide1up + // e.g. (insert_vec_elt (shuffle %v1, %v2), %val) + if (auto *Shuffle = dyn_cast(Op.getOperand(0))) { + ArrayRef ShufMask = Shuffle->getMask(); + if (ShuffleVectorInst::isIdentityMask(ShufMask.drop_front(1)) && + VecVT.isFixedLengthVector() && VecVT.getVectorNumElements() > 1) { + // If the first index of the identity mask < mask size, then + // we're inserting into the first operand, otherwise we're + // inserting into the second. + Vec = ShufMask[1] < (int)ShufMask.size() ? Shuffle->getOperand(0) + : Shuffle->getOperand(1); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + + unsigned Opc = Val.getSimpleValueType().isFloatingPoint() + ? RISCVISD::VFSLIDE1UP_VL + : RISCVISD::VSLIDE1UP_VL; + SDValue Slideup = + DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + Val, Mask, VL); + Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); + return Slideup; + } + } + Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); if (!VecVT.isFixedLengthVector()) return Vec; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll @@ -295,10 +295,8 @@ ; CHECK-LABEL: vslide1up_v2f64_inverted: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vi v9, v8, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %v1 = shufflevector <2 x double> %v, <2 x double> poison, <2 x i32> %v2 = insertelement <2 x double> %v1, double %b, i64 0 @@ -309,9 +307,7 @@ ; CHECK-LABEL: vslide1up_4xi8_inverted: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vslide1up.vx v9, v8, a0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %v1 = shufflevector <4 x i8> %v, <4 x i8> poison, <4 x i32> @@ -319,6 +315,51 @@ ret <4 x i8> %v2 } +define <2 x double> @vslide1up_v2f64_inverted_swapped(<2 x double> %v, double %b) { +; CHECK-LABEL: vslide1up_v2f64_inverted_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v1 = shufflevector <2 x double> poison, <2 x double> %v, <2 x i32> + %v2 = insertelement <2 x double> %v1, double %b, i64 0 + ret <2 x double> %v2 +} + +define <4 x i8> @vslide1up_4xi8_inverted_swapped(<4 x i8> %v, i8 %b) { +; CHECK-LABEL: vslide1up_4xi8_inverted_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v1 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> + %v2 = insertelement <4 x i8> %v1, i8 %b, i64 0 + ret <4 x i8> %v2 +} + +define <4 x i64> @vslide1up_4xi64_inverted(<4 x i64> %v, i64 %b) { +; RV32-LABEL: vslide1up_4xi64_inverted: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vslideup.vi v10, v8, 1 +; RV32-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vslide1up_4xi64_inverted: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vslide1up.vx v10, v8, a0 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %v1 = shufflevector <4 x i64> %v, <4 x i64> poison, <4 x i32> + %v2 = insertelement <4 x i64> %v1, i64 %b, i64 0 + ret <4 x i64> %v2 +} ; The length of the shift is less than the suffix define <4 x i32> @vslide1up_4xi32_neg1(<4 x i32> %v, i32 %b) {