Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3729,6 +3729,19 @@ MVT XLenVT = Subtarget.getXLenVT(); MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; + if (Index == 1 && isa(InPlace)) { + if (SDValue Splat = cast(InPlace)->getSplatValue()) { + auto OpCode = + VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL; + auto Vec = DAG.getNode(OpCode, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget), + Splat, TrueMask, + DAG.getConstant(NumSubElts + Index, DL, XLenVT)); + return convertFromScalableVector(VT, Vec, DAG, Subtarget); + } + } + // We slide up by the index that the subvector is being inserted at, and set // VL to the index + the number of elements being inserted. unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; @@ -3965,6 +3978,10 @@ Subtarget, DAG); } + if (SDValue V = + lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) + return V; + // Detect an interleave shuffle and lower to // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) int EvenSrc, OddSrc; @@ -3987,10 +4004,6 @@ return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); } - if (SDValue V = - lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) - return V; - // Detect shuffles which can be re-expressed as vector selects; these are // shuffles in which each element in the destination is taken from an element // at the corresponding index in either source vectors. Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll @@ -113,11 +113,8 @@ define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: trn1.v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vwaddu.vv v10, v8, v9 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vwmaccu.vx v10, a0, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> ret <2 x i32> %tmp0 @@ -198,11 +195,8 @@ define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK-LABEL: trn1.v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vwaddu.vv v10, v8, v9 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vwmaccu.vx v10, a0, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> ret <2 x float> %tmp0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll @@ -8,11 +8,7 @@ ; CHECK-LABEL: vslide1up_2xi8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vwaddu.vv v9, v10, v8 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vwmaccu.vx v9, a0, v8 +; CHECK-NEXT: vslide1up.vx v9, v8, a0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %vb = insertelement <2 x i8> poison, i8 %b, i64 0 @@ -46,22 +42,16 @@ ; RV32-LABEL: vslide1up_2xi16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV32-NEXT: vwaddu.vv v9, v10, v8 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: vwmaccu.vx v9, a0, v8 +; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: vslideup.vi v9, v8, 1 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: vslide1up_2xi16: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v10, a0 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vwaddu.vv v9, v10, v8 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vwmaccu.vx v9, a0, v8 +; RV64-NEXT: vslide1up.vx v9, v8, a0 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %vb = insertelement <2 x i16> poison, i16 %b, i64 0 @@ -95,22 +85,16 @@ ; RV32-LABEL: vslide1up_2xi32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v10, a0 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vwaddu.vv v9, v10, v8 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: vwmaccu.vx v9, a0, v8 +; RV32-NEXT: vslide1up.vx v9, v8, a0 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: vslide1up_2xi32: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v9, v10, v8 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vwmaccu.vx v9, a0, v8 +; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vslideup.vi v9, v8, 1 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %vb = insertelement <2 x i32> poison, i32 %b, i64 0 @@ -149,8 +133,7 @@ ; RV64-LABEL: vslide1up_2xi64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vslideup.vi v9, v8, 1 +; RV64-NEXT: vslide1up.vx v9, v8, a0 ; RV64-NEXT: vmv.v.v v8, v9 ; RV64-NEXT: ret %vb = insertelement <2 x i64> poison, i64 %b, i64 0 @@ -189,11 +172,7 @@ ; CHECK-LABEL: vslide1up_2xf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vwaddu.vv v9, v10, v8 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vwmaccu.vx v9, a0, v8 +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %vb = insertelement <2 x half> poison, half %b, i64 0 @@ -218,11 +197,7 @@ ; CHECK-LABEL: vslide1up_2xf32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vwaddu.vv v9, v10, v8 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vwmaccu.vx v9, a0, v8 +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %vb = insertelement <2 x float> poison, float %b, i64 0 @@ -247,8 +222,7 @@ ; CHECK-LABEL: vslide1up_2xf64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %vb = insertelement <2 x double> poison, double %b, i64 0