Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3013,7 +3013,10 @@ MVT VT = Op.getSimpleValueType(); assert(VT.isFixedLengthVector() && "Unexpected vector!"); + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + SDLoc DL(Op); + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); MVT XLenVT = Subtarget.getXLenVT(); unsigned NumElts = Op.getNumOperands(); @@ -3064,6 +3067,22 @@ SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); DenseSet Processed{DominantValue}; + + // We can handle an insert into the last element (of a splat) via + // v(f)slide1down. This is slightly better than the vmerge insert lowering + // as it avoids the need to materialize the mask. + if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1); + !LastOp.isUndef() && ValueCounts[LastOp] == 1 && + LastOp != DominantValue) { + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + auto OpCode = + VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; + Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + LastOp, Mask, VL); + Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget); + Processed.insert(LastOp); + } + MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); for (const auto &OpIdx : enumerate(Op->ops())) { const SDValue &V = OpIdx.value(); Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -693,9 +693,9 @@ ; RV32NOM-LABEL: extractelt_sdiv_v4i32: ; RV32NOM: # %bb.0: ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32NOM-NEXT: vmv.v.i v0, 8 ; RV32NOM-NEXT: vmv.v.i v9, 0 -; RV32NOM-NEXT: vmerge.vim v9, v9, -1, v0 +; RV32NOM-NEXT: li a0, -1 +; RV32NOM-NEXT: vslide1down.vx v9, v9, a0 ; RV32NOM-NEXT: lui a0, %hi(.LCPI38_0) ; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI38_0) ; RV32NOM-NEXT: vle32.v v10, (a0) @@ -728,9 +728,9 @@ ; RV64NOM-LABEL: extractelt_sdiv_v4i32: ; RV64NOM: # %bb.0: ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64NOM-NEXT: vmv.v.i v0, 8 ; RV64NOM-NEXT: vmv.v.i v9, 0 -; RV64NOM-NEXT: vmerge.vim v9, v9, -1, v0 +; RV64NOM-NEXT: li a0, -1 +; RV64NOM-NEXT: vslide1down.vx v9, v9, a0 ; RV64NOM-NEXT: lui a0, %hi(.LCPI38_0) ; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI38_0) ; RV64NOM-NEXT: vle32.v v10, (a0) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -488,9 +488,9 @@ ; CHECK-NEXT: vmv.v.i v8, 1 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse32.v v8, (a5) -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vse32.v v8, (a6) ; CHECK-NEXT: ret store <4 x i32> , ptr %z0 @@ -521,9 +521,9 @@ ; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vmerge.vim v8, v8, 3, v0 -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vse16.v v8, (a5) -; CHECK-NEXT: vmerge.vim v8, v9, 4, v0 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vslide1down.vx v8, v9, a0 ; CHECK-NEXT: vse16.v v8, (a6) ; CHECK-NEXT: ret store <4 x i16> , ptr %z0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -177,39 +177,39 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vmv.v.i v16, 2 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v20, (a0) +; RV32-NEXT: li a0, 5 +; RV32-NEXT: vslide1down.vx v21, v16, a0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v8, v20 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 5 -; RV32-NEXT: vmv.v.i v9, 2 -; RV32-NEXT: vslideup.vi v9, v8, 7 ; RV32-NEXT: li a0, 164 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v12, v9, v0.t +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v20, (a0) -; RV64-NEXT: vmv4r.v v16, v8 -; RV64-NEXT: vrgather.vv v8, v16, v20 -; RV64-NEXT: li a0, 5 -; RV64-NEXT: vmv.s.x v20, a0 ; RV64-NEXT: vmv.v.i v16, 2 -; RV64-NEXT: vslideup.vi v16, v20, 7 +; RV64-NEXT: li a0, 5 +; RV64-NEXT: vslide1down.vx v24, v16, a0 +; RV64-NEXT: vrgather.vv v16, v8, v20 ; RV64-NEXT: li a0, 164 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vv v8, v12, v16, v0.t +; RV64-NEXT: vrgather.vv v16, v12, v24, v0.t +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> ret <8 x i64> %s Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1283,9 +1283,9 @@ ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: vmulhu.vv v8, v8, v9 ; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vmv.v.i v9, 2 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -5211,9 +5211,9 @@ ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 8 ; LMULMAX1-RV32-NEXT: vmv.v.i v12, 2 -; LMULMAX1-RV32-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX1-RV32-NEXT: li a2, 1 +; LMULMAX1-RV32-NEXT: vslide1down.vx v12, v12, a2 ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: vmulhu.vv v11, v8, v11 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11