diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5008,6 +5008,25 @@ getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; // Limit the active VL to two. SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); + // If the Idx is 0 we can insert directly into the vector. + if (isNullConstant(Idx)) { + // First slide in the lo value, then the hi in above it. We use slide1down + // to avoid the register group overlap constraint of vslide1up. + ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, + Vec, Vec, ValLo, I32Mask, InsertI64VL); + // If the source vector is undef don't pass along the tail elements from + // the previous slide1down. + SDValue Tail = Vec.isUndef() ? Vec : ValInVec; + ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, + Tail, ValInVec, ValHi, I32Mask, InsertI64VL); + // Bitcast back to the right container type. + ValInVec = DAG.getBitcast(ContainerVT, ValInVec); + + if (!VecVT.isFixedLengthVector()) + return ValInVec; + return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); + } + // First slide in the lo value, then the hi in above it. We use slide1down // to avoid the register group overlap constraint of vslide1up. ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -509,9 +509,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v4i16: @@ -547,9 +545,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v2i32: @@ -585,9 +581,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll @@ -200,9 +200,7 @@ ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 -; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v9, 0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a1 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v4f16: @@ -219,9 +217,7 @@ ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 -; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v9, 0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a1 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v2f32: @@ -238,9 +234,7 @@ ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 -; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v9, 0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a1 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v1f64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -144,11 +144,9 @@ ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: lw a2, 0(a1) ; RV32-NEXT: lw a1, 4(a1) -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v10, v8, a2 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v10, 0 +; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vslide1down.vx v9, v9, a2 +; RV32-NEXT: vslide1down.vx v9, v9, a1 ; RV32-NEXT: andi a0, a0, 2 ; RV32-NEXT: beqz a0, .LBB5_2 ; RV32-NEXT: .LBB5_4: # %cond.load1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -635,11 +635,9 @@ define @insertelt_nxv1i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv1i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vslide1down.vx v9, v8, a0 -; CHECK-NEXT: vslide1down.vx v9, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 0 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -675,11 +673,9 @@ define @insertelt_nxv2i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vslide1down.vx v10, v8, a0 -; CHECK-NEXT: vslide1down.vx v10, v10, a1 -; CHECK-NEXT: vsetivli zero, 1, e64, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 0 +; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -715,11 +711,9 @@ define @insertelt_nxv4i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma -; CHECK-NEXT: vslide1down.vx v12, v8, a0 -; CHECK-NEXT: vslide1down.vx v12, v12, a1 -; CHECK-NEXT: vsetivli zero, 1, e64, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 0 +; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -755,11 +749,9 @@ define @insertelt_nxv8i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vslide1down.vx v16, v8, a0 -; CHECK-NEXT: vslide1down.vx v16, v16, a1 -; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma -; CHECK-NEXT: vslideup.vi v8, v16, 0 +; CHECK-NEXT: vsetivli zero, 2, e32, m8, tu, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r