diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3165,9 +3165,14 @@ unsigned NumOpElts = Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); SDValue Vec = DAG.getUNDEF(VT); - for (const auto &OpIdx : enumerate(Op->ops())) - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), + for (const auto &OpIdx : enumerate(Op->ops())) { + SDValue SubVec = OpIdx.value(); + // Don't insert undef subvectors. + if (SubVec.isUndef()) + continue; + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec, DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); + } return Vec; } case ISD::LOAD: diff --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll --- a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll +++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll @@ -14,19 +14,13 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu ; CHECK-NEXT: vmv2r.v v14, v12 ; CHECK-NEXT: vslideup.vi v14, v8, 0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu -; CHECK-NEXT: vslideup.vi v14, v8, 8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vsrl.vi v18, v16, 1 ; CHECK-NEXT: vrgather.vv v20, v14, v18 ; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu ; CHECK-NEXT: vslideup.vi v12, v10, 0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu -; CHECK-NEXT: vslideup.vi v12, v8, 8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addiw a0, a0, -1366 ; CHECK-NEXT: vmv.s.x v0, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -486,7 +486,6 @@ ; CHECK-NEXT: vslideup.vi v13, v14, 0 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vslideup.vx v13, v8, a0 ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -531,11 +531,7 @@ ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, mu ; CHECK-NEXT: vslideup.vi v9, v8, 0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; CHECK-NEXT: vslideup.vi v9, v8, 4 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vi v8, v9, 3 ; CHECK-NEXT: ret %shuf = shufflevector <4 x i8> %v, <4 x i8> undef, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll --- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll @@ -15,22 +15,16 @@ ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, tu, mu ; RV64-1024-NEXT: vmv4r.v v20, v8 ; RV64-1024-NEXT: vslideup.vi v20, v12, 0 -; RV64-1024-NEXT: vsetvli zero, a3, e16, m2, ta, mu -; RV64-1024-NEXT: vmv.v.i v24, 0 -; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; RV64-1024-NEXT: vslideup.vx v20, v24, a3 -; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-1024-NEXT: vid.v v28 -; RV64-1024-NEXT: vsrl.vi v12, v28, 1 -; RV64-1024-NEXT: vrgather.vv v0, v20, v12 +; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV64-1024-NEXT: vid.v v24 +; RV64-1024-NEXT: vsrl.vi v12, v24, 1 +; RV64-1024-NEXT: vrgather.vv v28, v20, v12 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, tu, mu ; RV64-1024-NEXT: vslideup.vi v8, v16, 0 -; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; RV64-1024-NEXT: vslideup.vx v8, v24, a3 ; RV64-1024-NEXT: lui a2, %hi(.LCPI0_0) ; RV64-1024-NEXT: ld a2, %lo(.LCPI0_0)(a2) -; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-1024-NEXT: vrgather.vv v16, v0, v28 +; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV64-1024-NEXT: vrgather.vv v16, v28, v24 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-1024-NEXT: vmv.s.x v20, a2 ; RV64-1024-NEXT: vsetivli zero, 2, e64, m1, tu, mu @@ -49,41 +43,35 @@ ; RV64-2048: # %bb.0: # %entry ; RV64-2048-NEXT: li a3, 128 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m1, ta, mu -; RV64-2048-NEXT: vle16.v v10, (a1) -; RV64-2048-NEXT: vle16.v v12, (a2) +; RV64-2048-NEXT: vle16.v v8, (a1) +; RV64-2048-NEXT: vle16.v v10, (a2) ; RV64-2048-NEXT: li a1, 256 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; RV64-2048-NEXT: vmv.v.i v8, 0 +; RV64-2048-NEXT: vmv.v.i v12, 0 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, tu, mu -; RV64-2048-NEXT: vmv2r.v v14, v8 -; RV64-2048-NEXT: vslideup.vi v14, v10, 0 -; RV64-2048-NEXT: vsetvli zero, a3, e16, m1, ta, mu -; RV64-2048-NEXT: vmv.v.i v10, 0 -; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; RV64-2048-NEXT: vslideup.vx v14, v10, a3 -; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vid.v v16 -; RV64-2048-NEXT: vsrl.vi v18, v16, 1 -; RV64-2048-NEXT: vrgather.vv v20, v14, v18 +; RV64-2048-NEXT: vmv2r.v v14, v12 +; RV64-2048-NEXT: vslideup.vi v14, v8, 0 +; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; RV64-2048-NEXT: vid.v v8 +; RV64-2048-NEXT: vsrl.vi v16, v8, 1 +; RV64-2048-NEXT: vrgather.vv v18, v14, v16 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, tu, mu -; RV64-2048-NEXT: vslideup.vi v8, v12, 0 -; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; RV64-2048-NEXT: vslideup.vx v8, v10, a3 +; RV64-2048-NEXT: vslideup.vi v12, v10, 0 ; RV64-2048-NEXT: lui a2, %hi(.LCPI0_0) ; RV64-2048-NEXT: ld a2, %lo(.LCPI0_0)(a2) -; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vrgather.vv v10, v20, v16 +; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; RV64-2048-NEXT: vrgather.vv v10, v18, v8 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, ta, mu -; RV64-2048-NEXT: vmv.s.x v12, a2 +; RV64-2048-NEXT: vmv.s.x v8, a2 ; RV64-2048-NEXT: vsetivli zero, 2, e64, m1, tu, mu -; RV64-2048-NEXT: vmv1r.v v0, v12 -; RV64-2048-NEXT: vslideup.vi v0, v12, 1 +; RV64-2048-NEXT: vmv1r.v v0, v8 +; RV64-2048-NEXT: vslideup.vi v0, v8, 1 ; RV64-2048-NEXT: vsetivli zero, 3, e64, m1, tu, mu -; RV64-2048-NEXT: vslideup.vi v0, v12, 2 +; RV64-2048-NEXT: vslideup.vi v0, v8, 2 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, tu, mu -; RV64-2048-NEXT: vslideup.vi v0, v12, 3 +; RV64-2048-NEXT: vslideup.vi v0, v8, 3 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; RV64-2048-NEXT: vrgather.vv v10, v8, v18, v0.t +; RV64-2048-NEXT: vrgather.vv v10, v12, v16, v0.t ; RV64-2048-NEXT: vse16.v v10, (a0) ; RV64-2048-NEXT: ret entry: @@ -102,103 +90,85 @@ ; RV64-1024-NEXT: addi sp, sp, -16 ; RV64-1024-NEXT: .cfi_def_cfa_offset 16 ; RV64-1024-NEXT: csrr a3, vlenb -; RV64-1024-NEXT: li a4, 40 -; RV64-1024-NEXT: mul a3, a3, a4 +; RV64-1024-NEXT: slli a3, a3, 5 ; RV64-1024-NEXT: sub sp, sp, a3 ; RV64-1024-NEXT: li a3, 256 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu -; RV64-1024-NEXT: vle16.v v24, (a1) +; RV64-1024-NEXT: vle16.v v0, (a1) ; RV64-1024-NEXT: vle16.v v8, (a2) ; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: li a2, 24 -; RV64-1024-NEXT: mul a1, a1, a2 +; RV64-1024-NEXT: slli a1, a1, 4 ; RV64-1024-NEXT: add a1, sp, a1 ; RV64-1024-NEXT: addi a1, a1, 16 ; RV64-1024-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-1024-NEXT: li a1, 512 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: vmv.v.i v8, 0 -; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 4 -; RV64-1024-NEXT: add a2, sp, a2 -; RV64-1024-NEXT: addi a2, a2, 16 +; RV64-1024-NEXT: addi a2, sp, 16 ; RV64-1024-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu -; RV64-1024-NEXT: vslideup.vi v8, v24, 0 -; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu -; RV64-1024-NEXT: vmv.v.i v16, 0 -; RV64-1024-NEXT: addi a2, sp, 16 -; RV64-1024-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu -; RV64-1024-NEXT: vslideup.vx v8, v16, a3 -; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; RV64-1024-NEXT: vid.v v24 -; RV64-1024-NEXT: vsrl.vi v16, v24, 1 +; RV64-1024-NEXT: vslideup.vi v8, v0, 0 +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; RV64-1024-NEXT: vid.v v0 +; RV64-1024-NEXT: vsrl.vi v16, v0, 1 ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 5 +; RV64-1024-NEXT: li a4, 24 +; RV64-1024-NEXT: mul a2, a2, a4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-1024-NEXT: vrgather.vv v0, v8, v16 +; RV64-1024-NEXT: vrgather.vv v24, v8, v16 ; RV64-1024-NEXT: csrr a2, vlenb ; RV64-1024-NEXT: slli a2, a2, 3 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV64-1024-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu ; RV64-1024-NEXT: csrr a2, vlenb ; RV64-1024-NEXT: slli a2, a2, 4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: li a4, 24 -; RV64-1024-NEXT: mul a2, a2, a4 -; RV64-1024-NEXT: add a2, sp, a2 -; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vslideup.vi v16, v8, 0 -; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; RV64-1024-NEXT: addi a2, sp, 16 -; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vslideup.vx v16, v8, a3 +; RV64-1024-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vslideup.vi v24, v8, 0 ; RV64-1024-NEXT: lui a2, %hi(.LCPI1_0) ; RV64-1024-NEXT: ld a2, %lo(.LCPI1_0)(a2) -; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: csrr a3, vlenb ; RV64-1024-NEXT: slli a3, a3, 3 ; RV64-1024-NEXT: add a3, sp, a3 ; RV64-1024-NEXT: addi a3, a3, 16 -; RV64-1024-NEXT: vl8re8.v v0, (a3) # Unknown-size Folded Reload -; RV64-1024-NEXT: vrgather.vv v8, v0, v24 +; RV64-1024-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload +; RV64-1024-NEXT: vrgather.vv v16, v8, v0 ; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, ta, mu -; RV64-1024-NEXT: vmv.s.x v24, a2 +; RV64-1024-NEXT: vmv.s.x v8, a2 ; RV64-1024-NEXT: vsetivli zero, 2, e64, m1, tu, mu -; RV64-1024-NEXT: vmv1r.v v0, v24 -; RV64-1024-NEXT: vslideup.vi v0, v24, 1 +; RV64-1024-NEXT: vmv1r.v v0, v8 +; RV64-1024-NEXT: vslideup.vi v0, v8, 1 ; RV64-1024-NEXT: vsetivli zero, 3, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v24, 2 +; RV64-1024-NEXT: vslideup.vi v0, v8, 2 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v24, 3 +; RV64-1024-NEXT: vslideup.vi v0, v8, 3 ; RV64-1024-NEXT: vsetivli zero, 5, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v24, 4 +; RV64-1024-NEXT: vslideup.vi v0, v8, 4 ; RV64-1024-NEXT: vsetivli zero, 6, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v24, 5 +; RV64-1024-NEXT: vslideup.vi v0, v8, 5 ; RV64-1024-NEXT: vsetivli zero, 7, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v24, 6 +; RV64-1024-NEXT: vslideup.vi v0, v8, 6 ; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v24, 7 +; RV64-1024-NEXT: vslideup.vi v0, v8, 7 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: slli a1, a1, 5 +; RV64-1024-NEXT: li a2, 24 +; RV64-1024-NEXT: mul a1, a1, a2 ; RV64-1024-NEXT: add a1, sp, a1 ; RV64-1024-NEXT: addi a1, a1, 16 -; RV64-1024-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload -; RV64-1024-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV64-1024-NEXT: vse16.v v8, (a0) +; RV64-1024-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-1024-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-1024-NEXT: vse16.v v16, (a0) ; RV64-1024-NEXT: csrr a0, vlenb -; RV64-1024-NEXT: li a1, 40 -; RV64-1024-NEXT: mul a0, a0, a1 +; RV64-1024-NEXT: slli a0, a0, 5 ; RV64-1024-NEXT: add sp, sp, a0 ; RV64-1024-NEXT: addi sp, sp, 16 ; RV64-1024-NEXT: ret @@ -215,22 +185,16 @@ ; RV64-2048-NEXT: vsetvli zero, a3, e16, m4, tu, mu ; RV64-2048-NEXT: vmv4r.v v20, v8 ; RV64-2048-NEXT: vslideup.vi v20, v12, 0 -; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, ta, mu -; RV64-2048-NEXT: vmv.v.i v24, 0 -; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; RV64-2048-NEXT: vslideup.vx v20, v24, a3 -; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-2048-NEXT: vid.v v28 -; RV64-2048-NEXT: vsrl.vi v12, v28, 1 -; RV64-2048-NEXT: vrgather.vv v0, v20, v12 +; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV64-2048-NEXT: vid.v v24 +; RV64-2048-NEXT: vsrl.vi v12, v24, 1 +; RV64-2048-NEXT: vrgather.vv v28, v20, v12 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m4, tu, mu ; RV64-2048-NEXT: vslideup.vi v8, v16, 0 -; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; RV64-2048-NEXT: vslideup.vx v8, v24, a3 ; RV64-2048-NEXT: lui a2, %hi(.LCPI1_0) ; RV64-2048-NEXT: ld a2, %lo(.LCPI1_0)(a2) -; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-2048-NEXT: vrgather.vv v16, v0, v28 +; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV64-2048-NEXT: vrgather.vv v16, v28, v24 ; RV64-2048-NEXT: vsetivli zero, 8, e64, m1, ta, mu ; RV64-2048-NEXT: vmv.s.x v20, a2 ; RV64-2048-NEXT: vsetivli zero, 2, e64, m1, tu, mu