diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -340,3 +340,236 @@ %w = shufflevector <4 x i8> %y, <4 x i8> %z, <4 x i32> ret <4 x i8> %w } + +define <8 x i8> @splat_ve4(<8 x i8> %v) { +; CHECK-LABEL: splat_ve4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v25, v8, 4 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) { +; CHECK-LABEL: splat_ve4_ins_i0ve2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 4 +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, mu +; CHECK-NEXT: vmv.s.x v26, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { +; CHECK-LABEL: splat_ve4_ins_i1ve3: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v25, a0 +; CHECK-NEXT: vmv.v.i v26, 4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v26, v25, 1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: splat_ve2_we0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 2 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: addi a0, zero, 66 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, mu +; CHECK-NEXT: vrgather.vv v25, v9, v26, v0.t +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: splat_ve2_we0_ins_i0ve4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 2 +; CHECK-NEXT: addi a0, zero, 4 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, mu +; CHECK-NEXT: vmv.s.x v26, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: addi a0, zero, 66 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, mu +; CHECK-NEXT: vrgather.vv v25, v9, v26, v0.t +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: splat_ve2_we0_ins_i0we4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 2 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: addi a0, zero, 67 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; CHECK-NEXT: vrgather.vv v25, v9, v26, v0.t +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { +; RV32-LABEL: splat_ve2_we0_ins_i2ve4: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 8256 +; RV32-NEXT: addi a0, a0, 514 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v25, v8, v26 +; RV32-NEXT: addi a0, zero, 66 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, tu, mu +; RV32-NEXT: vrgather.vv v25, v9, v26, v0.t +; RV32-NEXT: vmv1r.v v8, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: splat_ve2_we0_ins_i2ve4: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 8256 +; RV64-NEXT: addiw a0, a0, 514 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v25, v8, v26 +; RV64-NEXT: addi a0, zero, 66 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, tu, mu +; RV64-NEXT: vrgather.vv v25, v9, v26, v0.t +; RV64-NEXT: vmv1r.v v8, v25 +; RV64-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: splat_ve2_we0_ins_i2we4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 2 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: addi a0, zero, 4 +; CHECK-NEXT: vmv.s.x v26, a0 +; CHECK-NEXT: vmv.v.i v27, 0 +; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v27, v26, 2 +; CHECK-NEXT: addi a0, zero, 70 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; CHECK-NEXT: vrgather.vv v25, v9, v27, v0.t +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { +; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 6 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vmv.s.x v25, a0 +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, mu +; RV32-NEXT: vslideup.vi v26, v25, 5 +; RV32-NEXT: lui a0, 8256 +; RV32-NEXT: addi a0, a0, 2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: vmv.v.x v27, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v25, v8, v27 +; RV32-NEXT: addi a0, zero, 98 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; RV32-NEXT: vrgather.vv v25, v9, v26, v0.t +; RV32-NEXT: vmv1r.v v8, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 6 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vmv.s.x v25, a0 +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, mu +; RV64-NEXT: vslideup.vi v26, v25, 5 +; RV64-NEXT: lui a0, 8256 +; RV64-NEXT: addiw a0, a0, 2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vmv.v.x v27, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v25, v8, v27 +; RV64-NEXT: addi a0, zero, 98 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; RV64-NEXT: vrgather.vv v25, v9, v26, v0.t +; RV64-NEXT: vmv1r.v v8, v25 +; RV64-NEXT: ret + %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %shuff +} + +define <8 x i8> @widen_splat_ve3(<4 x i8> %v) { +; CHECK-LABEL: widen_splat_ve3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v25, v8, 0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v25, v26, 4 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v8, v25, 3 +; CHECK-NEXT: ret + %shuf = shufflevector <4 x i8> %v, <4 x i8> undef, <8 x i32> + ret <8 x i8> %shuf +}