diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -644,3 +644,129 @@ %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> ret <4 x i16> %5 } + +define <8 x i8> @merge_start_into_end(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_end: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: li a0, 240 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_end_non_contiguous: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: li a0, 144 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_end_into_end: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_middle: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: li a0, 30 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_start: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 240 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_slidedown: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: li a0, 195 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +; This should slide %v down by 2 and %w up by 1 before merging them +define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 2 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: li a0, 234 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +; This shouldn't generate a vmerge because the elements of %w are not consecutive +define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: unmergable: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vi v11, v10, 2 +; CHECK-NEXT: lui a0, %hi(.LCPI44_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 234 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +}