diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -365,3 +365,276 @@ store <2 x double> %shuffle.i5, ptr %out, align 8 ret void } + +define void @vnsrl_2_undef_i8(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_2_undef_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vadd.vi v10, v9, 1 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: li a0, 112 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu +; CHECK-NEXT: vadd.vi v9, v9, -7 +; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t +; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %1 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %1, ptr %out, align 1 + ret void +} + +define void @vnsrl_4_undef_i8(ptr %in, ptr %out) { +; V-LABEL: vnsrl_4_undef_i8: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 16, e8, mf2, ta, ma +; V-NEXT: vle8.v v8, (a0) +; V-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; V-NEXT: vslidedown.vi v9, v8, 1 +; V-NEXT: vmv.x.s a0, v9 +; V-NEXT: vslidedown.vi v8, v8, 5 +; V-NEXT: vmv.x.s a2, v8 +; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V-NEXT: vmv.v.x v8, a2 +; V-NEXT: vsetvli zero, zero, e8, mf8, tu, ma +; V-NEXT: vmv.s.x v8, a0 +; V-NEXT: vse8.v v8, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_4_undef_i8: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: vsetivli zero, 16, e8, mf2, ta, ma +; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; ZVE32F-NEXT: vslidedown.vi v9, v8, 1 +; ZVE32F-NEXT: vmv.x.s a0, v9 +; ZVE32F-NEXT: vslidedown.vi v8, v8, 5 +; ZVE32F-NEXT: vmv.x.s a2, v8 +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vmv.v.x v8, a2 +; ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma +; ZVE32F-NEXT: vmv.s.x v8, a0 +; ZVE32F-NEXT: vse8.v v8, (a1) +; ZVE32F-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %1 = shufflevector <16 x i8> %0, <16 x i8> poison, <4 x i32> + store <4 x i8> %1, ptr %out, align 1 + ret void +} + +define void @vnsrl_8_undef_i8(ptr %in, ptr %out) { +; V-LABEL: vnsrl_8_undef_i8: +; V: # %bb.0: # %entry +; V-NEXT: li a2, 32 +; V-NEXT: vsetvli zero, a2, e8, m1, ta, ma +; V-NEXT: vle8.v v8, (a0) +; V-NEXT: li a0, 2 +; V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; V-NEXT: vmv.s.x v0, a0 +; V-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; V-NEXT: vslidedown.vi v9, v8, 8 +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, mu +; V-NEXT: vrgather.vi v10, v8, 5 +; V-NEXT: vrgather.vi v10, v9, 5, v0.t +; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V-NEXT: vse8.v v10, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_8_undef_i8: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: li a2, 32 +; ZVE32F-NEXT: vsetvli zero, a2, e8, m1, ta, ma +; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: li a0, 2 +; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; ZVE32F-NEXT: vslidedown.vi v9, v8, 8 +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, mu +; ZVE32F-NEXT: vrgather.vi v10, v8, 5 +; ZVE32F-NEXT: vrgather.vi v10, v9, 5, v0.t +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vse8.v v10, (a1) +; ZVE32F-NEXT: ret +entry: + %0 = load <32 x i8>, ptr %in, align 1 + %1 = shufflevector <32 x i8> %0, <32 x i8> poison, <4 x i32> + store <4 x i8> %1, ptr %out, align 1 + ret void +} + +define void @vnsrl_2_undef_i16(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_2_undef_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vadd.vi v9, v9, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vse16.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i16>, ptr %in, align 2 + %1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> + store <8 x i16> %1, ptr %out, align 2 + ret void +} + +define void @vnsrl_4_undef_i16(ptr %in, ptr %out) { +; V-LABEL: vnsrl_4_undef_i16: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; V-NEXT: vle16.v v8, (a0) +; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; V-NEXT: vid.v v9 +; V-NEXT: vsll.vi v9, v9, 2 +; V-NEXT: vadd.vi v9, v9, 1 +; V-NEXT: vrgather.vv v10, v8, v9 +; V-NEXT: li a0, 4 +; V-NEXT: vmv.s.x v0, a0 +; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; V-NEXT: vslidedown.vi v8, v8, 8 +; V-NEXT: vsetivli zero, 8, e16, mf2, ta, mu +; V-NEXT: vrgather.vi v10, v8, 1, v0.t +; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; V-NEXT: vse16.v v10, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_4_undef_i16: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVE32F-NEXT: vle16.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVE32F-NEXT: vid.v v9 +; ZVE32F-NEXT: vsll.vi v9, v9, 2 +; ZVE32F-NEXT: vadd.vi v9, v9, 1 +; ZVE32F-NEXT: vrgather.vv v10, v8, v9 +; ZVE32F-NEXT: li a0, 4 +; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVE32F-NEXT: vslidedown.vi v8, v8, 8 +; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, mu +; ZVE32F-NEXT: vrgather.vi v10, v8, 1, v0.t +; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVE32F-NEXT: vse16.v v10, (a1) +; ZVE32F-NEXT: ret +entry: + %0 = load <16 x i16>, ptr %in, align 2 + %1 = shufflevector <16 x i16> %0, <16 x i16> poison, <4 x i32> + store <4 x i16> %1, ptr %out, align 2 + ret void +} + +define void @vnsrl_2_undef_i32(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_2_undef_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vv v10, v10, v10 +; CHECK-NEXT: vadd.vi v10, v10, 1 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vse32.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i32>, ptr %in, align 4 + %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <8 x i32> + store <8 x i32> %1, ptr %out, align 4 + ret void +} + +define void @vnsrl_2_undef_half(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_2_undef_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vadd.vi v10, v9, 1 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: li a0, 112 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e16, mf2, ta, mu +; CHECK-NEXT: vadd.vi v9, v9, -7 +; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t +; CHECK-NEXT: vse16.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x half>, ptr %in, align 2 + %1 = shufflevector <16 x half> %0, <16 x half> poison, <8 x i32> + store <8 x half> %1, ptr %out, align 2 + ret void +} + +define void @vnsrl_4_undef_half(ptr %in, ptr %out) { +; V-LABEL: vnsrl_4_undef_half: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; V-NEXT: vle16.v v8, (a0) +; V-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vfmv.f.s ft0, v9 +; V-NEXT: vslidedown.vi v8, v8, 6 +; V-NEXT: vfmv.f.s ft1, v8 +; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; V-NEXT: vfmv.v.f v8, ft1 +; V-NEXT: vsetvli zero, zero, e16, mf4, tu, ma +; V-NEXT: vfmv.s.f v8, ft0 +; V-NEXT: vse16.v v8, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_4_undef_half: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVE32F-NEXT: vle16.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; ZVE32F-NEXT: vfmv.f.s ft0, v9 +; ZVE32F-NEXT: vslidedown.vi v8, v8, 6 +; ZVE32F-NEXT: vfmv.f.s ft1, v8 +; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVE32F-NEXT: vfmv.v.f v8, ft1 +; ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, tu, ma +; ZVE32F-NEXT: vfmv.s.f v8, ft0 +; ZVE32F-NEXT: vse16.v v8, (a1) +; ZVE32F-NEXT: ret +entry: + %0 = load <16 x half>, ptr %in, align 2 + %1 = shufflevector <16 x half> %0, <16 x half> poison, <4 x i32> + store <4 x half> %1, ptr %out, align 2 + ret void +} + +define void @vnsrl_2_undef_float(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_2_undef_float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vv v10, v10, v10 +; CHECK-NEXT: vadd.vi v10, v10, 1 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vse32.v v11, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x float>, ptr %in, align 4 + %1 = shufflevector <16 x float> %0, <16 x float> poison, <8 x i32> + store <8 x float> %1, ptr %out, align 4 + ret void +}