diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll @@ -0,0 +1,215 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 | FileCheck %s + +declare <8 x i16> @llvm.vp.merge.nxv2i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) +declare <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1>, <8 x float>, <8 x float>, i32) +declare <8 x double> @llvm.vp.merge.nxv2f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +; Test binary operator with vp.merge and vp.smax. +declare <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test glued node of merge should not be deleted. +declare <8 x i1> @llvm.vp.icmp.nxv2i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v11, v9, v10 +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.nxv2i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test vp.merge have all-ones mask. +define <8 x i32> @vpmerge_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %mask, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test float binary operator with vp.merge and vp.fadd. +declare <8 x float> @llvm.vp.fadd.nxv2f32(<8 x float>, <8 x float>, <8 x i1>, i32) +define <8 x float> @vpmerge_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x float> %y, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x float> @llvm.vp.fadd.nxv2f32(<8 x float> %x, <8 x float> %y, <8 x i1> %mask, i32 %vl) + %b = call <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) + ret <8 x float> %b +} + +; Test conversion by fptosi. +declare <8 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<8 x float>, <8 x i1>, i32) +define <8 x i16> @vpmerge_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptosi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x i16> @llvm.vp.merge.nxv2i16(<8 x i1> %m, <8 x i16> %a, <8 x i16> %passthru, i32 %vl) + ret <8 x i16> %b +} + +; Test conversion by sitofp. +declare <8 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<8 x i64>, <8 x i1>, i32) +define <8 x float> @vpmerge_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpsitofp: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) + ret <8 x float> %b +} + +; Test integer extension by vp.zext. +declare <8 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<8 x i8>, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpzext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<8 x i8> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test integer truncation by vp.trunc. +declare <8 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<8 x i64>, <8 x i1>, i32) +define <8 x i32> @vpmerge_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test integer extension by vp.fpext. +declare <8 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<8 x float>, <8 x i1>, i32) +define <8 x double> @vpmerge_vpfpext(<8 x double> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfpext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x double> @llvm.vp.merge.nxv2f64(<8 x i1> %m, <8 x double> %a, <8 x double> %passthru, i32 %vl) + ret <8 x double> %b +} + +; Test integer truncation by vp.trunc. +declare <8 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<8 x double>, <8 x i1>, i32) +define <8 x float> @vpmerge_vpfptrunc(<8 x float> %passthru, <8 x double> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<8 x double> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) + ret <8 x float> %b +} + +; Test load operation by vp.load. +declare <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> *, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpload(<8 x i32> %passthru, <8 x i32> * %p, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> * %p, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test result have chain and glued node. +define <8 x i32> @vpmerge_vpload2(<8 x i32> %passthru, <8 x i32> * %p, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v11, (a0) +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> * %p, <8 x i1> %mask, i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.nxv2i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -0,0 +1,358 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s + +declare @llvm.vp.merge.nxv2i16(, , , i32) +declare @llvm.vp.merge.nxv2i32(, , , i32) +declare @llvm.vp.merge.nxv2f32(, , , i32) +declare @llvm.vp.merge.nxv2f64(, , , i32) + +; Test binary operator with vp.merge and vp.smax. +declare @llvm.vp.add.nxv2i32(, , , i32) +define @vpmerge_vpadd( %passthru, %x, %y, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test glued node of merge should not be deleted. +declare @llvm.vp.icmp.nxv2i32(, , metadata, , i32) +define @vpmerge_vpadd2( %passthru, %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v11, v9, v10 +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) + %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test vp.merge have all-ones mask. +define @vpmerge_vpadd3( %passthru, %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %mask, %a, %passthru, i32 %vl) + ret %b +} + +; Test float binary operator with vp.merge and vp.fadd. +declare @llvm.vp.fadd.nxv2f32(, , , i32) +define @vpmerge_vpfadd( %passthru, %x, %y, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.fadd.nxv2f32( %x, %y, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test for binary operator with specific EEW by riscv.vrgatherei16. +declare @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(, , , i64) +define @vpmerge_vrgatherei16( %passthru, %x, %y, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vrgatherei16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %2 = tail call @llvm.riscv.vrgatherei16.vv.nxv2i32.i64( undef, %x, %y, i64 %1) + %3 = tail call @llvm.vp.merge.nxv2i32( %m, %2, %passthru, i32 %vl) + ret %2 +} + +; Test conversion by fptosi. +declare @llvm.vp.fptosi.nxv2i16.nxv2f32(, , i32) +define @vpmerge_vpfptosi( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptosi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i16( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test conversion by sitofp. +declare @llvm.vp.sitofp.nxv2f32.nxv2i64(, , i32) +define @vpmerge_vpsitofp( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpsitofp: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test integer extension by vp.zext. +declare @llvm.vp.zext.nxv2i32.nxv2i8(, , i32) +define @vpmerge_vpzext( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpzext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test integer truncation by vp.trunc. +declare @llvm.vp.trunc.nxv2i32.nxv2i64(, , i32) +define @vpmerge_vptrunc( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test integer extension by vp.fpext. +declare @llvm.vp.fpext.nxv2f64.nxv2f32(, , i32) +define @vpmerge_vpfpext( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfpext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2f64( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test integer truncation by vp.trunc. +declare @llvm.vp.fptrunc.nxv2f32.nxv2f64(, , i32) +define @vpmerge_vpfptrunc( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test load operation by vp.load. +declare @llvm.vp.load.nxv2i32.p0nxv2i32( *, , i32) +define @vpmerge_vpload( %passthru, * %p, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.load.nxv2i32.p0nxv2i32( * %p, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test result have chain and glued node. +define @vpmerge_vpload2( %passthru, * %p, %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v11, (a0) +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.load.nxv2i32.p0nxv2i32( * %p, %mask, i32 %vl) + %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; FIXME: Merge vmerge.vvm and vleffN.v +declare { , i64 } @llvm.riscv.vleff.nxv2i32(, *, i64) +define @vpmerge_vleff( %passthru, * %p, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vleff: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32ff.v v9, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call { , i64 } @llvm.riscv.vleff.nxv2i32( undef, * %p, i64 %1) + %b = extractvalue { , i64 } %a, 0 + %c = call @llvm.vp.merge.nxv2i32( %m, %b, %passthru, i32 %vl) + ret %c +} + +; Test strided load by riscv.vlse +declare @llvm.riscv.vlse.nxv2i32(, *, i64, i64) +define @vpmerge_vlse( %passthru, * %p, %m, i64 %s, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vlse: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call @llvm.riscv.vlse.nxv2i32( undef, * %p, i64 %s, i64 %1) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test indexed load by riscv.vluxei +declare @llvm.riscv.vluxei.nxv2i32.nxv2i64(, *, , i64) +define @vpmerge_vluxei( %passthru, * %p, %idx, %m, i64 %s, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vluxei: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vluxei64.v v9, (a0), v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call @llvm.riscv.vluxei.nxv2i32.nxv2i64( undef, * %p, %idx, i64 %1) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test vector index by riscv.vid +declare @llvm.riscv.vid.nxv2i32(, i64) +define @vpmerge_vid( %passthru, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vid: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call @llvm.riscv.vid.nxv2i32( undef, i64 %1) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test riscv.viota +declare @llvm.riscv.viota.nxv2i32(, , i64) +define @vpmerge_viota( %passthru, %m, %vm, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_viota: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: viota.m v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call @llvm.riscv.viota.nxv2i32( undef, %vm, i64 %1) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test riscv.vfclass +declare @llvm.riscv.vfclass.nxv2i32(, , i64) +define @vpmerge_vflcass( %passthru, %vf, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vflcass: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfclass.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call @llvm.riscv.vfclass.nxv2i32( undef, %vf, i64 %1) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test riscv.vfsqrt +declare @llvm.riscv.vfsqrt.nxv2f32(, , i64) +define @vpmerge_vfsqrt( %passthru, %vf, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vfsqrt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call @llvm.riscv.vfsqrt.nxv2f32( undef, %vf, i64 %1) + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test reciprocal operation by riscv.vfrec7 +declare @llvm.riscv.vfrec7.nxv2f32(, , i64) +define @vpmerge_vfrec7( %passthru, %vf, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vfrec7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrec7.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call @llvm.riscv.vfrec7.nxv2f32( undef, %vf, i64 %1) + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +}