diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8824,6 +8824,22 @@ SDNode *N; SDVTList VTs = getVTList(VT); + // If it is VP_ADD/VP_SUB/VP_MUL mask operation + // then turn it to VP_XOR or VP_AND + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { + switch (Opcode) { + default: + break; + case ISD::VP_ADD: + case ISD::VP_SUB: + Opcode = ISD::VP_XOR; + break; + case ISD::VP_MUL: + Opcode = ISD::VP_AND; + break; + } + } + if (VT != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); diff --git a/llvm/test/CodeGen/RISCV/fixed-vectors-vadd-vp-mask.ll b/llvm/test/CodeGen/RISCV/fixed-vectors-vadd-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fixed-vectors-vadd-vp-mask.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + +declare <2 x i1> @llvm.vp.add.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @vadd_vv_v2i1(<2 x i1> %va, <2 x i1> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.add.v2i1(<2 x i1> %va, <2 x i1> %b, <2 x i1> %m, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.add.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @vadd_vv_v4i1(<4 x i1> %va, <4 x i1> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.add.v4i1(<4 x i1> %va, <4 x i1> %b, <4 x i1> %m, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.add.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @vadd_vv_v8i1(<8 x i1> %va, <8 x i1> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.add.v8i1(<8 x i1> %va, <8 x i1> %b, <8 x i1> %m, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.add.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @vadd_vv_v16i1(<16 x i1> %va, <16 x i1> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.add.v16i1(<16 x i1> %va, <16 x i1> %b, <16 x i1> %m, i32 %evl) + ret <16 x i1> %v +} + +declare <32 x i1> @llvm.vp.add.v32i1(<32 x i1>, <32 x i1>, <32 x i1>, i32) + +define <32 x i1> @vadd_vv_v32i1(<32 x i1> %va, <32 x i1> %b, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <32 x i1> @llvm.vp.add.v32i1(<32 x i1> %va, <32 x i1> %b, <32 x i1> %m, i32 %evl) + ret <32 x i1> %v +} + +declare <64 x i1> @llvm.vp.add.v64i1(<64 x i1>, <64 x i1>, <64 x i1>, i32) + +define <64 x i1> @vadd_vv_v64i1(<64 x i1> %va, <64 x i1> %b, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <64 x i1> @llvm.vp.add.v64i1(<64 x i1> %va, <64 x i1> %b, <64 x i1> %m, i32 %evl) + ret <64 x i1> %v +} diff --git a/llvm/test/CodeGen/RISCV/fixed-vectors-vmul-vp-mask.ll b/llvm/test/CodeGen/RISCV/fixed-vectors-vmul-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fixed-vectors-vmul-vp-mask.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + +declare <2 x i1> @llvm.vp.mul.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @vmul_vv_v2i1(<2 x i1> %va, <2 x i1> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.mul.v2i1(<2 x i1> %va, <2 x i1> %b, <2 x i1> %m, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.mul.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @vmul_vv_v4i1(<4 x i1> %va, <4 x i1> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.mul.v4i1(<4 x i1> %va, <4 x i1> %b, <4 x i1> %m, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.mul.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @vmul_vv_v8i1(<8 x i1> %va, <8 x i1> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.mul.v8i1(<8 x i1> %va, <8 x i1> %b, <8 x i1> %m, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.mul.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @vmul_vv_v16i1(<16 x i1> %va, <16 x i1> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.mul.v16i1(<16 x i1> %va, <16 x i1> %b, <16 x i1> %m, i32 %evl) + ret <16 x i1> %v +} + +declare <32 x i1> @llvm.vp.mul.v32i1(<32 x i1>, <32 x i1>, <32 x i1>, i32) + +define <32 x i1> @vmul_vv_v32i1(<32 x i1> %va, <32 x i1> %b, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <32 x i1> @llvm.vp.mul.v32i1(<32 x i1> %va, <32 x i1> %b, <32 x i1> %m, i32 %evl) + ret <32 x i1> %v +} + +declare <64 x i1> @llvm.vp.mul.v64i1(<64 x i1>, <64 x i1>, <64 x i1>, i32) + +define <64 x i1> @vmul_vv_v64i1(<64 x i1> %va, <64 x i1> %b, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <64 x i1> @llvm.vp.mul.v64i1(<64 x i1> %va, <64 x i1> %b, <64 x i1> %m, i32 %evl) + ret <64 x i1> %v +} diff --git a/llvm/test/CodeGen/RISCV/fixed-vectors-vsub-vp-mask.ll b/llvm/test/CodeGen/RISCV/fixed-vectors-vsub-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fixed-vectors-vsub-vp-mask.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + +declare <2 x i1> @llvm.vp.sub.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @vsub_vv_v2i1(<2 x i1> %va, <2 x i1> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.sub.v2i1(<2 x i1> %va, <2 x i1> %b, <2 x i1> %m, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.sub.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @vsub_vv_v4i1(<4 x i1> %va, <4 x i1> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.sub.v4i1(<4 x i1> %va, <4 x i1> %b, <4 x i1> %m, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.sub.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @vsub_vv_v8i1(<8 x i1> %va, <8 x i1> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.sub.v8i1(<8 x i1> %va, <8 x i1> %b, <8 x i1> %m, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.sub.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @vsub_vv_v16i1(<16 x i1> %va, <16 x i1> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.sub.v16i1(<16 x i1> %va, <16 x i1> %b, <16 x i1> %m, i32 %evl) + ret <16 x i1> %v +} + +declare <32 x i1> @llvm.vp.sub.v32i1(<32 x i1>, <32 x i1>, <32 x i1>, i32) + +define <32 x i1> @vsub_vv_v32i1(<32 x i1> %va, <32 x i1> %b, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <32 x i1> @llvm.vp.sub.v32i1(<32 x i1> %va, <32 x i1> %b, <32 x i1> %m, i32 %evl) + ret <32 x i1> %v +} + +declare <64 x i1> @llvm.vp.sub.v64i1(<64 x i1>, <64 x i1>, <64 x i1>, i32) + +define <64 x i1> @vsub_vv_v64i1(<64 x i1> %va, <64 x i1> %b, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <64 x i1> @llvm.vp.sub.v64i1(<64 x i1> %va, <64 x i1> %b, <64 x i1> %m, i32 %evl) + ret <64 x i1> %v +} diff --git a/llvm/test/CodeGen/RISCV/vadd-vp-mask.ll b/llvm/test/CodeGen/RISCV/vadd-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vadd-vp-mask.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + + +declare @llvm.vp.add.nxv2i1(, , , i32) + +define @vadd_vv_nxv2i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv2i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv4i1(, , , i32) + +define @vadd_vv_nxv4i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv4i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv8i1(, , , i32) + +define @vadd_vv_nxv8i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv8i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv16i1(, , , i32) + +define @vadd_vv_nxv16i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv16i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv32i1(, , , i32) + +define @vadd_vv_nxv32i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv32i1( %va, %b, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/vmul-vp-mask.ll b/llvm/test/CodeGen/RISCV/vmul-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vmul-vp-mask.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + + +declare @llvm.vp.mul.nxv2i1(, , , i32) + +define @vmul_vv_nxv2i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.mul.nxv2i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.mul.nxv4i1(, , , i32) + +define @vmul_vv_nxv4i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.mul.nxv4i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.mul.nxv8i1(, , , i32) + +define @vmul_vv_nxv8i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.mul.nxv8i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.mul.nxv16i1(, , , i32) + +define @vmul_vv_nxv16i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.mul.nxv16i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.mul.nxv32i1(, , , i32) + +define @vmul_vv_nxv32i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.mul.nxv32i1( %va, %b, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/vsub-vp-mask.ll b/llvm/test/CodeGen/RISCV/vsub-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vsub-vp-mask.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK + + +declare @llvm.vp.sub.nxv2i1(, , , i32) + +define @vsub_vv_nxv2i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv2i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i1(, , , i32) + +define @vsub_vv_nxv4i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv4i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i1(, , , i32) + +define @vsub_vv_nxv8i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv8i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv16i1(, , , i32) + +define @vsub_vv_nxv16i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv16i1( %va, %b, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv32i1(, , , i32) + +define @vsub_vv_nxv32i1( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv32i1( %va, %b, %m, i32 %evl) + ret %v +}