diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -601,6 +601,8 @@ SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, bool HasMask = true) const; SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const; + SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, + unsigned VecOpc) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -547,6 +547,10 @@ setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); @@ -831,6 +835,9 @@ // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::OR, VT, Custom); setOperationAction(ISD::XOR, VT, Custom); @@ -3189,11 +3196,11 @@ case ISD::VP_UREM: return lowerVPOp(Op, DAG, RISCVISD::UREM_VL); case ISD::VP_AND: - return lowerVPOp(Op, DAG, RISCVISD::AND_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL); case ISD::VP_OR: - return lowerVPOp(Op, DAG, RISCVISD::OR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL); case ISD::VP_XOR: - return lowerVPOp(Op, DAG, RISCVISD::XOR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL); case ISD::VP_ASHR: return lowerVPOp(Op, DAG, RISCVISD::SRA_VL); case ISD::VP_LSHR: @@ -5409,6 +5416,33 @@ return convertFromScalableVector(VT, VPOp, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, + unsigned MaskOpc, + unsigned VecOpc) const { + MVT VT = Op.getSimpleValueType(); + if (VT.getVectorElementType() != MVT::i1) + return lowerVPOp(Op, DAG, VecOpc); + + // It is safe to drop mask parameter as masked-off elements are undef. + SDValue Op1 = Op->getOperand(0); + SDValue Op2 = Op->getOperand(1); + SDValue VL = Op->getOperand(3); + + MVT ContainerVT = VT; + const bool IsFixed = VT.isFixedLengthVector(); + if (IsFixed) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); + } + + SDLoc DL(Op); + SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL); + if (!IsFixed) + return Val; + return convertFromScalableVector(VT, Val, DAG, Subtarget); +} + // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be // matched to a RVV indexed load. The RVV indexed load instructions only // support the "unsigned unscaled" addressing mode; indices are implicitly diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @and_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.and.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.and.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @and_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.and.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.and.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @and_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.and.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.and.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @and_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.and.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.and.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @and_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.and.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare <1 x i1> @llvm.vp.or.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @or_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.or.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.or.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @or_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.or.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.or.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @or_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.or.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.or.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @or_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.or.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.or.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @or_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.or.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare <1 x i1> @llvm.vp.xor.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @xor_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.xor.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.xor.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @xor_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.xor.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.xor.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @xor_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.xor.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.xor.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @xor_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.xor.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.xor.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @xor_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.xor.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.xor.nxv1i1(, , , i32) + +define @xor_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv2i1(, , , i32) + +define @xor_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv4i1(, , , i32) + +define @xor_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv8i1(, , , i32) + +define @xor_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv16i1(, , , i32) + +define @xor_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv32i1(, , , i32) + +define @xor_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv64i1(, , , i32) + +define @xor_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll @@ -0,0 +1,437 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @and_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.and.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.and.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @and_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.and.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.and.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @and_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.and.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.and.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @and_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.and.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.and.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @and_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.and.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.and.nxv1i1(, , , i32) + +define @and_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv2i1(, , , i32) + +define @and_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv4i1(, , , i32) + +define @and_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv8i1(, , , i32) + +define @and_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv16i1(, , , i32) + +define @and_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv32i1(, , , i32) + +define @and_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv64i1(, , , i32) + +define @and_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare <1 x i1> @llvm.vp.or.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @or_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.or.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.or.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @or_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.or.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.or.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @or_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.or.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.or.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @or_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.or.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.or.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @or_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.or.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.or.nxv1i1(, , , i32) + +define @or_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv2i1(, , , i32) + +define @or_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv4i1(, , , i32) + +define @or_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv8i1(, , , i32) + +define @or_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv16i1(, , , i32) + +define @or_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv32i1(, , , i32) + +define @or_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv64i1(, , , i32) + +define @or_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare <1 x i1> @llvm.vp.xor.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @xor_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.xor.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.xor.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @xor_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.xor.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.xor.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @xor_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.xor.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.xor.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @xor_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.xor.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.xor.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @xor_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.xor.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.xor.nxv1i1(, , , i32) + +define @xor_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv2i1(, , , i32) + +define @xor_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv4i1(, , , i32) + +define @xor_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv8i1(, , , i32) + +define @xor_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv16i1(, , , i32) + +define @xor_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv32i1(, , , i32) + +define @xor_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv64i1(, , , i32) + +define @xor_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +}