diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -141,7 +141,8 @@ VECREDUCE_FADD, VECREDUCE_SEQ_FADD, - // Vector binary and unary ops with VL as a third operand. + // Vector binary and unary ops with a mask as a third operand, and VL as a + // fourth operand. // FIXME: Can we replace these with ISD::VP_*? ADD_VL, AND_VL, @@ -167,6 +168,10 @@ UMIN_VL, UMAX_VL, + // Vector compare producing a mask. Fourth operand is input mask. Fifth + // operand is VL. + SETCC_VL, + // Set mask vector to all zeros or ones. VMCLR_VL, VMSET_VL, @@ -393,6 +398,7 @@ SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -528,9 +528,11 @@ setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); - // Operations below are not valid for masks. - if (VT.getVectorElementType() == MVT::i1) + // Operations below are different for between masks and other vectors. + if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::SETCC, VT, Custom); continue; + } setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -604,7 +606,8 @@ EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); - if (Subtarget.hasStdExtV() && VT.isScalableVector()) + if (Subtarget.hasStdExtV() && + (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -1168,6 +1171,8 @@ return lowerFixedLengthVectorLoadToRVV(Op, DAG); case ISD::STORE: return lowerFixedLengthVectorStoreToRVV(Op, DAG); + case ISD::SETCC: + return lowerFixedLengthVectorSetccToRVV(Op, DAG); case ISD::ADD: return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); case ISD::SUB: @@ -2085,6 +2090,31 @@ Store->getMemoryVT(), Store->getMemOperand()); } +SDValue +RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, + SelectionDAG &DAG) const { + MVT InVT = Op.getOperand(0).getSimpleValueType(); + MVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT, Subtarget); + + MVT VT = Op.getSimpleValueType(); + + SDValue Op1 = + convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); + SDValue Op2 = + convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); + + SDLoc DL(Op); + SDValue VL = + DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, + Op.getOperand(2), Mask, VL); + + return convertFromScalableVector(VT, Cmp, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc) const { MVT VT = Op.getSimpleValueType(); @@ -4714,6 +4744,7 @@ NODE_NAME_CASE(SMAX_VL) NODE_NAME_CASE(UMIN_VL) NODE_NAME_CASE(UMAX_VL) + NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VMCLR_VL) NODE_NAME_CASE(VMSET_VL) NODE_NAME_CASE(VRGATHER_VX_VL) @@ -5165,9 +5196,14 @@ // Don't use RVV for vectors we cannot scalarize if required. switch (VT.getVectorElementType().SimpleTy) { + // i1 is supported but has different rules. default: return false; case MVT::i1: + // Masks can only use a single register. + if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) + return false; + break; case MVT::i8: case MVT::i16: case MVT::i32: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -83,6 +83,16 @@ SDTCisVT<5, XLenVT>]>; def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL>; +def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", + SDTypeProfile<1, 5, [SDTCisVec<0>, + SDTCVecEltisVT<0, i1>, + SDTCisVec<1>, + SDTCisSameNumEltsAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>, + SDTCisSameAs<0, 4>, + SDTCisVT<5, XLenVT>]>>; + def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL", SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -226,6 +236,61 @@ GPR:$vl, fvti.SEW)>; } +multiclass VPatIntegerSetCCVL_VV { + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), + vti.RegClass:$rs2, cc, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, + vti.SEW)>; +} + +// Inherits from VPatIntegerSetCCVL_VV and adds a pattern with operands swapped. +multiclass VPatIntegerSetCCVL_VV_Swappable : + VPatIntegerSetCCVL_VV { + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$rs1, invcc, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, + vti.SEW)>; +} + +multiclass VPatIntegerSetCCVL_VX_Swappable { + defvar instruction = !cast(instruction_name#"_VX_"#vti.LMul.MX); + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), + (SplatPat GPR:$rs2), cc, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat GPR:$rs2), + (vti.Vector vti.RegClass:$rs1), invcc, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; +} + +multiclass VPatIntegerSetCCVL_VI_Swappable { + defvar instruction = !cast(instruction_name#"_VI_"#vti.LMul.MX); + defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), + (SplatPat_simm5 simm5:$rs2), cc, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (instruction vti.RegClass:$rs1, XLenVT:$rs2, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2), + (vti.Vector vti.RegClass:$rs1), invcc, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (instruction vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.SEW)>; +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -283,6 +348,34 @@ defm "" : VPatBinaryVL_VV_VX_VI; defm "" : VPatBinaryVL_VV_VX_VI; +// 12.8. Vector Integer Comparison Instructions +foreach vti = AllIntegerVectors in { + defm "" : VPatIntegerSetCCVL_VV; + defm "" : VPatIntegerSetCCVL_VV; + + defm "" : VPatIntegerSetCCVL_VV_Swappable; + defm "" : VPatIntegerSetCCVL_VV_Swappable; + defm "" : VPatIntegerSetCCVL_VV_Swappable; + defm "" : VPatIntegerSetCCVL_VV_Swappable; + + defm "" : VPatIntegerSetCCVL_VX_Swappable; + defm "" : VPatIntegerSetCCVL_VX_Swappable; + defm "" : VPatIntegerSetCCVL_VX_Swappable; + defm "" : VPatIntegerSetCCVL_VX_Swappable; + defm "" : VPatIntegerSetCCVL_VX_Swappable; + defm "" : VPatIntegerSetCCVL_VX_Swappable; + defm "" : VPatIntegerSetCCVL_VX_Swappable; + defm "" : VPatIntegerSetCCVL_VX_Swappable; + // There is no VMSGE(U)_VX instruction + + // FIXME: Support immediate forms of these by choosing SGT and decrementing + // the immediate + defm "" : VPatIntegerSetCCVL_VI_Swappable; + defm "" : VPatIntegerSetCCVL_VI_Swappable; + defm "" : VPatIntegerSetCCVL_VI_Swappable; + defm "" : VPatIntegerSetCCVL_VI_Swappable; +} // foreach vti = AllIntegerVectors + // 12.9. Vector Integer Min/Max Instructions defm "" : VPatBinaryVL_VV_VX; defm "" : VPatBinaryVL_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll @@ -0,0 +1,695 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +; FIXME: We use exclusively byte types here because the MVT we use for the +; stores is calculated assuming byte elements. We need to deal with mismatched +; subvector "casts" to make other elements work. + +define void @seteq_vv_v16i8(<16 x i8>* %x, <16 x i8>* %y, <16 x i1>* %z) { +; CHECK-LABEL: seteq_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vmseq.vv v27, v25, v26 +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = load <16 x i8>, <16 x i8>* %y + %c = icmp eq <16 x i8> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @setne_vv_v32i8(<32 x i8>* %x, <32 x i8>* %y, <32 x i1>* %z) { +; CHECK-LABEL: setne_vv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a3, a3, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vmsne.vv v25, v26, v28 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = load <32 x i8>, <32 x i8>* %y + %c = icmp ne <32 x i8> %a, %b + store <32 x i1> %c, <32 x i1>* %z + ret void +} + +define void @setgt_vv_v64i8(<64 x i8>* %x, <64 x i8>* %y, <64 x i1>* %z) { +; CHECK-LABEL: setgt_vv_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a3, a3, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vmslt.vv v25, v8, v28 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = load <64 x i8>, <64 x i8>* %y + %c = icmp sgt <64 x i8> %a, %b + store <64 x i1> %c, <64 x i1>* %z + ret void +} + +define void @setlt_vv_v128i8(<128 x i8>* %x, <128 x i8>* %y, <128 x i1>* %z) { +; CHECK-LABEL: setlt_vv_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 128 +; CHECK-NEXT: vsetvli a3, a3, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: vmslt.vv v25, v8, v16 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = load <128 x i8>, <128 x i8>* %y + %c = icmp slt <128 x i8> %a, %b + store <128 x i1> %c, <128 x i1>* %z + ret void +} + +define void @setge_vv_v8i8(<8 x i8>* %x, <8 x i8>* %y, <8 x i1>* %z) { +; CHECK-LABEL: setge_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vmsle.vv v27, v26, v25 +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = load <8 x i8>, <8 x i8>* %y + %c = icmp sge <8 x i8> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @setle_vv_v16i8(<16 x i8>* %x, <16 x i8>* %y, <16 x i1>* %z) { +; CHECK-LABEL: setle_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vmsle.vv v27, v25, v26 +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = load <16 x i8>, <16 x i8>* %y + %c = icmp sle <16 x i8> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @setugt_vv_v32i8(<32 x i8>* %x, <32 x i8>* %y, <32 x i1>* %z) { +; CHECK-LABEL: setugt_vv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a3, a3, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vle8.v v28, (a1) +; CHECK-NEXT: vmsltu.vv v25, v28, v26 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = load <32 x i8>, <32 x i8>* %y + %c = icmp ugt <32 x i8> %a, %b + store <32 x i1> %c, <32 x i1>* %z + ret void +} + +define void @setult_vv_v64i8(<64 x i8>* %x, <64 x i8>* %y, <64 x i1>* %z) { +; CHECK-LABEL: setult_vv_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a3, a3, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vmsltu.vv v25, v28, v8 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = load <64 x i8>, <64 x i8>* %y + %c = icmp ult <64 x i8> %a, %b + store <64 x i1> %c, <64 x i1>* %z + ret void +} + +define void @setuge_vv_v128i8(<128 x i8>* %x, <128 x i8>* %y, <128 x i1>* %z) { +; CHECK-LABEL: setuge_vv_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 128 +; CHECK-NEXT: vsetvli a3, a3, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: vmsleu.vv v25, v16, v8 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = load <128 x i8>, <128 x i8>* %y + %c = icmp uge <128 x i8> %a, %b + store <128 x i1> %c, <128 x i1>* %z + ret void +} + +define void @setule_vv_v8i8(<8 x i8>* %x, <8 x i8>* %y, <8 x i1>* %z) { +; CHECK-LABEL: setule_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vmsleu.vv v27, v25, v26 +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = load <8 x i8>, <8 x i8>* %y + %c = icmp ule <8 x i8> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @seteq_vx_v16i8(<16 x i8>* %x, i8 %y, <16 x i1>* %z) { +; CHECK-LABEL: seteq_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmseq.vx v26, v25, a1 +; CHECK-NEXT: vse1.v v26, (a2) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = icmp eq <16 x i8> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @setne_vx_v32i8(<32 x i8>* %x, i8 %y, <32 x i1>* %z) { +; CHECK-LABEL: setne_vx_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a3, a3, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vmsne.vx v25, v26, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = insertelement <32 x i8> undef, i8 %y, i32 0 + %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer + %d = icmp ne <32 x i8> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @setgt_vx_v64i8(<64 x i8>* %x, i8 %y, <64 x i1>* %z) { +; CHECK-LABEL: setgt_vx_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a3, a3, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vmsgt.vx v25, v28, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = insertelement <64 x i8> undef, i8 %y, i32 0 + %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %d = icmp sgt <64 x i8> %a, %c + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @setlt_vx_v128i8(<128 x i8>* %x, i8 %y, <128 x i1>* %z) { +; CHECK-LABEL: setlt_vx_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 128 +; CHECK-NEXT: vsetvli a3, a3, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmslt.vx v25, v8, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = insertelement <128 x i8> undef, i8 %y, i32 0 + %c = shufflevector <128 x i8> %b, <128 x i8> undef, <128 x i32> zeroinitializer + %d = icmp slt <128 x i8> %a, %c + store <128 x i1> %d, <128 x i1>* %z + ret void +} + +define void @setge_vx_v8i8(<8 x i8>* %x, i8 %y, <8 x i1>* %z) { +; CHECK-LABEL: setge_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vmsle.vv v27, v26, v25 +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = insertelement <8 x i8> undef, i8 %y, i32 0 + %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer + %d = icmp sge <8 x i8> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @setle_vx_v16i8(<16 x i8>* %x, i8 %y, <16 x i1>* %z) { +; CHECK-LABEL: setle_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmsle.vx v26, v25, a1 +; CHECK-NEXT: vse1.v v26, (a2) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = icmp sle <16 x i8> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @setugt_vx_v32i8(<32 x i8>* %x, i8 %y, <32 x i1>* %z) { +; CHECK-LABEL: setugt_vx_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a3, a3, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vmsgtu.vx v25, v26, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = insertelement <32 x i8> undef, i8 %y, i32 0 + %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer + %d = icmp ugt <32 x i8> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @setult_vx_v64i8(<64 x i8>* %x, i8 %y, <64 x i1>* %z) { +; CHECK-LABEL: setult_vx_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a3, a3, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vmsltu.vx v25, v28, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = insertelement <64 x i8> undef, i8 %y, i32 0 + %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %d = icmp ult <64 x i8> %a, %c + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @setuge_vx_v128i8(<128 x i8>* %x, i8 %y, <128 x i1>* %z) { +; CHECK-LABEL: setuge_vx_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 128 +; CHECK-NEXT: vsetvli a3, a3, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: vmsleu.vv v25, v16, v8 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = insertelement <128 x i8> undef, i8 %y, i32 0 + %c = shufflevector <128 x i8> %b, <128 x i8> undef, <128 x i32> zeroinitializer + %d = icmp uge <128 x i8> %a, %c + store <128 x i1> %d, <128 x i1>* %z + ret void +} + +define void @setule_vx_v8i8(<8 x i8>* %x, i8 %y, <8 x i1>* %z) { +; CHECK-LABEL: setule_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmsleu.vx v26, v25, a1 +; CHECK-NEXT: vse1.v v26, (a2) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = insertelement <8 x i8> undef, i8 %y, i32 0 + %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer + %d = icmp ule <8 x i8> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @seteq_xv_v16i8(<16 x i8>* %x, i8 %y, <16 x i1>* %z) { +; CHECK-LABEL: seteq_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmseq.vx v26, v25, a1 +; CHECK-NEXT: vse1.v v26, (a2) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = icmp eq <16 x i8> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @setne_xv_v32i8(<32 x i8>* %x, i8 %y, <32 x i1>* %z) { +; CHECK-LABEL: setne_xv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a3, a3, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vmsne.vx v25, v26, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = insertelement <32 x i8> undef, i8 %y, i32 0 + %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer + %d = icmp ne <32 x i8> %c, %a + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @setgt_xv_v64i8(<64 x i8>* %x, i8 %y, <64 x i1>* %z) { +; CHECK-LABEL: setgt_xv_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a3, a3, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vmslt.vx v25, v28, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = insertelement <64 x i8> undef, i8 %y, i32 0 + %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %d = icmp sgt <64 x i8> %c, %a + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @setlt_xv_v128i8(<128 x i8>* %x, i8 %y, <128 x i1>* %z) { +; CHECK-LABEL: setlt_xv_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 128 +; CHECK-NEXT: vsetvli a3, a3, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmsgt.vx v25, v8, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = insertelement <128 x i8> undef, i8 %y, i32 0 + %c = shufflevector <128 x i8> %b, <128 x i8> undef, <128 x i32> zeroinitializer + %d = icmp slt <128 x i8> %c, %a + store <128 x i1> %d, <128 x i1>* %z + ret void +} + +define void @setge_xv_v8i8(<8 x i8>* %x, i8 %y, <8 x i1>* %z) { +; CHECK-LABEL: setge_xv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmsle.vx v26, v25, a1 +; CHECK-NEXT: vse1.v v26, (a2) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = insertelement <8 x i8> undef, i8 %y, i32 0 + %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer + %d = icmp sge <8 x i8> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @setle_xv_v16i8(<16 x i8>* %x, i8 %y, <16 x i1>* %z) { +; CHECK-LABEL: setle_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vmsle.vv v27, v26, v25 +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = icmp sle <16 x i8> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @setugt_xv_v32i8(<32 x i8>* %x, i8 %y, <32 x i1>* %z) { +; CHECK-LABEL: setugt_xv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a3, a3, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vmsltu.vx v25, v26, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = insertelement <32 x i8> undef, i8 %y, i32 0 + %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer + %d = icmp ugt <32 x i8> %c, %a + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @setult_xv_v64i8(<64 x i8>* %x, i8 %y, <64 x i1>* %z) { +; CHECK-LABEL: setult_xv_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a3, a3, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vmsgtu.vx v25, v28, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = insertelement <64 x i8> undef, i8 %y, i32 0 + %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %d = icmp ult <64 x i8> %c, %a + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @setuge_xv_v128i8(<128 x i8>* %x, i8 %y, <128 x i1>* %z) { +; CHECK-LABEL: setuge_xv_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 128 +; CHECK-NEXT: vsetvli a3, a3, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmsleu.vx v25, v8, a1 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = insertelement <128 x i8> undef, i8 %y, i32 0 + %c = shufflevector <128 x i8> %b, <128 x i8> undef, <128 x i32> zeroinitializer + %d = icmp uge <128 x i8> %c, %a + store <128 x i1> %d, <128 x i1>* %z + ret void +} + +define void @setule_xv_v8i8(<8 x i8>* %x, i8 %y, <8 x i1>* %z) { +; CHECK-LABEL: setule_xv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a3, a3, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vmsleu.vv v27, v26, v25 +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = insertelement <8 x i8> undef, i8 %y, i32 0 + %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer + %d = icmp ule <8 x i8> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @seteq_vi_v16i8(<16 x i8>* %x, <16 x i1>* %z) { +; CHECK-LABEL: seteq_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmseq.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 0, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = icmp eq <16 x i8> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @setne_vi_v32i8(<32 x i8>* %x, <32 x i1>* %z) { +; CHECK-LABEL: setne_vi_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = insertelement <32 x i8> undef, i8 0, i32 0 + %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer + %d = icmp ne <32 x i8> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @setgt_vi_v64i8(<64 x i8>* %x, <64 x i1>* %z) { +; CHECK-LABEL: setgt_vi_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 64 +; CHECK-NEXT: vsetvli a2, a2, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vmsgt.vx v25, v28, zero +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = insertelement <64 x i8> undef, i8 0, i32 0 + %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %d = icmp sgt <64 x i8> %a, %c + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @setlt_vi_v128i8(<128 x i8>* %x, <128 x i1>* %z) { +; CHECK-LABEL: setlt_vi_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 128 +; CHECK-NEXT: vsetvli a2, a2, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmslt.vx v25, v8, zero +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = insertelement <128 x i8> undef, i8 0, i32 0 + %c = shufflevector <128 x i8> %b, <128 x i8> undef, <128 x i32> zeroinitializer + %d = icmp slt <128 x i8> %a, %c + store <128 x i1> %d, <128 x i1>* %z + ret void +} + +define void @setge_vi_v8i8(<8 x i8>* %x, <8 x i1>* %z) { +; CHECK-LABEL: setge_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vmsle.vv v27, v26, v25 +; CHECK-NEXT: vse1.v v27, (a1) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = insertelement <8 x i8> undef, i8 0, i32 0 + %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer + %d = icmp sge <8 x i8> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @setle_vi_v16i8(<16 x i8>* %x, <16 x i1>* %z) { +; CHECK-LABEL: setle_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmsle.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 0, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = icmp sle <16 x i8> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @setugt_vi_v32i8(<32 x i8>* %x, <32 x i1>* %z) { +; CHECK-LABEL: setugt_vi_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: addi a0, zero, 5 +; CHECK-NEXT: vmsgtu.vx v25, v26, a0 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = insertelement <32 x i8> undef, i8 5, i32 0 + %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer + %d = icmp ugt <32 x i8> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @setult_vi_v64i8(<64 x i8>* %x, <64 x i1>* %z) { +; CHECK-LABEL: setult_vi_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 64 +; CHECK-NEXT: vsetvli a2, a2, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: addi a0, zero, 5 +; CHECK-NEXT: vmsltu.vx v25, v28, a0 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = insertelement <64 x i8> undef, i8 5, i32 0 + %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %d = icmp ult <64 x i8> %a, %c + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @setuge_vi_v128i8(<128 x i8>* %x, <128 x i1>* %z) { +; CHECK-LABEL: setuge_vi_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 128 +; CHECK-NEXT: vsetvli a2, a2, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v16, 5 +; CHECK-NEXT: vmsleu.vv v25, v16, v8 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <128 x i8>, <128 x i8>* %x + %b = insertelement <128 x i8> undef, i8 5, i32 0 + %c = shufflevector <128 x i8> %b, <128 x i8> undef, <128 x i32> zeroinitializer + %d = icmp uge <128 x i8> %a, %c + store <128 x i1> %d, <128 x i1>* %z + ret void +} + +define void @setule_vi_v8i8(<8 x i8>* %x, <8 x i1>* %z) { +; CHECK-LABEL: setule_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmsleu.vi v26, v25, 5 +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = insertelement <8 x i8> undef, i8 5, i32 0 + %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer + %d = icmp ule <8 x i8> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +}