diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -849,13 +849,18 @@ break; // Bail when normal isel should do the job. - EVT InVT = Node->getOperand(1).getValueType(); + MVT InVT = Node->getOperand(1).getSimpleValueType(); if (VT.isFixedLengthVector() || InVT.isScalableVector()) break; + unsigned RegClassID; + if (VT.getVectorElementType() == MVT::i1) + RegClassID = RISCV::VRRegClassID; + else + RegClassID = getRegClassIDForLMUL(getLMUL(VT)); + SDValue V = Node->getOperand(1); SDLoc DL(V); - unsigned RegClassID = getRegClassIDForLMUL(getLMUL(VT)); SDValue RC = CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT()); SDNode *NewNode = @@ -869,13 +874,18 @@ break; // Bail when normal isel can do the job. - EVT InVT = Node->getOperand(0).getValueType(); + MVT InVT = Node->getOperand(0).getSimpleValueType(); if (VT.isScalableVector() || InVT.isFixedLengthVector()) break; + unsigned RegClassID; + if (InVT.getVectorElementType() == MVT::i1) + RegClassID = RISCV::VRRegClassID; + else + RegClassID = getRegClassIDForLMUL(getLMUL(InVT)); + SDValue V = Node->getOperand(0); SDLoc DL(V); - unsigned RegClassID = getRegClassIDForLMUL(getLMUL(InVT.getSimpleVT())); SDValue RC = CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT()); SDNode *NewNode = diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -163,6 +163,15 @@ FNEG_VL, FMA_VL, + // Vector compare producing a mask. Fourth operand is input mask. Fifth + // operand is VL. + SETCC_VL, + + // Mask binary operators. + VMAND_VL, + VMOR_VL, + VMXOR_VL, + // Set mask vector to all zeros or ones. VMCLR_VL, VMSET_VL, @@ -389,6 +398,7 @@ SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -528,9 +528,11 @@ setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); - // Operations below are not valid for masks. - if (VT.getVectorElementType() == MVT::i1) + // Operations below are different for between masks and other vectors. + if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::SETCC, VT, Custom); continue; + } setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -571,6 +573,9 @@ setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); + + for (auto CC : VFPCCToExpand) + setCondCodeAction(CC, VT, Expand); } } } @@ -599,7 +604,8 @@ EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); - if (Subtarget.hasStdExtV() && VT.isScalableVector()) + if (Subtarget.hasStdExtV() && + (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -1163,6 +1169,8 @@ return lowerFixedLengthVectorLoadToRVV(Op, DAG); case ISD::STORE: return lowerFixedLengthVectorStoreToRVV(Op, DAG); + case ISD::SETCC: + return lowerFixedLengthVectorSetccToRVV(Op, DAG); case ISD::ADD: return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); case ISD::SUB: @@ -2072,6 +2080,104 @@ Store->getMemoryVT(), Store->getMemOperand()); } +SDValue +RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, + SelectionDAG &DAG) const { + MVT InVT = Op.getOperand(0).getSimpleValueType(); + MVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT, Subtarget); + + MVT VT = Op.getSimpleValueType(); + + SDValue Op1 = + convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); + SDValue Op2 = + convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); + + SDLoc DL(Op); + SDValue VL = + DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); + + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + bool Invert = false; + unsigned LogicOpc = ISD::DELETED_NODE; + if (ContainerVT.isFloatingPoint()) { + bool Swap = false; + switch (CC) { + default: break; + case ISD::SETULE: + case ISD::SETULT: + Swap = true; + LLVM_FALLTHROUGH; + case ISD::SETUGE: + case ISD::SETUGT: + CC = getSetCCInverse(CC, ContainerVT); + Invert = true; + break; + case ISD::SETOGE: + case ISD::SETOGT: + case ISD::SETGE: + case ISD::SETGT: + Swap = true; + break; + case ISD::SETUEQ: + // Use !((OLT Op1, Op2) || (OGT Op2, Op1)) + Invert = true; + LogicOpc = RISCVISD::VMOR_VL; + CC = ISD::SETOLT; + break; + case ISD::SETONE: + // Use ((OLT Op1, Op2) || (OGT Op2, Op1)) + LogicOpc = RISCVISD::VMOR_VL; + CC = ISD::SETOLT; + break; + case ISD::SETO: + // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) + LogicOpc = RISCVISD::VMAND_VL; + CC = ISD::SETOEQ; + break; + case ISD::SETUO: + // Use (UNE Op1, Op1) || (UNE Op2, Op2) + LogicOpc = RISCVISD::VMOR_VL; + CC = ISD::SETUNE; + break; + } + + if (Swap) { + CC = getSetCCSwappedOperands(CC); + std::swap(Op1, Op2); + } + } + + MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + + SDValue Cmp; + + if (LogicOpc != ISD::DELETED_NODE && CC != ISD::SETOLT) { + Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, + DAG.getCondCode(CC), Mask, VL); + SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, + DAG.getCondCode(CC), Mask, VL); + Cmp = DAG.getNode(LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); + } else { + Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, + DAG.getCondCode(CC), Mask, VL); + if (LogicOpc != ISD::DELETED_NODE) { + SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, + DAG.getCondCode(CC), Mask, VL); + Cmp = DAG.getNode(LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); + } + } + + if (Invert) { + SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); + } + + return convertFromScalableVector(VT, Cmp, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc) const { MVT VT = Op.getSimpleValueType(); @@ -4697,6 +4803,10 @@ NODE_NAME_CASE(FDIV_VL) NODE_NAME_CASE(FNEG_VL) NODE_NAME_CASE(FMA_VL) + NODE_NAME_CASE(SETCC_VL) + NODE_NAME_CASE(VMAND_VL) + NODE_NAME_CASE(VMOR_VL) + NODE_NAME_CASE(VMXOR_VL) NODE_NAME_CASE(VMCLR_VL) NODE_NAME_CASE(VMSET_VL) NODE_NAME_CASE(VRGATHER_VX_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -276,43 +276,28 @@ SplatPat_simm5, simm5, swap>; } -multiclass VPatFPSetCCSDNode_VV { - foreach fvti = AllFloatVectors in +multiclass VPatFPSetCCSDNode_VV_VF_FV { + foreach fvti = AllFloatVectors in { def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1), (fvti.Vector fvti.RegClass:$rs2), cc)), - (!cast(instruction_name#"_VV_"#fvti.LMul.MX) + (!cast(inst_name#"_VV_"#fvti.LMul.MX) fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; -} - -multiclass VPatFPSetCCSDNode_VF { - foreach fvti = AllFloatVectors in def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1), - (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)), + (splat_vector fvti.ScalarRegClass:$rs2), cc)), - (!cast(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) - fvti.RegClass:$rs1, - (fvti.Scalar fvti.ScalarRegClass:$rs2), + (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, fvti.AVL, fvti.SEW)>; -} - -multiclass VPatFPSetCCSDNode_FV { - foreach fvti = AllFloatVectors in - def : Pat<(fvti.Mask (setcc (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)), + def : Pat<(fvti.Mask (setcc (splat_vector fvti.ScalarRegClass:$rs2), (fvti.Vector fvti.RegClass:$rs1), cc)), - (!cast(swapped_op_instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) - fvti.RegClass:$rs1, - (fvti.Scalar fvti.ScalarRegClass:$rs2), + (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, fvti.AVL, fvti.SEW)>; -} - -multiclass VPatFPSetCCSDNode_VV_VF_FV { - defm : VPatFPSetCCSDNode_VV; - defm : VPatFPSetCCSDNode_VF; - defm : VPatFPSetCCSDNode_FV; + } } multiclass VPatExtendSDNode_V ops, string inst_name, string suffix, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -79,6 +79,16 @@ SDTCisVT<5, XLenVT>]>; def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL>; +def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", + SDTypeProfile<1, 5, [SDTCisVec<0>, + SDTCVecEltisVT<0, i1>, + SDTCisVec<1>, + SDTCisSameNumEltsAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>, + SDTCisSameAs<0, 4>, + SDTCisVT<5, XLenVT>]>>; + def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL", SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -87,6 +97,15 @@ SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>]>>; +def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVec<0>, + SDTCVecEltisVT<0, i1>, + SDTCisVT<3, XLenVT>]>; +def riscv_vmand_vl : SDNode<"RISCVISD::VMAND_VL", SDT_RISCVMaskBinOp_VL, [SDNPCommutative]>; +def riscv_vmor_vl : SDNode<"RISCVISD::VMOR_VL", SDT_RISCVMaskBinOp_VL, [SDNPCommutative]>; +def riscv_vmxor_vl : SDNode<"RISCVISD::VMXOR_VL", SDT_RISCVMaskBinOp_VL, [SDNPCommutative]>; + def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVT<1, XLenVT>]>; @@ -178,6 +197,36 @@ GPR:$vl, fvti.SEW)>; } +multiclass VPatFPSetCCVL_VV_VF_FV { + foreach fvti = AllFloatVectors in { + def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1), + fvti.RegClass:$rs2, + cc, + (fvti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(inst_name#"_VV_"#fvti.LMul.MX) + fvti.RegClass:$rs1, fvti.RegClass:$rs2, GPR:$vl, fvti.SEW)>; + def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1), + (SplatFPOp fvti.ScalarRegClass:$rs2), + cc, + (fvti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, + GPR:$vl, fvti.SEW)>; + def : Pat<(fvti.Mask (riscv_setcc_vl (SplatFPOp fvti.ScalarRegClass:$rs2), + (fvti.Vector fvti.RegClass:$rs1), + cc, + (fvti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, + GPR:$vl, fvti.SEW)>; + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -267,6 +316,19 @@ GPR:$vl, vti.SEW)>; } +// 14.11. Vector Floating-Point Compare Instructions +defm "" : VPatFPSetCCVL_VV_VF_FV; +defm "" : VPatFPSetCCVL_VV_VF_FV; + +defm "" : VPatFPSetCCVL_VV_VF_FV; +defm "" : VPatFPSetCCVL_VV_VF_FV; + +defm "" : VPatFPSetCCVL_VV_VF_FV; +defm "" : VPatFPSetCCVL_VV_VF_FV; + +defm "" : VPatFPSetCCVL_VV_VF_FV; +defm "" : VPatFPSetCCVL_VV_VF_FV; + // 14.12. Vector Floating-Point Sign-Injection Instructions // Handle fneg with VFSGNJN using the same input for both operands. foreach vti = AllFloatVectors in { @@ -286,6 +348,28 @@ (!cast("PseudoVMSET_M_" # mti.BX) GPR:$vl, mti.SEW)>; def : Pat<(mti.Mask (riscv_vmclr_vl (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMCLR_M_" # mti.BX) GPR:$vl, mti.SEW)>; + + def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1, VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMAND_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1, VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMOR_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmxor_vl VR:$rs1, VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMXOR_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + + // FIXME: Add remaining mask instructions. + def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmor_vl VR:$rs1, VR:$rs2, + (XLenVT (VLOp GPR:$vl))), + true_mask, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMNOR_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + + // Match the not idiom to the vnot.mm pseudo. + def : Pat<(mti.Mask (riscv_vmxor_vl VR:$rs, true_mask, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMNAND_MM_" # mti.LMul.MX) + VR:$rs, VR:$rs, GPR:$vl, mti.SEW)>; } } // Predicates = [HasStdExtV] diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -0,0 +1,1456 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +define void @fcmp_oeq_vv_v8f16(<8 x half>* %x, <8 x half>* %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oeq_vv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a4, a3, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vmfeq.vv v27, v25, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = fcmp oeq <8 x half> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @fcmp_oeq_vv_v8f16_nonans(<8 x half>* %x, <8 x half>* %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oeq_vv_v8f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a4, a3, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vmfeq.vv v27, v25, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = fcmp nnan oeq <8 x half> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @fcmp_une_vv_v4f32(<4 x float>* %x, <4 x float>* %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_une_vv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 4 +; CHECK-NEXT: vsetvli a4, a3, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vmfne.vv v27, v25, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = fcmp une <4 x float> %a, %b + store <4 x i1> %c, <4 x i1>* %z + ret void +} + +define void @fcmp_une_vv_v4f32_nonans(<4 x float>* %x, <4 x float>* %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_une_vv_v4f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 4 +; CHECK-NEXT: vsetvli a4, a3, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vmfne.vv v27, v25, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = fcmp nnan une <4 x float> %a, %b + store <4 x i1> %c, <4 x i1>* %z + ret void +} + +define void @fcmp_ogt_vv_v2f64(<2 x double>* %x, <2 x double>* %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_ogt_vv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 2 +; CHECK-NEXT: vsetvli a4, a3, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vmflt.vv v27, v26, v25 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = fcmp ogt <2 x double> %a, %b + store <2 x i1> %c, <2 x i1>* %z + ret void +} + +define void @fcmp_ogt_vv_v2f64_nonans(<2 x double>* %x, <2 x double>* %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_ogt_vv_v2f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 2 +; CHECK-NEXT: vsetvli a4, a3, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vmflt.vv v27, v26, v25 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = fcmp nnan ogt <2 x double> %a, %b + store <2 x i1> %c, <2 x i1>* %z + ret void +} + +define void @fcmp_olt_vv_v16f16(<16 x half>* %x, <16 x half>* %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_olt_vv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a4, a3, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vmflt.vv v25, v26, v28 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <16 x half>, <16 x half>* %x + %b = load <16 x half>, <16 x half>* %y + %c = fcmp olt <16 x half> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @fcmp_olt_vv_v16f16_nonans(<16 x half>* %x, <16 x half>* %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_olt_vv_v16f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a4, a3, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vmflt.vv v25, v26, v28 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <16 x half>, <16 x half>* %x + %b = load <16 x half>, <16 x half>* %y + %c = fcmp nnan olt <16 x half> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @fcmp_oge_vv_v8f32(<8 x float>* %x, <8 x float>* %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oge_vv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a4, a3, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vmfle.vv v25, v28, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %b = load <8 x float>, <8 x float>* %y + %c = fcmp oge <8 x float> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @fcmp_oge_vv_v8f32_nonans(<8 x float>* %x, <8 x float>* %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oge_vv_v8f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a4, a3, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vmfle.vv v25, v28, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %b = load <8 x float>, <8 x float>* %y + %c = fcmp nnan oge <8 x float> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @fcmp_ole_vv_v4f64(<4 x double>* %x, <4 x double>* %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ole_vv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 4 +; CHECK-NEXT: vsetvli a4, a3, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vle64.v v28, (a1) +; CHECK-NEXT: vmfle.vv v25, v26, v28 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <4 x double>, <4 x double>* %x + %b = load <4 x double>, <4 x double>* %y + %c = fcmp ole <4 x double> %a, %b + store <4 x i1> %c, <4 x i1>* %z + ret void +} + +define void @fcmp_ole_vv_v4f64_nonans(<4 x double>* %x, <4 x double>* %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ole_vv_v4f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 4 +; CHECK-NEXT: vsetvli a4, a3, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vle64.v v28, (a1) +; CHECK-NEXT: vmfle.vv v25, v26, v28 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <4 x double>, <4 x double>* %x + %b = load <4 x double>, <4 x double>* %y + %c = fcmp nnan ole <4 x double> %a, %b + store <4 x i1> %c, <4 x i1>* %z + ret void +} + +define void @fcmp_ule_vv_v32f16(<32 x half>* %x, <32 x half>* %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ule_vv_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a4, a3, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vmflt.vv v25, v8, v28 +; CHECK-NEXT: vsetvli a0, a3, e8,m2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x half>, <32 x half>* %x + %b = load <32 x half>, <32 x half>* %y + %c = fcmp ule <32 x half> %a, %b + store <32 x i1> %c, <32 x i1>* %z + ret void +} + +define void @fcmp_ule_vv_v32f16_nonans(<32 x half>* %x, <32 x half>* %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ule_vv_v32f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a4, a3, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vmfle.vv v25, v28, v8 +; CHECK-NEXT: vsetvli a0, a3, e8,m2,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x half>, <32 x half>* %x + %b = load <32 x half>, <32 x half>* %y + %c = fcmp nnan ule <32 x half> %a, %b + store <32 x i1> %c, <32 x i1>* %z + ret void +} + +define void @fcmp_uge_vv_v16f32(<16 x float>* %x, <16 x float>* %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_uge_vv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a4, a3, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vmflt.vv v25, v28, v8 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <16 x float>, <16 x float>* %x + %b = load <16 x float>, <16 x float>* %y + %c = fcmp uge <16 x float> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @fcmp_uge_vv_v16f32_nonans(<16 x float>* %x, <16 x float>* %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_uge_vv_v16f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a4, a3, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vmfle.vv v25, v8, v28 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <16 x float>, <16 x float>* %x + %b = load <16 x float>, <16 x float>* %y + %c = fcmp nnan uge <16 x float> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @fcmp_ult_vv_v8f64(<8 x double>* %x, <8 x double>* %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_ult_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a4, a3, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: vmfle.vv v25, v8, v28 +; CHECK-NEXT: vsetvli a0, a3, e8,mf2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %b = load <8 x double>, <8 x double>* %y + %c = fcmp ult <8 x double> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @fcmp_ult_vv_v8f64_nonans(<8 x double>* %x, <8 x double>* %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_ult_vv_v8f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: vsetvli a4, a3, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: vmflt.vv v25, v28, v8 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %b = load <8 x double>, <8 x double>* %y + %c = fcmp nnan ult <8 x double> %a, %b + store <8 x i1> %c, <8 x i1>* %z + ret void +} + +define void @fcmp_ugt_vv_v64f16(<64 x half>* %x, <64 x half>* %y, <64 x i1>* %z) { +; CHECK-LABEL: fcmp_ugt_vv_v64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a4, a3, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v16, (a1) +; CHECK-NEXT: vmfle.vv v25, v8, v16 +; CHECK-NEXT: vsetvli a0, a3, e8,m4,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x half>, <64 x half>* %x + %b = load <64 x half>, <64 x half>* %y + %c = fcmp ugt <64 x half> %a, %b + store <64 x i1> %c, <64 x i1>* %z + ret void +} + +define void @fcmp_ugt_vv_v64f16_nonans(<64 x half>* %x, <64 x half>* %y, <64 x i1>* %z) { +; CHECK-LABEL: fcmp_ugt_vv_v64f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 64 +; CHECK-NEXT: vsetvli a4, a3, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v16, (a1) +; CHECK-NEXT: vmflt.vv v25, v16, v8 +; CHECK-NEXT: vsetvli a0, a3, e8,m4,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <64 x half>, <64 x half>* %x + %b = load <64 x half>, <64 x half>* %y + %c = fcmp nnan ugt <64 x half> %a, %b + store <64 x i1> %c, <64 x i1>* %z + ret void +} + +define void @fcmp_ueq_vv_v32f32(<32 x float>* %x, <32 x float>* %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ueq_vv_v32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a4, a3, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vmflt.vv v25, v16, v8 +; CHECK-NEXT: vmflt.vv v26, v8, v16 +; CHECK-NEXT: vsetvli a0, a3, e8,m2,ta,mu +; CHECK-NEXT: vmnor.mm v25, v26, v25 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x float>, <32 x float>* %x + %b = load <32 x float>, <32 x float>* %y + %c = fcmp ueq <32 x float> %a, %b + store <32 x i1> %c, <32 x i1>* %z + ret void +} + +define void @fcmp_ueq_vv_v32f32_nonans(<32 x float>* %x, <32 x float>* %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ueq_vv_v32f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 32 +; CHECK-NEXT: vsetvli a4, a3, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v25, v8, v16 +; CHECK-NEXT: vsetvli a0, a3, e8,m2,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <32 x float>, <32 x float>* %x + %b = load <32 x float>, <32 x float>* %y + %c = fcmp nnan ueq <32 x float> %a, %b + store <32 x i1> %c, <32 x i1>* %z + ret void +} + +define void @fcmp_one_vv_v8f64(<16 x double>* %x, <16 x double>* %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_one_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a4, a3, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmflt.vv v25, v16, v8 +; CHECK-NEXT: vmflt.vv v26, v8, v16 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vmor.mm v25, v26, v25 +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <16 x double>, <16 x double>* %x + %b = load <16 x double>, <16 x double>* %y + %c = fcmp one <16 x double> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @fcmp_one_vv_v8f64_nonans(<16 x double>* %x, <16 x double>* %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_one_vv_v8f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 16 +; CHECK-NEXT: vsetvli a4, a3, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmfne.vv v25, v8, v16 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <16 x double>, <16 x double>* %x + %b = load <16 x double>, <16 x double>* %y + %c = fcmp nnan one <16 x double> %a, %b + store <16 x i1> %c, <16 x i1>* %z + ret void +} + +define void @fcmp_ord_vv_v4f16(<4 x half>* %x, <4 x half>* %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ord_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 4 +; CHECK-NEXT: vsetvli a4, a3, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a1) +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vmfeq.vv v27, v25, v25 +; CHECK-NEXT: vmfeq.vv v25, v26, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,mf2,ta,mu +; CHECK-NEXT: vmand.mm v25, v25, v27 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <4 x half>, <4 x half>* %x + %b = load <4 x half>, <4 x half>* %y + %c = fcmp ord <4 x half> %a, %b + store <4 x i1> %c, <4 x i1>* %z + ret void +} + +define void @fcmp_uno_vv_v4f16(<2 x half>* %x, <2 x half>* %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_uno_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, zero, 2 +; CHECK-NEXT: vsetvli a4, a3, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a1) +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vmfne.vv v27, v25, v25 +; CHECK-NEXT: vmfne.vv v25, v26, v26 +; CHECK-NEXT: vsetvli a0, a3, e8,mf2,ta,mu +; CHECK-NEXT: vmor.mm v25, v25, v27 +; CHECK-NEXT: vsetvli a0, a3, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a2) +; CHECK-NEXT: ret + %a = load <2 x half>, <2 x half>* %x + %b = load <2 x half>, <2 x half>* %y + %c = fcmp uno <2 x half> %a, %b + store <2 x i1> %c, <2 x i1>* %z + ret void +} + +define void @fcmp_oeq_vf_v8f16(<8 x half>* %x, half %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oeq_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmfeq.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fcmp oeq <8 x half> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_oeq_vf_v8f16_nonans(<8 x half>* %x, half %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oeq_vf_v8f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmfeq.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fcmp nnan oeq <8 x half> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_une_vf_v4f32(<4 x float>* %x, float %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_une_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fcmp une <4 x float> %a, %c + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_une_vf_v4f32_nonans(<4 x float>* %x, float %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_une_vf_v4f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fcmp nnan une <4 x float> %a, %c + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_ogt_vf_v2f64(<2 x double>* %x, double %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_ogt_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vmfgt.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fcmp ogt <2 x double> %a, %c + store <2 x i1> %d, <2 x i1>* %z + ret void +} + +define void @fcmp_ogt_vf_v2f64_nonans(<2 x double>* %x, double %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_ogt_vf_v2f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vmfgt.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fcmp nnan ogt <2 x double> %a, %c + store <2 x i1> %d, <2 x i1>* %z + ret void +} + +define void @fcmp_olt_vf_v16f16(<16 x half>* %x, half %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_olt_vf_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vmflt.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x half>, <16 x half>* %x + %b = insertelement <16 x half> undef, half %y, i32 0 + %c = shufflevector <16 x half> %b, <16 x half> undef, <16 x i32> zeroinitializer + %d = fcmp olt <16 x half> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_olt_vf_v16f16_nonans(<16 x half>* %x, half %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_olt_vf_v16f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vmflt.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x half>, <16 x half>* %x + %b = insertelement <16 x half> undef, half %y, i32 0 + %c = shufflevector <16 x half> %b, <16 x half> undef, <16 x i32> zeroinitializer + %d = fcmp nnan olt <16 x half> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_oge_vf_v8f32(<8 x float>* %x, float %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oge_vf_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vmfge.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %b = insertelement <8 x float> undef, float %y, i32 0 + %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer + %d = fcmp oge <8 x float> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_oge_vf_v8f32_nonans(<8 x float>* %x, float %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oge_vf_v8f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vmfge.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %b = insertelement <8 x float> undef, float %y, i32 0 + %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer + %d = fcmp nnan oge <8 x float> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_ole_vf_v4f64(<4 x double>* %x, double %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ole_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vmfle.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <4 x double>, <4 x double>* %x + %b = insertelement <4 x double> undef, double %y, i32 0 + %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer + %d = fcmp ole <4 x double> %a, %c + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_ole_vf_v4f64_nonans(<4 x double>* %x, double %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ole_vf_v4f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vmfle.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <4 x double>, <4 x double>* %x + %b = insertelement <4 x double> undef, double %y, i32 0 + %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer + %d = fcmp nnan ole <4 x double> %a, %c + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_ule_vf_v32f16(<32 x half>* %x, half %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ule_vf_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vmfgt.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x half>, <32 x half>* %x + %b = insertelement <32 x half> undef, half %y, i32 0 + %c = shufflevector <32 x half> %b, <32 x half> undef, <32 x i32> zeroinitializer + %d = fcmp ule <32 x half> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_ule_vf_v32f16_nonans(<32 x half>* %x, half %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ule_vf_v32f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vmfle.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x half>, <32 x half>* %x + %b = insertelement <32 x half> undef, half %y, i32 0 + %c = shufflevector <32 x half> %b, <32 x half> undef, <32 x i32> zeroinitializer + %d = fcmp nnan ule <32 x half> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_uge_vf_v16f32(<16 x float>* %x, float %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_uge_vf_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vmflt.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x float>, <16 x float>* %x + %b = insertelement <16 x float> undef, float %y, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %d = fcmp uge <16 x float> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_uge_vf_v16f32_nonans(<16 x float>* %x, float %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_uge_vf_v16f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vmfge.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x float>, <16 x float>* %x + %b = insertelement <16 x float> undef, float %y, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %d = fcmp nnan uge <16 x float> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_ult_vf_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_ult_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vmfge.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,mf2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %b = insertelement <8 x double> undef, double %y, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %d = fcmp ult <8 x double> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_ult_vf_v8f64_nonans(<8 x double>* %x, double %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_ult_vf_v8f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vmflt.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %b = insertelement <8 x double> undef, double %y, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %d = fcmp nnan ult <8 x double> %a, %c + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_ugt_vf_v64f16(<64 x half>* %x, half %y, <64 x i1>* %z) { +; CHECK-LABEL: fcmp_ugt_vf_v64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 64 +; CHECK-NEXT: vsetvli a3, a2, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfle.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <64 x half>, <64 x half>* %x + %b = insertelement <64 x half> undef, half %y, i32 0 + %c = shufflevector <64 x half> %b, <64 x half> undef, <64 x i32> zeroinitializer + %d = fcmp ugt <64 x half> %a, %c + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @fcmp_ugt_vf_v64f16_nonans(<64 x half>* %x, half %y, <64 x i1>* %z) { +; CHECK-LABEL: fcmp_ugt_vf_v64f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 64 +; CHECK-NEXT: vsetvli a3, a2, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfgt.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <64 x half>, <64 x half>* %x + %b = insertelement <64 x half> undef, half %y, i32 0 + %c = shufflevector <64 x half> %b, <64 x half> undef, <64 x i32> zeroinitializer + %d = fcmp nnan ugt <64 x half> %a, %c + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @fcmp_ueq_vf_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ueq_vf_v32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfgt.vf v25, v8, fa0 +; CHECK-NEXT: vmflt.vf v26, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vmnor.mm v25, v26, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x float>, <32 x float>* %x + %b = insertelement <32 x float> undef, float %y, i32 0 + %c = shufflevector <32 x float> %b, <32 x float> undef, <32 x i32> zeroinitializer + %d = fcmp ueq <32 x float> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_ueq_vf_v32f32_nonans(<32 x float>* %x, float %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ueq_vf_v32f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfeq.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x float>, <32 x float>* %x + %b = insertelement <32 x float> undef, float %y, i32 0 + %c = shufflevector <32 x float> %b, <32 x float> undef, <32 x i32> zeroinitializer + %d = fcmp nnan ueq <32 x float> %a, %c + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_one_vf_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_one_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmfgt.vf v25, v8, fa0 +; CHECK-NEXT: vmflt.vf v26, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vmor.mm v25, v26, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x double>, <16 x double>* %x + %b = insertelement <16 x double> undef, double %y, i32 0 + %c = shufflevector <16 x double> %b, <16 x double> undef, <16 x i32> zeroinitializer + %d = fcmp one <16 x double> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_one_vf_v8f64_nonans(<16 x double>* %x, double %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_one_vf_v8f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmfne.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x double>, <16 x double>* %x + %b = insertelement <16 x double> undef, double %y, i32 0 + %c = shufflevector <16 x double> %b, <16 x double> undef, <16 x i32> zeroinitializer + %d = fcmp nnan one <16 x double> %a, %c + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_ord_vf_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ord_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vmfeq.vf v27, v26, fa0 +; CHECK-NEXT: vmfeq.vv v26, v25, v25 +; CHECK-NEXT: vsetvli a0, a2, e8,mf2,ta,mu +; CHECK-NEXT: vmand.mm v25, v26, v27 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <4 x half>, <4 x half>* %x + %b = insertelement <4 x half> undef, half %y, i32 0 + %c = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer + %d = fcmp ord <4 x half> %a, %c + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_uno_vf_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_uno_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vmfne.vf v27, v26, fa0 +; CHECK-NEXT: vmfne.vv v26, v25, v25 +; CHECK-NEXT: vsetvli a0, a2, e8,mf2,ta,mu +; CHECK-NEXT: vmor.mm v25, v26, v27 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x half>, <2 x half>* %x + %b = insertelement <2 x half> undef, half %y, i32 0 + %c = shufflevector <2 x half> %b, <2 x half> undef, <2 x i32> zeroinitializer + %d = fcmp uno <2 x half> %a, %c + store <2 x i1> %d, <2 x i1>* %z + ret void +} + +define void @fcmp_oeq_fv_v8f16(<8 x half>* %x, half %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oeq_fv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmfeq.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fcmp oeq <8 x half> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_oeq_fv_v8f16_nonans(<8 x half>* %x, half %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oeq_fv_v8f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmfeq.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fcmp nnan oeq <8 x half> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_une_fv_v4f32(<4 x float>* %x, float %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_une_fv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fcmp une <4 x float> %c, %a + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_une_fv_v4f32_nonans(<4 x float>* %x, float %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_une_fv_v4f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fcmp nnan une <4 x float> %c, %a + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_ogt_fv_v2f64(<2 x double>* %x, double %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_ogt_fv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vmflt.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fcmp ogt <2 x double> %c, %a + store <2 x i1> %d, <2 x i1>* %z + ret void +} + +define void @fcmp_ogt_fv_v2f64_nonans(<2 x double>* %x, double %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_ogt_fv_v2f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vmflt.vf v26, v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fcmp nnan ogt <2 x double> %c, %a + store <2 x i1> %d, <2 x i1>* %z + ret void +} + +define void @fcmp_olt_fv_v16f16(<16 x half>* %x, half %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_olt_fv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vmfgt.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x half>, <16 x half>* %x + %b = insertelement <16 x half> undef, half %y, i32 0 + %c = shufflevector <16 x half> %b, <16 x half> undef, <16 x i32> zeroinitializer + %d = fcmp olt <16 x half> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_olt_fv_v16f16_nonans(<16 x half>* %x, half %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_olt_fv_v16f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vmfgt.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x half>, <16 x half>* %x + %b = insertelement <16 x half> undef, half %y, i32 0 + %c = shufflevector <16 x half> %b, <16 x half> undef, <16 x i32> zeroinitializer + %d = fcmp nnan olt <16 x half> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_oge_fv_v8f32(<8 x float>* %x, float %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oge_fv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vmfle.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %b = insertelement <8 x float> undef, float %y, i32 0 + %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer + %d = fcmp oge <8 x float> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_oge_fv_v8f32_nonans(<8 x float>* %x, float %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_oge_fv_v8f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vmfle.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x float>, <8 x float>* %x + %b = insertelement <8 x float> undef, float %y, i32 0 + %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer + %d = fcmp nnan oge <8 x float> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_ole_fv_v4f64(<4 x double>* %x, double %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ole_fv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vmfge.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <4 x double>, <4 x double>* %x + %b = insertelement <4 x double> undef, double %y, i32 0 + %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer + %d = fcmp ole <4 x double> %c, %a + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_ole_fv_v4f64_nonans(<4 x double>* %x, double %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ole_fv_v4f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vmfge.vf v25, v26, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <4 x double>, <4 x double>* %x + %b = insertelement <4 x double> undef, double %y, i32 0 + %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer + %d = fcmp nnan ole <4 x double> %c, %a + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_ule_fv_v32f16(<32 x half>* %x, half %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ule_fv_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vmflt.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x half>, <32 x half>* %x + %b = insertelement <32 x half> undef, half %y, i32 0 + %c = shufflevector <32 x half> %b, <32 x half> undef, <32 x i32> zeroinitializer + %d = fcmp ule <32 x half> %c, %a + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_ule_fv_v32f16_nonans(<32 x half>* %x, half %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ule_fv_v32f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vmfge.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x half>, <32 x half>* %x + %b = insertelement <32 x half> undef, half %y, i32 0 + %c = shufflevector <32 x half> %b, <32 x half> undef, <32 x i32> zeroinitializer + %d = fcmp nnan ule <32 x half> %c, %a + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_uge_fv_v16f32(<16 x float>* %x, float %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_uge_fv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vmfgt.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x float>, <16 x float>* %x + %b = insertelement <16 x float> undef, float %y, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %d = fcmp uge <16 x float> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_uge_fv_v16f32_nonans(<16 x float>* %x, float %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_uge_fv_v16f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vmfle.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x float>, <16 x float>* %x + %b = insertelement <16 x float> undef, float %y, i32 0 + %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer + %d = fcmp nnan uge <16 x float> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_ult_fv_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_ult_fv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vmfle.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,mf2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %b = insertelement <8 x double> undef, double %y, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %d = fcmp ult <8 x double> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_ult_fv_v8f64_nonans(<8 x double>* %x, double %y, <8 x i1>* %z) { +; CHECK-LABEL: fcmp_ult_fv_v8f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vmfgt.vf v25, v28, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x double>, <8 x double>* %x + %b = insertelement <8 x double> undef, double %y, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer + %d = fcmp nnan ult <8 x double> %c, %a + store <8 x i1> %d, <8 x i1>* %z + ret void +} + +define void @fcmp_ugt_fv_v64f16(<64 x half>* %x, half %y, <64 x i1>* %z) { +; CHECK-LABEL: fcmp_ugt_fv_v64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 64 +; CHECK-NEXT: vsetvli a3, a2, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfge.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <64 x half>, <64 x half>* %x + %b = insertelement <64 x half> undef, half %y, i32 0 + %c = shufflevector <64 x half> %b, <64 x half> undef, <64 x i32> zeroinitializer + %d = fcmp ugt <64 x half> %c, %a + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @fcmp_ugt_fv_v64f16_nonans(<64 x half>* %x, half %y, <64 x i1>* %z) { +; CHECK-LABEL: fcmp_ugt_fv_v64f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 64 +; CHECK-NEXT: vsetvli a3, a2, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmflt.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m4,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <64 x half>, <64 x half>* %x + %b = insertelement <64 x half> undef, half %y, i32 0 + %c = shufflevector <64 x half> %b, <64 x half> undef, <64 x i32> zeroinitializer + %d = fcmp nnan ugt <64 x half> %c, %a + store <64 x i1> %d, <64 x i1>* %z + ret void +} + +define void @fcmp_ueq_fv_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ueq_fv_v32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmflt.vf v25, v8, fa0 +; CHECK-NEXT: vmfgt.vf v26, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vmnor.mm v25, v26, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x float>, <32 x float>* %x + %b = insertelement <32 x float> undef, float %y, i32 0 + %c = shufflevector <32 x float> %b, <32 x float> undef, <32 x i32> zeroinitializer + %d = fcmp ueq <32 x float> %c, %a + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_ueq_fv_v32f32_nonans(<32 x float>* %x, float %y, <32 x i1>* %z) { +; CHECK-LABEL: fcmp_ueq_fv_v32f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfeq.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <32 x float>, <32 x float>* %x + %b = insertelement <32 x float> undef, float %y, i32 0 + %c = shufflevector <32 x float> %b, <32 x float> undef, <32 x i32> zeroinitializer + %d = fcmp nnan ueq <32 x float> %c, %a + store <32 x i1> %d, <32 x i1>* %z + ret void +} + +define void @fcmp_one_fv_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_one_fv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmflt.vf v25, v8, fa0 +; CHECK-NEXT: vmfgt.vf v26, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vmor.mm v25, v26, v25 +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x double>, <16 x double>* %x + %b = insertelement <16 x double> undef, double %y, i32 0 + %c = shufflevector <16 x double> %b, <16 x double> undef, <16 x i32> zeroinitializer + %d = fcmp one <16 x double> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_one_fv_v8f64_nonans(<16 x double>* %x, double %y, <16 x i1>* %z) { +; CHECK-LABEL: fcmp_one_fv_v8f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmfne.vf v25, v8, fa0 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x double>, <16 x double>* %x + %b = insertelement <16 x double> undef, double %y, i32 0 + %c = shufflevector <16 x double> %b, <16 x double> undef, <16 x i32> zeroinitializer + %d = fcmp nnan one <16 x double> %c, %a + store <16 x i1> %d, <16 x i1>* %z + ret void +} + +define void @fcmp_ord_fv_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) { +; CHECK-LABEL: fcmp_ord_fv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vmfeq.vf v27, v26, fa0 +; CHECK-NEXT: vmfeq.vv v26, v25, v25 +; CHECK-NEXT: vsetvli a0, a2, e8,mf2,ta,mu +; CHECK-NEXT: vmand.mm v25, v27, v26 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <4 x half>, <4 x half>* %x + %b = insertelement <4 x half> undef, half %y, i32 0 + %c = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer + %d = fcmp ord <4 x half> %c, %a + store <4 x i1> %d, <4 x i1>* %z + ret void +} + +define void @fcmp_uno_fv_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) { +; CHECK-LABEL: fcmp_uno_fv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a3, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vmfne.vf v27, v26, fa0 +; CHECK-NEXT: vmfne.vv v26, v25, v25 +; CHECK-NEXT: vsetvli a0, a2, e8,mf2,ta,mu +; CHECK-NEXT: vmor.mm v25, v27, v26 +; CHECK-NEXT: vsetvli a0, a2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x half>, <2 x half>* %x + %b = insertelement <2 x half> undef, half %y, i32 0 + %c = shufflevector <2 x half> %b, <2 x half> undef, <2 x i32> zeroinitializer + %d = fcmp uno <2 x half> %c, %a + store <2 x i1> %d, <2 x i1>* %z + ret void +}