diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -161,6 +161,9 @@ def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; +def SDIsFPClassOp : SDTypeProfile<1, 2, [ // is_fpclass + SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 1> +]>; def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1> ]>; @@ -517,6 +520,8 @@ def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>; +def is_fpclass : SDNode<"ISD::IS_FPCLASS" , SDIsFPClassOp>; + def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>; def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>; def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2316,6 +2316,27 @@ return true; } + case Intrinsic::is_fpclass: { + unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + + Value *FpValue = CI.getOperand(0); + Type *FpEltTy = FpValue->getType()->getScalarType(); + ConstantInt *TestMaskValue = cast(CI.getOperand(1)); + const fltSemantics &FpSem = FpEltTy->getFltSemantics(); + + const MachineInstrBuilder &IsFpclass = + MIRBuilder + .buildInstr(TargetOpcode::G_IS_FPCLASS, {getOrCreateVReg(CI)}, + {getOrCreateVReg(*FpValue)}, Flags) + .addImm(TestMaskValue->getZExtValue()) + .addImm((unsigned)APFloat::SemanticsToEnum(FpSem)); + + const Function *F = CI.getFunction(); + if (!F->getAttributes().hasFnAttr(llvm::Attribute::StrictFP)) + IsFpclass.setMIFlag(MachineInstr::NoFPExcept); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2445,6 +2445,12 @@ Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_IS_FPCLASS: + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PTR_ADD: assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"); Observer.changingInstr(MI); @@ -4222,6 +4228,8 @@ case G_ICMP: case G_FCMP: return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/}); + case G_IS_FPCLASS: + return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/}); case G_SELECT: if (MRI.getType(MI.getOperand(1).getReg()).isVector()) return fewerElementsVectorMultiEltType(GMI, NumElts); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1477,7 +1477,11 @@ SDLoc DL(N); SDValue ArgLo, ArgHi; SDValue Test = N->getOperand(1); - GetSplitVector(N->getOperand(0), ArgLo, ArgHi); + SDValue FpValue = N->getOperand(0); + if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(FpValue, ArgLo, ArgHi); + else + std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -4716,8 +4720,11 @@ } SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { + SDValue FpValue = N->getOperand(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Arg = GetWidenedVector(N->getOperand(0)); + if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + SDValue Arg = GetWidenedVector(FpValue); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)}, N->getFlags()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6489,7 +6489,8 @@ // If ISD::IS_FPCLASS should be expanded, do it right now, because the // expansion can use illegal types. Making expansion early allows // legalizing these types prior to selection. - if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) { + if (!TLI.isOperationLegal(ISD::IS_FPCLASS, ArgVT) && + !TLI.isOperationCustom(ISD::IS_FPCLASS, ArgVT)) { SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG); setValue(&I, Result); return; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -74,6 +74,8 @@ SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; protected: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -306,6 +306,20 @@ setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); + if (Subtarget->has16BitInsts()) { + setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal); + } else { + setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); + setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal); + } + + setOperationAction( + ISD::IS_FPCLASS, + {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32, + MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32, + MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64}, + Custom); + // Expand to fneg + fadd. setOperationAction(ISD::FSUB, MVT::f64, Expand); @@ -1203,6 +1217,8 @@ case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); + case ISD::IS_FPCLASS: + return LowerIS_FPCLASS(Op, DAG); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: @@ -2754,6 +2770,16 @@ return SDValue(); } +SDValue AMDGPUTargetLowering::LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + assert(Src.getSimpleValueType() == MVT::f16); + + SDValue FpExtend = DAG.getFPExtendOrRound(Src, DL, MVT::f32); + SDValue LegalIsFpClass = DAG.getNode(ISD::IS_FPCLASS, DL, Op.getValueType(), {FpExtend, Op.getOperand(1)}, Op->getFlags()); + return LegalIsFpClass; +} + SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { EVT ExtraVT = cast(Op.getOperand(1))->getVT(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -130,7 +130,6 @@ def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; - def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. @@ -388,7 +387,8 @@ def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_class node:$src0, node:$src1), - (AMDGPUfp_class_impl node:$src0, node:$src1)]>; + (AMDGPUfp_class_impl node:$src0, node:$src1), + (is_fpclass node:$src0, node:$src1)]>; def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -105,6 +105,7 @@ bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; bool selectG_SBFX_UBFX(MachineInstr &I) const; + bool selectG_IS_FPCLASS(MachineInstr &I) const; bool selectInterpP1F16(MachineInstr &MI) const; bool selectWritelane(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -910,6 +910,47 @@ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } +static int getV_CMP_CLASSOpcode(unsigned size, bool hasTrue16BitInsts) { + switch(size) { + default: return -1; + case 16: + return hasTrue16BitInsts ? AMDGPU::V_CMP_CLASS_F16_t16_e64 + : AMDGPU::V_CMP_CLASS_F16_e64; + case 32: return AMDGPU::V_CMP_CLASS_F32_e64; + case 64: return AMDGPU::V_CMP_CLASS_F64_e64; + } +} + +bool AMDGPUInstructionSelector::selectG_IS_FPCLASS(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + + Register CCReg = I.getOperand(0).getReg(); + Register SrcReg; + unsigned Mods; + std::tie(SrcReg, Mods) = selectVOP3ModsImpl(I.getOperand(1)); + unsigned Mask = I.getOperand(2).getImm(); + unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); + + int Opcode = getV_CMP_CLASSOpcode(Size, STI.hasTrue16BitInsts()); + if (Opcode == -1) + return false; + + Register ConstantReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_MOV_B32_e32), ConstantReg) + .addImm(Mask); + MachineInstrBuilder CmpClassBuilder = + BuildMI(*BB, &I, DL, TII.get(Opcode), CCReg) + .addImm(Mods) + .addReg(SrcReg) + .addReg(ConstantReg); + + MachineInstr *CmpClass = CmpClassBuilder; + bool Ret = constrainSelectedInstRegOperands(*CmpClass, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; +} + bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const { if (STI.getLDSBankCount() != 16) return selectImpl(MI, *CoverageInfo); @@ -3313,6 +3354,8 @@ if (selectG_ICMP(I)) return true; return selectImpl(I, *CoverageInfo); + case TargetOpcode::G_IS_FPCLASS: + return selectG_IS_FPCLASS(I); case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: case TargetOpcode::G_ATOMIC_CMPXCHG: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -978,6 +978,11 @@ .scalarize(0) .widenScalarToNextPow2(0, 32); + getActionDefinitionsBuilder(G_IS_FPCLASS) + .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase) + .widenScalarToNextPow2(1) + .clampScalar(1, S32, S64) + .scalarize(0); // The hardware instructions return a different result on 0 than the generic // instructions expect. The hardware produces -1, but these produce the diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3936,6 +3936,14 @@ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); break; } + case AMDGPU::G_IS_FPCLASS: { + Register SrcReg = MI.getOperand(1).getReg(); + unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); + break; + } case AMDGPU::G_STORE: { assert(MI.getOperand(0).isReg()); unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -256,6 +256,7 @@ case ISD::INSERT_VECTOR_ELT: case ISD::EXTRACT_SUBVECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::IS_FPCLASS: break; case ISD::INSERT_SUBVECTOR: case ISD::CONCAT_VECTORS: @@ -525,6 +526,7 @@ case ISD::INSERT_SUBVECTOR: case ISD::EXTRACT_SUBVECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::IS_FPCLASS: break; case ISD::CONCAT_VECTORS: setOperationAction(Op, VT, Custom); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fpclass-flags.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fpclass-flags.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fpclass-flags.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1030 -O0 -stop-after=irtranslator -global-isel %s -o - | FileCheck %s + +; CHECK-LABEL: name: fpclass_has_nofpexcept +; CHECK: nofpexcept G_IS_FPCLASS +define i1 @fpclass_has_nofpexcept(float %x) { + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) + ret i1 %1 +} + +; CHECK-LABEL: name: strict_fpclass +; CHECK-NOT: nofpexcept +define i1 @strict_fpclass(float %x) strictfp { + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) + ret i1 %1 +} + +declare i1 @llvm.is.fpclass.f32(float, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll @@ -0,0 +1,1949 @@ +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7SELDAG,GFX7CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7GLISEL,GFX7CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8SELDAG,GFX8CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8GLISEL,GFX8CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9SELDAG,GFX9CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9GLISEL,GFX9CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10SELDAG,GFX10CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10GLISEL,GFX10CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11CHECK %s + +define i1 @isnan_half(half %x) nounwind { +; GFX7SELDAG-LABEL: isnan_half: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_half: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_half: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_half: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_half: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_half: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan + ret i1 %1 +} + +define <2 x i1> @isnan_v2half(<2 x half> %x) nounwind { +; GFX7SELDAG-LABEL: isnan_v2half: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_v2half: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8SELDAG-LABEL: isnan_v2half: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8GLISEL-LABEL: isnan_v2half: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9SELDAG-LABEL: isnan_v2half: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, 3 +; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9GLISEL-LABEL: isnan_v2half: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 3 +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v2 +; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v2half: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 3 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD +; GFX10CHECK-NEXT: v_mov_b32_e32 v0, v2 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v2half: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> %x, i32 3) ; nan + ret <2 x i1> %1 +} + +define <3 x i1> @isnan_v3half(<3 x half> %x) nounwind { +; GFX7SELDAG-LABEL: isnan_v3half: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_v3half: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8SELDAG-LABEL: isnan_v3half: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v2, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, v3 +; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8GLISEL-LABEL: isnan_v3half: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v2, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, v3 +; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9SELDAG-LABEL: isnan_v3half: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9SELDAG-NEXT: v_mov_b32_e32 v2, 3 +; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, v3 +; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9GLISEL-LABEL: isnan_v3half: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GLISEL-NEXT: v_mov_b32_e32 v2, 3 +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, v3 +; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10SELDAG-LABEL: isnan_v3half: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v2, 3 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v1, 3 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10GLISEL-LABEL: isnan_v3half: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v2, 3 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v1, 3 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, v3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v3half: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_mov_b32_e32 v1, v3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <3 x i1> @llvm.is.fpclass.v3f16(<3 x half> %x, i32 3) ; nan + ret <3 x i1> %1 +} + +define <4 x i1> @isnan_v4half(<4 x half> %x) nounwind { +; GFX7SELDAG-LABEL: isnan_v4half: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_v4half: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8SELDAG-LABEL: isnan_v4half: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v2, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v3, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, v4 +; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8GLISEL-LABEL: isnan_v4half: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v2, 3 +; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v3, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, v4 +; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9SELDAG-LABEL: isnan_v4half: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9SELDAG-NEXT: v_mov_b32_e32 v2, 3 +; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v1, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, v4 +; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9GLISEL-LABEL: isnan_v4half: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GLISEL-NEXT: v_mov_b32_e32 v3, 3 +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v1, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10SELDAG-LABEL: isnan_v4half: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v2, 3 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s5, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v0, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v1, 3 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v4 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5 +; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10GLISEL-LABEL: isnan_v4half: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v3, 3 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v0, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v1, 3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v1, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v4half: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v3, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v4, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <4 x i1> @llvm.is.fpclass.v4f16(<4 x half> %x, i32 3) ; nan + ret <4 x i1> %1 +} + +define i1 @isnan_float(float %x) nounwind { +; GFX7CHECK-LABEL: isnan_float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan + ret i1 %1 +} + +define <2 x i1> @isnan_v2float(<2 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v2float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v2float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v2float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v2float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v2float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 3) ; nan + ret <2 x i1> %1 +} + +define <3 x i1> @isnan_v3float(<3 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v3float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v3float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v3float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v3float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v3float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <3 x i1> @llvm.is.fpclass.v3f32(<3 x float> %x, i32 3) ; nan + ret <3 x i1> %1 +} + +define <4 x i1> @isnan_v4float(<4 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v4float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v4float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v4float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v4float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v4float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 3) ; nan + ret <4 x i1> %1 +} + +define <5 x i1> @isnan_v5float(<5 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v5float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v5float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v5float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v5float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v5float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <5 x i1> @llvm.is.fpclass.v5f32(<5 x float> %x, i32 3) ; nan + ret <5 x i1> %1 +} + +define <6 x i1> @isnan_v6float(<6 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v6float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v6float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v6float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v6float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v6float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <6 x i1> @llvm.is.fpclass.v6f32(<6 x float> %x, i32 3) ; nan + ret <6 x i1> %1 +} + +define <7 x i1> @isnan_v7float(<7 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v7float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v7float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v7float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v7float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v6, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v7float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v6, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <7 x i1> @llvm.is.fpclass.v7f32(<7 x float> %x, i32 3) ; nan + ret <7 x i1> %1 +} + +define <8 x i1> @isnan_v8float(<8 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v8float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v8float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v8float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v8float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v6, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v7, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v8float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v6, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v7, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <8 x i1> @llvm.is.fpclass.v8f32(<8 x float> %x, i32 3) ; nan + ret <8 x i1> %1 +} + +define <16 x i1> @isnan_v16float(<16 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v16float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v8, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v9, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v10, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v11, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v12, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v13, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v14, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v15, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v16float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v8, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v9, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v10, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v11, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v12, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v13, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v14, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v15, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v16float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v8, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v9, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v10, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v11, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v12, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v13, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v14, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v15, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v16float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v6, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v7, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v8, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v9, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v10, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v11, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v12, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v13, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v14, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v15, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v16float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v6, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v7, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v8, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v9, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v10, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v11, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v12, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v13, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v14, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v15, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <16 x i1> @llvm.is.fpclass.v16f32(<16 x float> %x, i32 3) ; nan + ret <16 x i1> %1 +} + +define i1 @isnan_double(double %x) nounwind { +; GFX7CHECK-LABEL: isnan_double: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_double: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_double: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_double: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_double: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isnan_half_strictfp(half %x) strictfp nounwind { +; GFX7SELDAG-LABEL: isnan_half_strictfp: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_half_strictfp: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_half_strictfp: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_half_strictfp: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_half_strictfp: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_half_strictfp: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isnan_float_strictfp(float %x) strictfp nounwind { +; GFX7CHECK-LABEL: isnan_float_strictfp: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_float_strictfp: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_float_strictfp: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_float_strictfp: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_float_strictfp: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isnan_double_strictfp(double %x) strictfp nounwind { +; GFX7CHECK-LABEL: isnan_double_strictfp: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_double_strictfp: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_double_strictfp: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_double_strictfp: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_double_strictfp: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isinf_half(half %x) nounwind { +; GFX7SELDAG-LABEL: isinf_half: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isinf_half: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isinf_half: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isinf_half: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isinf_half: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x204 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isinf_half: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 516) ; 0x204 = "inf" + ret i1 %1 +} + +define i1 @isinf_float(float %x) nounwind { +; GFX7CHECK-LABEL: isinf_float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isinf_float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isinf_float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isinf_float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x204 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isinf_float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" + ret i1 %1 +} + +define i1 @isinf_double(double %x) nounwind { +; GFX7CHECK-LABEL: isinf_double: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v2, 0x204 +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isinf_double: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v2, 0x204 +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isinf_double: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v2, 0x204 +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isinf_double: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x204 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isinf_double: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 516) ; 0x204 = "inf" + ret i1 %1 +} + +define i1 @isfinite_half(half %x) nounwind { +; GFX7SELDAG-LABEL: isfinite_half: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isfinite_half: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isfinite_half: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isfinite_half: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isfinite_half: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1f8 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isfinite_half: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1f8 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 504) ; 0x1f8 = "finite" + ret i1 %1 +} + +define i1 @isfinite_float(float %x) nounwind { +; GFX7CHECK-LABEL: isfinite_float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isfinite_float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isfinite_float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isfinite_float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x1f8 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isfinite_float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x1f8 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" + ret i1 %1 +} + +define i1 @isfinite_double(double %x) nounwind { +; GFX7CHECK-LABEL: isfinite_double: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v2, 0x1f8 +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isfinite_double: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v2, 0x1f8 +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isfinite_double: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v2, 0x1f8 +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isfinite_double: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x1f8 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isfinite_double: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x1f8 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite" + ret i1 %1 +} + +define i1 @isnormal_float(float %x) nounwind { +; GFX7CHECK-LABEL: isnormal_float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnormal_float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnormal_float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnormal_float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x108 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnormal_float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x108 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 264) ; 0x108 = "normal" + ret i1 %1 +} + +define <2 x i1> @isnormal_v2double(<2 x double> %x) nounwind { +; GFX7CHECK-LABEL: isnormal_v2double: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v4, 0x108 +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v4 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnormal_v2double: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v4, 0x108 +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v4 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnormal_v2double: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v4, 0x108 +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v4 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnormal_v2double: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x108 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 0x108 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnormal_v2double: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x108 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 0x108 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <2 x i1> @llvm.is.fpclass.v2f64(<2 x double> %x, i32 264) ; 0x108 = "normal" + ret <2 x i1> %1 +} + +define i1 @issubnormal_float(float %x) nounwind { +; GFX7CHECK-LABEL: issubnormal_float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: issubnormal_float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: issubnormal_float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: issubnormal_float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x90 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: issubnormal_float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x90 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 144) ; 0x90 = "subnormal" + ret i1 %1 +} + +define i1 @iszero_float(float %x) nounwind { +; GFX7CHECK-LABEL: iszero_float: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: iszero_float: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: iszero_float: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: iszero_float: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x60 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: iszero_float: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x60 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 96) ; 0x60 = "zero" + ret i1 %1 +} + +declare i1 @llvm.is.fpclass.f32(float, i32) +declare i1 @llvm.is.fpclass.f16(half, i32) +declare i1 @llvm.is.fpclass.f64(double, i32) +declare <2 x i1> @llvm.is.fpclass.v2f16(<2 x half>, i32) +declare <3 x i1> @llvm.is.fpclass.v3f16(<3 x half>, i32) +declare <4 x i1> @llvm.is.fpclass.v4f16(<4 x half>, i32) +declare <2 x i1> @llvm.is.fpclass.v2f32(<2 x float>, i32) +declare <3 x i1> @llvm.is.fpclass.v3f32(<3 x float>, i32) +declare <4 x i1> @llvm.is.fpclass.v4f32(<4 x float>, i32) +declare <5 x i1> @llvm.is.fpclass.v5f32(<5 x float>, i32) +declare <6 x i1> @llvm.is.fpclass.v6f32(<6 x float>, i32) +declare <7 x i1> @llvm.is.fpclass.v7f32(<7 x float>, i32) +declare <8 x i1> @llvm.is.fpclass.v8f32(<8 x float>, i32) +declare <16 x i1> @llvm.is.fpclass.v16f32(<16 x float>, i32) +declare <2 x i1> @llvm.is.fpclass.v2f64(<2 x double>, i32)