diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -104,6 +104,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // ISD::INTRINSIC_VOID can also be handled with G_INTRINSIC_W_SIDE_EFFECTS. def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -161,6 +161,9 @@ def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; +def SDIsFPClassOp : SDTypeProfile<1, 2, [ // is_fpclass + SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 1> +]>; def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1> ]>; @@ -517,6 +520,8 @@ def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>; +def is_fpclass : SDNode<"ISD::IS_FPCLASS" , SDIsFPClassOp>; + def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>; def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>; def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2316,6 +2316,17 @@ return true; } + case Intrinsic::is_fpclass: { + Value *FpValue = CI.getOperand(0); + ConstantInt *TestMaskValue = cast(CI.getOperand(1)); + + MIRBuilder + .buildInstr(TargetOpcode::G_IS_FPCLASS, {getOrCreateVReg(CI)}, + {getOrCreateVReg(*FpValue)}) + .addImm(TestMaskValue->getZExtValue()); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4231,6 +4231,8 @@ case G_ICMP: case G_FCMP: return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/}); + case G_IS_FPCLASS: + return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/}); case G_SELECT: if (MRI.getType(MI.getOperand(1).getReg()).isVector()) return fewerElementsVectorMultiEltType(GMI, NumElts); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1479,7 +1479,11 @@ SDLoc DL(N); SDValue ArgLo, ArgHi; SDValue Test = N->getOperand(1); - GetSplitVector(N->getOperand(0), ArgLo, ArgHi); + SDValue FpValue = N->getOperand(0); + if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(FpValue, ArgLo, ArgHi); + else + std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -4720,8 +4724,11 @@ } SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { + SDValue FpValue = N->getOperand(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Arg = GetWidenedVector(N->getOperand(0)); + if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + SDValue Arg = GetWidenedVector(FpValue); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)}, N->getFlags()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -306,6 +306,21 @@ setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); + if (Subtarget->has16BitInsts()) + setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal); + else + setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal); + + // FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches + // scalarization code. Can be removed when IS_FPCLASS expand isn't called by + // default unless marked custom/legal. + setOperationAction( + ISD::IS_FPCLASS, + {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32, + MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32, + MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64}, + Custom); + // Expand to fneg + fadd. setOperationAction(ISD::FSUB, MVT::f64, Expand); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -130,7 +130,6 @@ def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; - def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. @@ -388,7 +387,7 @@ def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_class node:$src0, node:$src1), - (AMDGPUfp_class_impl node:$src0, node:$src1)]>; + (AMDGPUfp_class_impl node:$src0, node:$src1)]>; def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -105,6 +105,7 @@ bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; bool selectG_SBFX_UBFX(MachineInstr &I) const; + bool selectG_IS_FPCLASS(MachineInstr &I) const; bool selectInterpP1F16(MachineInstr &MI) const; bool selectWritelane(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -910,6 +910,17 @@ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } +static int getV_CMP_CLASSOpcode(unsigned size, bool hasTrue16BitInsts) { + switch(size) { + default: return -1; + case 16: + return hasTrue16BitInsts ? AMDGPU::V_CMP_CLASS_F16_t16_e64 + : AMDGPU::V_CMP_CLASS_F16_e64; + case 32: return AMDGPU::V_CMP_CLASS_F32_e64; + case 64: return AMDGPU::V_CMP_CLASS_F64_e64; + } +} + bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const { if (STI.getLDSBankCount() != 16) return selectImpl(MI, *CoverageInfo); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -978,6 +978,10 @@ .scalarize(0) .widenScalarToNextPow2(0, 32); + getActionDefinitionsBuilder(G_IS_FPCLASS) + .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase) + .widenScalarToNextPow2(1) + .scalarize(0); // The hardware instructions return a different result on 0 than the generic // instructions expect. The hardware produces -1, but these produce the diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3945,6 +3945,14 @@ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); break; } + case AMDGPU::G_IS_FPCLASS: { + Register SrcReg = MI.getOperand(1).getReg(); + unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); + break; + } case AMDGPU::G_STORE: { assert(MI.getOperand(0).isReg()); unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -256,6 +256,7 @@ case ISD::INSERT_VECTOR_ELT: case ISD::EXTRACT_SUBVECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::IS_FPCLASS: break; case ISD::INSERT_SUBVECTOR: case ISD::CONCAT_VECTORS: @@ -525,6 +526,7 @@ case ISD::INSERT_SUBVECTOR: case ISD::EXTRACT_SUBVECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::IS_FPCLASS: break; case ISD::CONCAT_VECTORS: setOperationAction(Op, VT, Custom); diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1486,6 +1486,21 @@ include "VOP3PInstructions.td" include "VOPDInstructions.td" +class ClassPat : GCNPat < + (is_fpclass (vt (VOP3Mods vt:$src0, i32:$src0_mods)), (i32 timm:$mask)), + (inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask)) +>; + +def : ClassPat { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts]; +} + +def : ClassPat { + let OtherPredicates = [HasTrue16BitInsts]; +} + +def : ClassPat; +def : ClassPat; class VOPInfoTable : GenericTable { let FilterClass = Format # "_Real"; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -0,0 +1,682 @@ +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7SELDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8SELDAG,GFX8CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8GLISEL,GFX8CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9SELDAG,GFX9CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9GLISEL,GFX9CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10SELDAG,GFX10CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10GLISEL,GFX10CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11SELDAG,GFX11CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11GLISEL,GFX11CHECK %s + +; GFX7SELDAG-LABEL: sgpr_isnan_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_load_dword s4, s[0:1], 0xb +; GFX7SELDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7SELDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX7SELDAG-NEXT: s_mov_b32 s2, -1 +; GFX7SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX7SELDAG-NEXT: s_and_b32 s4, s4, 0x7fff +; GFX7SELDAG-NEXT: s_cmpk_gt_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GFX7SELDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7SELDAG-NEXT: s_endpgm +; +; GFX8CHECK-LABEL: sgpr_isnan_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 +; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 +; GFX8CHECK-NEXT: s_endpgm +; +; GFX9CHECK-LABEL: sgpr_isnan_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX9CHECK-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[0:1], s4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX9CHECK-NEXT: global_store_dword v0, v1, s[2:3] +; GFX9CHECK-NEXT: s_endpgm +; +; GFX10CHECK-LABEL: sgpr_isnan_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_clause 0x1 +; GFX10CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10CHECK-NEXT: s_endpgm +; +; GFX11CHECK-LABEL: sgpr_isnan_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_clause 0x1 +; GFX11CHECK-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11CHECK-NEXT: s_endpgm +define amdgpu_kernel void @sgpr_isnan_f16(i32 addrspace(1)* %out, half %x) { + %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +define i1 @isnan_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: isnan_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan + ret i1 %1 +} + +define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind { +; GFX7SELDAG-LABEL: isnan_v2f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v1, 0x7fff, v1 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_v2f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v2f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v2f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 3 +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_mov_b32_e32 v0, v2 +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v2f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 3 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD +; GFX10CHECK-NEXT: v_mov_b32_e32 v0, v2 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v2f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> %x, i32 3) ; nan + ret <2 x i1> %1 +} + +define <3 x i1> @isnan_v3f16(<3 x half> %x) nounwind { +; GFX7SELDAG-LABEL: isnan_v3f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v1, 0x7fff, v1 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v2, 0x7fff, v2 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v2 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_v3f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8SELDAG-LABEL: isnan_v3f16: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v2, v2 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, v3 +; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8GLISEL-LABEL: isnan_v3f16: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v2, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, v3 +; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9SELDAG-LABEL: isnan_v3f16: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, v3 +; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9GLISEL-LABEL: isnan_v3f16: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GLISEL-NEXT: v_mov_b32_e32 v2, 3 +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, v3 +; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10SELDAG-LABEL: isnan_v3f16: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10GLISEL-LABEL: isnan_v3f16: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v2, 3 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v1, 3 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, v3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11SELDAG-LABEL: isnan_v3f16: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0 +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v2, v2 +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1 +; GFX11SELDAG-NEXT: v_mov_b32_e32 v1, v3 +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11GLISEL-LABEL: isnan_v3f16: +; GFX11GLISEL: ; %bb.0: +; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11GLISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v2, 3 +; GFX11GLISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v1, 3 +; GFX11GLISEL-NEXT: v_mov_b32_e32 v1, v3 +; GFX11GLISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31] + %1 = call <3 x i1> @llvm.is.fpclass.v3f16(<3 x half> %x, i32 3) ; nan + ret <3 x i1> %1 +} + +define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind { +; GFX7SELDAG-LABEL: isnan_v4f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v1, 0x7fff, v1 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v2, 0x7fff, v2 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1 +; GFX7SELDAG-NEXT: v_and_b32_e32 v3, 0x7fff, v3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v2 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_v4f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8SELDAG-LABEL: isnan_v4f16: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v3, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v4, 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8GLISEL-LABEL: isnan_v4f16: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v2, 3 +; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v3, 3 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, v4 +; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9SELDAG-LABEL: isnan_v4f16: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v3, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v1, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, v5 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, v4 +; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9GLISEL-LABEL: isnan_v4f16: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GLISEL-NEXT: v_mov_b32_e32 v3, 3 +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3 +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v1, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10SELDAG-LABEL: isnan_v4f16: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v2, 3 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v0, 3 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s5, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v4 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s5 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v1, 3 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v5 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5 +; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10GLISEL-LABEL: isnan_v4f16: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v3, 3 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v0, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v1, 3 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v1, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v4f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v3, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v4, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <4 x i1> @llvm.is.fpclass.v4f16(<4 x half> %x, i32 3) ; nan + ret <4 x i1> %1 +} + +define i1 @isnan_f16_strictfp(half %x) strictfp nounwind { +; GFX7SELDAG-LABEL: isnan_f16_strictfp: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isnan_f16_strictfp: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_f16_strictfp: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_f16_strictfp: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_f16_strictfp: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_f16_strictfp: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isinf_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: isinf_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isinf_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isinf_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isinf_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isinf_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x204 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isinf_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 516) ; 0x204 = "inf" + ret i1 %1 +} + +define i1 @isfinite_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: isfinite_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: isfinite_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX7GLISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isfinite_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isfinite_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isfinite_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1f8 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isfinite_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1f8 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 504) ; 0x1f8 = "finite" + ret i1 %1 +} + +declare i1 @llvm.is.fpclass.f16(half, i32) +declare <2 x i1> @llvm.is.fpclass.v2f16(<2 x half>, i32) +declare <3 x i1> @llvm.is.fpclass.v3f16(<3 x half>, i32) +declare <4 x i1> @llvm.is.fpclass.v4f16(<4 x half>, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll @@ -0,0 +1,1508 @@ +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7SELDAG,GFX7CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7GLISEL,GFX7CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8SELDAG,GFX8CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8GLISEL,GFX8CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10CHECK %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11CHECK %s + +define amdgpu_kernel void @sgpr_isnan_f32(i32 addrspace(1)* %out, float %x) { +; GFX7SELDAG-LABEL: sgpr_isnan_f32: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_load_dword s4, s[0:1], 0xb +; GFX7SELDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7SELDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX7SELDAG-NEXT: s_mov_b32 s2, -1 +; GFX7SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cmp_class_f32_e64 s[4:5], s4, 3 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GFX7SELDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7SELDAG-NEXT: s_endpgm +; +; GFX7GLISEL-LABEL: sgpr_isnan_f32: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_load_dword s3, s[0:1], 0xb +; GFX7GLISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 +; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], s3, 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7GLISEL-NEXT: s_endpgm +; +; GFX8CHECK-LABEL: sgpr_isnan_f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 +; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 +; GFX8CHECK-NEXT: s_endpgm +; +; GFX9CHECK-LABEL: sgpr_isnan_f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX9CHECK-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[0:1], s4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX9CHECK-NEXT: global_store_dword v0, v1, s[2:3] +; GFX9CHECK-NEXT: s_endpgm +; +; GFX10CHECK-LABEL: sgpr_isnan_f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_clause 0x1 +; GFX10CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10CHECK-NEXT: s_endpgm +; +; GFX11CHECK-LABEL: sgpr_isnan_f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_clause 0x1 +; GFX11CHECK-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11CHECK-NEXT: s_endpgm + %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +define amdgpu_kernel void @sgpr_isnan_f64(i32 addrspace(1)* %out, double %x) { +; GFX7ISELDAG-LABEL: sgpr_isnan_f64: +; GFX7ISELDAG: ; %bb.0: +; GFX7ISELDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX7ISELDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7ISELDAG-NEXT: s_mov_b32 s6, -1 +; GFX7ISELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX7ISELDAG-NEXT: s_mov_b32 s4, s0 +; GFX7ISELDAG-NEXT: s_mov_b32 s5, s1 +; GFX7ISELDAG-NEXT: v_cmp_class_f64_e64 s[0:1], s[2:3], 3 +; GFX7ISELDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1] +; GFX7ISELDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX7ISELDAG-NEXT: s_endpgm +; +; GFX7GLISEL-LABEL: sgpr_isnan_f64: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX7GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3] +; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 +; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7GLISEL-NEXT: s_endpgm +; +; GFX8SELDAG-LABEL: sgpr_isnan_f64: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8SELDAG-NEXT: v_cmp_class_f64_e64 s[0:1], s[2:3], 3 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] +; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8SELDAG-NEXT: s_endpgm +; +; GFX8GLISEL-LABEL: sgpr_isnan_f64: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GLISEL-NEXT: s_endpgm +; +; GFX9CHECK-LABEL: sgpr_isnan_f64: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9CHECK-NEXT: s_endpgm +; +; GFX10CHECK-LABEL: sgpr_isnan_f64: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10CHECK-NEXT: s_endpgm +; +; GFX11CHECK-LABEL: sgpr_isnan_f64: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11CHECK-NEXT: s_endpgm + %result = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +define i1 @isnan_f32(float %x) nounwind { +; GFX7CHECK-LABEL: isnan_f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan + ret i1 %1 +} + +define <2 x i1> @isnan_v2f32(<2 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v2f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v2f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v2f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v2f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v2f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 3) ; nan + ret <2 x i1> %1 +} + +define <3 x i1> @isnan_v3f32(<3 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v3f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v3f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v3f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v3f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v3f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <3 x i1> @llvm.is.fpclass.v3f32(<3 x float> %x, i32 3) ; nan + ret <3 x i1> %1 +} + +define <4 x i1> @isnan_v4f32(<4 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v4f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v4f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v4f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v4f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v4f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 3) ; nan + ret <4 x i1> %1 +} + +define <5 x i1> @isnan_v5f32(<5 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v5f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v5f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v5f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v5f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v5f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <5 x i1> @llvm.is.fpclass.v5f32(<5 x float> %x, i32 3) ; nan + ret <5 x i1> %1 +} + +define <6 x i1> @isnan_v6f32(<6 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v6f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v6f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v6f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v6f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v6f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <6 x i1> @llvm.is.fpclass.v6f32(<6 x float> %x, i32 3) ; nan + ret <6 x i1> %1 +} + +define <7 x i1> @isnan_v7f32(<7 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v7f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v7f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v7f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v7f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v6, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v7f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v6, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <7 x i1> @llvm.is.fpclass.v7f32(<7 x float> %x, i32 3) ; nan + ret <7 x i1> %1 +} + +define <8 x i1> @isnan_v8f32(<8 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v8f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v8f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v8f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v8f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v6, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v7, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v8f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v6, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v7, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <8 x i1> @llvm.is.fpclass.v8f32(<8 x float> %x, i32 3) ; nan + ret <8 x i1> %1 +} + +define <16 x i1> @isnan_v16f32(<16 x float> %x) nounwind { +; GFX7CHECK-LABEL: isnan_v16f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v8, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v9, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v10, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v11, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v12, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v13, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v14, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v15, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_v16f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v8, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v9, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v10, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v11, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v12, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v13, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v14, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v15, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_v16f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v1, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v2, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v3, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v4, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v5, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v6, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v7, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v8, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v9, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v10, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v11, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v12, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v13, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v14, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v15, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_v16f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v2, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v3, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v4, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v5, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v6, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v7, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v8, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v9, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v10, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v11, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v12, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v13, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v14, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v15, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_v16f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v1, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v2, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v3, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v4, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v5, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v6, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v7, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v8, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v9, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v10, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v11, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v12, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v13, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v14, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v15, 3 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <16 x i1> @llvm.is.fpclass.v16f32(<16 x float> %x, i32 3) ; nan + ret <16 x i1> %1 +} + +define i1 @isnan_f64(double %x) nounwind { +; GFX7CHECK-LABEL: isnan_f64: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_f64: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_f64: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_f64: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_f64: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isnan_f32_strictfp(float %x) strictfp nounwind { +; GFX7CHECK-LABEL: isnan_f32_strictfp: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_f32_strictfp: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_f32_strictfp: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_f32_strictfp: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_f32_strictfp: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isnan_f64_strictfp(double %x) strictfp nounwind { +; GFX7CHECK-LABEL: isnan_f64_strictfp: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnan_f64_strictfp: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnan_f64_strictfp: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 3 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnan_f64_strictfp: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnan_f64_strictfp: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isinf_f32(float %x) nounwind { +; GFX7CHECK-LABEL: isinf_f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isinf_f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isinf_f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isinf_f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x204 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isinf_f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" + ret i1 %1 +} + +define i1 @isinf_f64(double %x) nounwind { +; GFX7CHECK-LABEL: isinf_f64: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v2, 0x204 +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isinf_f64: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v2, 0x204 +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isinf_f64: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v2, 0x204 +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isinf_f64: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x204 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isinf_f64: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 516) ; 0x204 = "inf" + ret i1 %1 +} + +define i1 @isfinite_f32(float %x) nounwind { +; GFX7CHECK-LABEL: isfinite_f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isfinite_f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isfinite_f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isfinite_f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x1f8 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isfinite_f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x1f8 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" + ret i1 %1 +} + +define i1 @isfinite_f64(double %x) nounwind { +; GFX7CHECK-LABEL: isfinite_f64: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v2, 0x1f8 +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isfinite_f64: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v2, 0x1f8 +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isfinite_f64: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v2, 0x1f8 +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isfinite_f64: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x1f8 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isfinite_f64: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x1f8 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite" + ret i1 %1 +} + +define i1 @isnormal_f32(float %x) nounwind { +; GFX7CHECK-LABEL: isnormal_f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnormal_f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnormal_f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnormal_f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x108 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnormal_f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x108 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 264) ; 0x108 = "normal" + ret i1 %1 +} + +define <2 x i1> @isnormal_v2f64(<2 x double> %x) nounwind { +; GFX7CHECK-LABEL: isnormal_v2f64: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v4, 0x108 +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v4 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: isnormal_v2f64: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v4, 0x108 +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v4 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: isnormal_v2f64: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v4, 0x108 +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v4 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: isnormal_v2f64: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x108 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 0x108 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: isnormal_v2f64: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x108 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 0x108 +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call <2 x i1> @llvm.is.fpclass.v2f64(<2 x double> %x, i32 264) ; 0x108 = "normal" + ret <2 x i1> %1 +} + +define i1 @issubnormal_f32(float %x) nounwind { +; GFX7CHECK-LABEL: issubnormal_f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: issubnormal_f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: issubnormal_f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: issubnormal_f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x90 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: issubnormal_f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x90 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 144) ; 0x90 = "subnormal" + ret i1 %1 +} + +define i1 @iszero_f32(float %x) nounwind { +; GFX7CHECK-LABEL: iszero_f32: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX7CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: iszero_f32: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX8CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: iszero_f32: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX9CHECK-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: iszero_f32: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x60 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: iszero_f32: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x60 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 96) ; 0x60 = "zero" + ret i1 %1 +} + +declare i1 @llvm.is.fpclass.f32(float, i32) +declare i1 @llvm.is.fpclass.f64(double, i32) +declare <2 x i1> @llvm.is.fpclass.v2f32(<2 x float>, i32) +declare <3 x i1> @llvm.is.fpclass.v3f32(<3 x float>, i32) +declare <4 x i1> @llvm.is.fpclass.v4f32(<4 x float>, i32) +declare <5 x i1> @llvm.is.fpclass.v5f32(<5 x float>, i32) +declare <6 x i1> @llvm.is.fpclass.v6f32(<6 x float>, i32) +declare <7 x i1> @llvm.is.fpclass.v7f32(<7 x float>, i32) +declare <8 x i1> @llvm.is.fpclass.v8f32(<8 x float>, i32) +declare <16 x i1> @llvm.is.fpclass.v16f32(<16 x float>, i32) +declare <2 x i1> @llvm.is.fpclass.v2f64(<2 x double>, i32)