diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -379,6 +379,8 @@ LegalizeResult lowerFPTRUNC(MachineInstr &MI); LegalizeResult lowerFPOWI(MachineInstr &MI); + LegalizeResult lowerISFPCLASS(MachineInstr &MI); + LegalizeResult lowerMinMax(MachineInstr &MI); LegalizeResult lowerFCopySign(MachineInstr &MI); LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3525,6 +3525,8 @@ return lowerAbsToAddXor(MI); case G_SELECT: return lowerSelect(MI); + case G_IS_FPCLASS: + return lowerISFPCLASS(MI); case G_SDIVREM: case G_UDIVREM: return lowerDIVREM(MI); @@ -7236,6 +7238,187 @@ return Legalized; } +// Currently there is no support to distinguish between FP semantics of same +// size. Only considering the IEEE semantics for now. +static const fltSemantics &getIEEEfltSemantics(unsigned NrOfBits) { + switch (NrOfBits) { + default: + return APFloat::Bogus(); + case 16: + return APFloat::IEEEhalf(); + case 32: + return APFloat::IEEEsingle(); + case 64: + return APFloat::IEEEdouble(); + case 128: + return APFloat::IEEEquad(); + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + uint64_t Mask = MI.getOperand(2).getImm(); + MIRBuilder.setDebugLoc(MI.getDebugLoc()); + + if (Mask == 0) { + MIRBuilder.buildConstant(DstReg, 0); + MI.eraseFromParent(); + return Legalized; + } + if ((Mask & fcAllFlags) == fcAllFlags) { + MIRBuilder.buildConstant(DstReg, 1); + MI.eraseFromParent(); + return Legalized; + } + + unsigned BitSize = SrcTy.getScalarSizeInBits(); + const fltSemantics &Semantics = getIEEEfltSemantics(BitSize); + if (APFloat::getSizeInBits(Semantics) == 0) + return UnableToLegalize; + + LLT IntTy = LLT::scalar(BitSize); + if (SrcTy.isVector()) + IntTy = LLT::vector(SrcTy.getElementCount(), IntTy); + auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg); + + // Various masks. + APInt SignBit = APInt::getSignMask(BitSize); + APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign. + APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit. + APInt ExpMask = Inf; + APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; + APInt QNaNBitMask = + APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); + APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits()); + + auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit); + auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask); + auto InfC = MIRBuilder.buildConstant(IntTy, Inf); + auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask); + auto ZeroC = MIRBuilder.buildConstant(IntTy, 0); + + auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC); + auto Sign = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs); + + auto Res = MIRBuilder.buildConstant(DstTy, 0); + const auto appendToRes = [&](MachineInstrBuilder ToAppend) { + Res = MIRBuilder.buildOr(DstTy, Res, ToAppend); + }; + + // Tests that involve more than one class should be processed first. + if ((Mask & fcFinite) == fcFinite) { + // finite(V) ==> abs(V) u< exp_mask + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs, + ExpMaskC)); + Mask &= ~fcFinite; + } else if ((Mask & fcFinite) == fcPosFinite) { + // finite(V) && V > 0 ==> V u< exp_mask + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt, + ExpMaskC)); + Mask &= ~fcPosFinite; + } else if ((Mask & fcFinite) == fcNegFinite) { + // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1 + auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs, + ExpMaskC); + auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign); + appendToRes(And); + Mask &= ~fcNegFinite; + } + + // Check for individual classes. + if (unsigned PartialCheck = Mask & fcZero) { + if (PartialCheck == fcPosZero) + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, ZeroC)); + else if (PartialCheck == fcZero) + appendToRes( + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC)); + else // ISD::fcNegZero + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, SignBitC)); + } + + if (unsigned PartialCheck = Mask & fcInf) { + if (PartialCheck == fcPosInf) + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, InfC)); + else if (PartialCheck == fcInf) + appendToRes( + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC)); + else { // ISD::fcNegInf + APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt(); + auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf); + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + AsInt, NegInfC)); + } + } + + if (unsigned PartialCheck = Mask & fcNan) { + auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask); + if (PartialCheck == fcNan) { + // isnan(V) ==> abs(V) u> int(inf) + appendToRes( + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC)); + } else if (PartialCheck == fcQNan) { + // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit) + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs, + InfWithQnanBitC)); + } else { // ISD::fcSNan + // issignaling(V) ==> abs(V) u> unsigned(Inf) && + // abs(V) u< (unsigned(Inf) | quiet_bit) + auto IsNan = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC); + auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, + Abs, InfWithQnanBitC); + appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan)); + } + } + + if (unsigned PartialCheck = Mask & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set) + auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs; + // auto AllOneMantissaC = MIRBuilder.buildConstant(IntTy, AllOneMantissa); + auto OneC = MIRBuilder.buildConstant(IntTy, 1); + auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC); + auto SubnormalRes = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne, + MIRBuilder.buildConstant(IntTy, AllOneMantissa)); + if (PartialCheck == fcNegSubnormal) + SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign); + appendToRes(SubnormalRes); + } + + if (unsigned PartialCheck = Mask & fcNormal) { + // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u< + // (max_exp-1)) + APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); + auto ExpMinusOne = MIRBuilder.buildSub( + IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB)); + APInt MaxExpMinusOne = ExpMask - ExpLSB; + auto NormalRes = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne, + MIRBuilder.buildConstant(IntTy, MaxExpMinusOne)); + if (PartialCheck == fcNegNormal) + NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign); + else if (PartialCheck == fcPosNormal) { + auto PosSign = MIRBuilder.buildXor( + DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask)); + NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign); + } + appendToRes(NormalRes); + } + + MIRBuilder.buildCopy(DstReg, Res); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { // Implement vector G_SELECT in terms of XOR, AND, OR. Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -978,10 +978,18 @@ .scalarize(0) .widenScalarToNextPow2(0, 32); - getActionDefinitionsBuilder(G_IS_FPCLASS) - .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase) - .widenScalarToNextPow2(1) - .scalarize(0); + // If no 16 bit instr is available, lower into different instructions. + if (ST.has16BitInsts()) + getActionDefinitionsBuilder(G_IS_FPCLASS) + .legalForCartesianProduct({S1}, FPTypes16) + .widenScalarToNextPow2(1) + .scalarize(0); + else + getActionDefinitionsBuilder(G_IS_FPCLASS) + .legalForCartesianProduct({S1}, FPTypesBase) + .lowerFor({S1, S16}) + .widenScalarToNextPow2(1) + .scalarize(0); // The hardware instructions return a different result on 0 than the generic // instructions expect. The hardware produces -1, but these produce the diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1,4 +1,5 @@ -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7SELDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7SELDAG,GFX7CHECK %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7GLISEL,GFX7CHECK %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8SELDAG,GFX8CHECK %s ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8GLISEL,GFX8CHECK %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9SELDAG,GFX9CHECK %s @@ -8,6 +9,7 @@ ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11SELDAG,GFX11CHECK %s ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11GLISEL,GFX11CHECK %s +define amdgpu_kernel void @sgpr_isnan_f16(i32 addrspace(1)* %out, half %x) { ; GFX7SELDAG-LABEL: sgpr_isnan_f16: ; GFX7SELDAG: ; %bb.0: ; GFX7SELDAG-NEXT: s_load_dword s4, s[0:1], 0xb @@ -22,6 +24,22 @@ ; GFX7SELDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7SELDAG-NEXT: s_endpgm ; +; GFX7GLISEL-LABEL: sgpr_isnan_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_load_dword s3, s[0:1], 0xb +; GFX7GLISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 +; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff +; GFX7GLISEL-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 +; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7GLISEL-NEXT: s_endpgm +; ; GFX8CHECK-LABEL: sgpr_isnan_f16: ; GFX8CHECK: ; %bb.0: ; GFX8CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -70,13 +88,800 @@ ; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11CHECK-NEXT: s_endpgm -define amdgpu_kernel void @sgpr_isnan_f16(i32 addrspace(1)* %out, half %x) { - %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan + %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void } +define i1 @zeromask_f16(half %x) nounwind { +; GFX7CHECK-LABEL: zeromask_f16: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 0 +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: zeromask_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 0 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: zeromask_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 0 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: zeromask_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: zeromask_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 0) + ret i1 %1 +} + +define i1 @allflags_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: allflags_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_mov_b32_e32 v0, 1 +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: allflags_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, -1 +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: allflags_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: allflags_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: allflags_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3ff +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: allflags_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3ff +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1023) ; 0x3ff + ret i1 %1 +} + +define i1 @snan_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: snan_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7e00 +; GFX7SELDAG-NEXT: s_movk_i32 s5, 0x7c00 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s5, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: snan_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 +; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: snan_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: snan_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: snan_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 1 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: snan_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 1 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1) ; 0x001 + ret i1 %1 +} + +define i1 @qnan_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: qnan_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7dff +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: qnan_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00 +; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: qnan_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 2 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: qnan_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 2 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: qnan_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 2 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: qnan_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 2 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 2) ; 0x002 + ret i1 %1 +} + +define i1 @posinf_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: posinf_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: posinf_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: posinf_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x200 +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: posinf_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x200 +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: posinf_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x200 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: posinf_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x200 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 512) ; 0x200 + ret i1 %1 +} + +define i1 @neginf_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: neginf_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_mov_b32 s4, 0xfc00 +; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: neginf_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xfc00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: neginf_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 4 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: neginf_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 4 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: neginf_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 4 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: neginf_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 4 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 4) ; 0x004 + ret i1 %1 +} + +define i1 @posnormal_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: posnormal_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 +; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1 +; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: posnormal_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: posnormal_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x100 +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: posnormal_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x100 +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: posnormal_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x100 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: posnormal_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x100 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 256) ; 0x100 + ret i1 %1 +} + +define i1 @negnormal_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: negnormal_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 +; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1 +; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: negnormal_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16 +; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: negnormal_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 8 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: negnormal_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 8 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: negnormal_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 8 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: negnormal_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 8 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 8) ; 0x008 + ret i1 %1 +} + +define i1 @possubnormal_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: possubnormal_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff +; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, -1, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: possubnormal_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: possubnormal_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x80 +; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: possubnormal_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x80 +; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: possubnormal_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x80 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: possubnormal_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x80 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 128) ; 0x080 + ret i1 %1 +} + +define i1 @negsubnormal_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: negsubnormal_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 +; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: negsubnormal_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16 +; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v2 +; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v1 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 +; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: negsubnormal_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 16 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: negsubnormal_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 16 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: negsubnormal_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 16 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: negsubnormal_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 16 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 16) ; 0x010 + ret i1 %1 +} + +define i1 @poszero_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: poszero_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: poszero_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: poszero_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 64 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: poszero_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 64 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: poszero_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 64 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: poszero_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 64 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 64) ; 0x040 + ret i1 %1 +} + +define i1 @negzero_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: negzero_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_mov_b32 s4, 0x8000 +; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: negzero_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x8000 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: negzero_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 32 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: negzero_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 32 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: negzero_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 32 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: negzero_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 32 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 32) ; 0x020 + ret i1 %1 +} + +define i1 @posfinite_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: posfinite_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: posfinite_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8SELDAG-LABEL: posfinite_f16: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, 0x1c0 +; GFX8SELDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9SELDAG-LABEL: posfinite_f16: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, 0x1c0 +; GFX9SELDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10SELDAG-LABEL: posfinite_f16: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1c0 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11SELDAG-LABEL: posfinite_f16: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11SELDAG-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1c0 +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 448) ; 0x1c0 + ret i1 %1 +} + +define i1 @negfinite_f16(half %x) nounwind { +; GFX7SELDAG-LABEL: negfinite_f16: +; GFX7SELDAG: ; %bb.0: +; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7GLISEL-LABEL: negfinite_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v0 +; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8CHECK-LABEL: negfinite_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 56 +; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX9CHECK-LABEL: negfinite_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 56 +; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX10CHECK-LABEL: negfinite_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 56 +; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] +; +; GFX11CHECK-LABEL: negfinite_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 56 +; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 56) ; 0x038 + ret i1 %1 +} + define i1 @isnan_f16(half %x) nounwind { ; GFX7SELDAG-LABEL: isnan_f16: ; GFX7SELDAG: ; %bb.0: @@ -88,6 +893,16 @@ ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX7GLISEL-LABEL: isnan_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX8CHECK-LABEL: isnan_f16: ; GFX8CHECK: ; %bb.0: ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -137,6 +952,20 @@ ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX7GLISEL-LABEL: isnan_v2f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX8CHECK-LABEL: isnan_v2f16: ; GFX8CHECK: ; %bb.0: ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -205,6 +1034,24 @@ ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX7GLISEL-LABEL: isnan_v3f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0x7fff, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v2, v2, 0, 16 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v2 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX8SELDAG-LABEL: isnan_v3f16: ; GFX8SELDAG: ; %bb.0: ; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -343,6 +1190,28 @@ ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX7GLISEL-LABEL: isnan_v4f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0x7fff, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v2, v2, 0, 16 +; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0x7fff, v3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1 +; GFX7GLISEL-NEXT: v_bfe_u32 v3, v3, 0, 16 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v2 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v3 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX8SELDAG-LABEL: isnan_v4f16: ; GFX8SELDAG: ; %bb.0: ; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -472,6 +1341,16 @@ ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX7GLISEL-LABEL: isnan_f16_strictfp: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX8CHECK-LABEL: isnan_f16_strictfp: ; GFX8CHECK: ; %bb.0: ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -517,6 +1396,16 @@ ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX7GLISEL-LABEL: isinf_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX8CHECK-LABEL: isinf_f16: ; GFX8CHECK: ; %bb.0: ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -564,6 +1453,16 @@ ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX7GLISEL-LABEL: isfinite_f16: +; GFX7GLISEL: ; %bb.0: +; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX8CHECK-LABEL: isfinite_f16: ; GFX8CHECK: ; %bb.0: ; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)