Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7372,6 +7372,8 @@ return Legalized; } + // TODO: Try getInvertedFPClassTest + unsigned BitSize = SrcTy.getScalarSizeInBits(); const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType()); @@ -7425,6 +7427,18 @@ Mask &= ~fcNegFinite; } + if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) { + // fcZero | fcSubnormal => test all exponent bits are 0 + // TODO: Handle sign bit specific cases + // TODO: Handle inverted case + if (PartialCheck == (fcZero | fcSubnormal)) { + auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC); + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + ExpBits, ZeroC)); + Mask &= ~PartialCheck & fcAllFlags; + } + } + // Check for individual classes. if (unsigned PartialCheck = Mask & fcZero) { if (PartialCheck == fcPosZero) Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8151,6 +8151,20 @@ } appendResult(PartialRes); + if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) { + // fcZero | fcSubnormal => test all exponent bits are 0 + // TODO: Handle sign bit specific cases + if (PartialCheck == (fcZero | fcSubnormal)) { + assert(!IsInverted && "should handle inverted case"); + + SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV); + SDValue ExpIsZero = + DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ); + appendResult(ExpIsZero); + Test &= ~PartialCheck & fcAllFlags; + } + } + // Check for individual classes. if (unsigned PartialCheck = Test & fcZero) { Index: llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1553,27 +1553,18 @@ ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7c00, v0 ; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7SELDAG-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX7GLISEL-LABEL: issubnormal_or_zero_f16: ; GFX7GLISEL: ; %bb.0: ; %entry ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: issubnormal_or_zero_f16: @@ -1756,18 +1747,15 @@ ; GFX7GLISEL-LABEL: not_isnormal_f16: ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v1, 0, 16 ; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v0 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] @@ -1827,23 +1815,20 @@ ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16 -; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3 +; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v3, v1, 0, 16 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 ; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 +; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v2, v3 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 ; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] @@ -1905,23 +1890,20 @@ ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3 +; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7GLISEL-NEXT: v_bfe_u32 v3, v1, 0, 16 +; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16 ; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v3 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 ; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] Index: llvm/test/CodeGen/X86/is_fpclass.ll =================================================================== --- llvm/test/CodeGen/X86/is_fpclass.ll +++ llvm/test/CodeGen/X86/is_fpclass.ll @@ -810,24 +810,15 @@ define i1 @issubnormal_or_zero_f(float %x) { ; CHECK-32-LABEL: issubnormal_or_zero_f: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: sete %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: issubnormal_or_zero_f: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: sete %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero" @@ -864,24 +855,15 @@ define i1 @issubnormal_or_zero_f_maybe_daz(float %x) #1 { ; CHECK-32-LABEL: issubnormal_or_zero_f_maybe_daz: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: sete %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: issubnormal_or_zero_f_maybe_daz: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: sete %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero"