Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7211,6 +7211,9 @@ return Legalized; } + // TODO: Try inverting the test with getInvertedFPClassTest like the DAG + // version + unsigned BitSize = SrcTy.getScalarSizeInBits(); const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType()); @@ -7266,6 +7269,18 @@ Mask &= ~fcNegFinite; } + if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) { + // fcZero | fcSubnormal => test all exponent bits are 0 + // TODO: Handle sign bit specific cases + // TODO: Handle inverted case + if (PartialCheck == (fcZero | fcSubnormal)) { + auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC); + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + ExpBits, ZeroC)); + Mask &= ~PartialCheck; + } + } + // Check for individual classes. if (FPClassTest PartialCheck = Mask & fcZero) { if (PartialCheck == fcPosZero) Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8102,12 +8102,8 @@ // exceptions are ignored. if (Flags.hasNoFPExcept() && isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) { - // Even if the condition isn't legal, we're probably better off expanding it - // if it's the combined 0 || denormal compare. - if (isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()) && - (Test != fcZero || - isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ, + (isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ, OperandVT.getScalarType().getSimpleVT()))) { // If denormals could be implicitly treated as 0, this is not equivalent // to a compare with 0 since it will also be true for denormals. @@ -8201,6 +8197,20 @@ } appendResult(PartialRes); + if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) { + // fcZero | fcSubnormal => test all exponent bits are 0 + // TODO: Handle sign bit specific cases + if (PartialCheck == (fcZero | fcSubnormal)) { + assert(!IsInverted && "should handle inverted case"); + + SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV); + SDValue ExpIsZero = + DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ); + appendResult(ExpIsZero); + Test &= ~PartialCheck & fcAllFlags; + } + } + // Check for individual classes. if (unsigned PartialCheck = Test & fcZero) { Index: llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1553,27 +1553,18 @@ ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7c00, v0 ; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7SELDAG-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX7GLISEL-LABEL: issubnormal_or_zero_f16: ; GFX7GLISEL: ; %bb.0: ; %entry ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: issubnormal_or_zero_f16: @@ -1756,18 +1747,15 @@ ; GFX7GLISEL-LABEL: not_isnormal_f16: ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1 ; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v0 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] @@ -1827,23 +1815,20 @@ ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3 ; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 +; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v2, v3 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 ; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] @@ -1905,23 +1890,20 @@ ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3 ; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v3 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 ; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] Index: llvm/test/CodeGen/X86/is_fpclass.ll =================================================================== --- llvm/test/CodeGen/X86/is_fpclass.ll +++ llvm/test/CodeGen/X86/is_fpclass.ll @@ -792,24 +792,15 @@ define i1 @issubnormal_or_zero_f(float %x) { ; CHECK-32-LABEL: issubnormal_or_zero_f: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: sete %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: issubnormal_or_zero_f: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: sete %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero" @@ -819,24 +810,15 @@ define i1 @issubnormal_or_zero_f_daz(float %x) #0 { ; CHECK-32-LABEL: issubnormal_or_zero_f_daz: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fldz -; CHECK-32-NEXT: fucompp -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setnp %cl +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 ; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: issubnormal_or_zero_f_daz: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: xorps %xmm1, %xmm1 -; CHECK-64-NEXT: cmpeqss %xmm0, %xmm1 -; CHECK-64-NEXT: movd %xmm1, %eax -; CHECK-64-NEXT: andl $1, %eax -; CHECK-64-NEXT: # kill: def $al killed $al killed $eax +; CHECK-64-NEXT: movd %xmm0, %eax +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: sete %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero" @@ -846,24 +828,15 @@ define i1 @issubnormal_or_zero_f_maybe_daz(float %x) #1 { ; CHECK-32-LABEL: issubnormal_or_zero_f_maybe_daz: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: sete %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: issubnormal_or_zero_f_maybe_daz: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: sete %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero"