Index: llvm/include/llvm/CodeGen/CodeGenCommonISel.h =================================================================== --- llvm/include/llvm/CodeGen/CodeGenCommonISel.h +++ llvm/include/llvm/CodeGen/CodeGenCommonISel.h @@ -217,13 +217,13 @@ MachineBasicBlock::iterator findSplitPointForStackProtector(MachineBasicBlock *BB, const TargetInstrInfo &TII); -/// Evaluates if the specified FP class test is an inversion of a simpler test. -/// An example is the test "inf|normal|subnormal|zero", which is an inversion -/// of "nan". -/// \param Test The test as specified in 'is_fpclass' intrinsic invocation. -/// \returns The inverted test, or zero, if inversion does not produce simpler -/// test. -FPClassTest getInvertedFPClassTest(FPClassTest Test); + +/// Evaluates if the specified FP class test is better performed as the inverse +/// (i.e. fewer instructions should be required to lower it). An example is the +/// test "inf|normal|subnormal|zero", which is an inversion of "nan". \param +/// Test The test as specified in 'is_fpclass' intrinsic invocation. \returns +/// The inverted test, or fcNone, if inversion does not produce a simpler test. +FPClassTest invertFPClassTestIfSimpler(FPClassTest Test); /// Assuming the instruction \p MI is going to be deleted, attempt to salvage /// debug users of \p MI by writing the effect of \p MI in a DIExpression. Index: llvm/lib/CodeGen/CodeGenCommonISel.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -173,11 +173,11 @@ return SplitPoint; } -FPClassTest llvm::getInvertedFPClassTest(FPClassTest Test) { +FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) { FPClassTest InvertedTest = static_cast(~Test & fcAllFlags); - switch (InvertedTest) { - default: - break; + // Pick the direction with fewer tests + // TODO: Handle more combinations of cases that can be handled together + switch (static_cast(InvertedTest)) { case fcNan: case fcSNan: case fcQNan: @@ -196,9 +196,13 @@ case fcFinite: case fcPosFinite: case fcNegFinite: + case fcSubnormal | fcZero: return InvertedTest; + default: + return fcNone; } - return fcNone; + + llvm_unreachable("covered FPClassTest"); } static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI, Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7330,6 +7330,7 @@ } // TODO: Try getInvertedFPClassTest + bool IsInverted = false; unsigned BitSize = SrcTy.getScalarSizeInBits(); const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType()); @@ -7390,6 +7391,7 @@ // TODO: Handle inverted case if (PartialCheck == (fcZero | fcSubnormal)) { auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC); + assert(!IsInverted); appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, ExpBits, ZeroC)); Mask &= ~PartialCheck & fcAllFlags; Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8044,7 +8044,8 @@ // Some checks may be represented as inversion of simpler check, for example // "inf|normal|subnormal|zero" => !"nan". bool IsInverted = false; - if (FPClassTest InvertedCheck = getInvertedFPClassTest(TestMask)) { + + if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(TestMask)) { IsInverted = true; TestMask = InvertedCheck; } @@ -8158,8 +8159,6 @@ // fcZero | fcSubnormal => test all exponent bits are 0 // TODO: Handle sign bit specific cases if (PartialCheck == (fcZero | fcSubnormal)) { - assert(!IsInverted && "should handle inverted case"); - SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV); SDValue ExpIsZero = DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ); Index: llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -2304,15 +2304,9 @@ ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7bff -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s4, v0 -; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7SELDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX7GLISEL-LABEL: not_issubnormal_or_zero_f16: Index: llvm/test/CodeGen/X86/is_fpclass.ll =================================================================== --- llvm/test/CodeGen/X86/is_fpclass.ll +++ llvm/test/CodeGen/X86/is_fpclass.ll @@ -873,26 +873,15 @@ define i1 @not_issubnormal_or_zero_f(float %x) { ; CHECK-32-LABEL: not_issubnormal_or_zero_f: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_f: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" @@ -902,26 +891,24 @@ define i1 @not_issubnormal_or_zero_f_daz(float %x) #0 { ; CHECK-32-LABEL: not_issubnormal_or_zero_f_daz: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al +; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-32-NEXT: fldz +; CHECK-32-NEXT: fucompp +; CHECK-32-NEXT: fnstsw %ax +; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-32-NEXT: sahf +; CHECK-32-NEXT: setp %cl +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: orb %cl, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_f_daz: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: xorps %xmm1, %xmm1 +; CHECK-64-NEXT: cmpneqss %xmm0, %xmm1 +; CHECK-64-NEXT: movd %xmm1, %eax +; CHECK-64-NEXT: andl $1, %eax +; CHECK-64-NEXT: # kill: def $al killed $al killed $eax ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" @@ -931,26 +918,15 @@ define i1 @not_issubnormal_or_zero_f_maybe_daz(float %x) #1 { ; CHECK-32-LABEL: not_issubnormal_or_zero_f_maybe_daz: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_f_maybe_daz: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)"