Index: llvm/include/llvm/CodeGen/CodeGenCommonISel.h =================================================================== --- llvm/include/llvm/CodeGen/CodeGenCommonISel.h +++ llvm/include/llvm/CodeGen/CodeGenCommonISel.h @@ -217,13 +217,14 @@ MachineBasicBlock::iterator findSplitPointForStackProtector(MachineBasicBlock *BB, const TargetInstrInfo &TII); -/// Evaluates if the specified FP class test is an inversion of a simpler test. -/// An example is the test "inf|normal|subnormal|zero", which is an inversion -/// of "nan". + +/// Evaluates if the specified FP class test is better performed as the inverse +/// (i.e. fewer instructions should be required to lower it). An example is the +/// test "inf|normal|subnormal|zero", which is an inversion of "nan". /// \param Test The test as specified in 'is_fpclass' intrinsic invocation. -/// \returns The inverted test, or zero, if inversion does not produce simpler -/// test. -FPClassTest getInvertedFPClassTest(FPClassTest Test); +/// \returns The inverted test, or fcNone, if inversion does not produce a +/// simpler test. +FPClassTest invertFPClassTestIfSimpler(FPClassTest Test); /// Assuming the instruction \p MI is going to be deleted, attempt to salvage /// debug users of \p MI by writing the effect of \p MI in a DIExpression. Index: llvm/lib/CodeGen/CodeGenCommonISel.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -173,11 +173,11 @@ return SplitPoint; } -FPClassTest llvm::getInvertedFPClassTest(FPClassTest Test) { +FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) { FPClassTest InvertedTest = static_cast(~Test & fcAllFlags); - switch (InvertedTest) { - default: - break; + // Pick the direction with fewer tests + // TODO: Handle more combinations of cases that can be handled together + switch (static_cast(InvertedTest)) { case fcNan: case fcSNan: case fcQNan: @@ -196,9 +196,13 @@ case fcFinite: case fcPosFinite: case fcNegFinite: + case fcSubnormal | fcZero: return InvertedTest; + default: + return fcNone; } - return fcNone; + + llvm_unreachable("covered FPClassTest"); } static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI, Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8011,6 +8011,7 @@ static FPClassTest isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF) { + // TODO: Handle unordered compares if (Test == fcZero && MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE) return fcZero; @@ -8044,7 +8045,8 @@ // Some checks may be represented as inversion of simpler check, for example // "inf|normal|subnormal|zero" => !"nan". bool IsInverted = false; - if (FPClassTest InvertedCheck = getInvertedFPClassTest(TestMask)) { + + if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(TestMask)) { IsInverted = true; TestMask = InvertedCheck; } @@ -8066,7 +8068,7 @@ // to a compare with 0 since it will also be true for denormals. return DAG.getSetCC(DL, ResultVT, Op, DAG.getConstantFP(0.0, DL, OperandVT), - IsInverted ? ISD::SETUNE : ISD::SETOEQ); + IsInverted ? ISD::SETONE : ISD::SETOEQ); } if (TestMask == fcNan) @@ -8158,8 +8160,6 @@ // fcZero | fcSubnormal => test all exponent bits are 0 // TODO: Handle sign bit specific cases if (PartialCheck == (fcZero | fcSubnormal)) { - assert(!IsInverted && "should handle inverted case"); - SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV); SDValue ExpIsZero = DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ); Index: llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1608,15 +1608,9 @@ ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7bff -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s4, v0 -; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7c00, v0 +; GFX7SELDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX7GLISEL-LABEL: not_issubnormal_or_zero_f16: Index: llvm/test/CodeGen/X86/is_fpclass.ll =================================================================== --- llvm/test/CodeGen/X86/is_fpclass.ll +++ llvm/test/CodeGen/X86/is_fpclass.ll @@ -753,18 +753,14 @@ ; CHECK-32-NEXT: fnstsw %ax ; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %cl ; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: orb %cl, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_iszero_f: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: xorps %xmm1, %xmm1 -; CHECK-64-NEXT: cmpneqss %xmm0, %xmm1 -; CHECK-64-NEXT: movd %xmm1, %eax -; CHECK-64-NEXT: andl $1, %eax -; CHECK-64-NEXT: # kill: def $al killed $al killed $eax +; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 927) ; ~0x60 = "~zero" @@ -873,26 +869,15 @@ define i1 @not_issubnormal_or_zero_f(float %x) { ; CHECK-32-LABEL: not_issubnormal_or_zero_f: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_f: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" @@ -902,26 +887,20 @@ define i1 @not_issubnormal_or_zero_f_daz(float %x) #0 { ; CHECK-32-LABEL: not_issubnormal_or_zero_f_daz: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-32-NEXT: fldz +; CHECK-32-NEXT: fucompp +; CHECK-32-NEXT: fnstsw %ax +; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-32-NEXT: sahf +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_f_daz: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: xorps %xmm1, %xmm1 +; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" @@ -931,26 +910,15 @@ define i1 @not_issubnormal_or_zero_f_maybe_daz(float %x) #1 { ; CHECK-32-LABEL: not_issubnormal_or_zero_f_maybe_daz: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_f_maybe_daz: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)"