Index: llvm/trunk/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ValueTracking.h +++ llvm/trunk/include/llvm/Analysis/ValueTracking.h @@ -185,6 +185,11 @@ /// x < -0 --> false bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); + /// Return true if the floating-point scalar value is not a NaN or if the + /// floating-point vector value has no NaN elements. Return false if a value + /// could ever be NaN. + bool isKnownNeverNaN(const Value *V); + /// Return true if we can prove that the specified FP value's sign bit is 0. /// /// NaN --> true/false (depending on the NaN's sign bit) Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -2695,6 +2695,41 @@ return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0); } +bool llvm::isKnownNeverNaN(const Value *V) { + assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type"); + + // If we're told that NaNs won't happen, assume they won't. + if (auto *FPMathOp = dyn_cast(V)) + if (FPMathOp->hasNoNaNs()) + return true; + + // TODO: Handle instructions and potentially recurse like other 'isKnown' + // functions. For example, the result of sitofp is never NaN. + + // Handle scalar constants. + if (auto *CFP = dyn_cast(V)) + return !CFP->isNaN(); + + // Bail out for constant expressions, but try to handle vector constants. + if (!V->getType()->isVectorTy() || !isa(V)) + return false; + + // For vectors, verify that each element is not NaN. + unsigned NumElts = V->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = cast(V)->getAggregateElement(i); + if (!Elt) + return false; + if (isa(Elt)) + continue; + auto *CElt = dyn_cast(Elt); + if (!CElt || CElt->isNaN()) + return false; + } + // All elements were confirmed not-NaN or undefined. + return true; +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -938,21 +938,12 @@ if (LHS0->getType() != RHS0->getType()) return nullptr; - auto *LHSC = dyn_cast(LHS1); - auto *RHSC = dyn_cast(RHS1); - if (LHSC && RHSC) { - assert(!LHSC->getValueAPF().isNaN() && !RHSC->getValueAPF().isNaN() && - "Failed to simplify fcmp ord/uno with NAN operand"); - // Ignore the constants because they can't be NANs: - // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) - // (fcmp uno x, c) & (fcmp uno y, c) -> (fcmp uno x, y) - return Builder.CreateFCmp(PredL, LHS0, RHS0); - } - - // Handle vector zeros. This occurs because the canonical form of - // "fcmp ord/uno x,x" is "fcmp ord/uno x, 0". - if (isa(LHS1) && - isa(RHS1)) + // FCmp canonicalization ensures that (fcmp ord/uno X, X) and + // (fcmp ord/uno X, C) will be transformed to (fcmp X, 0.0). + if (match(LHS1, m_Zero()) && LHS1 == RHS1) + // Ignore the constants because they are obviously not NANs: + // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y) + // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y) return Builder.CreateFCmp(PredL, LHS0, RHS0); } Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4963,6 +4963,19 @@ } } + // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand, + // then canonicalize the operand to 0.0. + if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) { + if (!match(Op0, m_Zero()) && isKnownNeverNaN(Op0)) { + I.setOperand(0, ConstantFP::getNullValue(Op0->getType())); + return &I; + } + if (!match(Op1, m_Zero()) && isKnownNeverNaN(Op1)) { + I.setOperand(1, ConstantFP::getNullValue(Op0->getType())); + return &I; + } + } + // Test if the FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing // any other folding. This helps out other analyses which understand Index: llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll +++ llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll @@ -41,10 +41,8 @@ define <3 x i1> @fcmp_ord_nonzero_vec(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: @fcmp_ord_nonzero_vec( -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord <3 x float> %x, -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ord <3 x float> %y, -; CHECK-NEXT: [[AND:%.*]] = and <3 x i1> [[CMP1]], [[CMP2]] -; CHECK-NEXT: ret <3 x i1> [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <3 x float> %x, %y +; CHECK-NEXT: ret <3 x i1> [[TMP1]] ; %cmp1 = fcmp ord <3 x float> %x, %cmp2 = fcmp ord <3 x float> %y, Index: llvm/trunk/test/Transforms/InstCombine/fcmp-special.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/fcmp-special.ll +++ llvm/trunk/test/Transforms/InstCombine/fcmp-special.ll @@ -35,7 +35,7 @@ define i1 @ord_nonzero(double %x) { ; CHECK-LABEL: @ord_nonzero( -; CHECK-NEXT: [[F:%.*]] = fcmp ord double %x, 3.000000e+00 +; CHECK-NEXT: [[F:%.*]] = fcmp ord double %x, 0.000000e+00 ; CHECK-NEXT: ret i1 [[F]] ; %f = fcmp ord double %x, 3.0 @@ -62,7 +62,7 @@ define i1 @uno_nonzero(float %x) { ; CHECK-LABEL: @uno_nonzero( -; CHECK-NEXT: [[F:%.*]] = fcmp uno float %x, 3.000000e+00 +; CHECK-NEXT: [[F:%.*]] = fcmp uno float %x, 0.000000e+00 ; CHECK-NEXT: ret i1 [[F]] ; %f = fcmp uno float %x, 3.0 @@ -89,7 +89,7 @@ define <2 x i1> @ord_nonzero_vec(<2 x float> %x) { ; CHECK-LABEL: @ord_nonzero_vec( -; CHECK-NEXT: [[F:%.*]] = fcmp ord <2 x float> %x, +; CHECK-NEXT: [[F:%.*]] = fcmp ord <2 x float> %x, zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[F]] ; %f = fcmp ord <2 x float> %x, @@ -116,7 +116,7 @@ define <2 x i1> @uno_nonzero_vec(<2 x double> %x) { ; CHECK-LABEL: @uno_nonzero_vec( -; CHECK-NEXT: [[F:%.*]] = fcmp uno <2 x double> %x, +; CHECK-NEXT: [[F:%.*]] = fcmp uno <2 x double> %x, zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[F]] ; %f = fcmp uno <2 x double> %x, @@ -148,10 +148,7 @@ define i1 @nnan_ops_to_fcmp_ord(float %x, float %y) { ; CHECK-LABEL: @nnan_ops_to_fcmp_ord( -; CHECK-NEXT: [[MUL:%.*]] = fmul nnan float %x, %y -; CHECK-NEXT: [[DIV:%.*]] = fdiv nnan float %x, %y -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[MUL]], [[DIV]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %mul = fmul nnan float %x, %y %div = fdiv nnan float %x, %y @@ -163,10 +160,7 @@ define i1 @nnan_ops_to_fcmp_uno(float %x, float %y) { ; CHECK-LABEL: @nnan_ops_to_fcmp_uno( -; CHECK-NEXT: [[MUL:%.*]] = fmul nnan float %x, %y -; CHECK-NEXT: [[DIV:%.*]] = fdiv nnan float %x, %y -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[MUL]], [[DIV]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %mul = fmul nnan float %x, %y %div = fdiv nnan float %x, %y Index: llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll +++ llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll @@ -14,10 +14,8 @@ define <3 x i1> @fcmp_uno_nonzero_vec(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: @fcmp_uno_nonzero_vec( -; CHECK-NEXT: [[CMP1:%.*]] = fcmp uno <3 x float> %x, -; CHECK-NEXT: [[CMP2:%.*]] = fcmp uno <3 x float> %y, -; CHECK-NEXT: [[OR:%.*]] = or <3 x i1> [[CMP1]], [[CMP2]] -; CHECK-NEXT: ret <3 x i1> [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <3 x float> %x, %y +; CHECK-NEXT: ret <3 x i1> [[TMP1]] ; %cmp1 = fcmp uno <3 x float> %x, %cmp2 = fcmp uno <3 x float> %y,