Index: include/llvm/IR/Constant.h =================================================================== --- include/llvm/IR/Constant.h +++ include/llvm/IR/Constant.h @@ -71,6 +71,11 @@ /// Return true if the value is the smallest signed value. bool isMinSignedValue() const; + /// Return true if the floating-point scalar value is not a NaN or if the + /// floating-point vector value has no NaN elements. Return false if a value + /// cannot be determined (for example, constant expressions). + bool isKnownNeverNaN() const; + /// Return true if evaluation of this constant could trap. This is true for /// things like constant expressions that could divide by zero. bool canTrap() const; Index: lib/IR/Constants.cpp =================================================================== --- lib/IR/Constants.cpp +++ lib/IR/Constants.cpp @@ -203,6 +203,33 @@ return false; } +bool Constant::isKnownNeverNaN() const { + assert(getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type"); + + // Handle scalar constants. + if (auto *CFP = dyn_cast(this)) + return !CFP->isNaN(); + + // Bail out for constant expressions. + if (!getType()->isVectorTy()) + return false; + + // For vectors, verify that each element is not NaN. + unsigned NumElts = getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = getAggregateElement(i); + if (!Elt) + return false; + if (isa(Elt)) + continue; + auto *CElt = dyn_cast(Elt); + if (!CElt || CElt->isNaN()) + return false; + } + // All elements were confirmed not-NaN or undefined. + return true; +} + /// Constructor to create a '0' constant of arbitrary type. Constant *Constant::getNullValue(Type *Ty) { switch (Ty->getTypeID()) { Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -938,21 +938,12 @@ if (LHS0->getType() != RHS0->getType()) return nullptr; - auto *LHSC = dyn_cast(LHS1); - auto *RHSC = dyn_cast(RHS1); - if (LHSC && RHSC) { - assert(!LHSC->getValueAPF().isNaN() && !RHSC->getValueAPF().isNaN() && - "Failed to simplify fcmp ord/uno with NAN operand"); - // Ignore the constants because they can't be NANs: - // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) - // (fcmp uno x, c) & (fcmp uno y, c) -> (fcmp uno x, y) - return Builder.CreateFCmp(PredL, LHS0, RHS0); - } - - // Handle vector zeros. This occurs because the canonical form of - // "fcmp ord/uno x,x" is "fcmp ord/uno x, 0". - if (isa(LHS1) && - isa(RHS1)) + // FCmp canonicalization ensures that (fcmp ord/uno X, X) and + // (fcmp ord/uno X, C) will be transformed to (fcmp X, 0.0). + if (match(LHS1, m_Zero()) && LHS1 == RHS1) + // Ignore the constants because they are obviously not NANs: + // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y) + // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y) return Builder.CreateFCmp(PredL, LHS0, RHS0); } Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4963,6 +4963,17 @@ } } + // If we're just checking for a NaN (ORD/UNO) and have a constant operand, + // then try to canonicalize the constant to 0.0 to simplify pattern matching + // for other folds. We could canonicalize the constant operand to Op0 just as + // easily, but that requires adjusting the canonicalization above too. + Constant *C; + if ((Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) && + match(Op1, m_Constant(C)) && !C->isNullValue() && C->isKnownNeverNaN()) { + I.setOperand(1, ConstantFP::getNullValue(Op0->getType())); + return &I; + } + // Test if the FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing // any other folding. This helps out other analyses which understand Index: test/Transforms/InstCombine/and-fcmp.ll =================================================================== --- test/Transforms/InstCombine/and-fcmp.ll +++ test/Transforms/InstCombine/and-fcmp.ll @@ -41,10 +41,8 @@ define <3 x i1> @fcmp_ord_nonzero_vec(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: @fcmp_ord_nonzero_vec( -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord <3 x float> %x, -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ord <3 x float> %y, -; CHECK-NEXT: [[AND:%.*]] = and <3 x i1> [[CMP1]], [[CMP2]] -; CHECK-NEXT: ret <3 x i1> [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <3 x float> %x, %y +; CHECK-NEXT: ret <3 x i1> [[TMP1]] ; %cmp1 = fcmp ord <3 x float> %x, %cmp2 = fcmp ord <3 x float> %y, Index: test/Transforms/InstCombine/fcmp-special.ll =================================================================== --- test/Transforms/InstCombine/fcmp-special.ll +++ test/Transforms/InstCombine/fcmp-special.ll @@ -35,7 +35,7 @@ define i1 @ord_nonzero(double %x) { ; CHECK-LABEL: @ord_nonzero( -; CHECK-NEXT: [[F:%.*]] = fcmp ord double %x, 3.000000e+00 +; CHECK-NEXT: [[F:%.*]] = fcmp ord double %x, 0.000000e+00 ; CHECK-NEXT: ret i1 [[F]] ; %f = fcmp ord double %x, 3.0 @@ -62,7 +62,7 @@ define i1 @uno_nonzero(float %x) { ; CHECK-LABEL: @uno_nonzero( -; CHECK-NEXT: [[F:%.*]] = fcmp uno float %x, 3.000000e+00 +; CHECK-NEXT: [[F:%.*]] = fcmp uno float %x, 0.000000e+00 ; CHECK-NEXT: ret i1 [[F]] ; %f = fcmp uno float %x, 3.0 @@ -89,7 +89,7 @@ define <2 x i1> @ord_nonzero_vec(<2 x float> %x) { ; CHECK-LABEL: @ord_nonzero_vec( -; CHECK-NEXT: [[F:%.*]] = fcmp ord <2 x float> %x, +; CHECK-NEXT: [[F:%.*]] = fcmp ord <2 x float> %x, zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[F]] ; %f = fcmp ord <2 x float> %x, @@ -116,7 +116,7 @@ define <2 x i1> @uno_nonzero_vec(<2 x double> %x) { ; CHECK-LABEL: @uno_nonzero_vec( -; CHECK-NEXT: [[F:%.*]] = fcmp uno <2 x double> %x, +; CHECK-NEXT: [[F:%.*]] = fcmp uno <2 x double> %x, zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[F]] ; %f = fcmp uno <2 x double> %x, Index: test/Transforms/InstCombine/or-fcmp.ll =================================================================== --- test/Transforms/InstCombine/or-fcmp.ll +++ test/Transforms/InstCombine/or-fcmp.ll @@ -14,10 +14,8 @@ define <3 x i1> @fcmp_uno_nonzero_vec(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: @fcmp_uno_nonzero_vec( -; CHECK-NEXT: [[CMP1:%.*]] = fcmp uno <3 x float> %x, -; CHECK-NEXT: [[CMP2:%.*]] = fcmp uno <3 x float> %y, -; CHECK-NEXT: [[OR:%.*]] = or <3 x i1> [[CMP1]], [[CMP2]] -; CHECK-NEXT: ret <3 x i1> [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <3 x float> %x, %y +; CHECK-NEXT: ret <3 x i1> [[TMP1]] ; %cmp1 = fcmp uno <3 x float> %x, %cmp2 = fcmp uno <3 x float> %y,