diff --git a/llvm/include/llvm/IR/ConstantFold.h b/llvm/include/llvm/IR/ConstantFold.h --- a/llvm/include/llvm/IR/ConstantFold.h +++ b/llvm/include/llvm/IR/ConstantFold.h @@ -22,6 +22,7 @@ namespace llvm { template class ArrayRef; +class APFloat; class Value; class Constant; class Type; @@ -51,6 +52,7 @@ Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool InBounds, Optional InRangeIndex, ArrayRef Idxs); +bool evaluatePredicate(unsigned Pred, const APFloat &V1, const APFloat &C2); } // End llvm namespace #endif diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -490,6 +490,9 @@ class ConstrainedFPCmpIntrinsic : public ConstrainedFPIntrinsic { public: FCmpInst::Predicate getPredicate() const; + bool isSignaling() const { + return getIntrinsicID() == Intrinsic::experimental_constrained_fcmps; + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/config.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantFold.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -1508,6 +1509,8 @@ case Intrinsic::experimental_constrained_trunc: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: return true; default: return false; @@ -1786,12 +1789,12 @@ // If evaluation raised FP exception, the result can depend on rounding // mode. If the latter is unknown, folding is not possible. - if (!ORM || *ORM == RoundingMode::Dynamic) + if (ORM && *ORM == RoundingMode::Dynamic) return false; // If FP exceptions are ignored, fold the call, even if such exception is // raised. - if (!EB || *EB != fp::ExceptionBehavior::ebStrict) + if (EB && *EB != fp::ExceptionBehavior::ebStrict) return true; // Leave the calculation for runtime so that exception flags be correctly set @@ -2289,6 +2292,25 @@ return nullptr; } +static Constant *evaluateCompare(const ConstrainedFPIntrinsic *Call) { + APFloat::opStatus St = APFloat::opOK; + auto *FCmp = cast(Call); + FCmpInst::Predicate Cond = FCmp->getPredicate(); + const APFloat &Op1 = cast(FCmp->getOperand(0))->getValueAPF(); + const APFloat &Op2 = cast(FCmp->getOperand(1))->getValueAPF(); + if (FCmp->isSignaling()) { + if (Op1.isNaN() || Op2.isNaN()) + St = APFloat::opInvalidOp; + } else { + if (Op1.isSignaling() || Op2.isSignaling()) + St = APFloat::opInvalidOp; + } + bool Result = evaluatePredicate(Cond, Op1, Op2); + if (mayFoldConstrained(const_cast(FCmp), St)) + return ConstantInt::get(Call->getType(), Result); + return nullptr; +} + static Constant *ConstantFoldScalarCall2(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, @@ -2317,8 +2339,6 @@ } if (const auto *Op1 = dyn_cast(Operands[0])) { - if (!Ty->isFloatingPointTy()) - return nullptr; const APFloat &Op1V = Op1->getValueAPF(); if (const auto *Op2 = dyn_cast(Operands[1])) { @@ -2348,6 +2368,9 @@ case Intrinsic::experimental_constrained_frem: St = Res.mod(Op2V); break; + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: + return evaluateCompare(ConstrIntr); } if (mayFoldConstrained(const_cast(ConstrIntr), St)) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1801,46 +1801,8 @@ } else if (isa(C1) && isa(C2)) { const APFloat &C1V = cast(C1)->getValueAPF(); const APFloat &C2V = cast(C2)->getValueAPF(); - APFloat::cmpResult R = C1V.compare(C2V); - switch (pred) { - default: llvm_unreachable("Invalid FCmp Predicate"); - case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy); - case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy); - case FCmpInst::FCMP_UNO: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered); - case FCmpInst::FCMP_ORD: - return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered); - case FCmpInst::FCMP_UEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_OEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpEqual); - case FCmpInst::FCMP_UNE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual); - case FCmpInst::FCMP_ONE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan); - case FCmpInst::FCMP_OLT: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan); - case FCmpInst::FCMP_UGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OLE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_UGE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan); - case FCmpInst::FCMP_OGE: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual); - } + CmpInst::Predicate Predicate = CmpInst::Predicate(pred); + return ConstantInt::get(ResultTy, evaluatePredicate(Predicate, C1V, C2V)); } else if (auto *C1VTy = dyn_cast(C1->getType())) { // Fast path for splatted constants. @@ -2055,6 +2017,48 @@ return nullptr; } +/// Evaluate the given predicate for the constant values. +bool llvm::evaluatePredicate(unsigned Pred, const APFloat &Op1, + const APFloat &Op2) { + APFloat::cmpResult R = Op1.compare(Op2); + switch (Pred) { + default: + llvm_unreachable("Invalid FCmp Predicate"); + case FCmpInst::FCMP_FALSE: + return false; + case FCmpInst::FCMP_TRUE: + return true; + case FCmpInst::FCMP_UNO: + return R == APFloat::cmpUnordered; + case FCmpInst::FCMP_ORD: + return R != APFloat::cmpUnordered; + case FCmpInst::FCMP_UEQ: + return R == APFloat::cmpUnordered || R == APFloat::cmpEqual; + case FCmpInst::FCMP_OEQ: + return R == APFloat::cmpEqual; + case FCmpInst::FCMP_UNE: + return R != APFloat::cmpEqual; + case FCmpInst::FCMP_ONE: + return R == APFloat::cmpLessThan || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULT: + return R == APFloat::cmpUnordered || R == APFloat::cmpLessThan; + case FCmpInst::FCMP_OLT: + return R == APFloat::cmpLessThan; + case FCmpInst::FCMP_UGT: + return R == APFloat::cmpUnordered || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OGT: + return R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULE: + return R != APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OLE: + return R == APFloat::cmpLessThan || R == APFloat::cmpEqual; + case FCmpInst::FCMP_UGE: + return R != APFloat::cmpLessThan; + case FCmpInst::FCMP_OGE: + return R == APFloat::cmpGreaterThan || R == APFloat::cmpEqual; + } +} + /// Test whether the given sequence of *normalized* indices is "inbounds". template static bool isInBoundsIndices(ArrayRef Idxs) { diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll --- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -415,6 +415,113 @@ } +; When exceptions are ignored, comparison of constants can be folded, even for (signaling) NaNs. +define i1 @cmp_eq_01() #0 { +; CHECK-LABEL: @cmp_eq_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 1.0, double 2.0, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_02() #0 { +; CHECK-LABEL: @cmp_eq_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 2.0, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_03() #0 { +; CHECK-LABEL: @cmp_eq_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 0x7ff8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_04() #0 { +; CHECK-LABEL: @cmp_eq_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 0x7ff4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_05() #0 { +; CHECK-LABEL: @cmp_eq_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.0, double 0x7ff8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_06() #0 { +; CHECK-LABEL: @cmp_eq_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.0, double 0x7ff4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +; Compare with SNAN is NOT folded if the exception behavior mode is not 'ignore'. +define i1 @cmp_eq_nan_01() #0 { +; CHECK-LABEL: @cmp_eq_nan_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7FF4000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7ff4000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +define i1 @cmp_eq_nan_02() #0 { +; CHECK-LABEL: @cmp_eq_nan_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7FF4000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7ff4000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +; Compare with QNAN is folded for fcmp but is NOT folded for fcmps if the exception behavior mode is not 'ignore'. +define i1 @cmp_eq_nan_03() #0 { +; CHECK-LABEL: @cmp_eq_nan_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7ff8000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +define i1 @cmp_eq_nan_04() #0 { +; CHECK-LABEL: @cmp_eq_nan_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7FF8000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7ff8000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + + attributes #0 = { strictfp } declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) @@ -433,4 +540,6 @@ declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)