diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1598,6 +1598,13 @@ case Intrinsic::rint: // Constrained intrinsics can be folded if FP environment is known // to compiler. + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fmuladd: + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: case Intrinsic::experimental_constrained_round: @@ -1859,6 +1866,58 @@ return false; } +/// Checks if the given intrinsic call, which evaluates to constant, is allowed +/// to be folded. +/// +/// \param CI Constrained intrinsic call. +/// \param St Exception flags raised during constant evaluation. +static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, + APFloat::opStatus St) { + Optional ORM = CI->getRoundingMode(); + Optional EB = CI->getExceptionBehavior(); + + // If the operation does not change exception status flags, it is safe + // to fold. + if (St == APFloat::opStatus::opOK) { + // When FP exceptions are not ignored, intrinsic call will not be + // eliminated, because it is considered as having side effect. But we + // know that its evaluation does not raise exceptions, so side effect + // is absent. To allow removing the call, mark it as not accessing memory. + if (EB && *EB != fp::ExceptionBehavior::ebIgnore) + CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); + return true; + } + + // If evaluation raised FP exception, the result can depend on rounding + // mode. If the latter is unknown, folding is not possible. + if (!ORM || *ORM == RoundingMode::Dynamic) + return false; + + // If FP exceptions are ignored, fold the call, even if such exception is + // raised. + if (!EB || *EB != fp::ExceptionBehavior::ebStrict) + return true; + + // Leave the calculation for runtime so that exception flags be correctly set + // in hardware. + return false; +} + +/// Returns the rounding mode that should be used for constant evaluation. +static RoundingMode +getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) { + RoundingMode RM = RoundingMode::Dynamic; + Optional ORM = CI->getRoundingMode(); + if (ORM) + RM = *ORM; + if (RM == RoundingMode::Dynamic) + // Even if the rounding mode is unknown, try evaluating the operation. + // If it does not raise inexact exception, rounding was not applied + // so the result does not depend on rounding mode. + RM = RoundingMode::NearestTiesToEven; + return RM; +} + static Constant *ConstantFoldScalarCall1(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, @@ -2362,7 +2421,7 @@ } if (auto *Op1 = dyn_cast(Operands[0])) { - if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isFloatingPointTy()) return nullptr; APFloat Op1V = Op1->getValueAPF(); @@ -2371,6 +2430,38 @@ return nullptr; APFloat Op2V = Op2->getValueAPF(); + if (auto ConstrIntr = dyn_cast(Call)) { + RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); + APFloat Res = Op1V; + APFloat::opStatus St; + switch (IntrinsicID) { + default: + return nullptr; + case Intrinsic::experimental_constrained_fadd: + St = Res.add(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fsub: + St = Res.subtract(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fmul: + St = Res.multiply(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fdiv: + St = Res.divide(Op2V, RM); + break; + case Intrinsic::experimental_constrained_frem: + St = Res.remainder(Op2V); + break; + } + if (mayFoldConstrained(const_cast(ConstrIntr), + St)) + return ConstantFP::get(Ty->getContext(), Res); + return nullptr; + } + + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + return nullptr; + switch (IntrinsicID) { default: break; @@ -2434,6 +2525,8 @@ break; } } else if (auto *Op2C = dyn_cast(Operands[1])) { + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + return nullptr; if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) return ConstantFP::get( Ty->getContext(), @@ -2769,6 +2862,25 @@ const APFloat &C1 = Op1->getValueAPF(); const APFloat &C2 = Op2->getValueAPF(); const APFloat &C3 = Op3->getValueAPF(); + + if (auto ConstrIntr = dyn_cast(Call)) { + RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); + APFloat Res = C1; + APFloat::opStatus St; + switch (IntrinsicID) { + default: + return nullptr; + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fmuladd: + St = Res.fusedMultiplyAdd(C2, C3, RM); + break; + } + if (mayFoldConstrained( + const_cast(ConstrIntr), St)) + return ConstantFP::get(Ty->getContext(), Res); + return nullptr; + } + switch (IntrinsicID) { default: break; case Intrinsic::amdgcn_fma_legacy: { diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -486,6 +486,16 @@ if (isMathLibCallNoop(Call, TLI)) return true; + // To express possible interaction with floating point environment constrained + // intrinsics are described as if they access memory. So they look like having + // side effect but actually do not have it unless they raise floating point + // exception. If FP exceptions are ignored, the intrinsic may be deleted. + if (auto *CI = dyn_cast(I)) { + Optional EB = CI->getExceptionBehavior(); + if (!EB || *EB == fp::ExceptionBehavior::ebIgnore) + return true; + } + return false; } diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll --- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -232,6 +232,219 @@ ; CHECK: ret double 1.000000e+01 } +define float @fadd_01() #0 { +entry: + %result = call float @llvm.experimental.constrained.fadd.f32( + float 1.000000e+01, + float 2.000000e+01, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret float %result + ; CHECK-LABEL: @fadd_01 + ; CHECK: ret float 3.000000e+01 +} + +; Inexact result does not prevent from folding if exceptions are ignored and +; rounding mode is known. +define double @fadd_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_02 + ; CHECK: ret double 2.000000e+00 +} + +define double @fadd_03() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.upward", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_03 + ; CHECK: ret double 0x4000000000000001 +} + +; Inexact result prevents from folding if exceptions may be checked. +define double @fadd_04() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_04 + ; CHECK: ret double %result +} + +; If result is exact, folding is allowed even if exceptions may be checked. +define double @fadd_05() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_05 + ; CHECK: ret double 3.000000e+00 +} + +; Dynamic rounding mode does not prevent from folding if the result is exact. +define double @fadd_06() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 2.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_06 + ; CHECK: ret double 3.000000e+00 +} + +; Inexact results prevents from folding if rounding mode is unknown. +define double @fadd_07() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_07 + ; CHECK: ret double %result +} + +; Infinite result does not prevent from folding unless exceptions are tracked. +define double @fadd_08() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 0x7fEFFFFFFFFFFFFF, + double 0x7fEFFFFFFFFFFFFF, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_08 + ; CHECK: ret double 0x7FF0000000000000 +} + +define double @fadd_09() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 0x7fEFFFFFFFFFFFFF, + double 0x7fEFFFFFFFFFFFFF, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_09 + ; CHECK: ret double %result +} + +define half @fadd_10() #0 { +entry: + %result = call half @llvm.experimental.constrained.fadd.f16( + half 1.0, + half 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret half %result + ; CHECK-LABEL: @fadd_10 + ; CHECK: ret half 0xH4200 +} + +define bfloat @fadd_11() #0 { +entry: + %result = call bfloat @llvm.experimental.constrained.fadd.bf16( + bfloat 1.0, + bfloat 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret bfloat %result + ; CHECK-LABEL: @fadd_11 + ; CHECK: ret bfloat 0xR4040 +} + +define double @fsub_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fsub.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fsub_01 + ; CHECK: ret double -1.000000e+00 +} + +define double @fmul_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fmul.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fmul_01 + ; CHECK: ret double 2.000000e+00 +} + +define double @fdiv_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fdiv.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fdiv_01 + ; CHECK: ret double 5.000000e-01 +} + +define double @frem_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.frem.f64( + double 1.0, + double 2.0, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @frem_01 + ; CHECK: ret double 1.000000e+00 +} + +define double @fma_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fma.f64( + double 1.0, + double 2.0, + double 3.0, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fma_01 + ; CHECK: ret double 5.000000e+00 +} + +define double @fmuladd_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fmuladd.f64( + double 1.0, + double 2.0, + double 3.0, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fmuladd_01 + ; CHECK: ret double 5.000000e+00 +} + attributes #0 = { strictfp } @@ -241,4 +454,14 @@ declare double @llvm.experimental.constrained.trunc.f64(double, metadata) declare double @llvm.experimental.constrained.round.f64(double, metadata) declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare bfloat @llvm.experimental.constrained.fadd.bf16(bfloat, bfloat, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata)