diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1593,6 +1593,13 @@ case Intrinsic::rint: // Constrained intrinsics can be folded if FP environment is known // to compiler. + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fmuladd: + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: case Intrinsic::experimental_constrained_round: @@ -1854,6 +1861,56 @@ return false; } +/// Checks if the given intrinsic call, which evaluates to constant, is allowed +/// to be folded. +/// +/// \param CI Constrained intrinsic call. +/// \param St Exception flags raised during constant evaluation. +static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, + APFloat::opStatus St) { + Optional ORM = CI->getRoundingMode(); + Optional EB = CI->getExceptionBehavior(); + + // If the operation does not change exception status flags, it is safe + // to fold. + if (St == APFloat::opStatus::opOK) { + // When FP exceptions are not ignored, intrinsic call will not be + // eliminated, because it is considered as having side effect. But we + // know that its evaluation does not raise exceptions, so side effect + // is absent. To allow removing the call, mark it as not accessing memory. + if (EB && *EB != fp::ExceptionBehavior::ebIgnore) + CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); + return true; + } + + // If evaluation raised FP exception, the result can depend on rounding + // mode. If the latter is unknown, folding is not possible. + if (!ORM || *ORM == RoundingMode::Dynamic) + return false; + + // If FP exceptions are ignored, fold the call, even if such exception is + // raised. + if (!EB || *EB != fp::ExceptionBehavior::ebStrict) + return true; + + // Leave the calculation for runtime so that exception flags be correctly set + // in hardware. + return false; +} + +/// Returns the rounding mode that should be used for constant evaluation. +static RoundingMode +getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) { + Optional ORM = CI->getRoundingMode(); + if (!ORM || *ORM == RoundingMode::Dynamic) + // Even if the rounding mode is unknown, try evaluating the operation. + // If it does not raise inexact exception, rounding was not applied, + // so the result is exact and does not depend on rounding mode. Whether + // other FP exceptions are raised, it does not depend on rounding mode. + return RoundingMode::NearestTiesToEven; + return *ORM; +} + static Constant *ConstantFoldScalarCall1(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, @@ -2356,16 +2413,45 @@ } } - if (auto *Op1 = dyn_cast(Operands[0])) { + if (const auto *Op1 = dyn_cast(Operands[0])) { if (!Ty->isFloatingPointTy()) return nullptr; APFloat Op1V = Op1->getValueAPF(); - if (auto *Op2 = dyn_cast(Operands[1])) { + if (const auto *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) return nullptr; APFloat Op2V = Op2->getValueAPF(); + if (const auto *ConstrIntr = dyn_cast(Call)) { + RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); + APFloat Res = Op1V; + APFloat::opStatus St; + switch (IntrinsicID) { + default: + return nullptr; + case Intrinsic::experimental_constrained_fadd: + St = Res.add(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fsub: + St = Res.subtract(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fmul: + St = Res.multiply(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fdiv: + St = Res.divide(Op2V, RM); + break; + case Intrinsic::experimental_constrained_frem: + St = Res.mod(Op2V); + break; + } + if (mayFoldConstrained(const_cast(ConstrIntr), + St)) + return ConstantFP::get(Ty->getContext(), Res); + return nullptr; + } + switch (IntrinsicID) { default: break; @@ -2437,6 +2523,8 @@ break; } } else if (auto *Op2C = dyn_cast(Operands[1])) { + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + return nullptr; if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) return ConstantFP::get( Ty->getContext(), @@ -2772,6 +2860,25 @@ const APFloat &C1 = Op1->getValueAPF(); const APFloat &C2 = Op2->getValueAPF(); const APFloat &C3 = Op3->getValueAPF(); + + if (const auto *ConstrIntr = dyn_cast(Call)) { + RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); + APFloat Res = C1; + APFloat::opStatus St; + switch (IntrinsicID) { + default: + return nullptr; + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fmuladd: + St = Res.fusedMultiplyAdd(C2, C3, RM); + break; + } + if (mayFoldConstrained( + const_cast(ConstrIntr), St)) + return ConstantFP::get(Ty->getContext(), Res); + return nullptr; + } + switch (IntrinsicID) { default: break; case Intrinsic::amdgcn_fma_legacy: { diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -491,6 +491,16 @@ if (isMathLibCallNoop(Call, TLI)) return true; + // To express possible interaction with floating point environment constrained + // intrinsics are described as if they access memory. So they look like having + // side effect but actually do not have it unless they raise floating point + // exception. If FP exceptions are ignored, the intrinsic may be deleted. + if (auto *CI = dyn_cast(I)) { + Optional EB = CI->getExceptionBehavior(); + if (!EB || *EB == fp::ExceptionBehavior::ebIgnore) + return true; + } + return false; } diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll --- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -234,6 +234,186 @@ ret double %result } +define float @fadd_01() #0 { +; CHECK-LABEL: @fadd_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret float 3.000000e+01 +; +entry: + %result = call float @llvm.experimental.constrained.fadd.f32(float 1.000000e+01, float 2.000000e+01, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret float %result +} + +; Inexact result does not prevent from folding if exceptions are ignored and +; rounding mode is known. +define double @fadd_02() #0 { +; CHECK-LABEL: @fadd_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 2.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret double %result +} + +define double @fadd_03() #0 { +; CHECK-LABEL: @fadd_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 0x4000000000000001 +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.upward", metadata !"fpexcept.ignore") #0 + ret double %result +} + +; Inexact result prevents from folding if exceptions may be checked. +define double @fadd_04() #0 { +; CHECK-LABEL: @fadd_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret double [[RESULT]] +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %result +} + +; If result is exact, folding is allowed even if exceptions may be checked. +define double @fadd_05() #0 { +; CHECK-LABEL: @fadd_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 3.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %result +} + +; Dynamic rounding mode does not prevent from folding if the result is exact. +define double @fadd_06() #0 { +; CHECK-LABEL: @fadd_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 3.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %result +} + +; Inexact results prevents from folding if rounding mode is unknown. +define double @fadd_07() #0 { +; CHECK-LABEL: @fadd_07( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret double [[RESULT]] +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret double %result +} + +; Infinite result does not prevent from folding unless exceptions are tracked. +define double @fadd_08() #0 { +; CHECK-LABEL: @fadd_08( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 0x7FF0000000000000 +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret double %result +} + +define double @fadd_09() #0 { +; CHECK-LABEL: @fadd_09( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret double [[RESULT]] +; +entry: + %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %result +} + +define half @fadd_10() #0 { +; CHECK-LABEL: @fadd_10( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret half 0xH4200 +; +entry: + %result = call half @llvm.experimental.constrained.fadd.f16(half 1.0, half 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret half %result +} + +define bfloat @fadd_11() #0 { +; CHECK-LABEL: @fadd_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret bfloat 0xR4040 +; +entry: + %result = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat 1.0, bfloat 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret bfloat %result +} + +define double @fsub_01() #0 { +; CHECK-LABEL: @fsub_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double -1.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.fsub.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret double %result +} + +define double @fmul_01() #0 { +; CHECK-LABEL: @fmul_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 2.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.fmul.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret double %result +} + +define double @fdiv_01() #0 { +; CHECK-LABEL: @fdiv_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 5.000000e-01 +; +entry: + %result = call double @llvm.experimental.constrained.fdiv.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + ret double %result +} + +define double @frem_01() #0 { +; CHECK-LABEL: @frem_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 1.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.frem.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret double %result +} + +define double @fma_01() #0 { +; CHECK-LABEL: @fma_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 5.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.fma.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret double %result +} + +define double @fmuladd_01() #0 { +; CHECK-LABEL: @fmuladd_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double 5.000000e+00 +; +entry: + %result = call double @llvm.experimental.constrained.fmuladd.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret double %result +} + attributes #0 = { strictfp } @@ -243,4 +423,14 @@ declare double @llvm.experimental.constrained.trunc.f64(double, metadata) declare double @llvm.experimental.constrained.round.f64(double, metadata) declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare bfloat @llvm.experimental.constrained.fadd.bf16(bfloat, bfloat, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) diff --git a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll --- a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll @@ -12,14 +12,23 @@ define float @fdiv_constant_fold_strict() #0 { ; CHECK-LABEL: @fdiv_constant_fold_strict( -; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]] -; CHECK-NEXT: ret float [[F]] +; CHECK-NEXT: ret float 1.500000e+00 ; %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %f } +define float @fdiv_constant_fold_strict2() #0 { +; CHECK-LABEL: @fdiv_constant_fold_strict2( +; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]] +; CHECK-NEXT: ret float [[F]] +; + %f = call float @llvm.experimental.constrained.fdiv.f32(float 2.0, float 3.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + + ret float %f +} + define float @frem_constant_fold() #0 { ; CHECK-LABEL: @frem_constant_fold( ; CHECK-NEXT: ret float 1.000000e+00 @@ -30,10 +39,9 @@ define float @frem_constant_fold_strict() #0 { ; CHECK-LABEL: @frem_constant_fold_strict( -; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] -; CHECK-NEXT: ret float [[F]] +; CHECK-NEXT: ret float 1.000000e+00 ; - %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %f = call float @llvm.experimental.constrained.frem.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %f }