diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1555,6 +1555,13 @@ case Intrinsic::rint: // Constrained intrinsics can be folded if FP environment is known // to compiler. + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fmuladd: + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: case Intrinsic::experimental_constrained_round: @@ -1816,6 +1823,44 @@ return false; } +/// Checks if the given intrinsic call, which evaluates to constant, is allowed +/// to be folded. +/// +/// \param CI Constrained intrinsic call. +/// \param RM Rounding mode used for evaluation. +/// \param St Exception flags raised during constant evaluation. +static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, RoundingMode RM, + APFloat::opStatus St) { + Optional EB = CI->getExceptionBehavior(); + + // If the operation does not change exception status flags, it is safe + // to fold. + if (St == APFloat::opStatus::opOK) { + // When FP exceptions are not ignored, intrinsic call will not be + // eliminated, because it is considered as having side effect. But we + // know that its evaluation does not raise exceptions, so side effect + // is absent. To allow of removing the call, mark it as not accessing + // memory. + if (EB && *EB != fp::ExceptionBehavior::ebIgnore) + CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); + return true; + } + + // If evaluation raised FP exception, the result can depend on rounding + // mode. If the latter is unknown, folding is not possible. + if (RM == RoundingMode::Dynamic) + return false; + + // If FP exceptions are ignored, fold the call, even if such exception is + // raised. + if (!EB || *EB == fp::ExceptionBehavior::ebIgnore) + return true; + + // Leave the calculation for runtime so that exception flags be correctly set + // in hardware. + return false; +} + static Constant *ConstantFoldScalarCall1(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, @@ -2319,7 +2364,7 @@ } if (auto *Op1 = dyn_cast(Operands[0])) { - if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isFloatingPointTy()) return nullptr; APFloat Op1V = Op1->getValueAPF(); @@ -2328,6 +2373,49 @@ return nullptr; APFloat Op2V = Op2->getValueAPF(); + if (auto ConstrIntr = dyn_cast(Call)) { + Optional ORM = ConstrIntr->getRoundingMode(); + if (!ORM) + ORM = RoundingMode::Dynamic; + RoundingMode RM; + if (*ORM == RoundingMode::Dynamic) + // Even if the rounding mode is unknown, try evaluating the operation. + // If it does not raise inexact exception, rounding was not applied + // so the result does not depend on rounding mode. + RM = RoundingMode::NearestTiesToEven; + else + RM = *ORM; + APFloat Res = Op1V; + APFloat::opStatus St; + switch (IntrinsicID) { + default: + return nullptr; + case Intrinsic::experimental_constrained_fadd: + St = Res.add(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fsub: + St = Res.subtract(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fmul: + St = Res.multiply(Op2V, RM); + break; + case Intrinsic::experimental_constrained_fdiv: + St = Res.divide(Op2V, RM); + break; + case Intrinsic::experimental_constrained_frem: + // frem does not depend on rounding mode, so change dynamic mode to + // something definite. + if (*ORM == RoundingMode::Dynamic) + ORM = RoundingMode::NearestTiesToEven; + St = Res.remainder(Op2V); + break; + } + if (mayFoldConstrained(const_cast(ConstrIntr), + *ORM, St)) + return ConstantFP::get(Ty->getContext(), Res); + return nullptr; + } + switch (IntrinsicID) { default: break; @@ -2723,6 +2811,35 @@ const APFloat &C1 = Op1->getValueAPF(); const APFloat &C2 = Op2->getValueAPF(); const APFloat &C3 = Op3->getValueAPF(); + + if (auto ConstrIntr = dyn_cast(Call)) { + Optional ORM = ConstrIntr->getRoundingMode(); + if (!ORM) + ORM = RoundingMode::Dynamic; + RoundingMode RM; + if (*ORM == RoundingMode::Dynamic) + // Even if the rounding mode is unknown, try evaluating the + // operation. If it does not raise inexact exception, rounding was + // not applied so the result does not depend on rounding mode. + RM = RoundingMode::NearestTiesToEven; + else + RM = *ORM; + APFloat Res = C1; + APFloat::opStatus St; + switch (IntrinsicID) { + default: + return nullptr; + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fmuladd: + St = Res.fusedMultiplyAdd(C2, C3, RM); + break; + } + if (mayFoldConstrained( + const_cast(ConstrIntr), *ORM, St)) + return ConstantFP::get(Ty->getContext(), Res); + return nullptr; + } + switch (IntrinsicID) { default: break; case Intrinsic::amdgcn_fma_legacy: { diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -486,6 +486,16 @@ if (isMathLibCallNoop(Call, TLI)) return true; + // To express possible interaction with floating point environment constrained + // intrinsics are described as if they access memory. So they look like having + // side effect but actually do not have it unless they raise floating point + // exception. If FP exceptions are ignored, the intrinsic may be deleted. + if (auto *CI = dyn_cast(I)) { + Optional EB = CI->getExceptionBehavior(); + if (!EB || *EB == fp::ExceptionBehavior::ebIgnore) + return true; + } + return false; } diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -213,34 +213,14 @@ define <1 x float> @constrained_vector_frem_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_frem_v1f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) ; PC64LE-NEXT: addis 3, 2, .LCPI5_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI5_1@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI5_0@toc@l(3) -; PC64LE-NEXT: lfs 2, .LCPI5_1@toc@l(4) -; PC64LE-NEXT: bl fmodf -; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_frem_v1f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI5_0@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI5_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI5_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI5_1@toc@l(3) -; PC64LE9-NEXT: bl fmodf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32( @@ -254,62 +234,17 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_frem_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: addis 4, 2, .LCPI6_1@toc@ha -; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; PC64LE-NEXT: lfs 31, .LCPI6_1@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI6_0@toc@l(3) -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI6_2@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: xxlor 63, 1, 1 -; PC64LE-NEXT: lfs 1, .LCPI6_2@toc@l(3) -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: xxmrghd 34, 1, 63 -; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI6_0@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: xxswapd 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_frem_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI6_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI6_1@toc@ha -; PC64LE9-NEXT: lfs 31, .LCPI6_1@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI6_2@toc@ha -; PC64LE9-NEXT: xscpsgndp 63, 1, 1 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI6_2@toc@l(3) -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 63 -; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI6_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64( @@ -323,88 +258,16 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_frem_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stfd 29, -24(1) # 8-byte Folded Spill -; PC64LE-NEXT: stfd 30, -16(1) # 8-byte Folded Spill -; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) -; PC64LE-NEXT: addis 4, 2, .LCPI7_1@toc@ha ; PC64LE-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; PC64LE-NEXT: lfs 31, .LCPI7_1@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI7_0@toc@l(3) -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: bl fmodf -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI7_2@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: fmr 30, 1 -; PC64LE-NEXT: lfs 1, .LCPI7_2@toc@l(3) -; PC64LE-NEXT: bl fmodf -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI7_3@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: fmr 29, 1 -; PC64LE-NEXT: lfs 1, .LCPI7_3@toc@l(3) -; PC64LE-NEXT: bl fmodf -; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 34, 29 -; PC64LE-NEXT: addis 3, 2, .LCPI7_4@toc@ha -; PC64LE-NEXT: xscvdpspn 35, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI7_4@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: vmrghw 2, 2, 3 -; PC64LE-NEXT: xscvdpspn 35, 30 -; PC64LE-NEXT: vperm 2, 3, 2, 4 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload -; PC64LE-NEXT: lfd 30, -16(1) # 8-byte Folded Reload -; PC64LE-NEXT: lfd 29, -24(1) # 8-byte Folded Reload -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI7_0@toc@l +; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_frem_v3f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stfd 29, -24(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stfd 30, -16(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI7_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI7_1@toc@ha -; PC64LE9-NEXT: lfs 31, .LCPI7_1@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: bl fmodf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI7_2@toc@ha -; PC64LE9-NEXT: fmr 30, 1 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI7_2@toc@l(3) -; PC64LE9-NEXT: bl fmodf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI7_3@toc@ha -; PC64LE9-NEXT: fmr 29, 1 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI7_3@toc@l(3) -; PC64LE9-NEXT: bl fmodf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xscvdpspn 35, 29 -; PC64LE9-NEXT: addis 3, 2, .LCPI7_4@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI7_4@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 30 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload -; PC64LE9-NEXT: lfd 30, -16(1) # 8-byte Folded Reload -; PC64LE9-NEXT: mtlr 0 -; PC64LE9-NEXT: lfd 29, -24(1) # 8-byte Folded Reload +; PC64LE9-NEXT: addi 3, 3, .LCPI7_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32( @@ -418,80 +281,22 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_frem_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: addis 4, 2, .LCPI8_1@toc@ha -; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; PC64LE-NEXT: lfs 31, .LCPI8_1@toc@l(4) +; PC64LE-NEXT: addis 4, 2, .LCPI8_1@toc@ha +; PC64LE-NEXT: addis 5, 2, .LCPI8_2@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI8_0@toc@l(3) -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI8_2@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: xxlor 63, 1, 1 -; PC64LE-NEXT: lfs 1, .LCPI8_2@toc@l(3) -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI8_3@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: xxmrghd 63, 1, 63 -; PC64LE-NEXT: lfs 1, .LCPI8_3@toc@l(3) -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: lfs 2, .LCPI8_1@toc@l(4) +; PC64LE-NEXT: lfs 3, .LCPI8_2@toc@l(5) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_frem_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI8_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI8_1@toc@ha -; PC64LE9-NEXT: lfs 31, .LCPI8_1@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lfs 2, .LCPI8_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI8_2@toc@ha -; PC64LE9-NEXT: xscpsgndp 63, 1, 1 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI8_2@toc@l(3) -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI8_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 63 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI8_3@toc@l(3) -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: lfs 3, .LCPI8_2@toc@l(3) ; PC64LE9-NEXT: blr entry: %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64( @@ -505,96 +310,24 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_frem_v4f64: ; PC64LE: # %bb.0: -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -96(1) -; PC64LE-NEXT: addis 4, 2, .LCPI9_1@toc@ha -; PC64LE-NEXT: stfd 31, 88(1) # 8-byte Folded Spill -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lfs 31, .LCPI9_1@toc@l(4) -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI9_0@toc@l(3) -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI9_2@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: xxlor 63, 1, 1 -; PC64LE-NEXT: lfs 1, .LCPI9_2@toc@l(3) -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI9_3@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: xxmrghd 63, 1, 63 -; PC64LE-NEXT: lfs 1, .LCPI9_3@toc@l(3) -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI9_4@toc@ha -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: xxlor 62, 1, 1 -; PC64LE-NEXT: lfs 1, .LCPI9_4@toc@l(3) -; PC64LE-NEXT: bl fmod -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: xxmrghd 35, 1, 62 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 96 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI9_1@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI9_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI9_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: xxswapd 35, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_frem_v4f64: ; PC64LE9: # %bb.0: -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI9_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI9_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI9_1@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 31, .LCPI9_1@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI9_2@toc@ha -; PC64LE9-NEXT: xscpsgndp 63, 1, 1 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI9_2@toc@l(3) -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI9_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 63 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI9_3@toc@l(3) -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI9_4@toc@ha -; PC64LE9-NEXT: xscpsgndp 62, 1, 1 -; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: lfs 1, .LCPI9_4@toc@l(3) -; PC64LE9-NEXT: bl fmod -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 62 -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 80 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI9_1@toc@l +; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: blr %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64( <4 x double> @llvm.experimental.constrained.fmul.v1f32( @@ -669,48 +396,16 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_fmul_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI12_1@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI12_3@toc@ha -; PC64LE-NEXT: addis 5, 2, .LCPI12_2@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI12_1@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI12_3@toc@l(4) -; PC64LE-NEXT: lfs 2, .LCPI12_2@toc@l(5) ; PC64LE-NEXT: addis 3, 2, .LCPI12_0@toc@ha -; PC64LE-NEXT: xsmulsp 1, 0, 1 -; PC64LE-NEXT: lfs 3, .LCPI12_0@toc@l(3) -; PC64LE-NEXT: addis 3, 2, .LCPI12_4@toc@ha -; PC64LE-NEXT: xsmulsp 2, 0, 2 -; PC64LE-NEXT: addi 3, 3, .LCPI12_4@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xsmulsp 0, 0, 3 -; PC64LE-NEXT: xscvdpspn 34, 1 -; PC64LE-NEXT: xscvdpspn 35, 2 -; PC64LE-NEXT: vmrghw 2, 3, 2 -; PC64LE-NEXT: xscvdpspn 35, 0 -; PC64LE-NEXT: vperm 2, 3, 2, 4 +; PC64LE-NEXT: addi 3, 3, .LCPI12_0@toc@l +; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fmul_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI12_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI12_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI12_1@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI12_1@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI12_2@toc@ha -; PC64LE9-NEXT: xsmulsp 0, 1, 0 -; PC64LE9-NEXT: lfs 2, .LCPI12_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI12_3@toc@ha -; PC64LE9-NEXT: lfs 3, .LCPI12_3@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI12_4@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI12_4@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: xsmulsp 2, 1, 2 -; PC64LE9-NEXT: xsmulsp 1, 1, 3 -; PC64LE9-NEXT: xscvdpspn 35, 2 -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: addi 3, 3, .LCPI12_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32( @@ -820,19 +515,13 @@ ; PC64LE-LABEL: constrained_vector_fadd_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI15_1@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI15_0@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI15_1@toc@l(4) -; PC64LE-NEXT: xsaddsp 1, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI15_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fadd_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI15_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI15_1@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI15_1@toc@l(3) -; PC64LE9-NEXT: xsaddsp 1, 1, 0 +; PC64LE9-NEXT: lfs 1, .LCPI15_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32( @@ -879,46 +568,12 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_fadd_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI17_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI17_2@toc@ha -; PC64LE-NEXT: addis 5, 2, .LCPI17_1@toc@ha -; PC64LE-NEXT: xxlxor 3, 3, 3 -; PC64LE-NEXT: lfs 0, .LCPI17_0@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI17_2@toc@l(4) -; PC64LE-NEXT: lfs 2, .LCPI17_1@toc@l(5) -; PC64LE-NEXT: addis 3, 2, .LCPI17_3@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI17_3@toc@l -; PC64LE-NEXT: xsaddsp 1, 0, 1 -; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xsaddsp 2, 0, 2 -; PC64LE-NEXT: xsaddsp 0, 0, 3 -; PC64LE-NEXT: xscvdpspn 34, 1 -; PC64LE-NEXT: xscvdpspn 35, 2 -; PC64LE-NEXT: vmrghw 2, 3, 2 -; PC64LE-NEXT: xscvdpspn 35, 0 -; PC64LE-NEXT: vperm 2, 3, 2, 4 +; PC64LE-NEXT: xxleqv 34, 34, 34 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fadd_v3f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: addis 3, 2, .LCPI17_0@toc@ha -; PC64LE9-NEXT: xxlxor 1, 1, 1 -; PC64LE9-NEXT: lfs 0, .LCPI17_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI17_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI17_1@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI17_2@toc@ha -; PC64LE9-NEXT: lfs 3, .LCPI17_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI17_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI17_3@toc@l -; PC64LE9-NEXT: xsaddsp 1, 0, 1 -; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: xsaddsp 2, 0, 2 -; PC64LE9-NEXT: xsaddsp 0, 0, 3 -; PC64LE9-NEXT: xscvdpspn 35, 2 -; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: xxleqv 34, 34, 34 ; PC64LE9-NEXT: blr entry: %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32( @@ -1026,19 +681,13 @@ ; PC64LE-LABEL: constrained_vector_fsub_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI20_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI20_1@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI20_0@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI20_1@toc@l(4) -; PC64LE-NEXT: xssubsp 1, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI20_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fsub_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI20_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI20_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI20_1@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI20_1@toc@l(3) -; PC64LE9-NEXT: xssubsp 1, 1, 0 +; PC64LE9-NEXT: lfs 1, .LCPI20_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( @@ -1085,46 +734,12 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_fsub_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI22_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI22_2@toc@ha -; PC64LE-NEXT: addis 5, 2, .LCPI22_1@toc@ha -; PC64LE-NEXT: xxlxor 3, 3, 3 -; PC64LE-NEXT: lfs 0, .LCPI22_0@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI22_2@toc@l(4) -; PC64LE-NEXT: lfs 2, .LCPI22_1@toc@l(5) -; PC64LE-NEXT: addis 3, 2, .LCPI22_3@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI22_3@toc@l -; PC64LE-NEXT: xssubsp 1, 0, 1 -; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xssubsp 2, 0, 2 -; PC64LE-NEXT: xssubsp 0, 0, 3 -; PC64LE-NEXT: xscvdpspn 34, 1 -; PC64LE-NEXT: xscvdpspn 35, 2 -; PC64LE-NEXT: vmrghw 2, 3, 2 -; PC64LE-NEXT: xscvdpspn 35, 0 -; PC64LE-NEXT: vperm 2, 3, 2, 4 +; PC64LE-NEXT: xxleqv 34, 34, 34 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fsub_v3f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: addis 3, 2, .LCPI22_0@toc@ha -; PC64LE9-NEXT: xxlxor 1, 1, 1 -; PC64LE9-NEXT: lfs 0, .LCPI22_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI22_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI22_1@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI22_2@toc@ha -; PC64LE9-NEXT: lfs 3, .LCPI22_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI22_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI22_3@toc@l -; PC64LE9-NEXT: xssubsp 1, 0, 1 -; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: xssubsp 2, 0, 2 -; PC64LE9-NEXT: xssubsp 0, 0, 3 -; PC64LE9-NEXT: xscvdpspn 35, 2 -; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: xxleqv 34, 34, 34 ; PC64LE9-NEXT: blr entry: %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll --- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -232,6 +232,219 @@ ; CHECK: ret double 1.000000e+01 } +define float @fadd_01() #0 { +entry: + %result = call float @llvm.experimental.constrained.fadd.f32( + float 1.000000e+01, + float 2.000000e+01, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret float %result + ; CHECK-LABEL: @fadd_01 + ; CHECK: ret float 3.000000e+01 +} + +; Inexact result does not prevent from folding if exceptions are ignored and +; rounding mode is known. +define double @fadd_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_02 + ; CHECK: ret double 2.000000e+00 +} + +define double @fadd_03() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.upward", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_03 + ; CHECK: ret double 0x4000000000000001 +} + +; Inexact result prevents from folding if exceptions may be checked. +define double @fadd_04() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_04 + ; CHECK: ret double %result +} + +; If result is exact, folding is allowed even if exceptions may be checked. +define double @fadd_05() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_05 + ; CHECK: ret double 3.000000e+00 +} + +; Dynamic rounding mode does not prevent from folding if the result is exact. +define double @fadd_06() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 2.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_06 + ; CHECK: ret double 3.000000e+00 +} + +; Inexact results prevents from folding if rounding mode is unknown. +define double @fadd_07() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 1.0, + double 0x3FF0000000000001, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_07 + ; CHECK: ret double %result +} + +; Infinite result does not prevent from folding unless exceptions are tracked. +define double @fadd_08() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 0x7fEFFFFFFFFFFFFF, + double 0x7fEFFFFFFFFFFFFF, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fadd_08 + ; CHECK: ret double 0x7FF0000000000000 +} + +define double @fadd_09() #0 { +entry: + %result = call double @llvm.experimental.constrained.fadd.f64( + double 0x7fEFFFFFFFFFFFFF, + double 0x7fEFFFFFFFFFFFFF, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @fadd_09 + ; CHECK: ret double %result +} + +define half @fadd_10() #0 { +entry: + %result = call half @llvm.experimental.constrained.fadd.f16( + half 1.0, + half 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret half %result + ; CHECK-LABEL: @fadd_10 + ; CHECK: ret half 0xH4200 +} + +define bfloat @fadd_11() #0 { +entry: + %result = call bfloat @llvm.experimental.constrained.fadd.bf16( + bfloat 1.0, + bfloat 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret bfloat %result + ; CHECK-LABEL: @fadd_11 + ; CHECK: ret bfloat 0xR4040 +} + +define double @fsub_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fsub.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fsub_01 + ; CHECK: ret double -1.000000e+00 +} + +define double @fmul_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fmul.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fmul_01 + ; CHECK: ret double 2.000000e+00 +} + +define double @fdiv_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fdiv.f64( + double 1.0, + double 2.0, + metadata !"round.tonearest", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fdiv_01 + ; CHECK: ret double 5.000000e-01 +} + +define double @frem_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.frem.f64( + double 1.0, + double 2.0, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @frem_01 + ; CHECK: ret double 1.000000e+00 +} + +define double @fma_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fma.f64( + double 1.0, + double 2.0, + double 3.0, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fma_01 + ; CHECK: ret double 5.000000e+00 +} + +define double @fmuladd_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.fmuladd.f64( + double 1.0, + double 2.0, + double 3.0, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @fmuladd_01 + ; CHECK: ret double 5.000000e+00 +} + attributes #0 = { strictfp } @@ -241,4 +454,14 @@ declare double @llvm.experimental.constrained.trunc.f64(double, metadata) declare double @llvm.experimental.constrained.round.f64(double, metadata) declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare bfloat @llvm.experimental.constrained.fadd.bf16(bfloat, bfloat, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata)