diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h --- a/llvm/include/llvm/IR/FPEnv.h +++ b/llvm/include/llvm/IR/FPEnv.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_FLOATINGPOINT_H #define LLVM_IR_FLOATINGPOINT_H +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include @@ -66,5 +67,8 @@ /// input in constrained intrinsic exception behavior metadata. Optional ExceptionBehaviorToStr(fp::ExceptionBehavior); +/// Converts rounding mode represented by fp::RoundingMode to the rounding mode +/// index used by APFloat. For fp::rmDynamic it returns None. +Optional getAPFloatRoundingMode(fp::RoundingMode); } #endif diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsX86.h" @@ -1396,41 +1397,19 @@ // bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { - if (Call->isNoBuiltin() || Call->isStrictFP()) + if (Call->isNoBuiltin()) return false; switch (F->getIntrinsicID()) { - case Intrinsic::fabs: - case Intrinsic::minnum: - case Intrinsic::maxnum: - case Intrinsic::minimum: - case Intrinsic::maximum: - case Intrinsic::log: - case Intrinsic::log2: - case Intrinsic::log10: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::sqrt: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::pow: - case Intrinsic::powi: + // Operations that do not operate floating-point numbers and do not depend on + // FP environment can be folded even in strictfp functions. case Intrinsic::bswap: case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::fshl: case Intrinsic::fshr: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::copysign: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: - case Intrinsic::round: case Intrinsic::masked_load: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: @@ -1444,9 +1423,31 @@ case Intrinsic::usub_sat: case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: + case Intrinsic::bitreverse: + case Intrinsic::is_constant: + return true; + + // Floating point operations cannot be folded in strictfp functions in + // general case. They can be folded if FP environment is known to compiler. + case Intrinsic::minnum: + case Intrinsic::maxnum: + case Intrinsic::minimum: + case Intrinsic::maximum: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::pow: + case Intrinsic::powi: + case Intrinsic::fma: + case Intrinsic::fmuladd: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: - case Intrinsic::bitreverse: + // The intrinsics below depend on rounding mode in MXCSR. case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: case Intrinsic::amdgcn_cubesc: @@ -1477,14 +1478,35 @@ case Intrinsic::x86_avx512_vcvtsd2usi64: case Intrinsic::x86_avx512_cvttsd2usi: case Intrinsic::x86_avx512_cvttsd2usi64: - case Intrinsic::is_constant: + return !Call->isStrictFP(); + + // Sign operations are actually bitwise operations, they do not raise + // exceptions even for SNANs. + case Intrinsic::fabs: + case Intrinsic::copysign: + // Non-constrained variants of rounding operations means default FP + // environment, they can be folded in any case. + case Intrinsic::ceil: + case Intrinsic::floor: + case Intrinsic::round: + case Intrinsic::trunc: + case Intrinsic::nearbyint: + case Intrinsic::rint: + // Constrained intrinsics can be folded if FP environment is known + // to compiler. + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_trunc: + case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_rint: return true; default: return false; case Intrinsic::not_intrinsic: break; } - if (!F->hasName()) + if (!F->hasName() || Call->isStrictFP()) return false; // In these cases, the check of the length is required. We don't want to @@ -1792,6 +1814,55 @@ return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne)); } + // Rounding operations (floor, trunc, ceil, round and nearbyint) do not + // raise FP exceptions, unless the argument is signaling NaN. + + Optional RM; + switch (IntrinsicID) { + default: + break; + case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_rint: { + auto CI = cast(Call); + Optional RMOp = CI->getRoundingMode(); + if (RMOp) + RM = getAPFloatRoundingMode(*RMOp); + if (!RM) + return nullptr; + break; + } + case Intrinsic::experimental_constrained_round: + RM = APFloat::rmNearestTiesToAway; + break; + case Intrinsic::experimental_constrained_ceil: + RM = APFloat::rmTowardPositive; + break; + case Intrinsic::experimental_constrained_floor: + RM = APFloat::rmTowardNegative; + break; + case Intrinsic::experimental_constrained_trunc: + RM = APFloat::rmTowardZero; + break; + } + if (RM) { + auto CI = cast(Call); + if (U.isFinite()) { + APFloat::opStatus St = U.roundToIntegral(*RM); + if (IntrinsicID == Intrinsic::experimental_constrained_rint && + St == APFloat::opInexact) { + Optional EB = CI->getExceptionBehavior(); + if (EB && *EB == fp::ebStrict) + return nullptr; + } + } else if (U.isSignaling()) { + Optional EB = CI->getExceptionBehavior(); + if (EB && *EB != fp::ebIgnore) + return nullptr; + U = APFloat::getQNaN(U.getSemantics()); + } + return ConstantFP::get(Ty->getContext(), U); + } + /// We only fold functions with finite arguments. Folding NaN and inf is /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. @@ -2573,7 +2644,7 @@ Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef Operands, const TargetLibraryInfo *TLI) { - if (Call->isNoBuiltin() || Call->isStrictFP()) + if (Call->isNoBuiltin()) return nullptr; if (!F->hasName()) return nullptr; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5368,8 +5368,11 @@ ConstantArgs.reserve(NumArgs); for (auto &Arg : Call->args()) { Constant *C = dyn_cast(&Arg); - if (!C) + if (!C) { + if (isa(Arg.get())) + continue; return nullptr; + } ConstantArgs.push_back(C); } diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp --- a/llvm/lib/IR/FPEnv.cpp +++ b/llvm/lib/IR/FPEnv.cpp @@ -75,4 +75,20 @@ return ExceptStr; } +Optional +getAPFloatRoundingMode(fp::RoundingMode RM) { + switch (RM) { + case fp::rmDynamic: + return None; + case fp::rmToNearest: + return APFloat::rmNearestTiesToEven; + case fp::rmDownward: + return APFloat::rmTowardNegative; + case fp::rmUpward: + return APFloat::rmTowardPositive; + case fp::rmTowardZero: + return APFloat::rmTowardZero; + } + llvm_unreachable("Unexpected rounding mode"); +} } diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -6373,19 +6373,17 @@ ; PC64LE-LABEL: constrained_vector_ceil_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI103_0@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI103_0@toc@l(3) -; PC64LE-NEXT: xsrdpip 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: addi 3, 3, .LCPI103_0@toc@l +; PC64LE-NEXT: lfiwzx 0, 0, 3 +; PC64LE-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI103_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI103_0@toc@l(3) -; PC64LE9-NEXT: xsrdpip 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI103_0@toc@l +; PC64LE9-NEXT: lfiwzx 0, 0, 3 +; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE9-NEXT: blr entry: %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( @@ -6400,16 +6398,14 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI104_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI104_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpip 34, 0 +; PC64LE-NEXT: xxswapd 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI104_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI104_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpip 34, 0 +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( @@ -6421,50 +6417,16 @@ define <3 x float> @constrained_vector_ceil_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_ceil_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI105_2@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI105_1@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI105_2@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI105_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE-NEXT: xsrdpip 0, 0 -; PC64LE-NEXT: lfs 2, .LCPI105_0@toc@l(3) -; PC64LE-NEXT: addis 3, 2, .LCPI105_3@toc@ha -; PC64LE-NEXT: xsrdpip 1, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI105_3@toc@l -; PC64LE-NEXT: xsrdpip 2, 2 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xscvdpspn 0, 2 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 3, 3, .LCPI105_0@toc@l +; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI105_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI105_1@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI105_1@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI105_2@toc@ha -; PC64LE9-NEXT: xsrdpip 0, 0 -; PC64LE9-NEXT: lfs 2, .LCPI105_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI105_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI105_3@toc@l -; PC64LE9-NEXT: xsrdpip 1, 1 -; PC64LE9-NEXT: xsrdpip 2, 2 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( @@ -6476,31 +6438,18 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_ceil_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI106_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI106_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI106_0@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI106_0@toc@l(3) -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xsrdpip 3, 1 -; PC64LE-NEXT: xvrdpip 2, 0 -; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: fmr 2, 1 +; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI106_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI106_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI106_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI106_1@toc@l -; PC64LE9-NEXT: xsrdpip 3, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpip 2, 0 -; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: lfs 1, .LCPI106_0@toc@l(3) +; PC64LE9-NEXT: fmr 2, 1 +; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: blr entry: %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64( @@ -6513,19 +6462,17 @@ ; PC64LE-LABEL: constrained_vector_floor_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI107_0@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI107_0@toc@l(3) -; PC64LE-NEXT: xsrdpim 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: addi 3, 3, .LCPI107_0@toc@l +; PC64LE-NEXT: lfiwzx 0, 0, 3 +; PC64LE-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI107_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI107_0@toc@l(3) -; PC64LE9-NEXT: xsrdpim 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI107_0@toc@l +; PC64LE9-NEXT: lfiwzx 0, 0, 3 +; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE9-NEXT: blr entry: %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( @@ -6541,16 +6488,14 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI108_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI108_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpim 34, 0 +; PC64LE-NEXT: xxswapd 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI108_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI108_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpim 34, 0 +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64( @@ -6562,50 +6507,16 @@ define <3 x float> @constrained_vector_floor_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_floor_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI109_2@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI109_1@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI109_2@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI109_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI109_0@toc@ha -; PC64LE-NEXT: xsrdpim 0, 0 -; PC64LE-NEXT: lfs 2, .LCPI109_0@toc@l(3) -; PC64LE-NEXT: addis 3, 2, .LCPI109_3@toc@ha -; PC64LE-NEXT: xsrdpim 1, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI109_3@toc@l -; PC64LE-NEXT: xsrdpim 2, 2 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xscvdpspn 0, 2 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 3, 3, .LCPI109_0@toc@l +; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI109_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI109_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI109_1@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI109_1@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI109_2@toc@ha -; PC64LE9-NEXT: xsrdpim 0, 0 -; PC64LE9-NEXT: lfs 2, .LCPI109_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI109_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI109_3@toc@l -; PC64LE9-NEXT: xsrdpim 1, 1 -; PC64LE9-NEXT: xsrdpim 2, 2 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: addi 3, 3, .LCPI109_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( @@ -6617,31 +6528,18 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_floor_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI110_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI110_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI110_0@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI110_0@toc@l(3) -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xsrdpim 3, 1 -; PC64LE-NEXT: xvrdpim 2, 0 -; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: fmr 2, 1 +; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI110_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI110_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI110_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI110_1@toc@l -; PC64LE9-NEXT: xsrdpim 3, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpim 2, 0 -; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: lfs 1, .LCPI110_0@toc@l(3) +; PC64LE9-NEXT: fmr 2, 1 +; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: blr entry: %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64( @@ -6654,19 +6552,17 @@ ; PC64LE-LABEL: constrained_vector_round_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI111_0@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI111_0@toc@l(3) -; PC64LE-NEXT: xsrdpi 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: addi 3, 3, .LCPI111_0@toc@l +; PC64LE-NEXT: lfiwzx 0, 0, 3 +; PC64LE-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI111_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI111_0@toc@l(3) -; PC64LE9-NEXT: xsrdpi 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI111_0@toc@l +; PC64LE9-NEXT: lfiwzx 0, 0, 3 +; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE9-NEXT: blr entry: %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( @@ -6681,16 +6577,14 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI112_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI112_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpi 34, 0 +; PC64LE-NEXT: xxswapd 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI112_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI112_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpi 34, 0 +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %round = call <2 x double> @llvm.experimental.constrained.round.v2f64( @@ -6702,50 +6596,16 @@ define <3 x float> @constrained_vector_round_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_round_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI113_2@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI113_1@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI113_2@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI113_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE-NEXT: xsrdpi 0, 0 -; PC64LE-NEXT: lfs 2, .LCPI113_0@toc@l(3) -; PC64LE-NEXT: addis 3, 2, .LCPI113_3@toc@ha -; PC64LE-NEXT: xsrdpi 1, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI113_3@toc@l -; PC64LE-NEXT: xsrdpi 2, 2 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xscvdpspn 0, 2 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 3, 3, .LCPI113_0@toc@l +; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI113_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI113_1@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI113_1@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI113_2@toc@ha -; PC64LE9-NEXT: xsrdpi 0, 0 -; PC64LE9-NEXT: lfs 2, .LCPI113_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI113_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI113_3@toc@l -; PC64LE9-NEXT: xsrdpi 1, 1 -; PC64LE9-NEXT: xsrdpi 2, 2 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( @@ -6758,31 +6618,20 @@ define <3 x double> @constrained_vector_round_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_round_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI114_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI114_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: addis 4, 2, .LCPI114_1@toc@ha ; PC64LE-NEXT: addis 3, 2, .LCPI114_0@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI114_1@toc@l(4) ; PC64LE-NEXT: lfs 1, .LCPI114_0@toc@l(3) -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xsrdpi 3, 1 -; PC64LE-NEXT: xvrdpi 2, 0 -; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: fmr 3, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI114_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI114_0@toc@l(3) +; PC64LE9-NEXT: lfs 1, .LCPI114_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI114_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI114_1@toc@l -; PC64LE9-NEXT: xsrdpi 3, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpi 2, 0 -; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: lfs 2, .LCPI114_1@toc@l(3) +; PC64LE9-NEXT: fmr 3, 2 ; PC64LE9-NEXT: blr entry: %round = call <3 x double> @llvm.experimental.constrained.round.v3f64( @@ -6795,19 +6644,17 @@ ; PC64LE-LABEL: constrained_vector_trunc_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI115_0@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI115_0@toc@l(3) -; PC64LE-NEXT: xsrdpiz 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: addi 3, 3, .LCPI115_0@toc@l +; PC64LE-NEXT: lfiwzx 0, 0, 3 +; PC64LE-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI115_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI115_0@toc@l(3) -; PC64LE9-NEXT: xsrdpiz 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI115_0@toc@l +; PC64LE9-NEXT: lfiwzx 0, 0, 3 +; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2 ; PC64LE9-NEXT: blr entry: %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32( @@ -6822,16 +6669,14 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI116_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI116_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpiz 34, 0 +; PC64LE-NEXT: xxswapd 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI116_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI116_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpiz 34, 0 +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( @@ -6843,50 +6688,16 @@ define <3 x float> @constrained_vector_trunc_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_trunc_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI117_2@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI117_1@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI117_2@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI117_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI117_0@toc@ha -; PC64LE-NEXT: xsrdpiz 0, 0 -; PC64LE-NEXT: lfs 2, .LCPI117_0@toc@l(3) -; PC64LE-NEXT: addis 3, 2, .LCPI117_3@toc@ha -; PC64LE-NEXT: xsrdpiz 1, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI117_3@toc@l -; PC64LE-NEXT: xsrdpiz 2, 2 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xscvdpspn 0, 2 -; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 -; PC64LE-NEXT: lvx 3, 0, 3 -; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 3, 3, .LCPI117_0@toc@l +; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI117_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI117_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI117_1@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI117_1@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI117_2@toc@ha -; PC64LE9-NEXT: xsrdpiz 0, 0 -; PC64LE9-NEXT: lfs 2, .LCPI117_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI117_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI117_3@toc@l -; PC64LE9-NEXT: xsrdpiz 1, 1 -; PC64LE9-NEXT: xsrdpiz 2, 2 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 -; PC64LE9-NEXT: vmrglw 2, 3, 2 -; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: addi 3, 3, .LCPI117_0@toc@l +; PC64LE9-NEXT: lxvx 34, 0, 3 ; PC64LE9-NEXT: blr entry: %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( @@ -6898,31 +6709,18 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_trunc_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI118_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI118_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI118_0@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI118_0@toc@l(3) -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xsrdpiz 3, 1 -; PC64LE-NEXT: xvrdpiz 2, 0 -; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: fmr 2, 1 +; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI118_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI118_0@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI118_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI118_1@toc@l -; PC64LE9-NEXT: xsrdpiz 3, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpiz 2, 0 -; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: lfs 1, .LCPI118_0@toc@l(3) +; PC64LE9-NEXT: fmr 2, 1 +; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: blr entry: %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64( diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -0,0 +1,244 @@ +; RUN: opt < %s -instsimplify -S | FileCheck %s + + +; Verify that floor(10.1) is folded to 10.0 when the exception behavior is 'ignore'. +define double @floor_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.floor.f64( + double 1.010000e+01, + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @floor_01 + ; CHECK: ret double 1.000000e+01 +} + +; Verify that floor(-10.1) is folded to -11.0 when the exception behavior is not 'ignore'. +define double @floor_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.floor.f64( + double -1.010000e+01, + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @floor_02 + ; CHECK: ret double -1.100000e+01 +} + +; Verify that ceil(10.1) is folded to 11.0 when the exception behavior is 'ignore'. +define double @ceil_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.ceil.f64( + double 1.010000e+01, + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @ceil_01 + ; CHECK: ret double 1.100000e+01 +} + +; Verify that ceil(-10.1) is folded to -10.0 when the exception behavior is not 'ignore'. +define double @ceil_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.ceil.f64( + double -1.010000e+01, + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @ceil_02 + ; CHECK: ret double -1.000000e+01 +} + +; Verify that trunc(10.1) is folded to 10.0 when the exception behavior is 'ignore'. +define double @trunc_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.trunc.f64( + double 1.010000e+01, + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @trunc_01 + ; CHECK: ret double 1.000000e+01 +} + +; Verify that trunc(-10.1) is folded to -10.0 when the exception behavior is NOT 'ignore'. +define double @trunc_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.trunc.f64( + double -1.010000e+01, + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @trunc_02 + ; CHECK: ret double -1.000000e+01 +} + +; Verify that round(10.5) is folded to 11.0 when the exception behavior is 'ignore'. +define double @round_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.round.f64( + double 1.050000e+01, + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @round_01 + ; CHECK: ret double 1.100000e+01 +} + +; Verify that floor(-10.5) is folded to -11.0 when the exception behavior is NOT 'ignore'. +define double @round_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.round.f64( + double -1.050000e+01, + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @round_02 + ; CHECK: ret double -1.100000e+01 +} + +; Verify that nearbyint(10.5) is folded to 11.0 when the rounding mode is 'upward'. +define double @nearbyint_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.nearbyint.f64( + double 1.050000e+01, + metadata !"round.upward", + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @nearbyint_01 + ; CHECK: ret double 1.100000e+01 +} + +; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'downward'. +define double @nearbyint_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.nearbyint.f64( + double 1.050000e+01, + metadata !"round.downward", + metadata !"fpexcept.maytrap") #0 + ret double %result + ; CHECK-LABEL: @nearbyint_02 + ; CHECK: ret double 1.000000e+01 +} + +; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'towardzero'. +define double @nearbyint_03() #0 { +entry: + %result = call double @llvm.experimental.constrained.nearbyint.f64( + double 1.050000e+01, + metadata !"round.towardzero", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @nearbyint_03 + ; CHECK: ret double 1.000000e+01 +} + +; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'tonearest'. +define double @nearbyint_04() #0 { +entry: + %result = call double @llvm.experimental.constrained.nearbyint.f64( + double 1.050000e+01, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @nearbyint_04 + ; CHECK: ret double 1.000000e+01 +} + +; Verify that nearbyint(10.5) is NOT folded if the rounding mode is 'dynamic'. +define double @nearbyint_05() #0 { +entry: + %result = call double @llvm.experimental.constrained.nearbyint.f64( + double 1.050000e+01, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @nearbyint_05 + ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.nearbyint + ; CHECK: ret double [[VAL]] +} + +; Verify that trunc(SNAN) is NOT folded if the exception behavior mode is not 'ignore'. +define double @nonfinite_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.trunc.f64( + double 0x7ff4000000000000, + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @nonfinite_01 + ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.trunc + ; CHECK: ret double [[VAL]] +} + +; Verify that trunc(SNAN) is folded to QNAN if the exception behavior mode is 'ignore'. +define double @nonfinite_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.trunc.f64( + double 0x7ff4000000000000, + metadata !"fpexcept.ignore") #0 + ret double %result + ; CHECK-LABEL: @nonfinite_02 + ; CHECK: ret double 0x7FF8000000000000 +} + +; Verify that trunc(QNAN) is folded even if the exception behavior mode is not 'ignore'. +define double @nonfinite_03() #0 { +entry: + %result = call double @llvm.experimental.constrained.trunc.f64( + double 0x7ff8000000000000, + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @nonfinite_03 + ; CHECK: ret double 0x7FF8000000000000 +} + +; Verify that trunc(+Inf) is folded even if the exception behavior mode is not 'ignore'. +define double @nonfinite_04() #0 { +entry: + %result = call double @llvm.experimental.constrained.trunc.f64( + double 0x7ff0000000000000, + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @nonfinite_04 + ; CHECK: ret double 0x7FF0000000000000 +} + +; Verify that rint(10) is folded to 10.0 when the rounding mode is 'tonearest'. +define double @rint_01() #0 { +entry: + %result = call double @llvm.experimental.constrained.rint.f64( + double 1.000000e+01, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @rint_01 + ; CHECK: ret double 1.000000e+01 +} + +; Verify that rint(10.1) is NOT folded to 10.0 when the exception behavior is 'strict'. +define double @rint_02() #0 { +entry: + %result = call double @llvm.experimental.constrained.rint.f64( + double 1.010000e+01, + metadata !"round.tonearest", + metadata !"fpexcept.strict") #0 + ret double %result + ; CHECK-LABEL: @rint_02 + ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.rint + ; CHECK: ret double [[VAL]] +} + +; Verify that rint(10.1) is folded to 10.0 when the exception behavior is not 'strict'. +define double @rint_03() #0 { +entry: + %result = call double @llvm.experimental.constrained.rint.f64( + double 1.010000e+01, + metadata !"round.tonearest", + metadata !"fpexcept.maytrap") #0 + ret double %result + ; CHECK-LABEL: @rint_03 + ; CHECK: ret double 1.000000e+01 +} + + +attributes #0 = { strictfp } + +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata) +declare double @llvm.experimental.constrained.round.f64(double, metadata) +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +