Index: flang/include/flang/Optimizer/Support/InitFIR.h =================================================================== --- flang/include/flang/Optimizer/Support/InitFIR.h +++ flang/include/flang/Optimizer/Support/InitFIR.h @@ -28,7 +28,8 @@ mlir::AffineDialect, FIROpsDialect, mlir::acc::OpenACCDialect, \ mlir::omp::OpenMPDialect, mlir::scf::SCFDialect, \ mlir::arith::ArithmeticDialect, mlir::cf::ControlFlowDialect, \ - mlir::func::FuncDialect, mlir::vector::VectorDialect + mlir::func::FuncDialect, mlir::vector::VectorDialect, \ + mlir::math::MathDialect // The definitive list of dialects used by flang. #define FLANG_DIALECT_LIST \ Index: flang/lib/Lower/IntrinsicCall.cpp =================================================================== --- flang/lib/Lower/IntrinsicCall.cpp +++ flang/lib/Lower/IntrinsicCall.cpp @@ -35,6 +35,7 @@ #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Support/FatalError.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Math/IR/Math.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -960,6 +961,40 @@ // Math runtime description and matching utility //===----------------------------------------------------------------------===// +/// Command line option to control how math operations are lowered +/// into MLIR. +/// Going forward, most of the math operations have to be lowered +/// to some MLIR dialect operations, which are converted to +/// library calls at the end of the FIR pipeline. Basically, +/// the mathRuntimeVersion generation will happen for these +/// math operations late during FIR conversion. +/// +/// Exposing MLIR operations early can potentially enable more +/// MLIR optimizations. At the same time, there are some issues +/// with doing this, e.g. 'math' dialect operations do not model +/// strict FP behavior right now, so the optimizations may change +/// the program behavior comparing to when we represent intrinsic +/// mathematical operations with generic calls. In order to preserve +/// strict FP behavior with late math lowering we have to extend +/// the dialects used by the late lowering such that they model strict +/// FP behavior properly. +enum MathLoweringMode { + // Lower math operations according to mathRuntimeVersion selection + // defined below. + earlyLowering, + + // Lower math operations into operations of MLIR dialects, + // such as mlir::math, mlir::complex, etc. + lateLowering, +}; + +llvm::cl::opt mathLowering( + "math-lowering", llvm::cl::desc("Select math operations lowering mode:"), + llvm::cl::values( + clEnumValN(earlyLowering, "early", "lower to library calls early"), + clEnumValN(lateLowering, "late", "lower to MLIR dialect operations")), + llvm::cl::init(earlyLowering)); + /// Command line option to modify math runtime version used to implement /// intrinsics. enum MathRuntimeVersion { @@ -984,6 +1019,8 @@ // Needed for implicit compare with keys. constexpr operator Key() const { return key; } Key key; // intrinsic name + + // Name of a runtime function that implements the operation. llvm::StringRef symbol; fir::runtime::FuncTypeBuilderFunc typeGenerator; }; @@ -1050,9 +1087,160 @@ return mlir::FunctionType::get(context, {t}, {r}); } -// TODO : Fill-up this table with more intrinsic. +template +static mlir::FunctionType genF64F64IntFuncType(mlir::MLIRContext *context) { + auto ftype = mlir::FloatType::getF64(context); + auto itype = mlir::IntegerType::get(context, Bits); + return mlir::FunctionType::get(context, {ftype, itype}, {ftype}); +} + +template +static mlir::FunctionType genF32F32IntFuncType(mlir::MLIRContext *context) { + auto ftype = mlir::FloatType::getF32(context); + auto itype = mlir::IntegerType::get(context, Bits); + return mlir::FunctionType::get(context, {ftype, itype}, {ftype}); +} + +/// Callback type for generating lowering for a math operation. +using MathGeneratorTy = mlir::Value (*)(fir::FirOpBuilder &, mlir::Location, + llvm::StringRef name, + mlir::FunctionType funcType, + llvm::ArrayRef); + +struct MathOperation { + // llvm::StringRef comparison operator are not constexpr, so use string_view. + using Key = std::string_view; + // Needed for implicit compare with keys. + constexpr operator Key() const { return key; } + Key key; // intrinsic name + + // Name of a runtime function that implements the operation. + llvm::StringRef symbol; + fir::runtime::FuncTypeBuilderFunc typeGenerator; + + // If funcGenerator is non null, then it is generating + // the lowering code, otherwise - the lowering is done + // as a call to a runtime function named as specified + // in 'symbol' member. + MathGeneratorTy funcGenerator; +}; + +static mlir::Value genLibCall(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef name, mlir::FunctionType funcType, + llvm::ArrayRef args) { + LLVM_DEBUG(llvm::dbgs() << "Generating '" << name << "' call with type "; + funcType.dump(); llvm::dbgs() << "\n"); + mlir::func::FuncOp funcOp = builder.addNamedFunction(loc, name, funcType); + // TODO: ensure 'strictfp' setting on the call for "precise/strict" + // FP mode. Set appropriate Fast-Math Flags otherwise. + // TODO: we should also mark as many libm function as possible + // with 'pure' attribute (of course, not in strict FP mode). + auto libCall = builder.create(loc, funcOp, args); + LLVM_DEBUG(libCall.dump(); llvm::dbgs() << "\n"); + return libCall.getResult(0); +} + +template +static mlir::Value genMathOp(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef name, mlir::FunctionType funcType, + llvm::ArrayRef args) { + // TODO: we have to annotate the math operations with flags + // that will allow to define FP accuracy/exception + // behavior per operation, so that after early multi-module + // MLIR inlining we can distiguish operation that were + // compiled with different settings. + // Suggestion: + // * For "relaxed" FP mode set all Fast-Math Flags + // (see "[RFC] FastMath flags support in MLIR (arith dialect)" + // topic at discourse.llvm.org). + // * For "fast" FP mode set all Fast-Math Flags except 'afn'. + // * For "precise/strict" FP mode generate fir.calls to libm + // entries and annotate them with an attribute that will + // end up transformed into 'strictfp' LLVM attribute (TBD). + // Elsewhere, "precise/strict" FP mode should also set + // 'strictfp' for all user functions and calls so that + // LLVM backend does the right job. + // * Operations that cannot be reasonably optimized in MLIR + // can be also lowered to libm calls for "fast" and "relaxed" + // modes. + mlir::Value result; + if (mathRuntimeVersion == preciseVersion) { + result = genLibCall(builder, loc, name, funcType, args); + } else { + LLVM_DEBUG(llvm::dbgs() + << "Generating '" << name << "' operation with type "; + funcType.dump(); llvm::dbgs() << "\n"); + result = builder.create(loc, args); + } + LLVM_DEBUG(result.dump(); llvm::dbgs() << "\n"); + return result; +} + +/// Map mathematical intrinsic operations into MLIR operations +/// of some appropriate dialect (math, complex, etc.) or libm +/// calls. +/// TODO: support more operations here. +static constexpr MathOperation mathOperations[] = { + {"abs", "fabsf", genF32F32FuncType, genMathOp}, + {"abs", "fabs", genF64F64FuncType, genMathOp}, + // llvm.trunc behaves the same way as libm's trunc. + {"aint", "llvm.trunc.f32", genF32F32FuncType, genLibCall}, + {"aint", "llvm.trunc.f64", genF64F64FuncType, genLibCall}, + // llvm.round behaves the same way as libm's round. + {"anint", "llvm.round.f32", genF32F32FuncType, + genMathOp}, + {"anint", "llvm.round.f64", genF64F64FuncType, + genMathOp}, + {"atan", "atanf", genF32F32FuncType, genMathOp}, + {"atan", "atan", genF64F64FuncType, genMathOp}, + {"atan2", "atan2f", genF32F32F32FuncType, genMathOp}, + {"atan2", "atan2", genF64F64F64FuncType, genMathOp}, + // math::CeilOp returns a real, while Fortran CEILING returns integer. + {"ceil", "ceilf", genF32F32FuncType, genMathOp}, + {"ceil", "ceil", genF64F64FuncType, genMathOp}, + {"cos", "cosf", genF32F32FuncType, genMathOp}, + {"cos", "cos", genF64F64FuncType, genMathOp}, + {"erf", "erff", genF32F32FuncType, genMathOp}, + {"erf", "erf", genF64F64FuncType, genMathOp}, + {"exp", "expf", genF32F32FuncType, genMathOp}, + {"exp", "exp", genF64F64FuncType, genMathOp}, + // math::FloorOp returns a real, while Fortran FLOOR returns integer. + {"floor", "floorf", genF32F32FuncType, genMathOp}, + {"floor", "floor", genF64F64FuncType, genMathOp}, + {"hypot", "hypotf", genF32F32F32FuncType, genLibCall}, + {"hypot", "hypot", genF64F64F64FuncType, genLibCall}, + {"log", "logf", genF32F32FuncType, genMathOp}, + {"log", "log", genF64F64FuncType, genMathOp}, + {"log10", "log10f", genF32F32FuncType, genMathOp}, + {"log10", "log10", genF64F64FuncType, genMathOp}, + // llvm.lround behaves the same way as libm's lround. + {"nint", "llvm.lround.i64.f64", genIntF64FuncType<64>, genLibCall}, + {"nint", "llvm.lround.i64.f32", genIntF32FuncType<64>, genLibCall}, + {"nint", "llvm.lround.i32.f64", genIntF64FuncType<32>, genLibCall}, + {"nint", "llvm.lround.i32.f32", genIntF32FuncType<32>, genLibCall}, + {"pow", "powf", genF32F32F32FuncType, genMathOp}, + {"pow", "pow", genF64F64F64FuncType, genMathOp}, + // TODO: add PowIOp in math and complex dialects. + {"pow", "llvm.powi.f32.i32", genF32F32IntFuncType<32>, genLibCall}, + {"pow", "llvm.powi.f64.i32", genF64F64IntFuncType<32>, genLibCall}, + {"sign", "copysignf", genF32F32F32FuncType, + genMathOp}, + {"sign", "copysign", genF64F64F64FuncType, + genMathOp}, + {"sin", "sinf", genF32F32FuncType, genMathOp}, + {"sin", "sin", genF64F64FuncType, genMathOp}, + {"sqrt", "sqrtf", genF32F32FuncType, genMathOp}, + {"sqrt", "sqrt", genF64F64FuncType, genMathOp}, + {"tanh", "tanhf", genF32F32FuncType, genMathOp}, + {"tanh", "tanh", genF64F64FuncType, genMathOp}, +}; + // Note: These are also defined as operations in LLVM dialect. See if this // can be use and has advantages. +// TODO: remove this table, since the late math lowering should +// replace it and generate proper MLIR operations rather +// than llvm intrinsic calls, which still look like generic +// calls to MLIR and do not enable many optimizations. static constexpr RuntimeFunction llvmIntrinsics[] = { {"abs", "llvm.fabs.f32", genF32F32FuncType}, {"abs", "llvm.fabs.f64", genF64F64FuncType}, @@ -1251,7 +1439,7 @@ /// function type and that will not imply narrowing arguments or extending the /// result. /// If nothing is found, the mlir::func::FuncOp will contain a nullptr. -mlir::func::FuncOp searchFunctionInLibrary( +static mlir::func::FuncOp searchFunctionInLibrary( mlir::Location loc, fir::FirOpBuilder &builder, const Fortran::common::StaticMultimapView &lib, llvm::StringRef name, mlir::FunctionType funcType, @@ -1274,6 +1462,65 @@ return {}; } +using RtMap = Fortran::common::StaticMultimapView; +static constexpr RtMap mathOps(mathOperations); +static_assert(mathOps.Verify() && "map must be sorted"); + +static const MathOperation * +searchMathOperation(fir::FirOpBuilder &builder, llvm::StringRef name, + mlir::FunctionType funcType, + const MathOperation **bestNearMatch, + FunctionDistance &bestMatchDistance) { + auto range = mathOps.equal_range(name); + for (auto iter = range.first; iter != range.second && iter; ++iter) { + const auto &impl = *iter; + auto implType = impl.typeGenerator(builder.getContext()); + if (funcType == implType) + return &impl; // exact match + + FunctionDistance distance(funcType, implType); + if (distance.isSmallerThan(bestMatchDistance)) { + *bestNearMatch = &impl; + bestMatchDistance = std::move(distance); + } + } + return nullptr; +} + +/// Implementation of the operation defined by \p name with type +/// \p funcType is not precise, and the actual available implementation +/// is \p distance away from the requested. If using the available +/// implementation results in a precision loss, emit an error message +/// with the given code location \p loc. +static void diagPrecisionLoss(llvm::StringRef name, mlir::FunctionType funcType, + const FunctionDistance &distance, + mlir::Location loc) { + if (!distance.isLosingPrecision()) + return; + + // Using this runtime version requires narrowing the arguments + // or extending the result. It is not numerically safe. There + // is currently no quad math library that was described in + // lowering and could be used here. Emit an error and continue + // generating the code with the narrowing cast so that the user + // can get a complete list of the problematic intrinsic calls. + std::string message("TODO: no math runtime available for '"); + llvm::raw_string_ostream sstream(message); + if (name == "pow") { + assert(funcType.getNumInputs() == 2 && "power operator has two arguments"); + sstream << funcType.getInput(0) << " ** " << funcType.getInput(1); + } else { + sstream << name << "("; + if (funcType.getNumInputs() > 0) + sstream << funcType.getInput(0); + for (mlir::Type argType : funcType.getInputs().drop_front()) + sstream << ", " << argType; + sstream << ")"; + } + sstream << "'"; + mlir::emitError(loc, message); +} + /// Search runtime for the best runtime function given an intrinsic name /// and interface. The interface may not be a perfect match in which case /// the caller is responsible to insert argument and return value conversions. @@ -1292,6 +1539,7 @@ static_assert(pgmathR.Verify() && "map must be sorted"); static constexpr RtMap pgmathP(pgmathPrecise); static_assert(pgmathP.Verify() && "map must be sorted"); + if (mathRuntimeVersion == fastVersion) { match = searchFunctionInLibrary(loc, builder, pgmathF, name, funcType, &bestNearMatch, bestMatchDistance); @@ -1317,30 +1565,7 @@ return exactMatch; if (bestNearMatch != nullptr) { - if (bestMatchDistance.isLosingPrecision()) { - // Using this runtime version requires narrowing the arguments - // or extending the result. It is not numerically safe. There - // is currently no quad math library that was described in - // lowering and could be used here. Emit an error and continue - // generating the code with the narrowing cast so that the user - // can get a complete list of the problematic intrinsic calls. - std::string message("TODO: no math runtime available for '"); - llvm::raw_string_ostream sstream(message); - if (name == "pow") { - assert(funcType.getNumInputs() == 2 && - "power operator has two arguments"); - sstream << funcType.getInput(0) << " ** " << funcType.getInput(1); - } else { - sstream << name << "("; - if (funcType.getNumInputs() > 0) - sstream << funcType.getInput(0); - for (mlir::Type argType : funcType.getInputs().drop_front()) - sstream << ", " << argType; - sstream << ")"; - } - sstream << "'"; - mlir::emitError(loc, message); - } + diagPrecisionLoss(name, funcType, bestMatchDistance, loc); return getFuncOp(loc, builder, *bestNearMatch); } return {}; @@ -1574,7 +1799,7 @@ IntrinsicLibrary::RuntimeCallGenerator runtimeCallGenerator = getRuntimeCallGenerator(name, soughtFuncType); return genElementalCall(runtimeCallGenerator, name, *resultType, args, - /* outline */ true); + /*outline=*/outlineAllIntrinsics); } mlir::Value @@ -1726,29 +1951,58 @@ IntrinsicLibrary::RuntimeCallGenerator IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name, mlir::FunctionType soughtFuncType) { - mlir::func::FuncOp funcOp = - getRuntimeFunction(loc, builder, name, soughtFuncType); - if (!funcOp) { + mlir::func::FuncOp funcOp; + mlir::FunctionType actualFuncType; + const MathOperation *mathOp = nullptr; + if (mathLowering == lateLowering) { + // Look for a dedicated math operation generator, which + // normally produces a single MLIR operation implementing + // the math operation. + // If not found fall back to a runtime function lookup. + const MathOperation *bestNearMatch = nullptr; + FunctionDistance bestMatchDistance; + mathOp = searchMathOperation(builder, name, soughtFuncType, &bestNearMatch, + bestMatchDistance); + if (!mathOp && bestNearMatch) { + // Use the best near match, optionally issuing an error, + // if types conversions cause precision loss. + diagPrecisionLoss(name, soughtFuncType, bestMatchDistance, loc); + mathOp = bestNearMatch; + } + if (mathOp) + actualFuncType = mathOp->typeGenerator(builder.getContext()); + } + if (!mathOp) + if ((funcOp = getRuntimeFunction(loc, builder, name, soughtFuncType))) + actualFuncType = funcOp.getFunctionType(); + + if (!mathOp && !funcOp) { std::string nameAndType; llvm::raw_string_ostream sstream(nameAndType); sstream << name << "\nrequested type: " << soughtFuncType; crashOnMissingIntrinsic(loc, nameAndType); } - mlir::FunctionType actualFuncType = funcOp.getFunctionType(); assert(actualFuncType.getNumResults() == soughtFuncType.getNumResults() && actualFuncType.getNumInputs() == soughtFuncType.getNumInputs() && actualFuncType.getNumResults() == 1 && "Bad intrinsic match"); - return [funcOp, actualFuncType, + return [funcOp, actualFuncType, mathOp, soughtFuncType](fir::FirOpBuilder &builder, mlir::Location loc, llvm::ArrayRef args) { llvm::SmallVector convertedArguments; for (auto [fst, snd] : llvm::zip(actualFuncType.getInputs(), args)) convertedArguments.push_back(builder.createConvert(loc, fst, snd)); - auto call = builder.create(loc, funcOp, convertedArguments); + mlir::Value result; + // Use math operation generator, if available. + if (mathOp) + result = mathOp->funcGenerator(builder, loc, mathOp->symbol, + actualFuncType, convertedArguments); + else + result = builder.create(loc, funcOp, convertedArguments) + .getResult(0); mlir::Type soughtType = soughtFuncType.getResult(0); - return builder.createConvert(loc, soughtType, call.getResult(0)); + return builder.createConvert(loc, soughtType, result); }; } @@ -1914,7 +2168,7 @@ llvm::ArrayRef args) { assert(args.size() == 1); return fir::factory::Complex{builder, loc}.extractComplexPart( - args[0], true /* isImagPart */); + args[0], /*isImagPart=*/true); } // AINT @@ -3987,6 +4241,13 @@ mlir::Value Fortran::lower::genPow(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type type, mlir::Value x, mlir::Value y) { + // TODO: since there is no libm version of pow with integer exponent, + // we have to provide an alternative implementation for + // "precise/strict" FP mode and (mathLowering == lateLowering). + // One option is to generate internal function with inlined + // implementation and mark it 'strictfp'. + // Another option is to implement it in Fortran runtime library + // (just like matmul). return IntrinsicLibrary{builder, loc}.genRuntimeCall("pow", type, {x, y}); } Index: flang/lib/Optimizer/CodeGen/CMakeLists.txt =================================================================== --- flang/lib/Optimizer/CodeGen/CMakeLists.txt +++ flang/lib/Optimizer/CodeGen/CMakeLists.txt @@ -17,6 +17,8 @@ FIRBuilder FIRDialect FIRSupport + MLIRMathToLLVM + MLIRMathToLibm MLIROpenMPToLLVM MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport Index: flang/lib/Optimizer/CodeGen/CodeGen.cpp =================================================================== --- flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -23,6 +23,8 @@ #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" +#include "mlir/Conversion/MathToLLVM/MathToLLVM.h" +#include "mlir/Conversion/MathToLibm/MathToLibm.h" #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" @@ -3380,6 +3382,10 @@ pattern); mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter, pattern); + // Convert math-like dialect operations, which can be produced + // when late math lowering mode is used, into llvm dialect. + mlir::populateMathToLLVMConversionPatterns(typeConverter, pattern); + mlir::populateMathToLibmConversionPatterns(pattern, /*benefit=*/0); mlir::ConversionTarget target{*context}; target.addLegalDialect(); // The OpenMP dialect is legal for Operations without regions, for those Index: flang/test/Intrinsics/late-math-codegen.f90 =================================================================== --- /dev/null +++ flang/test/Intrinsics/late-math-codegen.f90 @@ -0,0 +1,181 @@ +! TODO: verify that Fast-Math Flags and 'strictfp' are properly set. +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=fast | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,FAST %s +! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=fast %s -o - | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,FAST %s +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=relaxed | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,RELAXED %s +! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=relaxed %s -o - | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,RELAXED %s +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=precise | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,PRECISE %s +! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=precise %s -o - | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,PRECISE %s + +! ALL-LABEL: @_QPtest_real4 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @fabsf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.trunc.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @ceilf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @cosf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @expf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @floorf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @logf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log10f({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i32.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i64.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i64 + +! ALL: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @powf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @copysignf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @sinf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +function test_real4(x, y, c, s, i) + real :: x, y, test_real4 + complex(4) :: c + integer(2) :: s + integer(4) :: i + test_real4 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function + +! ALL-LABEL: @_QPtest_real8 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.trunc.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i32.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i64.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i64 + +! ALL: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +function test_real8(x, y, c, s, i) + real(8) :: x, y, test_real8 + complex(8) :: c + integer(2) :: s + integer(4) :: i + test_real8 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function Index: flang/test/Lower/Intrinsics/exp.f90 =================================================================== --- flang/test/Lower/Intrinsics/exp.f90 +++ flang/test/Lower/Intrinsics/exp.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: exp_testr ! CHECK-SAME: (%[[AREF:.*]]: !fir.ref {{.*}}, %[[BREF:.*]]: !fir.ref {{.*}}) Index: flang/test/Lower/Intrinsics/log.f90 =================================================================== --- flang/test/Lower/Intrinsics/log.f90 +++ flang/test/Lower/Intrinsics/log.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: log_testr ! CHECK-SAME: (%[[AREF:.*]]: !fir.ref {{.*}}, %[[BREF:.*]]: !fir.ref {{.*}}) Index: flang/test/Lower/Intrinsics/math-runtime-options.f90 =================================================================== --- flang/test/Lower/Intrinsics/math-runtime-options.f90 +++ flang/test/Lower/Intrinsics/math-runtime-options.f90 @@ -1,7 +1,11 @@ -! RUN: bbc -emit-fir --math-runtime=fast %s -o - | FileCheck %s --check-prefixes="FIR,FAST" -! RUN: bbc -emit-fir --math-runtime=relaxed %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED" -! RUN: bbc -emit-fir --math-runtime=precise %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE" -! RUN: bbc -emit-fir --math-runtime=llvm %s -o - | FileCheck %s --check-prefixes="FIR,LLVM" +! RUN: bbc -emit-fir --math-runtime=fast -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,FAST" +! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=fast -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,FAST" +! RUN: bbc -emit-fir --math-runtime=relaxed -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED" +! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=relaxed -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED" +! RUN: bbc -emit-fir --math-runtime=precise -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE" +! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=precise -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE" +! RUN: bbc -emit-fir --math-runtime=llvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,LLVM" +! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=llvm -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,LLVM" ! CHECK-LABEL: cos_testr subroutine cos_testr(a, b) Index: flang/test/Lower/late-math-lowering.f90 =================================================================== --- /dev/null +++ flang/test/Lower/late-math-lowering.f90 @@ -0,0 +1,137 @@ +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=fast | FileCheck --check-prefixes=ALL,FAST %s +! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=fast %s -o - | FileCheck --check-prefixes=ALL,FAST %s +! 'relaxed' matches 'fast' exactly right now, but this will change: +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=relaxed | FileCheck --check-prefixes=ALL,RELAXED %s +! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=relaxed %s -o - | FileCheck --check-prefixes=ALL,RELAXED %s +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=precise | FileCheck --check-prefixes=ALL,PRECISE %s +! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=precise %s -o - | FileCheck --check-prefixes=ALL,PRECISE %s + +! ALL-LABEL: @_QPtest_real4 +! FAST: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @fabsf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.trunc.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @ceilf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @cosf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @expf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @floorf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @logf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log10f({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i32.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i64.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i64 +! ALL: [[STOI:%[A-Za-z0-9._]+]] = fir.convert {{%[A-Za-z0-9._]+}} : (i16) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @powf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @copysignf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @sinf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +function test_real4(x, y, c, s, i) + real :: x, y, test_real4 + complex(4) :: c + integer(2) :: s + integer(4) :: i + test_real4 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function + +! ALL-LABEL: @_QPtest_real8 +! FAST: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.trunc.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i32.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i64.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i64 +! ALL: [[STOI:%[A-Za-z0-9._]+]] = fir.convert {{%[A-Za-z0-9._]+}} : (i16) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +function test_real8(x, y, c, s, i) + real(8) :: x, y, test_real8 + complex(8) :: c + integer(2) :: s + integer(4) :: i + test_real8 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function Index: flang/test/Lower/llvm-math.f90 =================================================================== --- flang/test/Lower/llvm-math.f90 +++ flang/test/Lower/llvm-math.f90 @@ -1,4 +1,5 @@ -! RUN: bbc -emit-fir %s -o - --math-runtime=llvm | FileCheck %s +! RUN: bbc -emit-fir %s -o - --math-runtime=llvm --outline-intrinsics | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=llvm -mllvm -outline-intrinsics %s -o - | FileCheck %s SUBROUTINE POW_WRAPPER(IN, IN2, OUT) DOUBLE PRECISION IN, IN2 Index: flang/test/Lower/sqrt.f90 =================================================================== --- flang/test/Lower/sqrt.f90 +++ flang/test/Lower/sqrt.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: sqrt_testr subroutine sqrt_testr(a, b) Index: flang/test/Lower/trigonometric-intrinsics.f90 =================================================================== --- flang/test/Lower/trigonometric-intrinsics.f90 +++ flang/test/Lower/trigonometric-intrinsics.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: atan_testr subroutine atan_testr(a, b)