diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -28,7 +28,8 @@ mlir::AffineDialect, FIROpsDialect, mlir::acc::OpenACCDialect, \ mlir::omp::OpenMPDialect, mlir::scf::SCFDialect, \ mlir::arith::ArithmeticDialect, mlir::cf::ControlFlowDialect, \ - mlir::func::FuncDialect, mlir::vector::VectorDialect + mlir::func::FuncDialect, mlir::vector::VectorDialect, \ + mlir::math::MathDialect // The definitive list of dialects used by flang. #define FLANG_DIALECT_LIST \ diff --git a/flang/lib/Lower/IntrinsicCall.cpp b/flang/lib/Lower/IntrinsicCall.cpp --- a/flang/lib/Lower/IntrinsicCall.cpp +++ b/flang/lib/Lower/IntrinsicCall.cpp @@ -35,6 +35,7 @@ #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Support/FatalError.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Math/IR/Math.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -951,6 +952,40 @@ // Math runtime description and matching utility //===----------------------------------------------------------------------===// +/// Command line option to control how math operations are lowered +/// into MLIR. +/// Going forward, most of the math operations have to be lowered +/// to some MLIR dialect operations, which are converted to +/// library calls at the end of the FIR pipeline. Basically, +/// the mathRuntimeVersion generation will happen for these +/// math operations late during FIR conversion. +/// +/// Exposing MLIR operations early can potentially enable more +/// MLIR optimizations. At the same time, there are some issues +/// with doing this, e.g. 'math' dialect operations do not model +/// strict FP behavior right now, so the optimizations may change +/// the program behavior comparing to when we represent intrinsic +/// mathematical operations with generic calls. In order to preserve +/// strict FP behavior with late math lowering we have to extend +/// the dialects used by the late lowering such that they model strict +/// FP behavior properly. +enum MathLoweringMode { + // Lower math operations according to mathRuntimeVersion selection + // defined below. + earlyLowering, + + // Lower math operations into operations of MLIR dialects, + // such as mlir::math, mlir::complex, etc. + lateLowering, +}; + +llvm::cl::opt mathLowering( + "math-lowering", llvm::cl::desc("Select math operations lowering mode:"), + llvm::cl::values( + clEnumValN(earlyLowering, "early", "lower to library calls early"), + clEnumValN(lateLowering, "late", "lower to MLIR dialect operations")), + llvm::cl::init(earlyLowering)); + /// Command line option to modify math runtime version used to implement /// intrinsics. enum MathRuntimeVersion { @@ -975,6 +1010,8 @@ // Needed for implicit compare with keys. constexpr operator Key() const { return key; } Key key; // intrinsic name + + // Name of a runtime function that implements the operation. llvm::StringRef symbol; fir::runtime::FuncTypeBuilderFunc typeGenerator; }; @@ -1041,9 +1078,160 @@ return mlir::FunctionType::get(context, {t}, {r}); } -// TODO : Fill-up this table with more intrinsic. +template +static mlir::FunctionType genF64F64IntFuncType(mlir::MLIRContext *context) { + auto ftype = mlir::FloatType::getF64(context); + auto itype = mlir::IntegerType::get(context, Bits); + return mlir::FunctionType::get(context, {ftype, itype}, {ftype}); +} + +template +static mlir::FunctionType genF32F32IntFuncType(mlir::MLIRContext *context) { + auto ftype = mlir::FloatType::getF32(context); + auto itype = mlir::IntegerType::get(context, Bits); + return mlir::FunctionType::get(context, {ftype, itype}, {ftype}); +} + +/// Callback type for generating lowering for a math operation. +using MathGeneratorTy = mlir::Value (*)(fir::FirOpBuilder &, mlir::Location, + llvm::StringRef name, + mlir::FunctionType funcType, + llvm::ArrayRef); + +struct MathOperation { + // llvm::StringRef comparison operator are not constexpr, so use string_view. + using Key = std::string_view; + // Needed for implicit compare with keys. + constexpr operator Key() const { return key; } + Key key; // intrinsic name + + // Name of a runtime function that implements the operation. + llvm::StringRef symbol; + fir::runtime::FuncTypeBuilderFunc typeGenerator; + + // If funcGenerator is non null, then it is generating + // the lowering code, otherwise - the lowering is done + // as a call to a runtime function named as specified + // in 'symbol' member. + MathGeneratorTy funcGenerator; +}; + +static mlir::Value genLibCall(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef name, mlir::FunctionType funcType, + llvm::ArrayRef args) { + LLVM_DEBUG(llvm::dbgs() << "Generating '" << name << "' call with type "; + funcType.dump(); llvm::dbgs() << "\n"); + mlir::func::FuncOp funcOp = builder.addNamedFunction(loc, name, funcType); + // TODO: ensure 'strictfp' setting on the call for "precise/strict" + // FP mode. Set appropriate Fast-Math Flags otherwise. + // TODO: we should also mark as many libm function as possible + // with 'pure' attribute (of course, not in strict FP mode). + auto libCall = builder.create(loc, funcOp, args); + LLVM_DEBUG(libCall.dump(); llvm::dbgs() << "\n"); + return libCall.getResult(0); +} + +template +static mlir::Value genMathOp(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef name, mlir::FunctionType funcType, + llvm::ArrayRef args) { + // TODO: we have to annotate the math operations with flags + // that will allow to define FP accuracy/exception + // behavior per operation, so that after early multi-module + // MLIR inlining we can distiguish operation that were + // compiled with different settings. + // Suggestion: + // * For "relaxed" FP mode set all Fast-Math Flags + // (see "[RFC] FastMath flags support in MLIR (arith dialect)" + // topic at discourse.llvm.org). + // * For "fast" FP mode set all Fast-Math Flags except 'afn'. + // * For "precise/strict" FP mode generate fir.calls to libm + // entries and annotate them with an attribute that will + // end up transformed into 'strictfp' LLVM attribute (TBD). + // Elsewhere, "precise/strict" FP mode should also set + // 'strictfp' for all user functions and calls so that + // LLVM backend does the right job. + // * Operations that cannot be reasonably optimized in MLIR + // can be also lowered to libm calls for "fast" and "relaxed" + // modes. + mlir::Value result; + if (mathRuntimeVersion == preciseVersion) { + result = genLibCall(builder, loc, name, funcType, args); + } else { + LLVM_DEBUG(llvm::dbgs() + << "Generating '" << name << "' operation with type "; + funcType.dump(); llvm::dbgs() << "\n"); + result = builder.create(loc, args); + } + LLVM_DEBUG(result.dump(); llvm::dbgs() << "\n"); + return result; +} + +/// Map mathematical intrinsic operations into MLIR operations +/// of some appropriate dialect (math, complex, etc.) or libm +/// calls. +/// TODO: support more operations here. +static constexpr MathOperation mathOperations[] = { + {"abs", "fabsf", genF32F32FuncType, genMathOp}, + {"abs", "fabs", genF64F64FuncType, genMathOp}, + // llvm.trunc behaves the same way as libm's trunc. + {"aint", "llvm.trunc.f32", genF32F32FuncType, genLibCall}, + {"aint", "llvm.trunc.f64", genF64F64FuncType, genLibCall}, + // llvm.round behaves the same way as libm's round. + {"anint", "llvm.round.f32", genF32F32FuncType, + genMathOp}, + {"anint", "llvm.round.f64", genF64F64FuncType, + genMathOp}, + {"atan", "atanf", genF32F32FuncType, genMathOp}, + {"atan", "atan", genF64F64FuncType, genMathOp}, + {"atan2", "atan2f", genF32F32F32FuncType, genMathOp}, + {"atan2", "atan2", genF64F64F64FuncType, genMathOp}, + // math::CeilOp returns a real, while Fortran CEILING returns integer. + {"ceil", "ceilf", genF32F32FuncType, genMathOp}, + {"ceil", "ceil", genF64F64FuncType, genMathOp}, + {"cos", "cosf", genF32F32FuncType, genMathOp}, + {"cos", "cos", genF64F64FuncType, genMathOp}, + {"erf", "erff", genF32F32FuncType, genMathOp}, + {"erf", "erf", genF64F64FuncType, genMathOp}, + {"exp", "expf", genF32F32FuncType, genMathOp}, + {"exp", "exp", genF64F64FuncType, genMathOp}, + // math::FloorOp returns a real, while Fortran FLOOR returns integer. + {"floor", "floorf", genF32F32FuncType, genMathOp}, + {"floor", "floor", genF64F64FuncType, genMathOp}, + {"hypot", "hypotf", genF32F32F32FuncType, genLibCall}, + {"hypot", "hypot", genF64F64F64FuncType, genLibCall}, + {"log", "logf", genF32F32FuncType, genMathOp}, + {"log", "log", genF64F64FuncType, genMathOp}, + {"log10", "log10f", genF32F32FuncType, genMathOp}, + {"log10", "log10", genF64F64FuncType, genMathOp}, + // llvm.lround behaves the same way as libm's lround. + {"nint", "llvm.lround.i64.f64", genIntF64FuncType<64>, genLibCall}, + {"nint", "llvm.lround.i64.f32", genIntF32FuncType<64>, genLibCall}, + {"nint", "llvm.lround.i32.f64", genIntF64FuncType<32>, genLibCall}, + {"nint", "llvm.lround.i32.f32", genIntF32FuncType<32>, genLibCall}, + {"pow", "powf", genF32F32F32FuncType, genMathOp}, + {"pow", "pow", genF64F64F64FuncType, genMathOp}, + // TODO: add PowIOp in math and complex dialects. + {"pow", "llvm.powi.f32.i32", genF32F32IntFuncType<32>, genLibCall}, + {"pow", "llvm.powi.f64.i32", genF64F64IntFuncType<32>, genLibCall}, + {"sign", "copysignf", genF32F32F32FuncType, + genMathOp}, + {"sign", "copysign", genF64F64F64FuncType, + genMathOp}, + {"sin", "sinf", genF32F32FuncType, genMathOp}, + {"sin", "sin", genF64F64FuncType, genMathOp}, + {"sqrt", "sqrtf", genF32F32FuncType, genMathOp}, + {"sqrt", "sqrt", genF64F64FuncType, genMathOp}, + {"tanh", "tanhf", genF32F32FuncType, genMathOp}, + {"tanh", "tanh", genF64F64FuncType, genMathOp}, +}; + // Note: These are also defined as operations in LLVM dialect. See if this // can be use and has advantages. +// TODO: remove this table, since the late math lowering should +// replace it and generate proper MLIR operations rather +// than llvm intrinsic calls, which still look like generic +// calls to MLIR and do not enable many optimizations. static constexpr RuntimeFunction llvmIntrinsics[] = { {"abs", "llvm.fabs.f32", genF32F32FuncType}, {"abs", "llvm.fabs.f64", genF64F64FuncType}, @@ -1242,7 +1430,7 @@ /// function type and that will not imply narrowing arguments or extending the /// result. /// If nothing is found, the mlir::func::FuncOp will contain a nullptr. -mlir::func::FuncOp searchFunctionInLibrary( +static mlir::func::FuncOp searchFunctionInLibrary( mlir::Location loc, fir::FirOpBuilder &builder, const Fortran::common::StaticMultimapView &lib, llvm::StringRef name, mlir::FunctionType funcType, @@ -1265,6 +1453,65 @@ return {}; } +using RtMap = Fortran::common::StaticMultimapView; +static constexpr RtMap mathOps(mathOperations); +static_assert(mathOps.Verify() && "map must be sorted"); + +static const MathOperation * +searchMathOperation(fir::FirOpBuilder &builder, llvm::StringRef name, + mlir::FunctionType funcType, + const MathOperation **bestNearMatch, + FunctionDistance &bestMatchDistance) { + auto range = mathOps.equal_range(name); + for (auto iter = range.first; iter != range.second && iter; ++iter) { + const auto &impl = *iter; + auto implType = impl.typeGenerator(builder.getContext()); + if (funcType == implType) + return &impl; // exact match + + FunctionDistance distance(funcType, implType); + if (distance.isSmallerThan(bestMatchDistance)) { + *bestNearMatch = &impl; + bestMatchDistance = std::move(distance); + } + } + return nullptr; +} + +/// Implementation of the operation defined by \p name with type +/// \p funcType is not precise, and the actual available implementation +/// is \p distance away from the requested. If using the available +/// implementation results in a precision loss, emit an error message +/// with the given code location \p loc. +static void diagPrecisionLoss(llvm::StringRef name, mlir::FunctionType funcType, + const FunctionDistance &distance, + mlir::Location loc) { + if (!distance.isLosingPrecision()) + return; + + // Using this runtime version requires narrowing the arguments + // or extending the result. It is not numerically safe. There + // is currently no quad math library that was described in + // lowering and could be used here. Emit an error and continue + // generating the code with the narrowing cast so that the user + // can get a complete list of the problematic intrinsic calls. + std::string message("TODO: no math runtime available for '"); + llvm::raw_string_ostream sstream(message); + if (name == "pow") { + assert(funcType.getNumInputs() == 2 && "power operator has two arguments"); + sstream << funcType.getInput(0) << " ** " << funcType.getInput(1); + } else { + sstream << name << "("; + if (funcType.getNumInputs() > 0) + sstream << funcType.getInput(0); + for (mlir::Type argType : funcType.getInputs().drop_front()) + sstream << ", " << argType; + sstream << ")"; + } + sstream << "'"; + mlir::emitError(loc, message); +} + /// Search runtime for the best runtime function given an intrinsic name /// and interface. The interface may not be a perfect match in which case /// the caller is responsible to insert argument and return value conversions. @@ -1283,6 +1530,7 @@ static_assert(pgmathR.Verify() && "map must be sorted"); static constexpr RtMap pgmathP(pgmathPrecise); static_assert(pgmathP.Verify() && "map must be sorted"); + if (mathRuntimeVersion == fastVersion) { match = searchFunctionInLibrary(loc, builder, pgmathF, name, funcType, &bestNearMatch, bestMatchDistance); @@ -1308,30 +1556,7 @@ return exactMatch; if (bestNearMatch != nullptr) { - if (bestMatchDistance.isLosingPrecision()) { - // Using this runtime version requires narrowing the arguments - // or extending the result. It is not numerically safe. There - // is currently no quad math library that was described in - // lowering and could be used here. Emit an error and continue - // generating the code with the narrowing cast so that the user - // can get a complete list of the problematic intrinsic calls. - std::string message("TODO: no math runtime available for '"); - llvm::raw_string_ostream sstream(message); - if (name == "pow") { - assert(funcType.getNumInputs() == 2 && - "power operator has two arguments"); - sstream << funcType.getInput(0) << " ** " << funcType.getInput(1); - } else { - sstream << name << "("; - if (funcType.getNumInputs() > 0) - sstream << funcType.getInput(0); - for (mlir::Type argType : funcType.getInputs().drop_front()) - sstream << ", " << argType; - sstream << ")"; - } - sstream << "'"; - mlir::emitError(loc, message); - } + diagPrecisionLoss(name, funcType, bestMatchDistance, loc); return getFuncOp(loc, builder, *bestNearMatch); } return {}; @@ -1540,7 +1765,7 @@ IntrinsicLibrary::RuntimeCallGenerator runtimeCallGenerator = getRuntimeCallGenerator(name, soughtFuncType); return genElementalCall(runtimeCallGenerator, name, *resultType, args, - /* outline */ true); + /*outline=*/outlineAllIntrinsics); } mlir::Value @@ -1692,29 +1917,58 @@ IntrinsicLibrary::RuntimeCallGenerator IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name, mlir::FunctionType soughtFuncType) { - mlir::func::FuncOp funcOp = - getRuntimeFunction(loc, builder, name, soughtFuncType); - if (!funcOp) { + mlir::func::FuncOp funcOp; + mlir::FunctionType actualFuncType; + const MathOperation *mathOp = nullptr; + if (mathLowering == lateLowering) { + // Look for a dedicated math operation generator, which + // normally produces a single MLIR operation implementing + // the math operation. + // If not found fall back to a runtime function lookup. + const MathOperation *bestNearMatch = nullptr; + FunctionDistance bestMatchDistance; + mathOp = searchMathOperation(builder, name, soughtFuncType, &bestNearMatch, + bestMatchDistance); + if (!mathOp && bestNearMatch) { + // Use the best near match, optionally issuing an error, + // if types conversions cause precision loss. + diagPrecisionLoss(name, soughtFuncType, bestMatchDistance, loc); + mathOp = bestNearMatch; + } + if (mathOp) + actualFuncType = mathOp->typeGenerator(builder.getContext()); + } + if (!mathOp) + if ((funcOp = getRuntimeFunction(loc, builder, name, soughtFuncType))) + actualFuncType = funcOp.getFunctionType(); + + if (!mathOp && !funcOp) { std::string buffer("not yet implemented: missing intrinsic lowering: "); llvm::raw_string_ostream sstream(buffer); sstream << name << "\nrequested type was: " << soughtFuncType << '\n'; fir::emitFatalError(loc, buffer); } - mlir::FunctionType actualFuncType = funcOp.getFunctionType(); assert(actualFuncType.getNumResults() == soughtFuncType.getNumResults() && actualFuncType.getNumInputs() == soughtFuncType.getNumInputs() && actualFuncType.getNumResults() == 1 && "Bad intrinsic match"); - return [funcOp, actualFuncType, + return [funcOp, actualFuncType, mathOp, soughtFuncType](fir::FirOpBuilder &builder, mlir::Location loc, llvm::ArrayRef args) { llvm::SmallVector convertedArguments; for (auto [fst, snd] : llvm::zip(actualFuncType.getInputs(), args)) convertedArguments.push_back(builder.createConvert(loc, fst, snd)); - auto call = builder.create(loc, funcOp, convertedArguments); + mlir::Value result; + // Use math operation generator, if available. + if (mathOp) + result = mathOp->funcGenerator(builder, loc, mathOp->symbol, + actualFuncType, convertedArguments); + else + result = builder.create(loc, funcOp, convertedArguments) + .getResult(0); mlir::Type soughtType = soughtFuncType.getResult(0); - return builder.createConvert(loc, soughtType, call.getResult(0)); + return builder.createConvert(loc, soughtType, result); }; } @@ -1880,7 +2134,7 @@ llvm::ArrayRef args) { assert(args.size() == 1); return fir::factory::Complex{builder, loc}.extractComplexPart( - args[0], true /* isImagPart */); + args[0], /*isImagPart=*/true); } // AINT @@ -3896,6 +4150,13 @@ mlir::Value Fortran::lower::genPow(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type type, mlir::Value x, mlir::Value y) { + // TODO: since there is no libm version of pow with integer exponent, + // we have to provide an alternative implementation for + // "precise/strict" FP mode and (mathLowering == lateLowering). + // One option is to generate internal function with inlined + // implementation and mark it 'strictfp'. + // Another option is to implement it in Fortran runtime library + // (just like matmul). return IntrinsicLibrary{builder, loc}.genRuntimeCall("pow", type, {x, y}); } diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt --- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt +++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt @@ -17,6 +17,8 @@ FIRBuilder FIRDialect FIRSupport + MLIRMathToLLVM + MLIRMathToLibm MLIROpenMPToLLVM MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -23,6 +23,8 @@ #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" +#include "mlir/Conversion/MathToLLVM/MathToLLVM.h" +#include "mlir/Conversion/MathToLibm/MathToLibm.h" #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" @@ -3380,6 +3382,10 @@ pattern); mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter, pattern); + // Convert math-like dialect operations, which can be produced + // when late math lowering mode is used, into llvm dialect. + mlir::populateMathToLLVMConversionPatterns(typeConverter, pattern); + mlir::populateMathToLibmConversionPatterns(pattern, /*benefit=*/0); mlir::ConversionTarget target{*context}; target.addLegalDialect(); // The OpenMP dialect is legal for Operations without regions, for those diff --git a/flang/test/Intrinsics/late-math-codegen.f90 b/flang/test/Intrinsics/late-math-codegen.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Intrinsics/late-math-codegen.f90 @@ -0,0 +1,178 @@ +! TODO: verify that Fast-Math Flags and 'strictfp' are properly set. +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=fast | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,FAST %s +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=relaxed | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,RELAXED %s +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=precise | fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,PRECISE %s + +! ALL-LABEL: @_QPtest_real4 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @fabsf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.trunc.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @ceilf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @cosf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @expf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @floorf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @logf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log10f({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i32.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i64.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i64 + +! ALL: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @powf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @copysignf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @sinf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +function test_real4(x, y, c, s, i) + real :: x, y, test_real4 + complex(4) :: c + integer(2) :: s + integer(4) :: i + test_real4 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function + +! ALL-LABEL: @_QPtest_real8 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.trunc.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i32.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i32 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i64.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i64 + +! ALL: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +! FAST: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +function test_real8(x, y, c, s, i) + real(8) :: x, y, test_real8 + complex(8) :: c + integer(2) :: s + integer(4) :: i + test_real8 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function diff --git a/flang/test/Lower/Intrinsics/exp.f90 b/flang/test/Lower/Intrinsics/exp.f90 --- a/flang/test/Lower/Intrinsics/exp.f90 +++ b/flang/test/Lower/Intrinsics/exp.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: exp_testr ! CHECK-SAME: (%[[AREF:.*]]: !fir.ref {{.*}}, %[[BREF:.*]]: !fir.ref {{.*}}) diff --git a/flang/test/Lower/Intrinsics/log.f90 b/flang/test/Lower/Intrinsics/log.f90 --- a/flang/test/Lower/Intrinsics/log.f90 +++ b/flang/test/Lower/Intrinsics/log.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: log_testr ! CHECK-SAME: (%[[AREF:.*]]: !fir.ref {{.*}}, %[[BREF:.*]]: !fir.ref {{.*}}) diff --git a/flang/test/Lower/Intrinsics/math-runtime-options.f90 b/flang/test/Lower/Intrinsics/math-runtime-options.f90 --- a/flang/test/Lower/Intrinsics/math-runtime-options.f90 +++ b/flang/test/Lower/Intrinsics/math-runtime-options.f90 @@ -1,7 +1,7 @@ -! RUN: bbc -emit-fir --math-runtime=fast %s -o - | FileCheck %s --check-prefixes="FIR,FAST" -! RUN: bbc -emit-fir --math-runtime=relaxed %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED" -! RUN: bbc -emit-fir --math-runtime=precise %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE" -! RUN: bbc -emit-fir --math-runtime=llvm %s -o - | FileCheck %s --check-prefixes="FIR,LLVM" +! RUN: bbc -emit-fir --math-runtime=fast -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,FAST" +! RUN: bbc -emit-fir --math-runtime=relaxed -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED" +! RUN: bbc -emit-fir --math-runtime=precise -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE" +! RUN: bbc -emit-fir --math-runtime=llvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,LLVM" ! CHECK-LABEL: cos_testr subroutine cos_testr(a, b) diff --git a/flang/test/Lower/late-math-lowering.f90 b/flang/test/Lower/late-math-lowering.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/late-math-lowering.f90 @@ -0,0 +1,134 @@ +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=fast | FileCheck --check-prefixes=ALL,FAST %s +! 'relaxed' matches 'fast' exactly right now, but this will change: +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=relaxed | FileCheck --check-prefixes=ALL,RELAXED %s +! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=precise | FileCheck --check-prefixes=ALL,PRECISE %s + +! ALL-LABEL: @_QPtest_real4 +! FAST: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @fabsf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.trunc.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @ceilf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @cosf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @expf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @floorf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @logf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log10f({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i32.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i64.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i64 +! ALL: [[STOI:%[A-Za-z0-9._]+]] = fir.convert {{%[A-Za-z0-9._]+}} : (i16) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @powf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @copysignf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @sinf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f32 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32 + +function test_real4(x, y, c, s, i) + real :: x, y, test_real4 + complex(4) :: c + integer(2) :: s + integer(4) :: i + test_real4 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function + +! ALL-LABEL: @_QPtest_real8 +! FAST: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.trunc.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i32.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i64.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i64 +! ALL: [[STOI:%[A-Za-z0-9._]+]] = fir.convert {{%[A-Za-z0-9._]+}} : (i16) -> i32 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f64 +! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64 + +function test_real8(x, y, c, s, i) + real(8) :: x, y, test_real8 + complex(8) :: c + integer(2) :: s + integer(4) :: i + test_real8 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + & + ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + & + nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + & + sin(x) + tanh(x) +end function diff --git a/flang/test/Lower/llvm-math.f90 b/flang/test/Lower/llvm-math.f90 --- a/flang/test/Lower/llvm-math.f90 +++ b/flang/test/Lower/llvm-math.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -emit-fir %s -o - --math-runtime=llvm | FileCheck %s +! RUN: bbc -emit-fir %s -o - --math-runtime=llvm --outline-intrinsics | FileCheck %s SUBROUTINE POW_WRAPPER(IN, IN2, OUT) DOUBLE PRECISION IN, IN2 diff --git a/flang/test/Lower/sqrt.f90 b/flang/test/Lower/sqrt.f90 --- a/flang/test/Lower/sqrt.f90 +++ b/flang/test/Lower/sqrt.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: sqrt_testr subroutine sqrt_testr(a, b) diff --git a/flang/test/Lower/trigonometric-intrinsics.f90 b/flang/test/Lower/trigonometric-intrinsics.f90 --- a/flang/test/Lower/trigonometric-intrinsics.f90 +++ b/flang/test/Lower/trigonometric-intrinsics.f90 @@ -1,5 +1,5 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s +! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s ! CHECK-LABEL: atan_testr subroutine atan_testr(a, b) diff --git a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp --- a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp +++ b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp @@ -143,11 +143,15 @@ patterns.add, VecOpToScalarOp, VecOpToScalarOp, VecOpToScalarOp, VecOpToScalarOp, VecOpToScalarOp, - VecOpToScalarOp>(patterns.getContext(), benefit); + VecOpToScalarOp, VecOpToScalarOp>( + patterns.getContext(), benefit); patterns.add, PromoteOpToF32, PromoteOpToF32, PromoteOpToF32, PromoteOpToF32, PromoteOpToF32, - PromoteOpToF32>(patterns.getContext(), benefit); + PromoteOpToF32, PromoteOpToF32>( + patterns.getContext(), benefit); + patterns.add>(patterns.getContext(), "atanf", + "atan", benefit); patterns.add>(patterns.getContext(), "atan2f", "atan2", benefit); patterns.add>(patterns.getContext(), "erff", diff --git a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir --- a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir +++ b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s -convert-math-to-libm -canonicalize | FileCheck %s +// CHECK-DAG: @atan(f64) -> f64 +// CHECK-DAG: @atanf(f32) -> f32 // CHECK-DAG: @erf(f64) -> f64 // CHECK-DAG: @erff(f32) -> f32 // CHECK-DAG: @expm1(f64) -> f64 @@ -15,6 +17,53 @@ // CHECK-DAG: @sin(f64) -> f64 // CHECK-DAG: @sinf(f32) -> f32 +// CHECK-LABEL: func @atan_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +// CHECK-SAME: %[[HALF:.*]]: f16 +// CHECK-SAME: %[[BFLOAT:.*]]: bf16 +func.func @atan_caller(%float: f32, %double: f64, %half: f16, %bfloat: bf16) -> (f32, f64, f16, bf16) { + // CHECK: %[[FLOAT_RESULT:.*]] = call @atanf(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.atan %float : f32 + // CHECK: %[[DOUBLE_RESULT:.*]] = call @atan(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.atan %double : f64 + // CHECK: %[[HALF_PROMOTED:.*]] = arith.extf %[[HALF]] : f16 to f32 + // CHECK: %[[HALF_CALL:.*]] = call @atanf(%[[HALF_PROMOTED]]) : (f32) -> f32 + // CHECK: %[[HALF_RESULT:.*]] = arith.truncf %[[HALF_CALL]] : f32 to f16 + %half_result = math.atan %half : f16 + // CHECK: %[[BFLOAT_PROMOTED:.*]] = arith.extf %[[BFLOAT]] : bf16 to f32 + // CHECK: %[[BFLOAT_CALL:.*]] = call @atanf(%[[BFLOAT_PROMOTED]]) : (f32) -> f32 + // CHECK: %[[BFLOAT_RESULT:.*]] = arith.truncf %[[BFLOAT_CALL]] : f32 to bf16 + %bfloat_result = math.atan %bfloat : bf16 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]], %[[HALF_RESULT]], %[[BFLOAT_RESULT]] + return %float_result, %double_result, %half_result, %bfloat_result : f32, f64, f16, bf16 +} + +// CHECK-LABEL: func @atan_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @atanf(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @atanf(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @atan(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @atan(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } +func.func @atan_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.atan %float : vector<2xf32> + %double_result = math.atan %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} + // CHECK-LABEL: func @tanh_caller // CHECK-SAME: %[[FLOAT:.*]]: f32 // CHECK-SAME: %[[DOUBLE:.*]]: f64