Index: flang/include/flang/Optimizer/Support/InitFIR.h
===================================================================
--- flang/include/flang/Optimizer/Support/InitFIR.h
+++ flang/include/flang/Optimizer/Support/InitFIR.h
@@ -28,7 +28,8 @@
   mlir::AffineDialect, FIROpsDialect, mlir::acc::OpenACCDialect,               \
       mlir::omp::OpenMPDialect, mlir::scf::SCFDialect,                         \
       mlir::arith::ArithmeticDialect, mlir::cf::ControlFlowDialect,            \
-      mlir::func::FuncDialect, mlir::vector::VectorDialect
+      mlir::func::FuncDialect, mlir::vector::VectorDialect,                    \
+      mlir::math::MathDialect
 
 // The definitive list of dialects used by flang.
 #define FLANG_DIALECT_LIST                                                     \
Index: flang/lib/Lower/IntrinsicCall.cpp
===================================================================
--- flang/lib/Lower/IntrinsicCall.cpp
+++ flang/lib/Lower/IntrinsicCall.cpp
@@ -35,6 +35,7 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Math/IR/Math.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 
@@ -960,8 +961,34 @@
 // Math runtime description and matching utility
 //===----------------------------------------------------------------------===//
 
-/// Command line option to modify math runtime version used to implement
-/// intrinsics.
+/// Command line option to control how math operations are lowered
+/// into MLIR.
+/// Going forward, most of the math operations have to be lowered
+/// to some MLIR dialect operations or libm calls, if the corresponding
+/// MLIR operation is not available or not reasonable to create
+/// (e.g. there are no know optimization opportunities for the math
+/// operation in MLIR).
+///
+/// In general, exposing MLIR operations early can potentially enable more
+/// MLIR optimizations.
+enum MathLoweringMode {
+  // Most operations will be lowered to pgmath calls in this mode.
+  earlyLowering,
+
+  // Lower math operations into operations of MLIR dialects,
+  // such as mlir::math, mlir::complex, etc.
+  lateLowering,
+};
+
+llvm::cl::opt<MathLoweringMode> mathLowering(
+    "math-lowering", llvm::cl::desc("Select math operations lowering mode:"),
+    llvm::cl::values(
+        clEnumValN(earlyLowering, "early", "lower to library calls early"),
+        clEnumValN(lateLowering, "late", "lower to MLIR dialect operations")),
+    llvm::cl::init(earlyLowering));
+
+/// Command line option to modify math runtime behavior used to implement
+/// intrinsics. This option applies both to early and late math-lowering modes.
 enum MathRuntimeVersion {
   fastVersion,
   relaxedVersion,
@@ -969,11 +996,11 @@
   llvmOnly
 };
 llvm::cl::opt<MathRuntimeVersion> mathRuntimeVersion(
-    "math-runtime", llvm::cl::desc("Select math runtime version:"),
+    "math-runtime", llvm::cl::desc("Select math operations' runtime behavior:"),
     llvm::cl::values(
-        clEnumValN(fastVersion, "fast", "use pgmath fast runtime"),
-        clEnumValN(relaxedVersion, "relaxed", "use pgmath relaxed runtime"),
-        clEnumValN(preciseVersion, "precise", "use pgmath precise runtime"),
+        clEnumValN(fastVersion, "fast", "use fast runtime behavior"),
+        clEnumValN(relaxedVersion, "relaxed", "use relaxed runtime behavior"),
+        clEnumValN(preciseVersion, "precise", "use precise runtime behavior"),
         clEnumValN(llvmOnly, "llvm",
                    "only use LLVM intrinsics (may be incomplete)")),
     llvm::cl::init(fastVersion));
@@ -984,6 +1011,8 @@
   // Needed for implicit compare with keys.
   constexpr operator Key() const { return key; }
   Key key; // intrinsic name
+
+  // Name of a runtime function that implements the operation.
   llvm::StringRef symbol;
   fir::runtime::FuncTypeBuilderFunc typeGenerator;
 };
@@ -1050,9 +1079,162 @@
   return mlir::FunctionType::get(context, {t}, {r});
 }
 
-// TODO : Fill-up this table with more intrinsic.
+template <int Bits>
+static mlir::FunctionType genF64F64IntFuncType(mlir::MLIRContext *context) {
+  auto ftype = mlir::FloatType::getF64(context);
+  auto itype = mlir::IntegerType::get(context, Bits);
+  return mlir::FunctionType::get(context, {ftype, itype}, {ftype});
+}
+
+template <int Bits>
+static mlir::FunctionType genF32F32IntFuncType(mlir::MLIRContext *context) {
+  auto ftype = mlir::FloatType::getF32(context);
+  auto itype = mlir::IntegerType::get(context, Bits);
+  return mlir::FunctionType::get(context, {ftype, itype}, {ftype});
+}
+
+/// Callback type for generating lowering for a math operation.
+using MathGeneratorTy = mlir::Value (*)(fir::FirOpBuilder &, mlir::Location,
+                                        llvm::StringRef name,
+                                        mlir::FunctionType funcType,
+                                        llvm::ArrayRef<mlir::Value>);
+
+struct MathOperation {
+  // llvm::StringRef comparison operator are not constexpr, so use string_view.
+  using Key = std::string_view;
+  // Needed for implicit compare with keys.
+  constexpr operator Key() const { return key; }
+  // Intrinsic name.
+  Key key;
+
+  // Name of a runtime function that implements the operation.
+  llvm::StringRef symbol;
+  fir::runtime::FuncTypeBuilderFunc typeGenerator;
+
+  // If funcGenerator is non null, then it is generating
+  // the lowering code, otherwise - the lowering is done
+  // as a call to a runtime function named as specified
+  // in 'symbol' member.
+  MathGeneratorTy funcGenerator;
+};
+
+static mlir::Value genLibCall(fir::FirOpBuilder &builder, mlir::Location loc,
+                              llvm::StringRef name, mlir::FunctionType funcType,
+                              llvm::ArrayRef<mlir::Value> args) {
+  LLVM_DEBUG(llvm::dbgs() << "Generating '" << name << "' call with type ";
+             funcType.dump(); llvm::dbgs() << "\n");
+  mlir::func::FuncOp funcOp = builder.addNamedFunction(loc, name, funcType);
+  // TODO: ensure 'strictfp' setting on the call for "precise/strict"
+  //       FP mode. Set appropriate Fast-Math Flags otherwise.
+  // TODO: we should also mark as many libm function as possible
+  //       with 'pure' attribute (of course, not in strict FP mode).
+  auto libCall = builder.create<fir::CallOp>(loc, funcOp, args);
+  LLVM_DEBUG(libCall.dump(); llvm::dbgs() << "\n");
+  return libCall.getResult(0);
+}
+
+template <typename T>
+static mlir::Value genMathOp(fir::FirOpBuilder &builder, mlir::Location loc,
+                             llvm::StringRef name, mlir::FunctionType funcType,
+                             llvm::ArrayRef<mlir::Value> args) {
+  // TODO: we have to annotate the math operations with flags
+  //       that will allow to define FP accuracy/exception
+  //       behavior per operation, so that after early multi-module
+  //       MLIR inlining we can distiguish operation that were
+  //       compiled with different settings.
+  //       Suggestion:
+  //         * For "relaxed" FP mode set all Fast-Math Flags
+  //           (see "[RFC] FastMath flags support in MLIR (arith dialect)"
+  //           topic at discourse.llvm.org).
+  //         * For "fast" FP mode set all Fast-Math Flags except 'afn'.
+  //         * For "precise/strict" FP mode generate fir.calls to libm
+  //           entries and annotate them with an attribute that will
+  //           end up transformed into 'strictfp' LLVM attribute (TBD).
+  //           Elsewhere, "precise/strict" FP mode should also set
+  //           'strictfp' for all user functions and calls so that
+  //           LLVM backend does the right job.
+  //         * Operations that cannot be reasonably optimized in MLIR
+  //           can be also lowered to libm calls for "fast" and "relaxed"
+  //           modes.
+  mlir::Value result;
+  if (mathRuntimeVersion == preciseVersion) {
+    result = genLibCall(builder, loc, name, funcType, args);
+  } else {
+    LLVM_DEBUG(llvm::dbgs()
+                   << "Generating '" << name << "' operation with type ";
+               funcType.dump(); llvm::dbgs() << "\n");
+    result = builder.create<T>(loc, args);
+  }
+  LLVM_DEBUG(result.dump(); llvm::dbgs() << "\n");
+  return result;
+}
+
+/// Mapping between mathematical intrinsic operations and MLIR operations
+/// of some appropriate dialect (math, complex, etc.) or libm calls.
+/// TODO: support remaining Fortran math intrinsics.
+///       See https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gfortran/\
+///       Intrinsic-Procedures.html for a reference.
+static constexpr MathOperation mathOperations[] = {
+    {"abs", "fabsf", genF32F32FuncType, genMathOp<mlir::math::AbsOp>},
+    {"abs", "fabs", genF64F64FuncType, genMathOp<mlir::math::AbsOp>},
+    // llvm.trunc behaves the same way as libm's trunc.
+    {"aint", "llvm.trunc.f32", genF32F32FuncType, genLibCall},
+    {"aint", "llvm.trunc.f64", genF64F64FuncType, genLibCall},
+    // llvm.round behaves the same way as libm's round.
+    {"anint", "llvm.round.f32", genF32F32FuncType,
+     genMathOp<mlir::LLVM::RoundOp>},
+    {"anint", "llvm.round.f64", genF64F64FuncType,
+     genMathOp<mlir::LLVM::RoundOp>},
+    {"atan", "atanf", genF32F32FuncType, genMathOp<mlir::math::AtanOp>},
+    {"atan", "atan", genF64F64FuncType, genMathOp<mlir::math::AtanOp>},
+    {"atan2", "atan2f", genF32F32F32FuncType, genMathOp<mlir::math::Atan2Op>},
+    {"atan2", "atan2", genF64F64F64FuncType, genMathOp<mlir::math::Atan2Op>},
+    // math::CeilOp returns a real, while Fortran CEILING returns integer.
+    {"ceil", "ceilf", genF32F32FuncType, genMathOp<mlir::math::CeilOp>},
+    {"ceil", "ceil", genF64F64FuncType, genMathOp<mlir::math::CeilOp>},
+    {"cos", "cosf", genF32F32FuncType, genMathOp<mlir::math::CosOp>},
+    {"cos", "cos", genF64F64FuncType, genMathOp<mlir::math::CosOp>},
+    {"erf", "erff", genF32F32FuncType, genMathOp<mlir::math::ErfOp>},
+    {"erf", "erf", genF64F64FuncType, genMathOp<mlir::math::ErfOp>},
+    {"exp", "expf", genF32F32FuncType, genMathOp<mlir::math::ExpOp>},
+    {"exp", "exp", genF64F64FuncType, genMathOp<mlir::math::ExpOp>},
+    // math::FloorOp returns a real, while Fortran FLOOR returns integer.
+    {"floor", "floorf", genF32F32FuncType, genMathOp<mlir::math::FloorOp>},
+    {"floor", "floor", genF64F64FuncType, genMathOp<mlir::math::FloorOp>},
+    {"hypot", "hypotf", genF32F32F32FuncType, genLibCall},
+    {"hypot", "hypot", genF64F64F64FuncType, genLibCall},
+    {"log", "logf", genF32F32FuncType, genMathOp<mlir::math::LogOp>},
+    {"log", "log", genF64F64FuncType, genMathOp<mlir::math::LogOp>},
+    {"log10", "log10f", genF32F32FuncType, genMathOp<mlir::math::Log10Op>},
+    {"log10", "log10", genF64F64FuncType, genMathOp<mlir::math::Log10Op>},
+    // llvm.lround behaves the same way as libm's lround.
+    {"nint", "llvm.lround.i64.f64", genIntF64FuncType<64>, genLibCall},
+    {"nint", "llvm.lround.i64.f32", genIntF32FuncType<64>, genLibCall},
+    {"nint", "llvm.lround.i32.f64", genIntF64FuncType<32>, genLibCall},
+    {"nint", "llvm.lround.i32.f32", genIntF32FuncType<32>, genLibCall},
+    {"pow", "powf", genF32F32F32FuncType, genMathOp<mlir::math::PowFOp>},
+    {"pow", "pow", genF64F64F64FuncType, genMathOp<mlir::math::PowFOp>},
+    // TODO: add PowIOp in math and complex dialects.
+    {"pow", "llvm.powi.f32.i32", genF32F32IntFuncType<32>, genLibCall},
+    {"pow", "llvm.powi.f64.i32", genF64F64IntFuncType<32>, genLibCall},
+    {"sign", "copysignf", genF32F32F32FuncType,
+     genMathOp<mlir::math::CopySignOp>},
+    {"sign", "copysign", genF64F64F64FuncType,
+     genMathOp<mlir::math::CopySignOp>},
+    {"sin", "sinf", genF32F32FuncType, genMathOp<mlir::math::SinOp>},
+    {"sin", "sin", genF64F64FuncType, genMathOp<mlir::math::SinOp>},
+    {"sqrt", "sqrtf", genF32F32FuncType, genMathOp<mlir::math::SqrtOp>},
+    {"sqrt", "sqrt", genF64F64FuncType, genMathOp<mlir::math::SqrtOp>},
+    {"tanh", "tanhf", genF32F32FuncType, genMathOp<mlir::math::TanhOp>},
+    {"tanh", "tanh", genF64F64FuncType, genMathOp<mlir::math::TanhOp>},
+};
+
 // Note: These are also defined as operations in LLVM dialect. See if this
 // can be use and has advantages.
+// TODO: remove this table, since the late math lowering should
+//       replace it and generate proper MLIR operations rather
+//       than llvm intrinsic calls, which still look like generic
+//       calls to MLIR and do not enable many optimizations.
 static constexpr RuntimeFunction llvmIntrinsics[] = {
     {"abs", "llvm.fabs.f32", genF32F32FuncType},
     {"abs", "llvm.fabs.f64", genF64F64FuncType},
@@ -1251,7 +1433,7 @@
 /// function type and that will not imply narrowing arguments or extending the
 /// result.
 /// If nothing is found, the mlir::func::FuncOp will contain a nullptr.
-mlir::func::FuncOp searchFunctionInLibrary(
+static mlir::func::FuncOp searchFunctionInLibrary(
     mlir::Location loc, fir::FirOpBuilder &builder,
     const Fortran::common::StaticMultimapView<RuntimeFunction> &lib,
     llvm::StringRef name, mlir::FunctionType funcType,
@@ -1274,6 +1456,77 @@
   return {};
 }
 
+using RtMap = Fortran::common::StaticMultimapView<MathOperation>;
+static constexpr RtMap mathOps(mathOperations);
+static_assert(mathOps.Verify() && "map must be sorted");
+
+/// Look for a MathOperation entry specifying how to lower a mathematical
+/// operation defined by \p name with its result' and operands' types
+/// specified in the form of a FunctionType \p funcType.
+/// If exact match for the given types is found, then the function
+/// returns a pointer to the corresponding MathOperation.
+/// Otherwise, the function returns nullptr.
+/// If there is a MathOperation that can be used with additional
+/// type casts for the operands or/and result (non-exact match),
+/// then it is returned via \p bestNearMatch argument, and
+/// \p bestMatchDistance specifies the FunctionDistance between
+/// the requested operation and the non-exact match.
+static const MathOperation *
+searchMathOperation(fir::FirOpBuilder &builder, llvm::StringRef name,
+                    mlir::FunctionType funcType,
+                    const MathOperation **bestNearMatch,
+                    FunctionDistance &bestMatchDistance) {
+  auto range = mathOps.equal_range(name);
+  for (auto iter = range.first; iter != range.second && iter; ++iter) {
+    const auto &impl = *iter;
+    auto implType = impl.typeGenerator(builder.getContext());
+    if (funcType == implType)
+      return &impl; // exact match
+
+    FunctionDistance distance(funcType, implType);
+    if (distance.isSmallerThan(bestMatchDistance)) {
+      *bestNearMatch = &impl;
+      bestMatchDistance = std::move(distance);
+    }
+  }
+  return nullptr;
+}
+
+/// Implementation of the operation defined by \p name with type
+/// \p funcType is not precise, and the actual available implementation
+/// is \p distance away from the requested. If using the available
+/// implementation results in a precision loss, emit an error message
+/// with the given code location \p loc.
+static void checkPrecisionLoss(llvm::StringRef name,
+                               mlir::FunctionType funcType,
+                               const FunctionDistance &distance,
+                               mlir::Location loc) {
+  if (!distance.isLosingPrecision())
+    return;
+
+  // Using this runtime version requires narrowing the arguments
+  // or extending the result. It is not numerically safe. There
+  // is currently no quad math library that was described in
+  // lowering and could be used here. Emit an error and continue
+  // generating the code with the narrowing cast so that the user
+  // can get a complete list of the problematic intrinsic calls.
+  std::string message("TODO: no math runtime available for '");
+  llvm::raw_string_ostream sstream(message);
+  if (name == "pow") {
+    assert(funcType.getNumInputs() == 2 && "power operator has two arguments");
+    sstream << funcType.getInput(0) << " ** " << funcType.getInput(1);
+  } else {
+    sstream << name << "(";
+    if (funcType.getNumInputs() > 0)
+      sstream << funcType.getInput(0);
+    for (mlir::Type argType : funcType.getInputs().drop_front())
+      sstream << ", " << argType;
+    sstream << ")";
+  }
+  sstream << "'";
+  mlir::emitError(loc, message);
+}
+
 /// Search runtime for the best runtime function given an intrinsic name
 /// and interface. The interface may not be a perfect match in which case
 /// the caller is responsible to insert argument and return value conversions.
@@ -1292,6 +1545,7 @@
   static_assert(pgmathR.Verify() && "map must be sorted");
   static constexpr RtMap pgmathP(pgmathPrecise);
   static_assert(pgmathP.Verify() && "map must be sorted");
+
   if (mathRuntimeVersion == fastVersion) {
     match = searchFunctionInLibrary(loc, builder, pgmathF, name, funcType,
                                     &bestNearMatch, bestMatchDistance);
@@ -1317,30 +1571,7 @@
     return exactMatch;
 
   if (bestNearMatch != nullptr) {
-    if (bestMatchDistance.isLosingPrecision()) {
-      // Using this runtime version requires narrowing the arguments
-      // or extending the result. It is not numerically safe. There
-      // is currently no quad math library that was described in
-      // lowering and could be used here. Emit an error and continue
-      // generating the code with the narrowing cast so that the user
-      // can get a complete list of the problematic intrinsic calls.
-      std::string message("TODO: no math runtime available for '");
-      llvm::raw_string_ostream sstream(message);
-      if (name == "pow") {
-        assert(funcType.getNumInputs() == 2 &&
-               "power operator has two arguments");
-        sstream << funcType.getInput(0) << " ** " << funcType.getInput(1);
-      } else {
-        sstream << name << "(";
-        if (funcType.getNumInputs() > 0)
-          sstream << funcType.getInput(0);
-        for (mlir::Type argType : funcType.getInputs().drop_front())
-          sstream << ", " << argType;
-        sstream << ")";
-      }
-      sstream << "'";
-      mlir::emitError(loc, message);
-    }
+    checkPrecisionLoss(name, funcType, bestMatchDistance, loc);
     return getFuncOp(loc, builder, *bestNearMatch);
   }
   return {};
@@ -1574,7 +1805,7 @@
   IntrinsicLibrary::RuntimeCallGenerator runtimeCallGenerator =
       getRuntimeCallGenerator(name, soughtFuncType);
   return genElementalCall(runtimeCallGenerator, name, *resultType, args,
-                          /* outline */ true);
+                          /*outline=*/outlineAllIntrinsics);
 }
 
 mlir::Value
@@ -1726,29 +1957,58 @@
 IntrinsicLibrary::RuntimeCallGenerator
 IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name,
                                           mlir::FunctionType soughtFuncType) {
-  mlir::func::FuncOp funcOp =
-      getRuntimeFunction(loc, builder, name, soughtFuncType);
-  if (!funcOp) {
+  mlir::func::FuncOp funcOp;
+  mlir::FunctionType actualFuncType;
+  const MathOperation *mathOp = nullptr;
+  if (mathLowering == lateLowering) {
+    // Look for a dedicated math operation generator, which
+    // normally produces a single MLIR operation implementing
+    // the math operation.
+    // If not found fall back to a runtime function lookup.
+    const MathOperation *bestNearMatch = nullptr;
+    FunctionDistance bestMatchDistance;
+    mathOp = searchMathOperation(builder, name, soughtFuncType, &bestNearMatch,
+                                 bestMatchDistance);
+    if (!mathOp && bestNearMatch) {
+      // Use the best near match, optionally issuing an error,
+      // if types conversions cause precision loss.
+      checkPrecisionLoss(name, soughtFuncType, bestMatchDistance, loc);
+      mathOp = bestNearMatch;
+    }
+    if (mathOp)
+      actualFuncType = mathOp->typeGenerator(builder.getContext());
+  }
+  if (!mathOp)
+    if ((funcOp = getRuntimeFunction(loc, builder, name, soughtFuncType)))
+      actualFuncType = funcOp.getFunctionType();
+
+  if (!mathOp && !funcOp) {
     std::string nameAndType;
     llvm::raw_string_ostream sstream(nameAndType);
     sstream << name << "\nrequested type: " << soughtFuncType;
     crashOnMissingIntrinsic(loc, nameAndType);
   }
 
-  mlir::FunctionType actualFuncType = funcOp.getFunctionType();
   assert(actualFuncType.getNumResults() == soughtFuncType.getNumResults() &&
          actualFuncType.getNumInputs() == soughtFuncType.getNumInputs() &&
          actualFuncType.getNumResults() == 1 && "Bad intrinsic match");
 
-  return [funcOp, actualFuncType,
+  return [funcOp, actualFuncType, mathOp,
           soughtFuncType](fir::FirOpBuilder &builder, mlir::Location loc,
                           llvm::ArrayRef<mlir::Value> args) {
     llvm::SmallVector<mlir::Value> convertedArguments;
     for (auto [fst, snd] : llvm::zip(actualFuncType.getInputs(), args))
       convertedArguments.push_back(builder.createConvert(loc, fst, snd));
-    auto call = builder.create<fir::CallOp>(loc, funcOp, convertedArguments);
+    mlir::Value result;
+    // Use math operation generator, if available.
+    if (mathOp)
+      result = mathOp->funcGenerator(builder, loc, mathOp->symbol,
+                                     actualFuncType, convertedArguments);
+    else
+      result = builder.create<fir::CallOp>(loc, funcOp, convertedArguments)
+                   .getResult(0);
     mlir::Type soughtType = soughtFuncType.getResult(0);
-    return builder.createConvert(loc, soughtType, call.getResult(0));
+    return builder.createConvert(loc, soughtType, result);
   };
 }
 
@@ -1914,7 +2174,7 @@
                                        llvm::ArrayRef<mlir::Value> args) {
   assert(args.size() == 1);
   return fir::factory::Complex{builder, loc}.extractComplexPart(
-      args[0], true /* isImagPart */);
+      args[0], /*isImagPart=*/true);
 }
 
 // AINT
@@ -3987,6 +4247,13 @@
 mlir::Value Fortran::lower::genPow(fir::FirOpBuilder &builder,
                                    mlir::Location loc, mlir::Type type,
                                    mlir::Value x, mlir::Value y) {
+  // TODO: since there is no libm version of pow with integer exponent,
+  //       we have to provide an alternative implementation for
+  //       "precise/strict" FP mode and (mathLowering == lateLowering).
+  //       One option is to generate internal function with inlined
+  //       implementation and mark it 'strictfp'.
+  //       Another option is to implement it in Fortran runtime library
+  //       (just like matmul).
   return IntrinsicLibrary{builder, loc}.genRuntimeCall("pow", type, {x, y});
 }
 
Index: flang/lib/Optimizer/CodeGen/CMakeLists.txt
===================================================================
--- flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -17,6 +17,8 @@
   FIRBuilder
   FIRDialect
   FIRSupport
+  MLIRMathToLLVM
+  MLIRMathToLibm
   MLIROpenMPToLLVM
   MLIRLLVMToLLVMIRTranslation
   MLIRTargetLLVMIRExport
Index: flang/lib/Optimizer/CodeGen/CodeGen.cpp
===================================================================
--- flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,6 +23,8 @@
 #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
+#include "mlir/Conversion/MathToLibm/MathToLibm.h"
 #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Matchers.h"
@@ -3380,6 +3382,10 @@
                                                             pattern);
     mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter,
                                                           pattern);
+    // Convert math-like dialect operations, which can be produced
+    // when late math lowering mode is used, into llvm dialect.
+    mlir::populateMathToLLVMConversionPatterns(typeConverter, pattern);
+    mlir::populateMathToLibmConversionPatterns(pattern, /*benefit=*/0);
     mlir::ConversionTarget target{*context};
     target.addLegalDialect<mlir::LLVM::LLVMDialect>();
     // The OpenMP dialect is legal for Operations without regions, for those
Index: flang/test/Intrinsics/late-math-codegen.fir
===================================================================
--- /dev/null
+++ flang/test/Intrinsics/late-math-codegen.fir
@@ -0,0 +1,741 @@
+// RUN: split-file %s %t
+// TODO: verify that Fast-Math Flags and 'strictfp' are properly set.
+// RUN: fir-opt %t/fast --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,FAST %s
+// RUN: fir-opt %t/relaxed --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,RELAXED %s
+// RUN: fir-opt %t/precise --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck --check-prefixes=ALL,PRECISE %s
+
+//Fortran original source:
+//function test_real4(x, y, c, s, i)
+//  real :: x, y, test_real4
+//  complex(4) :: c
+//  integer(2) :: s
+//  integer(4) :: i
+//  test_real4 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + &
+//       ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + &
+//       nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + &
+//       sin(x) + tanh(x)
+//end function
+//
+//function test_real8(x, y, c, s, i)
+//  real(8) :: x, y, test_real8
+//  complex(8) :: c
+//  integer(2) :: s
+//  integer(4) :: i
+//  test_real8 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + &
+//       ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + &
+//       nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + &
+//       sin(x) + tanh(x)
+//end function
+
+// ALL-LABEL: @_QPtest_real4
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @fabsf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.trunc.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @ceilf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @cosf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @expf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @floorf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @logf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log10f({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i32.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i32
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i64.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i64
+
+// ALL: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @powf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @copysignf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @sinf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+// ALL-LABEL: @_QPtest_real8
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.trunc.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i32.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i32
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.lround.i64.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i64
+
+// ALL: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+
+// ALL: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+// FAST: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// RELAXED: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+// PRECISE: {{%[A-Za-z0-9._]+}} = llvm.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+//--- fast
+
+func.func @_QPtest_real4(%arg0: !fir.ref<f32> {fir.bindc_name = "x"}, %arg1: !fir.ref<f32> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.complex<4>> {fir.bindc_name = "c"}, %arg3: !fir.ref<i16> {fir.bindc_name = "s"}, %arg4: !fir.ref<i32> {fir.bindc_name = "i"}) -> f32 {
+  %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"}
+  %1 = fir.load %arg0 : !fir.ref<f32>
+  %2 = math.abs %1 : f32
+  %3 = fir.load %arg2 : !fir.ref<!fir.complex<4>>
+  %4 = fir.extract_value %3, [0 : index] : (!fir.complex<4>) -> f32
+  %5 = fir.extract_value %3, [1 : index] : (!fir.complex<4>) -> f32
+  %6 = fir.call @hypotf(%4, %5) : (f32, f32) -> f32
+  %7 = arith.addf %2, %6 : f32
+  %8 = fir.load %arg0 : !fir.ref<f32>
+  %9 = fir.call @llvm.trunc.f32(%8) : (f32) -> f32
+  %10 = arith.addf %7, %9 : f32
+  %11 = fir.load %arg0 : !fir.ref<f32>
+  %12 = "llvm.intr.round"(%11) : (f32) -> f32
+  %13 = arith.addf %10, %12 : f32
+  %14 = fir.load %arg0 : !fir.ref<f32>
+  %15 = math.atan %14 : f32
+  %16 = arith.addf %13, %15 : f32
+  %17 = fir.load %arg0 : !fir.ref<f32>
+  %18 = fir.load %arg1 : !fir.ref<f32>
+  %19 = math.atan2 %17, %18 : f32
+  %20 = arith.addf %16, %19 : f32
+  %21 = fir.load %arg0 : !fir.ref<f32>
+  %22 = math.ceil %21 : f32
+  %23 = fir.convert %22 : (f32) -> i32
+  %24 = fir.convert %23 : (i32) -> f32
+  %25 = arith.addf %20, %24 : f32
+  %26 = fir.load %arg0 : !fir.ref<f32>
+  %27 = math.cos %26 : f32
+  %28 = arith.addf %25, %27 : f32
+  %29 = fir.load %arg0 : !fir.ref<f32>
+  %30 = math.erf %29 : f32
+  %31 = arith.addf %28, %30 : f32
+  %32 = fir.load %arg0 : !fir.ref<f32>
+  %33 = math.exp %32 : f32
+  %34 = arith.addf %31, %33 : f32
+  %35 = fir.load %arg0 : !fir.ref<f32>
+  %36 = math.floor %35 : f32
+  %37 = fir.convert %36 : (f32) -> i32
+  %38 = fir.convert %37 : (i32) -> f32
+  %39 = arith.addf %34, %38 : f32
+  %40 = fir.load %arg0 : !fir.ref<f32>
+  %41 = math.log %40 : f32
+  %42 = arith.addf %39, %41 : f32
+  %43 = fir.load %arg0 : !fir.ref<f32>
+  %44 = math.log10 %43 : f32
+  %45 = arith.addf %42, %44 : f32
+  %46 = fir.load %arg0 : !fir.ref<f32>
+  %47 = fir.call @llvm.lround.i32.f32(%46) : (f32) -> i32
+  %48 = fir.convert %47 : (i32) -> f32
+  %49 = arith.addf %45, %48 : f32
+  %50 = fir.load %arg0 : !fir.ref<f32>
+  %51 = fir.call @llvm.lround.i64.f32(%50) : (f32) -> i64
+  %52 = fir.convert %51 : (i64) -> f32
+  %53 = arith.addf %49, %52 : f32
+  %54 = fir.load %arg0 : !fir.ref<f32>
+  %55 = fir.load %arg3 : !fir.ref<i16>
+  %56 = fir.convert %55 : (i16) -> i32
+  %57 = fir.call @llvm.powi.f32.i32(%54, %56) : (f32, i32) -> f32
+  %58 = arith.addf %53, %57 : f32
+  %59 = fir.load %arg0 : !fir.ref<f32>
+  %60 = fir.load %arg1 : !fir.ref<f32>
+  %61 = math.powf %59, %60 : f32
+  %62 = arith.addf %58, %61 : f32
+  %63 = fir.load %arg0 : !fir.ref<f32>
+  %64 = fir.load %arg4 : !fir.ref<i32>
+  %65 = fir.call @llvm.powi.f32.i32(%63, %64) : (f32, i32) -> f32
+  %66 = arith.addf %62, %65 : f32
+  %67 = fir.load %arg0 : !fir.ref<f32>
+  %68 = fir.load %arg1 : !fir.ref<f32>
+  %69 = math.copysign %67, %68 : f32
+  %70 = arith.addf %66, %69 : f32
+  %71 = fir.load %arg0 : !fir.ref<f32>
+  %72 = math.sin %71 : f32
+  %73 = arith.addf %70, %72 : f32
+  %74 = fir.load %arg0 : !fir.ref<f32>
+  %75 = math.tanh %74 : f32
+  %76 = arith.addf %73, %75 : f32
+  fir.store %76 to %0 : !fir.ref<f32>
+  %77 = fir.load %0 : !fir.ref<f32>
+  return %77 : f32
+}
+func.func @_QPtest_real8(%arg0: !fir.ref<f64> {fir.bindc_name = "x"}, %arg1: !fir.ref<f64> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.complex<8>> {fir.bindc_name = "c"}, %arg3: !fir.ref<i16> {fir.bindc_name = "s"}, %arg4: !fir.ref<i32> {fir.bindc_name = "i"}) -> f64 {
+  %0 = fir.alloca f64 {bindc_name = "test_real8", uniq_name = "_QFtest_real8Etest_real8"}
+  %1 = fir.load %arg0 : !fir.ref<f64>
+  %2 = math.abs %1 : f64
+  %3 = fir.load %arg2 : !fir.ref<!fir.complex<8>>
+  %4 = fir.extract_value %3, [0 : index] : (!fir.complex<8>) -> f64
+  %5 = fir.extract_value %3, [1 : index] : (!fir.complex<8>) -> f64
+  %6 = fir.call @hypot(%4, %5) : (f64, f64) -> f64
+  %7 = arith.addf %2, %6 : f64
+  %8 = fir.load %arg0 : !fir.ref<f64>
+  %9 = fir.call @llvm.trunc.f64(%8) : (f64) -> f64
+  %10 = arith.addf %7, %9 : f64
+  %11 = fir.load %arg0 : !fir.ref<f64>
+  %12 = "llvm.intr.round"(%11) : (f64) -> f64
+  %13 = arith.addf %10, %12 : f64
+  %14 = fir.load %arg0 : !fir.ref<f64>
+  %15 = math.atan %14 : f64
+  %16 = arith.addf %13, %15 : f64
+  %17 = fir.load %arg0 : !fir.ref<f64>
+  %18 = fir.load %arg1 : !fir.ref<f64>
+  %19 = math.atan2 %17, %18 : f64
+  %20 = arith.addf %16, %19 : f64
+  %21 = fir.load %arg0 : !fir.ref<f64>
+  %22 = math.ceil %21 : f64
+  %23 = fir.convert %22 : (f64) -> i32
+  %24 = fir.convert %23 : (i32) -> f64
+  %25 = arith.addf %20, %24 : f64
+  %26 = fir.load %arg0 : !fir.ref<f64>
+  %27 = math.cos %26 : f64
+  %28 = arith.addf %25, %27 : f64
+  %29 = fir.load %arg0 : !fir.ref<f64>
+  %30 = math.erf %29 : f64
+  %31 = arith.addf %28, %30 : f64
+  %32 = fir.load %arg0 : !fir.ref<f64>
+  %33 = math.exp %32 : f64
+  %34 = arith.addf %31, %33 : f64
+  %35 = fir.load %arg0 : !fir.ref<f64>
+  %36 = math.floor %35 : f64
+  %37 = fir.convert %36 : (f64) -> i32
+  %38 = fir.convert %37 : (i32) -> f64
+  %39 = arith.addf %34, %38 : f64
+  %40 = fir.load %arg0 : !fir.ref<f64>
+  %41 = math.log %40 : f64
+  %42 = arith.addf %39, %41 : f64
+  %43 = fir.load %arg0 : !fir.ref<f64>
+  %44 = math.log10 %43 : f64
+  %45 = arith.addf %42, %44 : f64
+  %46 = fir.load %arg0 : !fir.ref<f64>
+  %47 = fir.call @llvm.lround.i32.f64(%46) : (f64) -> i32
+  %48 = fir.convert %47 : (i32) -> f64
+  %49 = arith.addf %45, %48 : f64
+  %50 = fir.load %arg0 : !fir.ref<f64>
+  %51 = fir.call @llvm.lround.i64.f64(%50) : (f64) -> i64
+  %52 = fir.convert %51 : (i64) -> f64
+  %53 = arith.addf %49, %52 : f64
+  %54 = fir.load %arg0 : !fir.ref<f64>
+  %55 = fir.load %arg3 : !fir.ref<i16>
+  %56 = fir.convert %55 : (i16) -> i32
+  %57 = fir.call @llvm.powi.f64.i32(%54, %56) : (f64, i32) -> f64
+  %58 = arith.addf %53, %57 : f64
+  %59 = fir.load %arg0 : !fir.ref<f64>
+  %60 = fir.load %arg1 : !fir.ref<f64>
+  %61 = math.powf %59, %60 : f64
+  %62 = arith.addf %58, %61 : f64
+  %63 = fir.load %arg0 : !fir.ref<f64>
+  %64 = fir.load %arg4 : !fir.ref<i32>
+  %65 = fir.call @llvm.powi.f64.i32(%63, %64) : (f64, i32) -> f64
+  %66 = arith.addf %62, %65 : f64
+  %67 = fir.load %arg0 : !fir.ref<f64>
+  %68 = fir.load %arg1 : !fir.ref<f64>
+  %69 = math.copysign %67, %68 : f64
+  %70 = arith.addf %66, %69 : f64
+  %71 = fir.load %arg0 : !fir.ref<f64>
+  %72 = math.sin %71 : f64
+  %73 = arith.addf %70, %72 : f64
+  %74 = fir.load %arg0 : !fir.ref<f64>
+  %75 = math.tanh %74 : f64
+  %76 = arith.addf %73, %75 : f64
+  fir.store %76 to %0 : !fir.ref<f64>
+  %77 = fir.load %0 : !fir.ref<f64>
+  return %77 : f64
+}
+func.func private @hypotf(f32, f32) -> f32
+func.func private @llvm.trunc.f32(f32) -> f32
+func.func private @llvm.lround.i32.f32(f32) -> i32
+func.func private @llvm.lround.i64.f32(f32) -> i64
+func.func private @llvm.powi.f32.i32(f32, i32) -> f32
+func.func private @hypot(f64, f64) -> f64
+func.func private @llvm.trunc.f64(f64) -> f64
+func.func private @llvm.lround.i32.f64(f64) -> i32
+func.func private @llvm.lround.i64.f64(f64) -> i64
+func.func private @llvm.powi.f64.i32(f64, i32) -> f64
+
+//--- relaxed
+
+func.func @_QPtest_real4(%arg0: !fir.ref<f32> {fir.bindc_name = "x"}, %arg1: !fir.ref<f32> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.complex<4>> {fir.bindc_name = "c"}, %arg3: !fir.ref<i16> {fir.bindc_name = "s"}, %arg4: !fir.ref<i32> {fir.bindc_name = "i"}) -> f32 {
+  %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"}
+  %1 = fir.load %arg0 : !fir.ref<f32>
+  %2 = math.abs %1 : f32
+  %3 = fir.load %arg2 : !fir.ref<!fir.complex<4>>
+  %4 = fir.extract_value %3, [0 : index] : (!fir.complex<4>) -> f32
+  %5 = fir.extract_value %3, [1 : index] : (!fir.complex<4>) -> f32
+  %6 = fir.call @hypotf(%4, %5) : (f32, f32) -> f32
+  %7 = arith.addf %2, %6 : f32
+  %8 = fir.load %arg0 : !fir.ref<f32>
+  %9 = fir.call @llvm.trunc.f32(%8) : (f32) -> f32
+  %10 = arith.addf %7, %9 : f32
+  %11 = fir.load %arg0 : !fir.ref<f32>
+  %12 = "llvm.intr.round"(%11) : (f32) -> f32
+  %13 = arith.addf %10, %12 : f32
+  %14 = fir.load %arg0 : !fir.ref<f32>
+  %15 = math.atan %14 : f32
+  %16 = arith.addf %13, %15 : f32
+  %17 = fir.load %arg0 : !fir.ref<f32>
+  %18 = fir.load %arg1 : !fir.ref<f32>
+  %19 = math.atan2 %17, %18 : f32
+  %20 = arith.addf %16, %19 : f32
+  %21 = fir.load %arg0 : !fir.ref<f32>
+  %22 = math.ceil %21 : f32
+  %23 = fir.convert %22 : (f32) -> i32
+  %24 = fir.convert %23 : (i32) -> f32
+  %25 = arith.addf %20, %24 : f32
+  %26 = fir.load %arg0 : !fir.ref<f32>
+  %27 = math.cos %26 : f32
+  %28 = arith.addf %25, %27 : f32
+  %29 = fir.load %arg0 : !fir.ref<f32>
+  %30 = math.erf %29 : f32
+  %31 = arith.addf %28, %30 : f32
+  %32 = fir.load %arg0 : !fir.ref<f32>
+  %33 = math.exp %32 : f32
+  %34 = arith.addf %31, %33 : f32
+  %35 = fir.load %arg0 : !fir.ref<f32>
+  %36 = math.floor %35 : f32
+  %37 = fir.convert %36 : (f32) -> i32
+  %38 = fir.convert %37 : (i32) -> f32
+  %39 = arith.addf %34, %38 : f32
+  %40 = fir.load %arg0 : !fir.ref<f32>
+  %41 = math.log %40 : f32
+  %42 = arith.addf %39, %41 : f32
+  %43 = fir.load %arg0 : !fir.ref<f32>
+  %44 = math.log10 %43 : f32
+  %45 = arith.addf %42, %44 : f32
+  %46 = fir.load %arg0 : !fir.ref<f32>
+  %47 = fir.call @llvm.lround.i32.f32(%46) : (f32) -> i32
+  %48 = fir.convert %47 : (i32) -> f32
+  %49 = arith.addf %45, %48 : f32
+  %50 = fir.load %arg0 : !fir.ref<f32>
+  %51 = fir.call @llvm.lround.i64.f32(%50) : (f32) -> i64
+  %52 = fir.convert %51 : (i64) -> f32
+  %53 = arith.addf %49, %52 : f32
+  %54 = fir.load %arg0 : !fir.ref<f32>
+  %55 = fir.load %arg3 : !fir.ref<i16>
+  %56 = fir.convert %55 : (i16) -> i32
+  %57 = fir.call @llvm.powi.f32.i32(%54, %56) : (f32, i32) -> f32
+  %58 = arith.addf %53, %57 : f32
+  %59 = fir.load %arg0 : !fir.ref<f32>
+  %60 = fir.load %arg1 : !fir.ref<f32>
+  %61 = math.powf %59, %60 : f32
+  %62 = arith.addf %58, %61 : f32
+  %63 = fir.load %arg0 : !fir.ref<f32>
+  %64 = fir.load %arg4 : !fir.ref<i32>
+  %65 = fir.call @llvm.powi.f32.i32(%63, %64) : (f32, i32) -> f32
+  %66 = arith.addf %62, %65 : f32
+  %67 = fir.load %arg0 : !fir.ref<f32>
+  %68 = fir.load %arg1 : !fir.ref<f32>
+  %69 = math.copysign %67, %68 : f32
+  %70 = arith.addf %66, %69 : f32
+  %71 = fir.load %arg0 : !fir.ref<f32>
+  %72 = math.sin %71 : f32
+  %73 = arith.addf %70, %72 : f32
+  %74 = fir.load %arg0 : !fir.ref<f32>
+  %75 = math.tanh %74 : f32
+  %76 = arith.addf %73, %75 : f32
+  fir.store %76 to %0 : !fir.ref<f32>
+  %77 = fir.load %0 : !fir.ref<f32>
+  return %77 : f32
+}
+func.func @_QPtest_real8(%arg0: !fir.ref<f64> {fir.bindc_name = "x"}, %arg1: !fir.ref<f64> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.complex<8>> {fir.bindc_name = "c"}, %arg3: !fir.ref<i16> {fir.bindc_name = "s"}, %arg4: !fir.ref<i32> {fir.bindc_name = "i"}) -> f64 {
+  %0 = fir.alloca f64 {bindc_name = "test_real8", uniq_name = "_QFtest_real8Etest_real8"}
+  %1 = fir.load %arg0 : !fir.ref<f64>
+  %2 = math.abs %1 : f64
+  %3 = fir.load %arg2 : !fir.ref<!fir.complex<8>>
+  %4 = fir.extract_value %3, [0 : index] : (!fir.complex<8>) -> f64
+  %5 = fir.extract_value %3, [1 : index] : (!fir.complex<8>) -> f64
+  %6 = fir.call @hypot(%4, %5) : (f64, f64) -> f64
+  %7 = arith.addf %2, %6 : f64
+  %8 = fir.load %arg0 : !fir.ref<f64>
+  %9 = fir.call @llvm.trunc.f64(%8) : (f64) -> f64
+  %10 = arith.addf %7, %9 : f64
+  %11 = fir.load %arg0 : !fir.ref<f64>
+  %12 = "llvm.intr.round"(%11) : (f64) -> f64
+  %13 = arith.addf %10, %12 : f64
+  %14 = fir.load %arg0 : !fir.ref<f64>
+  %15 = math.atan %14 : f64
+  %16 = arith.addf %13, %15 : f64
+  %17 = fir.load %arg0 : !fir.ref<f64>
+  %18 = fir.load %arg1 : !fir.ref<f64>
+  %19 = math.atan2 %17, %18 : f64
+  %20 = arith.addf %16, %19 : f64
+  %21 = fir.load %arg0 : !fir.ref<f64>
+  %22 = math.ceil %21 : f64
+  %23 = fir.convert %22 : (f64) -> i32
+  %24 = fir.convert %23 : (i32) -> f64
+  %25 = arith.addf %20, %24 : f64
+  %26 = fir.load %arg0 : !fir.ref<f64>
+  %27 = math.cos %26 : f64
+  %28 = arith.addf %25, %27 : f64
+  %29 = fir.load %arg0 : !fir.ref<f64>
+  %30 = math.erf %29 : f64
+  %31 = arith.addf %28, %30 : f64
+  %32 = fir.load %arg0 : !fir.ref<f64>
+  %33 = math.exp %32 : f64
+  %34 = arith.addf %31, %33 : f64
+  %35 = fir.load %arg0 : !fir.ref<f64>
+  %36 = math.floor %35 : f64
+  %37 = fir.convert %36 : (f64) -> i32
+  %38 = fir.convert %37 : (i32) -> f64
+  %39 = arith.addf %34, %38 : f64
+  %40 = fir.load %arg0 : !fir.ref<f64>
+  %41 = math.log %40 : f64
+  %42 = arith.addf %39, %41 : f64
+  %43 = fir.load %arg0 : !fir.ref<f64>
+  %44 = math.log10 %43 : f64
+  %45 = arith.addf %42, %44 : f64
+  %46 = fir.load %arg0 : !fir.ref<f64>
+  %47 = fir.call @llvm.lround.i32.f64(%46) : (f64) -> i32
+  %48 = fir.convert %47 : (i32) -> f64
+  %49 = arith.addf %45, %48 : f64
+  %50 = fir.load %arg0 : !fir.ref<f64>
+  %51 = fir.call @llvm.lround.i64.f64(%50) : (f64) -> i64
+  %52 = fir.convert %51 : (i64) -> f64
+  %53 = arith.addf %49, %52 : f64
+  %54 = fir.load %arg0 : !fir.ref<f64>
+  %55 = fir.load %arg3 : !fir.ref<i16>
+  %56 = fir.convert %55 : (i16) -> i32
+  %57 = fir.call @llvm.powi.f64.i32(%54, %56) : (f64, i32) -> f64
+  %58 = arith.addf %53, %57 : f64
+  %59 = fir.load %arg0 : !fir.ref<f64>
+  %60 = fir.load %arg1 : !fir.ref<f64>
+  %61 = math.powf %59, %60 : f64
+  %62 = arith.addf %58, %61 : f64
+  %63 = fir.load %arg0 : !fir.ref<f64>
+  %64 = fir.load %arg4 : !fir.ref<i32>
+  %65 = fir.call @llvm.powi.f64.i32(%63, %64) : (f64, i32) -> f64
+  %66 = arith.addf %62, %65 : f64
+  %67 = fir.load %arg0 : !fir.ref<f64>
+  %68 = fir.load %arg1 : !fir.ref<f64>
+  %69 = math.copysign %67, %68 : f64
+  %70 = arith.addf %66, %69 : f64
+  %71 = fir.load %arg0 : !fir.ref<f64>
+  %72 = math.sin %71 : f64
+  %73 = arith.addf %70, %72 : f64
+  %74 = fir.load %arg0 : !fir.ref<f64>
+  %75 = math.tanh %74 : f64
+  %76 = arith.addf %73, %75 : f64
+  fir.store %76 to %0 : !fir.ref<f64>
+  %77 = fir.load %0 : !fir.ref<f64>
+  return %77 : f64
+}
+func.func private @hypotf(f32, f32) -> f32
+func.func private @llvm.trunc.f32(f32) -> f32
+func.func private @llvm.lround.i32.f32(f32) -> i32
+func.func private @llvm.lround.i64.f32(f32) -> i64
+func.func private @llvm.powi.f32.i32(f32, i32) -> f32
+func.func private @hypot(f64, f64) -> f64
+func.func private @llvm.trunc.f64(f64) -> f64
+func.func private @llvm.lround.i32.f64(f64) -> i32
+func.func private @llvm.lround.i64.f64(f64) -> i64
+func.func private @llvm.powi.f64.i32(f64, i32) -> f64
+
+//--- precise
+
+func.func @_QPtest_real4(%arg0: !fir.ref<f32> {fir.bindc_name = "x"}, %arg1: !fir.ref<f32> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.complex<4>> {fir.bindc_name = "c"}, %arg3: !fir.ref<i16> {fir.bindc_name = "s"}, %arg4: !fir.ref<i32> {fir.bindc_name = "i"}) -> f32 {
+  %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"}
+  %1 = fir.load %arg0 : !fir.ref<f32>
+  %2 = fir.call @fabsf(%1) : (f32) -> f32
+  %3 = fir.load %arg2 : !fir.ref<!fir.complex<4>>
+  %4 = fir.extract_value %3, [0 : index] : (!fir.complex<4>) -> f32
+  %5 = fir.extract_value %3, [1 : index] : (!fir.complex<4>) -> f32
+  %6 = fir.call @hypotf(%4, %5) : (f32, f32) -> f32
+  %7 = arith.addf %2, %6 : f32
+  %8 = fir.load %arg0 : !fir.ref<f32>
+  %9 = fir.call @llvm.trunc.f32(%8) : (f32) -> f32
+  %10 = arith.addf %7, %9 : f32
+  %11 = fir.load %arg0 : !fir.ref<f32>
+  %12 = fir.call @llvm.round.f32(%11) : (f32) -> f32
+  %13 = arith.addf %10, %12 : f32
+  %14 = fir.load %arg0 : !fir.ref<f32>
+  %15 = fir.call @atanf(%14) : (f32) -> f32
+  %16 = arith.addf %13, %15 : f32
+  %17 = fir.load %arg0 : !fir.ref<f32>
+  %18 = fir.load %arg1 : !fir.ref<f32>
+  %19 = fir.call @atan2f(%17, %18) : (f32, f32) -> f32
+  %20 = arith.addf %16, %19 : f32
+  %21 = fir.load %arg0 : !fir.ref<f32>
+  %22 = fir.call @ceilf(%21) : (f32) -> f32
+  %23 = fir.convert %22 : (f32) -> i32
+  %24 = fir.convert %23 : (i32) -> f32
+  %25 = arith.addf %20, %24 : f32
+  %26 = fir.load %arg0 : !fir.ref<f32>
+  %27 = fir.call @cosf(%26) : (f32) -> f32
+  %28 = arith.addf %25, %27 : f32
+  %29 = fir.load %arg0 : !fir.ref<f32>
+  %30 = fir.call @erff(%29) : (f32) -> f32
+  %31 = arith.addf %28, %30 : f32
+  %32 = fir.load %arg0 : !fir.ref<f32>
+  %33 = fir.call @expf(%32) : (f32) -> f32
+  %34 = arith.addf %31, %33 : f32
+  %35 = fir.load %arg0 : !fir.ref<f32>
+  %36 = fir.call @floorf(%35) : (f32) -> f32
+  %37 = fir.convert %36 : (f32) -> i32
+  %38 = fir.convert %37 : (i32) -> f32
+  %39 = arith.addf %34, %38 : f32
+  %40 = fir.load %arg0 : !fir.ref<f32>
+  %41 = fir.call @logf(%40) : (f32) -> f32
+  %42 = arith.addf %39, %41 : f32
+  %43 = fir.load %arg0 : !fir.ref<f32>
+  %44 = fir.call @log10f(%43) : (f32) -> f32
+  %45 = arith.addf %42, %44 : f32
+  %46 = fir.load %arg0 : !fir.ref<f32>
+  %47 = fir.call @llvm.lround.i32.f32(%46) : (f32) -> i32
+  %48 = fir.convert %47 : (i32) -> f32
+  %49 = arith.addf %45, %48 : f32
+  %50 = fir.load %arg0 : !fir.ref<f32>
+  %51 = fir.call @llvm.lround.i64.f32(%50) : (f32) -> i64
+  %52 = fir.convert %51 : (i64) -> f32
+  %53 = arith.addf %49, %52 : f32
+  %54 = fir.load %arg0 : !fir.ref<f32>
+  %55 = fir.load %arg3 : !fir.ref<i16>
+  %56 = fir.convert %55 : (i16) -> i32
+  %57 = fir.call @llvm.powi.f32.i32(%54, %56) : (f32, i32) -> f32
+  %58 = arith.addf %53, %57 : f32
+  %59 = fir.load %arg0 : !fir.ref<f32>
+  %60 = fir.load %arg1 : !fir.ref<f32>
+  %61 = fir.call @powf(%59, %60) : (f32, f32) -> f32
+  %62 = arith.addf %58, %61 : f32
+  %63 = fir.load %arg0 : !fir.ref<f32>
+  %64 = fir.load %arg4 : !fir.ref<i32>
+  %65 = fir.call @llvm.powi.f32.i32(%63, %64) : (f32, i32) -> f32
+  %66 = arith.addf %62, %65 : f32
+  %67 = fir.load %arg0 : !fir.ref<f32>
+  %68 = fir.load %arg1 : !fir.ref<f32>
+  %69 = fir.call @copysignf(%67, %68) : (f32, f32) -> f32
+  %70 = arith.addf %66, %69 : f32
+  %71 = fir.load %arg0 : !fir.ref<f32>
+  %72 = fir.call @sinf(%71) : (f32) -> f32
+  %73 = arith.addf %70, %72 : f32
+  %74 = fir.load %arg0 : !fir.ref<f32>
+  %75 = fir.call @tanhf(%74) : (f32) -> f32
+  %76 = arith.addf %73, %75 : f32
+  fir.store %76 to %0 : !fir.ref<f32>
+  %77 = fir.load %0 : !fir.ref<f32>
+  return %77 : f32
+}
+func.func @_QPtest_real8(%arg0: !fir.ref<f64> {fir.bindc_name = "x"}, %arg1: !fir.ref<f64> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.complex<8>> {fir.bindc_name = "c"}, %arg3: !fir.ref<i16> {fir.bindc_name = "s"}, %arg4: !fir.ref<i32> {fir.bindc_name = "i"}) -> f64 {
+  %0 = fir.alloca f64 {bindc_name = "test_real8", uniq_name = "_QFtest_real8Etest_real8"}
+  %1 = fir.load %arg0 : !fir.ref<f64>
+  %2 = fir.call @fabs(%1) : (f64) -> f64
+  %3 = fir.load %arg2 : !fir.ref<!fir.complex<8>>
+  %4 = fir.extract_value %3, [0 : index] : (!fir.complex<8>) -> f64
+  %5 = fir.extract_value %3, [1 : index] : (!fir.complex<8>) -> f64
+  %6 = fir.call @hypot(%4, %5) : (f64, f64) -> f64
+  %7 = arith.addf %2, %6 : f64
+  %8 = fir.load %arg0 : !fir.ref<f64>
+  %9 = fir.call @llvm.trunc.f64(%8) : (f64) -> f64
+  %10 = arith.addf %7, %9 : f64
+  %11 = fir.load %arg0 : !fir.ref<f64>
+  %12 = fir.call @llvm.round.f64(%11) : (f64) -> f64
+  %13 = arith.addf %10, %12 : f64
+  %14 = fir.load %arg0 : !fir.ref<f64>
+  %15 = fir.call @atan(%14) : (f64) -> f64
+  %16 = arith.addf %13, %15 : f64
+  %17 = fir.load %arg0 : !fir.ref<f64>
+  %18 = fir.load %arg1 : !fir.ref<f64>
+  %19 = fir.call @atan2(%17, %18) : (f64, f64) -> f64
+  %20 = arith.addf %16, %19 : f64
+  %21 = fir.load %arg0 : !fir.ref<f64>
+  %22 = fir.call @ceil(%21) : (f64) -> f64
+  %23 = fir.convert %22 : (f64) -> i32
+  %24 = fir.convert %23 : (i32) -> f64
+  %25 = arith.addf %20, %24 : f64
+  %26 = fir.load %arg0 : !fir.ref<f64>
+  %27 = fir.call @cos(%26) : (f64) -> f64
+  %28 = arith.addf %25, %27 : f64
+  %29 = fir.load %arg0 : !fir.ref<f64>
+  %30 = fir.call @erf(%29) : (f64) -> f64
+  %31 = arith.addf %28, %30 : f64
+  %32 = fir.load %arg0 : !fir.ref<f64>
+  %33 = fir.call @exp(%32) : (f64) -> f64
+  %34 = arith.addf %31, %33 : f64
+  %35 = fir.load %arg0 : !fir.ref<f64>
+  %36 = fir.call @floor(%35) : (f64) -> f64
+  %37 = fir.convert %36 : (f64) -> i32
+  %38 = fir.convert %37 : (i32) -> f64
+  %39 = arith.addf %34, %38 : f64
+  %40 = fir.load %arg0 : !fir.ref<f64>
+  %41 = fir.call @log(%40) : (f64) -> f64
+  %42 = arith.addf %39, %41 : f64
+  %43 = fir.load %arg0 : !fir.ref<f64>
+  %44 = fir.call @log10(%43) : (f64) -> f64
+  %45 = arith.addf %42, %44 : f64
+  %46 = fir.load %arg0 : !fir.ref<f64>
+  %47 = fir.call @llvm.lround.i32.f64(%46) : (f64) -> i32
+  %48 = fir.convert %47 : (i32) -> f64
+  %49 = arith.addf %45, %48 : f64
+  %50 = fir.load %arg0 : !fir.ref<f64>
+  %51 = fir.call @llvm.lround.i64.f64(%50) : (f64) -> i64
+  %52 = fir.convert %51 : (i64) -> f64
+  %53 = arith.addf %49, %52 : f64
+  %54 = fir.load %arg0 : !fir.ref<f64>
+  %55 = fir.load %arg3 : !fir.ref<i16>
+  %56 = fir.convert %55 : (i16) -> i32
+  %57 = fir.call @llvm.powi.f64.i32(%54, %56) : (f64, i32) -> f64
+  %58 = arith.addf %53, %57 : f64
+  %59 = fir.load %arg0 : !fir.ref<f64>
+  %60 = fir.load %arg1 : !fir.ref<f64>
+  %61 = fir.call @pow(%59, %60) : (f64, f64) -> f64
+  %62 = arith.addf %58, %61 : f64
+  %63 = fir.load %arg0 : !fir.ref<f64>
+  %64 = fir.load %arg4 : !fir.ref<i32>
+  %65 = fir.call @llvm.powi.f64.i32(%63, %64) : (f64, i32) -> f64
+  %66 = arith.addf %62, %65 : f64
+  %67 = fir.load %arg0 : !fir.ref<f64>
+  %68 = fir.load %arg1 : !fir.ref<f64>
+  %69 = fir.call @copysign(%67, %68) : (f64, f64) -> f64
+  %70 = arith.addf %66, %69 : f64
+  %71 = fir.load %arg0 : !fir.ref<f64>
+  %72 = fir.call @sin(%71) : (f64) -> f64
+  %73 = arith.addf %70, %72 : f64
+  %74 = fir.load %arg0 : !fir.ref<f64>
+  %75 = fir.call @tanh(%74) : (f64) -> f64
+  %76 = arith.addf %73, %75 : f64
+  fir.store %76 to %0 : !fir.ref<f64>
+  %77 = fir.load %0 : !fir.ref<f64>
+  return %77 : f64
+}
+func.func private @fabsf(f32) -> f32
+func.func private @hypotf(f32, f32) -> f32
+func.func private @llvm.trunc.f32(f32) -> f32
+func.func private @llvm.round.f32(f32) -> f32
+func.func private @atanf(f32) -> f32
+func.func private @atan2f(f32, f32) -> f32
+func.func private @ceilf(f32) -> f32
+func.func private @cosf(f32) -> f32
+func.func private @erff(f32) -> f32
+func.func private @expf(f32) -> f32
+func.func private @floorf(f32) -> f32
+func.func private @logf(f32) -> f32
+func.func private @log10f(f32) -> f32
+func.func private @llvm.lround.i32.f32(f32) -> i32
+func.func private @llvm.lround.i64.f32(f32) -> i64
+func.func private @llvm.powi.f32.i32(f32, i32) -> f32
+func.func private @powf(f32, f32) -> f32
+func.func private @copysignf(f32, f32) -> f32
+func.func private @sinf(f32) -> f32
+func.func private @tanhf(f32) -> f32
+func.func private @fabs(f64) -> f64
+func.func private @hypot(f64, f64) -> f64
+func.func private @llvm.trunc.f64(f64) -> f64
+func.func private @llvm.round.f64(f64) -> f64
+func.func private @atan(f64) -> f64
+func.func private @atan2(f64, f64) -> f64
+func.func private @ceil(f64) -> f64
+func.func private @cos(f64) -> f64
+func.func private @erf(f64) -> f64
+func.func private @exp(f64) -> f64
+func.func private @floor(f64) -> f64
+func.func private @log(f64) -> f64
+func.func private @log10(f64) -> f64
+func.func private @llvm.lround.i32.f64(f64) -> i32
+func.func private @llvm.lround.i64.f64(f64) -> i64
+func.func private @llvm.powi.f64.i32(f64, i32) -> f64
+func.func private @pow(f64, f64) -> f64
+func.func private @copysign(f64, f64) -> f64
+func.func private @sin(f64) -> f64
+func.func private @tanh(f64) -> f64
Index: flang/test/Lower/Intrinsics/exp.f90
===================================================================
--- flang/test/Lower/Intrinsics/exp.f90
+++ flang/test/Lower/Intrinsics/exp.f90
@@ -1,5 +1,5 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck %s
-! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s
+! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s
 
 ! CHECK-LABEL: exp_testr
 ! CHECK-SAME: (%[[AREF:.*]]: !fir.ref<f32> {{.*}}, %[[BREF:.*]]: !fir.ref<f32> {{.*}})
Index: flang/test/Lower/Intrinsics/log.f90
===================================================================
--- flang/test/Lower/Intrinsics/log.f90
+++ flang/test/Lower/Intrinsics/log.f90
@@ -1,5 +1,5 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck %s
-! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s
+! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s
 
 ! CHECK-LABEL: log_testr
 ! CHECK-SAME: (%[[AREF:.*]]: !fir.ref<f32> {{.*}}, %[[BREF:.*]]: !fir.ref<f32> {{.*}})
Index: flang/test/Lower/Intrinsics/math-runtime-options.f90
===================================================================
--- flang/test/Lower/Intrinsics/math-runtime-options.f90
+++ flang/test/Lower/Intrinsics/math-runtime-options.f90
@@ -1,7 +1,11 @@
-! RUN: bbc -emit-fir --math-runtime=fast %s -o - | FileCheck %s --check-prefixes="FIR,FAST"
-! RUN: bbc -emit-fir --math-runtime=relaxed %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED"
-! RUN: bbc -emit-fir --math-runtime=precise %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE"
-! RUN: bbc -emit-fir --math-runtime=llvm %s -o - | FileCheck %s --check-prefixes="FIR,LLVM"
+! RUN: bbc -emit-fir --math-runtime=fast -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,FAST"
+! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=fast -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,FAST"
+! RUN: bbc -emit-fir --math-runtime=relaxed -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED"
+! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=relaxed -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,RELAXED"
+! RUN: bbc -emit-fir --math-runtime=precise -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE"
+! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=precise -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,PRECISE"
+! RUN: bbc -emit-fir --math-runtime=llvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,LLVM"
+! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=llvm -mllvm -outline-intrinsics %s -o - | FileCheck %s --check-prefixes="FIR,LLVM"
 
 ! CHECK-LABEL: cos_testr
 subroutine cos_testr(a, b)
Index: flang/test/Lower/late-math-lowering.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/late-math-lowering.f90
@@ -0,0 +1,137 @@
+! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=fast | FileCheck --check-prefixes=ALL,FAST %s
+! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=fast %s -o - | FileCheck --check-prefixes=ALL,FAST %s
+! 'relaxed' matches 'fast' exactly right now, but this will change:
+! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=relaxed | FileCheck --check-prefixes=ALL,RELAXED %s
+! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=relaxed %s -o - | FileCheck --check-prefixes=ALL,RELAXED %s
+! RUN: bbc -emit-fir %s -o - --math-lowering=late --math-runtime=precise | FileCheck --check-prefixes=ALL,PRECISE %s
+! RUN: %flang_fc1 -emit-fir -mllvm -math-lowering=late -mllvm -math-runtime=precise %s -o - | FileCheck --check-prefixes=ALL,PRECISE %s
+
+! ALL-LABEL: @_QPtest_real4
+! FAST: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @fabsf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.trunc.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atanf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan2f({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @ceilf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @cosf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @erff({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @expf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @floorf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @logf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log10f({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i32.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i64.f32({{%[A-Za-z0-9._]+}}) : (f32) -> i64
+! ALL: [[STOI:%[A-Za-z0-9._]+]] = fir.convert {{%[A-Za-z0-9._]+}} : (i16) -> i32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @powf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @copysignf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @sinf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+! FAST: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f32
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f32
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @tanhf({{%[A-Za-z0-9._]+}}) : (f32) -> f32
+
+function test_real4(x, y, c, s, i)
+  real :: x, y, test_real4
+  complex(4) :: c
+  integer(2) :: s
+  integer(4) :: i
+  test_real4 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + &
+       ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + &
+       nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + &
+       sin(x) + tanh(x)
+end function
+
+! ALL-LABEL: @_QPtest_real8
+! FAST: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.abs {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @hypot({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.trunc.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.atan2 {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @atan2({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.ceil {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.cos {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.erf {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @erf({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.exp {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.floor {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.log {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.log10 {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i32.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.lround.i64.f64({{%[A-Za-z0-9._]+}}) : (f64) -> i64
+! ALL: [[STOI:%[A-Za-z0-9._]+]] = fir.convert {{%[A-Za-z0-9._]+}} : (i16) -> i32
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.powf {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+! ALL: {{%[A-Za-z0-9._]+}} = fir.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.copysign {{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.sin {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+! FAST: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f64
+! RELAXED: {{%[A-Za-z0-9._]+}} = math.tanh {{%[A-Za-z0-9._]+}} : f64
+! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @tanh({{%[A-Za-z0-9._]+}}) : (f64) -> f64
+
+function test_real8(x, y, c, s, i)
+  real(8) :: x, y, test_real8
+  complex(8) :: c
+  integer(2) :: s
+  integer(4) :: i
+  test_real8 = abs(x) + abs(c) + aint(x) + anint(x) + atan(x) + atan2(x, y) + &
+       ceiling(x) + cos(x) + erf(x) + exp(x) + floor(x) + log(x) + log10(x) + &
+       nint(x, 4) + nint(x, 8) + x ** s + x ** y + x ** i + sign(x, y) + &
+       sin(x) + tanh(x)
+end function
Index: flang/test/Lower/llvm-math.f90
===================================================================
--- flang/test/Lower/llvm-math.f90
+++ flang/test/Lower/llvm-math.f90
@@ -1,4 +1,5 @@
-! RUN: bbc -emit-fir %s -o - --math-runtime=llvm | FileCheck %s
+! RUN: bbc -emit-fir %s -o - --math-runtime=llvm --outline-intrinsics | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -mllvm -math-runtime=llvm -mllvm -outline-intrinsics %s -o - | FileCheck %s
 
       SUBROUTINE POW_WRAPPER(IN, IN2, OUT)
       DOUBLE PRECISION IN, IN2
Index: flang/test/Lower/sqrt.f90
===================================================================
--- flang/test/Lower/sqrt.f90
+++ flang/test/Lower/sqrt.f90
@@ -1,5 +1,5 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck %s
-! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s
+! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s
 
 ! CHECK-LABEL: sqrt_testr
 subroutine sqrt_testr(a, b)
Index: flang/test/Lower/trigonometric-intrinsics.f90
===================================================================
--- flang/test/Lower/trigonometric-intrinsics.f90
+++ flang/test/Lower/trigonometric-intrinsics.f90
@@ -1,5 +1,5 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck %s
-! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s
+! RUN: bbc -emit-fir -outline-intrinsics %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -mllvm -outline-intrinsics %s -o - | FileCheck %s
 
 ! CHECK-LABEL: atan_testr
 subroutine atan_testr(a, b)