diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -35,6 +35,8 @@ std::unique_ptr createMemDataFlowOptPass(); std::unique_ptr createPromoteToAffinePass(); std::unique_ptr createMemoryAllocationPass(); +std::unique_ptr createSimplifyIntrinsicsPass(); + std::unique_ptr createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize); std::unique_ptr createAnnotateConstantOperandsPass(); diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -174,6 +174,20 @@ ]; } +// This needs to be a "mlir::ModuleOp" pass, because it inserts simplified +// functions into the module, which is invalid if a finer grain mlir::Operation +// is used as the pass specification says to not touch things outside hte scope +// of the operation being processed. +def SimplifyIntrinsics : Pass<"simplify-intrinsics", "mlir::ModuleOp"> { + let summary = "Intrinsics simplification"; + let description = [{ + Qualifying intrinsics calls are replaced with calls to a specialized and + simplified function. The simplified function is added to the current module. + This function can be inlined by a general purpose inlining pass. + }]; + let constructor = "::fir::createSimplifyIntrinsicsPass()"; +} + def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::func::FuncOp"> { let summary = "Convert stack to heap allocations and vice versa."; let description = [{ diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -11,6 +11,7 @@ RewriteLoop.cpp SimplifyRegionLite.cpp AlgebraicSimplification.cpp + SimplifyIntrinsics.cpp DEPENDS FIRBuilder diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp new file mode 100644 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp @@ -0,0 +1,237 @@ +//===- SimplifyIntrinsics.cpp -- replace intrinsics with simpler form -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +/// \file +/// This pass looks for suitable calls to runtime library for intrinsics that +/// can be simplified/specialized and replaces with a specialized function. +/// +/// For example, SUM(arr) can be specialized as a simple function with one loop, +/// compared to the three arguments (plus file & line info) that the runtime +/// call has - when the argument is a 1D-array (multiple loops may be needed +// for higher dimension arrays, of course) +/// +/// The general idea is that besides making the call simpler, it can also be +/// inlined by other passes that run after this pass, which further improves +/// performance, particularly when the work done in the function is trivial +/// and small in size. +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "flang/Optimizer/Builder/BoxValue.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Support/FIRContext.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/IR/Matchers.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/RegionUtils.h" + +namespace { + +class SimplifyIntrinsicsPass + : public fir::SimplifyIntrinsicsBase { +public: + mlir::func::FuncOp getOrCreateFunction(const mlir::Location &loc, + fir::FirOpBuilder &builder, + const mlir::Type &type, + const mlir::StringRef &basename); + void runOnOperation() override; +}; + +} // namespace + +mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction( + const mlir::Location &loc, fir::FirOpBuilder &builder, + const mlir::Type &type, const mlir::StringRef &baseName) { + // In future, the idea is that instead of building the function inside + // this function, this does the base creation, and calls a callback + // function (e.g. a lambda function) that fills in the actual content. + // For now, check that it's the ONLY the SUM runtime call. + assert(baseName.startswith("_FortranASum")); + + std::string replacementName = mlir::Twine{baseName, "_simplified"}.str(); + mlir::ModuleOp module = builder.getModule(); + // If we already have a function, just return it. + mlir::func::FuncOp newFunc = + fir::FirOpBuilder::getNamedFunction(module, replacementName); + if (newFunc) + return newFunc; + + // Need to build the function! + // Basic idea: + // function FortranASum_simplified(arr) + // T, dimension(:) :: arr + // T sum = 0 + // integer iter + // do iter = 0, extent(arr) + // sum = sum + arr[iter] + // end do + // FortranASum_simplified = sum + // end function FortranASum_simplified + mlir::Type boxType = fir::BoxType::get(builder.getNoneType()); + mlir::FunctionType fType = + mlir::FunctionType::get(builder.getContext(), {boxType}, {type}); + newFunc = + fir::FirOpBuilder::createFunction(loc, module, replacementName, fType); + auto inlineLinkage = mlir::LLVM::linkage::Linkage::LinkonceODR; + auto linkage = + mlir::LLVM::LinkageAttr::get(builder.getContext(), inlineLinkage); + newFunc->setAttr("llvm.linkage", linkage); + + // Save the position of the original call. + mlir::OpBuilder::InsertPoint insertPt = builder.saveInsertionPoint(); + builder.setInsertionPointToEnd(newFunc.addEntryBlock()); + + mlir::IndexType idxTy = builder.getIndexType(); + + mlir::Value zero = type.isa() + ? builder.createRealConstant(loc, type, 0.0) + : builder.createIntegerConstant(loc, type, 0); + mlir::Value sum = builder.create(loc, type); + builder.create(loc, zero, sum); + + mlir::Block::BlockArgListType args = newFunc.front().getArguments(); + mlir::Value arg = args[0]; + + mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); + + fir::SequenceType::Shape flatShape = {fir::SequenceType::getUnknownExtent()}; + mlir::Type arrTy = fir::SequenceType::get(flatShape, type); + mlir::Type boxArrTy = fir::BoxType::get(arrTy); + mlir::Value array = builder.create(loc, boxArrTy, arg); + auto dims = + builder.create(loc, idxTy, idxTy, idxTy, array, zeroIdx); + mlir::Value len = dims.getResult(1); + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + mlir::Value step = one; + + // We use C indexing here, so len-1 as loopcount + mlir::Value loopCount = builder.create(loc, len, one); + auto loop = builder.create(loc, zeroIdx, loopCount, step); + + // Begin loop code + mlir::OpBuilder::InsertPoint loopEndPt = builder.saveInsertionPoint(); + builder.setInsertionPointToStart(loop.getBody()); + + mlir::Type eleRefTy = builder.getRefType(type); + mlir::Value index = loop.getInductionVar(); + mlir::Value addr = + builder.create(loc, eleRefTy, array, index); + mlir::Value elem = builder.create(loc, addr); + mlir::Value sumVal = builder.create(loc, sum); + + mlir::Value res; + if (type.isa()) + res = builder.create(loc, elem, sumVal); + else if (type.isa()) + res = builder.create(loc, elem, sumVal); + else + TODO(loc, "Unsupported type"); + + builder.create(loc, res, sum); + // End of loop. + builder.restoreInsertionPoint(loopEndPt); + + mlir::Value resultVal = builder.create(loc, sum); + builder.create(loc, resultVal); + + // Now back to where we were adding code earlier... + builder.restoreInsertionPoint(insertPt); + + return newFunc; +} + +static bool isOperandAbsent(mlir::Value val) { + if (mlir::Operation *op = val.getDefiningOp()) + return mlir::isa_and_nonnull( + op->getOperand(0).getDefiningOp()); + return false; +} + +static bool isZero(mlir::Value val) { + if (mlir::Operation *op = val.getDefiningOp()) + if (mlir::Operation *defOp = op->getOperand(0).getDefiningOp()) + return mlir::matchPattern(defOp, mlir::m_Zero()); + return false; +} + +static mlir::Value findShape(mlir::Value val) { + mlir::Operation *defOp = val.getDefiningOp(); + while (defOp) { + defOp = defOp->getOperand(0).getDefiningOp(); + if (fir::EmboxOp box = mlir::dyn_cast_or_null(defOp)) + return box.getShape(); + } + return {}; +} + +static unsigned getDimCount(mlir::Value val) { + if (mlir::Value shapeVal = findShape(val)) { + mlir::Type resType = shapeVal.getDefiningOp()->getResultTypes()[0]; + return fir::getRankOfShapeType(resType); + } + return 0; +} + +void SimplifyIntrinsicsPass::runOnOperation() { + mlir::ModuleOp module = getOperation(); + fir::KindMapping kindMap = fir::getKindMapping(module); + module.walk([&](mlir::Operation *op) { + if (auto call = mlir::dyn_cast(op)) { + if (mlir::SymbolRefAttr callee = call.getCalleeAttr()) { + mlir::StringRef funcName = callee.getLeafReference().getValue(); + // Replace call to runtime function for SUM when it has single + // argument (no dim or mask argument) for 1D arrays with either + // Integer4 or Real8 types. Other forms are ignored. + // The new function is added to the module. + // + // Prototype for runtime call (from sum.cpp): + // RTNAME(Sum)(const Descriptor &x, const char *source, int line, + // int dim, const Descriptor *mask) + if (funcName.startswith("_FortranASum")) { + mlir::Operation::operand_range args = call.getArgs(); + // args[1] and args[2] are source filename and line number, ignored. + const mlir::Value &dim = args[3]; + const mlir::Value &mask = args[4]; + // dim is zero when it is absent, which is an implementation + // detail in the runtime library. + bool dimAndMaskAbsent = isZero(dim) && isOperandAbsent(mask); + unsigned rank = getDimCount(args[0]); + if (dimAndMaskAbsent && rank == 1) { + mlir::Location loc = call.getLoc(); + mlir::Type type; + fir::FirOpBuilder builder(op, kindMap); + if (funcName.endswith("Integer4")) { + type = mlir::IntegerType::get(builder.getContext(), 32); + } else if (funcName.endswith("Real8")) { + type = mlir::FloatType::getF64(builder.getContext()); + } else { + return; + } + mlir::func::FuncOp newFunc = + getOrCreateFunction(loc, builder, type, funcName); + auto newCall = builder.create( + loc, newFunc, mlir::ValueRange{args[0]}); + call->replaceAllUsesWith(newCall.getResults()); + call->dropAllReferences(); + call->erase(); + } + } + } + } + }); +} + +std::unique_ptr fir::createSimplifyIntrinsicsPass() { + return std::make_unique(); +} diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir new file mode 100644 --- /dev/null +++ b/flang/test/Transforms/simplifyintrinsics.fir @@ -0,0 +1,319 @@ +// RUN: fir-opt --split-input-file --simplify-intrinsics %s | FileCheck %s + +// Call to SUM with 1D I32 array is replaced. +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} { + func.func @sum_1d_array_int(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> i32 { + %c10 = arith.constant 10 : index + %0 = fir.alloca i32 {bindc_name = "test_sum_2", uniq_name = "_QFtest_sum_2Etest_sum_2"} + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2 = fir.embox %arg0(%1) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %3 = fir.absent !fir.box + %c0 = arith.constant 0 : index + %4 = fir.address_of(@_QQcl.2E2F6973756D5F322E66393000) : !fir.ref> + %c5_i32 = arith.constant 5 : i32 + %5 = fir.convert %2 : (!fir.box>) -> !fir.box + %6 = fir.convert %4 : (!fir.ref>) -> !fir.ref + %7 = fir.convert %c0 : (index) -> i32 + %8 = fir.convert %3 : (!fir.box) -> !fir.box + %9 = fir.call @_FortranASumInteger4(%5, %6, %c5_i32, %7, %8) : (!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 + fir.store %9 to %0 : !fir.ref + %10 = fir.load %0 : !fir.ref + return %10 : i32 + } + func.func private @_FortranASumInteger4(!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 attributes {fir.runtime} + fir.global linkonce @_QQcl.2E2F6973756D5F322E66393000 constant : !fir.char<1,13> { + %0 = fir.string_lit "./isum_2.f90\00"(13) : !fir.char<1,13> + fir.has_value %0 : !fir.char<1,13> + } +} + + +// CHECK-LABEL: func.func @sum_1d_array_int( +// CHECK-SAME: %[[A:.*]]: !fir.ref> {fir.bindc_name = "a"}) -> i32 { +// CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +// CHECK: %[[A_BOX_I32:.*]] = fir.embox %[[A]](%[[SHAPE]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_I32]] : (!fir.box>) -> !fir.box +// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}}) +// CHECK: %[[RES:.*]] = fir.call @_FortranASumInteger4_simplified(%[[A_BOX_NONE]]) : (!fir.box) -> i32 +// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}}) +// CHECK: return %{{.*}} : i32 +// CHECK: } +// CHECK: func.func private @_FortranASumInteger4(!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 attributes {fir.runtime} + +// CHECK-LABEL: func.func private @_FortranASumInteger4_simplified( +// CHECK-SAME: %[[ARR:.*]]: !fir.box) -> i32 attributes {llvm.linkage = #llvm.linkage} { +// CHECK: %[[CI32_0:.*]] = arith.constant 0 : i32 +// CHECK: %[[SUM:.*]] = fir.alloca i32 +// CHECK: fir.store %[[CI32_0]] to %[[SUM]] : !fir.ref +// CHECK: %[[CINDEX_0:.*]] = arith.constant 0 : index +// CHECK: %[[ARR_BOX_I32:.*]] = fir.convert %[[ARR]] : (!fir.box) -> !fir.box> +// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARR_BOX_I32]], %[[CINDEX_0]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index +// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index +// CHECK: fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] { +// CHECK: %[[ITEM:.*]] = fir.coordinate_of %[[ARR_BOX_I32]], %[[ITER]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref +// CHECK: %[[SUM_VAL:.*]] = fir.load %[[SUM]] : !fir.ref +// CHECK: %[[NEW_SUM:.*]] = arith.addi %[[ITEM_VAL]], %[[SUM_VAL]] : i32 +// CHECK: fir.store %[[NEW_SUM]] to %[[SUM]] : !fir.ref +// CHECK: } +// CHECK: %[[RET:.*]] = fir.load %[[SUM]] : !fir.ref +// CHECK: return %[[RET]] : i32 +// CHECK: } + +// ----- + +// Call to SUM with 2D I32 arrays is not replaced. +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} { + func.func @sum_2d_array_int(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> i32 { + %c10 = arith.constant 10 : index + %c10_0 = arith.constant 10 : index + %0 = fir.alloca i32 {bindc_name = "test_sum_3", uniq_name = "_QFtest_sum_3Etest_sum_3"} + %1 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2> + %2 = fir.embox %arg0(%1) : (!fir.ref>, !fir.shape<2>) -> !fir.box> + %3 = fir.absent !fir.box + %c0 = arith.constant 0 : index + %4 = fir.address_of(@_QQcl.2E2F6973756D5F332E66393000) : !fir.ref> + %c5_i32 = arith.constant 5 : i32 + %5 = fir.convert %2 : (!fir.box>) -> !fir.box + %6 = fir.convert %4 : (!fir.ref>) -> !fir.ref + %7 = fir.convert %c0 : (index) -> i32 + %8 = fir.convert %3 : (!fir.box) -> !fir.box + %9 = fir.call @_FortranASumInteger4(%5, %6, %c5_i32, %7, %8) : (!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 + fir.store %9 to %0 : !fir.ref + %10 = fir.load %0 : !fir.ref + return %10 : i32 + } + func.func private @_FortranASumInteger4(!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 attributes {fir.runtime} + fir.global linkonce @_QQcl.2E2F6973756D5F332E66393000 constant : !fir.char<1,13> { + %0 = fir.string_lit "./isum_3.f90\00"(13) : !fir.char<1,13> + fir.has_value %0 : !fir.char<1,13> + } +} + +// CHECK-LABEL: func.func @sum_2d_array_int({{.*}} !fir.ref> {fir.bindc_name = "a"}) -> i32 { +// CHECK-NOT: fir.call @_FortranASumInteger4_simplified({{.*}}) +// CHECK: fir.call @_FortranASumInteger4({{.*}}) : (!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 +// CHECK-NOT: fir.call @_FortranASumInteger4_simplified({{.*}}) + +// ----- + +// Call to SUM with 1D F64 is replaced. +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} { + func.func @sum_1d_real(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> f64 { + %c10 = arith.constant 10 : index + %0 = fir.alloca f64 {bindc_name = "sum_1d_real", uniq_name = "_QFsum_1d_realEsum_1d_real"} + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2 = fir.embox %arg0(%1) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %3 = fir.absent !fir.box + %c0 = arith.constant 0 : index + %4 = fir.address_of(@_QQcl.2E2F6973756D5F352E66393000) : !fir.ref> + %c5_i32 = arith.constant 5 : i32 + %5 = fir.convert %2 : (!fir.box>) -> !fir.box + %6 = fir.convert %4 : (!fir.ref>) -> !fir.ref + %7 = fir.convert %c0 : (index) -> i32 + %8 = fir.convert %3 : (!fir.box) -> !fir.box + %9 = fir.call @_FortranASumReal8(%5, %6, %c5_i32, %7, %8) : (!fir.box, !fir.ref, i32, i32, !fir.box) -> f64 + fir.store %9 to %0 : !fir.ref + %10 = fir.load %0 : !fir.ref + return %10 : f64 + } + func.func private @_FortranASumReal8(!fir.box, !fir.ref, i32, i32, !fir.box) -> f64 attributes {fir.runtime} + fir.global linkonce @_QQcl.2E2F6973756D5F352E66393000 constant : !fir.char<1,13> { + %0 = fir.string_lit "./isum_5.f90\00"(13) : !fir.char<1,13> + fir.has_value %0 : !fir.char<1,13> + } +} + + +// CHECK-LABEL: func.func @sum_1d_real( +// CHECK-SAME: %[[A:.*]]: !fir.ref> {fir.bindc_name = "a"}) -> f64 { +// CHECK: %[[CINDEX_10:.*]] = arith.constant 10 : index +// CHECK: %[[SHAPE:.*]] = fir.shape %[[CINDEX_10]] : (index) -> !fir.shape<1> +// CHECK: %[[A_BOX_F64:.*]] = fir.embox %[[A]](%[[SHAPE]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_F64]] : (!fir.box>) -> !fir.box +// CHECK-NOT: fir.call @_FortranASumReal8({{.*}}) +// CHECK: %[[RES:.*]] = fir.call @_FortranASumReal8_simplified(%[[A_BOX_NONE]]) : (!fir.box) -> f64 +// CHECK-NOT: fir.call @_FortranASumReal8({{.*}}) +// CHECK: return %{{.*}} : f64 +// CHECK: } + +// CHECK-LABEL: func.func private @_FortranASumReal8_simplified( +// CHECK-SAME: %[[ARR:.*]]: !fir.box) -> f64 attributes {llvm.linkage = #llvm.linkage} { +// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[SUM:.*]] = fir.alloca f64 +// CHECK: fir.store %[[ZERO]] to %[[SUM]] : !fir.ref +// CHECK: %[[CINDEX_0:.*]] = arith.constant 0 : index +// CHECK: %[[ARR_BOX_F64:.*]] = fir.convert %[[ARR]] : (!fir.box) -> !fir.box> +// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARR_BOX_F64]], %[[CINDEX_0]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index +// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index +// CHECK: fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] { +// CHECK: %[[ITEM:.*]] = fir.coordinate_of %[[ARR_BOX_F64]], %[[ITER]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref +// CHECK: %[[SUM_VAL:.*]] = fir.load %[[SUM]] : !fir.ref +// CHECK: %[[NEW_SUM:.*]] = arith.addf %[[ITEM_VAL]], %[[SUM_VAL]] : f64 +// CHECK: fir.store %[[NEW_SUM]] to %[[SUM]] : !fir.ref +// CHECK: } +// CHECK: %[[RES:.*]] = fir.load %[[SUM]] : !fir.ref +// CHECK: return %[[RES]] : f64 +// CHECK: } + +// ----- + +// Call to SUM with 1D COMPLEX array is not replaced. +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} { + func.func @sum_1d_complex(%arg0: !fir.ref>> {fir.bindc_name = "a"}) -> !fir.complex<4> { + %0 = fir.alloca !fir.complex<4> + %c10 = arith.constant 10 : index + %1 = fir.alloca !fir.complex<4> {bindc_name = "sum_1d_complex", uniq_name = "_QFsum_1d_complexEsum_1d_complex"} + %2 = fir.shape %c10 : (index) -> !fir.shape<1> + %3 = fir.embox %arg0(%2) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> + %4 = fir.absent !fir.box + %c0 = arith.constant 0 : index + %5 = fir.address_of(@_QQcl.2E2F6973756D5F362E66393000) : !fir.ref> + %c5_i32 = arith.constant 5 : i32 + %6 = fir.convert %0 : (!fir.ref>) -> !fir.ref> + %7 = fir.convert %3 : (!fir.box>>) -> !fir.box + %8 = fir.convert %5 : (!fir.ref>) -> !fir.ref + %9 = fir.convert %c0 : (index) -> i32 + %10 = fir.convert %4 : (!fir.box) -> !fir.box + %11 = fir.call @_FortranACppSumComplex4(%6, %7, %8, %c5_i32, %9, %10) : (!fir.ref>, !fir.box, !fir.ref, i32, i32, !fir.box) -> none + %12 = fir.load %0 : !fir.ref> + fir.store %12 to %1 : !fir.ref> + %13 = fir.load %1 : !fir.ref> + return %13 : !fir.complex<4> + } + func.func private @_FortranACppSumComplex4(!fir.ref>, !fir.box, !fir.ref, i32, i32, !fir.box) -> none attributes {fir.runtime} + fir.global linkonce @_QQcl.2E2F6973756D5F362E66393000 constant : !fir.char<1,13> { + %0 = fir.string_lit "./isum_6.f90\00"(13) : !fir.char<1,13> + fir.has_value %0 : !fir.char<1,13> + } +} + +// CHECK-LABEL: func.func @sum_1d_complex(%{{.*}}: !fir.ref>> {fir.bindc_name = "a"}) -> !fir.complex<4> { +// CHECK-NOT: fir.call @_FortranACppSumComplex4_simplified({{.*}}) +// CHECK: fir.call @_FortranACppSumComplex4({{.*}}) : (!fir.ref>, !fir.box, !fir.ref, i32, i32, !fir.box) -> none +// CHECK-NOT: fir.call @_FortranACppSumComplex4_simplified({{.*}}) + +// ----- + +// Test that two functions calling the same SUM function +// generates only ONE function declaration (and that both +// calls are converted) +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} { + func.func @sum_1d_calla(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> i32 { + %c10 = arith.constant 10 : index + %0 = fir.alloca i32 {bindc_name = "sum_1d_calla", uniq_name = "_QFsum_1d_callaEsum_1d_calla"} + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2 = fir.embox %arg0(%1) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %3 = fir.absent !fir.box + %c0 = arith.constant 0 : index + %4 = fir.address_of(@_QQcl.2E2F6973756D5F372E66393000) : !fir.ref> + %c5_i32 = arith.constant 5 : i32 + %5 = fir.convert %2 : (!fir.box>) -> !fir.box + %6 = fir.convert %4 : (!fir.ref>) -> !fir.ref + %7 = fir.convert %c0 : (index) -> i32 + %8 = fir.convert %3 : (!fir.box) -> !fir.box + %9 = fir.call @_FortranASumInteger4(%5, %6, %c5_i32, %7, %8) : (!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 + fir.store %9 to %0 : !fir.ref + %10 = fir.load %0 : !fir.ref + return %10 : i32 + } + func.func @sum_1d_callb(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> i32 { + %c20 = arith.constant 20 : index + %0 = fir.alloca i32 {bindc_name = "sum_1d_callb", uniq_name = "_QFsum_1d_callbEsum_1d_callb"} + %1 = fir.shape %c20 : (index) -> !fir.shape<1> + %2 = fir.embox %arg0(%1) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %3 = fir.absent !fir.box + %c0 = arith.constant 0 : index + %4 = fir.address_of(@_QQcl.2E2F6973756D5F372E66393000) : !fir.ref> + %c12_i32 = arith.constant 12 : i32 + %5 = fir.convert %2 : (!fir.box>) -> !fir.box + %6 = fir.convert %4 : (!fir.ref>) -> !fir.ref + %7 = fir.convert %c0 : (index) -> i32 + %8 = fir.convert %3 : (!fir.box) -> !fir.box + %9 = fir.call @_FortranASumInteger4(%5, %6, %c12_i32, %7, %8) : (!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 + fir.store %9 to %0 : !fir.ref + %10 = fir.load %0 : !fir.ref + return %10 : i32 + } + func.func private @_FortranASumInteger4(!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 attributes {fir.runtime} + fir.global linkonce @_QQcl.2E2F6973756D5F372E66393000 constant : !fir.char<1,13> { + %0 = fir.string_lit "./isum_7.f90\00"(13) : !fir.char<1,13> + fir.has_value %0 : !fir.char<1,13> + } +} + +// CHECK-LABEL: func.func @sum_1d_calla(%{{.*}}) -> i32 { +// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}}) +// CHECK: fir.call @_FortranASumInteger4_simplified(%{{.*}}) +// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}}) +// CHECK: } + +// CHECK-LABEL: func.func @sum_1d_callb(%{{.*}}) -> i32 { +// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}}) +// CHECK: fir.call @_FortranASumInteger4_simplified(%{{.*}}) +// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}}) +// CHECK: } + +// CHECK-LABEL: func.func private @_FortranASumInteger4_simplified({{.*}}) -> i32 {{.*}} { +// CHECK: return %{{.*}} : i32 +// CHECK: } +// CHECK-NOT: func.func private @_FortranASumInteger4_simplified({{.*}}) + +// ----- + +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} { + func.func @sum_1d_stride(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> i32 { + %c20 = arith.constant 20 : index + %0 = fir.alloca i32 {bindc_name = "sum_1d_stride", uniq_name = "_QFsum_1d_strideEsum_1d_stride"} + %c1 = arith.constant 1 : index + %c2_i64 = arith.constant 2 : i64 + %1 = fir.convert %c2_i64 : (i64) -> index + %2 = arith.addi %c1, %c20 : index + %3 = arith.subi %2, %c1 : index + %4 = fir.shape %c20 : (index) -> !fir.shape<1> + %5 = fir.slice %c1, %3, %1 : (index, index, index) -> !fir.slice<1> + %6 = fir.embox %arg0(%4) [%5] : (!fir.ref>, !fir.shape<1>, !fir.slice<1>) -> !fir.box> + %7 = fir.absent !fir.box + %c0 = arith.constant 0 : index + %8 = fir.address_of(@_QQcl.2E2F6973756D5F382E66393000) : !fir.ref> + %c5_i32 = arith.constant 5 : i32 + %9 = fir.convert %6 : (!fir.box>) -> !fir.box + %10 = fir.convert %8 : (!fir.ref>) -> !fir.ref + %11 = fir.convert %c0 : (index) -> i32 + %12 = fir.convert %7 : (!fir.box) -> !fir.box + %13 = fir.call @_FortranASumInteger4(%9, %10, %c5_i32, %11, %12) : (!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 + fir.store %13 to %0 : !fir.ref + %14 = fir.load %0 : !fir.ref + return %14 : i32 + } + func.func private @_FortranASumInteger4(!fir.box, !fir.ref, i32, i32, !fir.box) -> i32 attributes {fir.runtime} + fir.global linkonce @_QQcl.2E2F6973756D5F382E66393000 constant : !fir.char<1,13> { + %0 = fir.string_lit "./isum_8.f90\00"(13) : !fir.char<1,13> + fir.has_value %0 : !fir.char<1,13> + } +} + +// CHECK-LABEL: func.func @sum_1d_stride(%{{.*}} -> i32 { +// CHECK: %[[CI64_2:.*]] = arith.constant 2 : i64 +// CHECK: %[[CINDEX_2:.*]] = fir.convert %[[CI64_2]] : (i64) -> index +// CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} +// CHECK: %[[SLICE:.*]] = fir.slice %{{.*}}, %{{.*}}, %[[CINDEX_2]] : (index, index, index) -> !fir.slice<1> +// CHECK: %[[A_BOX_I32:.*]] = fir.embox %{{.*}}(%[[SHAPE]]) {{\[}}%[[SLICE]]] : (!fir.ref>, !fir.shape<1>, !fir.slice<1>) -> !fir.box> +// CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_I32]] : (!fir.box>) -> !fir.box +// CHECK: %{{.*}} = fir.call @_FortranASumInteger4_simplified(%[[A_BOX_NONE]]) : (!fir.box) -> i32 +// CHECK: return %{{.*}} : i32 +// CHECK: } + +// CHECK-LABEL: func.func private @_FortranASumInteger4_simplified(%{{.*}}) -> i32 attributes {llvm.linkage = #llvm.linkage} { +// CHECK: %[[ARR_BOX_I32:.*]] = fir.convert %{{.*}} : (!fir.box) -> !fir.box> +// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARR_BOX_I32]], %{{.*}} : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index +// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index +// CHECK: fir.do_loop %[[ITER:.*]] = %{{.*}} to %[[EXTENT]] step %[[CINDEX_1]] { +// CHECK: %{{.*}} = fir.coordinate_of %[[ARR_BOX_I32]], %[[ITER]] : (!fir.box>, index) -> !fir.ref +// CHECK: } +// CHECK: return %{{.*}} : i32 +// CHECK: }