diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -606,6 +606,11 @@ The shape and typeparams operands represent the extents and type parameters of the resulting array value. + Currently there is no way to control the iteration order of a hlfir + elemental operation and so operations in the body of the elemental must + not have side effects. If this is changed, an attribute must be added so + that the elemental inlining pass can skip these impure elementals. + Example: Y + X, with Integer :: X(10, 20), Y(10,20) ``` @@ -670,9 +675,17 @@ let description = [{ Given an hlfir.expr array value, hlfir.apply allow retrieving the value for an element given one based indices. + When hlfir.apply is used on an hlfir.elemental, and if the hlfir.elemental operation evaluation can be moved to the location of the hlfir.apply, it is as if the hlfir.elemental body was evaluated given the hlfir.apply indices. + Therefore, apply operations on hlfir.elemental expressions should be located + such that evaluating the hlfir.elemental at the position of the hlfir.apply + operation produces the same result as evaluating the hlfir.elemental at its + location in the instruction stream. Attention should be paid to + hlfir.elemental memory side effects (in practice these are unlikely). + "10.1.4 Evaluation of operations" says that expression evaluation shall not + impact/be impacted by other expression evaluation in the statement. }]; let arguments = (ins hlfir_ExprType:$expr, diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.h b/flang/include/flang/Optimizer/HLFIR/Passes.h --- a/flang/include/flang/Optimizer/HLFIR/Passes.h +++ b/flang/include/flang/Optimizer/HLFIR/Passes.h @@ -26,6 +26,7 @@ std::unique_ptr createBufferizeHLFIRPass(); std::unique_ptr createLowerHLFIRIntrinsicsPass(); std::unique_ptr createSimplifyHLFIRIntrinsicsPass(); +std::unique_ptr createInlineElementalsPass(); std::unique_ptr createLowerHLFIROrderedAssignmentsPass(); #define GEN_PASS_REGISTRATION diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td --- a/flang/include/flang/Optimizer/HLFIR/Passes.td +++ b/flang/include/flang/Optimizer/HLFIR/Passes.td @@ -43,4 +43,9 @@ let constructor = "hlfir::createSimplifyHLFIRIntrinsicsPass()"; } +def InlineElementals : Pass<"inline-elementals", "::mlir::func::FuncOp"> { + let summary = "Inline chained hlfir.elemental operations"; + let constructor = "hlfir::createInlineElementalsPass()"; +} + #endif //FORTRAN_DIALECT_HLFIR_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -242,6 +242,7 @@ addCanonicalizerPassWithoutRegionSimplification(pm); pm.addPass(hlfir::createSimplifyHLFIRIntrinsicsPass()); } + pm.addPass(hlfir::createInlineElementalsPass()); pm.addPass(hlfir::createLowerHLFIROrderedAssignmentsPass()); pm.addPass(hlfir::createLowerHLFIRIntrinsicsPass()); pm.addPass(hlfir::createBufferizeHLFIRPass()); diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt --- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt @@ -3,6 +3,7 @@ add_flang_library(HLFIRTransforms BufferizeHLFIR.cpp ConvertToFIR.cpp + InlineElementals.cpp LowerHLFIRIntrinsics.cpp LowerHLFIROrderedAssignments.cpp ScheduleOrderedAssignments.cpp diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp new file mode 100644 --- /dev/null +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp @@ -0,0 +1,119 @@ +//===- InlineElementals.cpp - Inline chained hlfir.elemental ops ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Chained elemental operations like a + b + c can inline the first elemental +// at the hlfir.apply in the body of the second one (as described in +// docs/HighLevelFIR.md). This has to be done in a pass rather than in lowering +// so that it happens after the HLFIR intrinsic simplification pass. +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Dialect/Support/FIRContext.h" +#include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/HLFIR/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/TypeSwitch.h" +#include + +namespace hlfir { +#define GEN_PASS_DEF_INLINEELEMENTALS +#include "flang/Optimizer/HLFIR/Passes.h.inc" +} // namespace hlfir + +/// If the elemental has only two uses and those two are an apply operation and +/// a destory operation, return those two, otherwise return {} +static std::optional> +getTwoUses(hlfir::ElementalOp elemental) { + mlir::Operation::user_range users = elemental->getUsers(); + // don't inline anything with more than one use (plus hfir.destroy) + if (std::distance(users.begin(), users.end()) != 2) { + return std::nullopt; + } + + hlfir::ApplyOp apply; + hlfir::DestroyOp destroy; + for (mlir::Operation *user : users) + mlir::TypeSwitch(user) + .Case([&](hlfir::ApplyOp op) { apply = op; }) + .Case([&](hlfir::DestroyOp op) { destroy = op; }); + + if (!apply || !destroy) + return std::nullopt; + return std::pair{apply, destroy}; +} + +namespace { +class InlineElementalConversion + : public mlir::OpRewritePattern { +public: + using mlir::OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(hlfir::ElementalOp elemental, + mlir::PatternRewriter &rewriter) const override { + std::optional> maybeTuple = + getTwoUses(elemental); + if (!maybeTuple) { + return rewriter.notifyMatchFailure(elemental.getLoc(), + [](mlir::Diagnostic &) {}); + } + auto [apply, destroy] = *maybeTuple; + + assert(elemental.getRegion().hasOneBlock() && + "expect elemental region to have one block"); + + fir::FirOpBuilder builder{rewriter, + fir::KindMapping{rewriter.getContext()}}; + builder.setInsertionPointAfter(apply); + hlfir::YieldElementOp yield = hlfir::inlineElementalOp( + elemental.getLoc(), builder, elemental, apply.getIndices()); + + // remove the old elemental and all of the bookkeeping + rewriter.replaceAllUsesWith(apply.getResult(), yield.getElementValue()); + rewriter.eraseOp(yield); + rewriter.eraseOp(apply); + rewriter.eraseOp(destroy); + rewriter.eraseOp(elemental); + + return mlir::success(); + } +}; + +class InlineElementalsPass + : public hlfir::impl::InlineElementalsBase { +public: + void runOnOperation() override { + mlir::func::FuncOp func = getOperation(); + mlir::MLIRContext *context = &getContext(); + + mlir::GreedyRewriteConfig config; + // Prevent the pattern driver from merging blocks. + config.enableRegionSimplification = false; + + mlir::RewritePatternSet patterns(context); + patterns.insert(context); + + if (mlir::failed(mlir::applyPatternsAndFoldGreedily( + func, std::move(patterns), config))) { + mlir::emitError(func->getLoc(), "failure in HLFIR elemental inlining"); + signalPassFailure(); + } + } +}; +} // namespace + +std::unique_ptr hlfir::createInlineElementalsPass() { + return std::make_unique(); +} diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90 --- a/flang/test/Driver/mlir-debug-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90 @@ -25,6 +25,8 @@ ! ALL: Pass statistics report ! ALL: Fortran::lower::VerifierPass +! ALL-NEXT: 'func.func' Pipeline +! ALL-NEXT: InlineElementals ! ALL-NEXT: LowerHLFIROrderedAssignments ! ALL-NEXT: LowerHLFIRIntrinsics ! ALL-NEXT: BufferizeHLFIR diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90 --- a/flang/test/Driver/mlir-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-pass-pipeline.f90 @@ -15,7 +15,8 @@ ! O2-NEXT: Canonicalizer ! O2-NEXT: 'func.func' Pipeline ! O2-NEXT: SimplifyHLFIRIntrinsics -! ALL-NEXT: LowerHLFIROrderedAssignments +! ALL: InlineElementals +! ALL: LowerHLFIROrderedAssignments ! ALL-NEXT: LowerHLFIRIntrinsics ! ALL-NEXT: BufferizeHLFIR ! ALL-NEXT: ConvertHLFIRtoFIR diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -19,6 +19,7 @@ // PASSES: Canonicalizer // PASSES-NEXT: 'func.func' Pipeline // PASSES-NEXT: SimplifyHLFIRIntrinsics +// PASSES-NEXT: InlineElementals // PASSES-NEXT: LowerHLFIROrderedAssignments // PASSES-NEXT: LowerHLFIRIntrinsics // PASSES-NEXT: BufferizeHLFIR diff --git a/flang/test/HLFIR/inline-elemental.fir b/flang/test/HLFIR/inline-elemental.fir new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/inline-elemental.fir @@ -0,0 +1,245 @@ +// RUN: fir-opt --inline-elementals %s | FileCheck %s + +// check inlining one elemental into another +// a = b * c + d +func.func @inline_to_elemental(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}, %arg2: !fir.box> {fir.bindc_name = "c"}, %arg3: !fir.box> {fir.bindc_name = "d"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg1 {uniq_name = "b"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg2 {uniq_name = "c"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %3:2 = hlfir.declare %arg3 {uniq_name = "d"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %c0 = arith.constant 0 : index + %4:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.designate %1#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %9 = hlfir.designate %2#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %8 : !fir.ref + %11 = fir.load %9 : !fir.ref + %12 = arith.muli %10, %11 : i32 + hlfir.yield_element %12 : i32 + } + %7 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.apply %6, %arg4 : (!hlfir.expr, index) -> i32 + %9 = hlfir.designate %3#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = arith.addi %8, %10 : i32 + hlfir.yield_element %11 : i32 + } + hlfir.assign %7 to %0#0 : !hlfir.expr, !fir.box> + hlfir.destroy %7 : !hlfir.expr + hlfir.destroy %6 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @inline_to_elemental +// CHECK-SAME: %[[A_ARG:.*]]: !fir.box> {fir.bindc_name = "a"} +// CHECK-SAME: %[[B_ARG:.*]]: !fir.box> {fir.bindc_name = "b"} +// CHECK-SAME: %[[C_ARG:.*]]: !fir.box> {fir.bindc_name = "c"} +// CHECK-SAME: %[[D_ARG:.*]]: !fir.box> {fir.bindc_name = "d"} +// CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-DAG: %[[B:.*]]:2 = hlfir.declare %[[B_ARG]] +// CHECK-DAG: %[[C:.*]]:2 = hlfir.declare %[[C_ARG]] +// CHECK-DAG: %[[D:.*]]:2 = hlfir.declare %[[D_ARG]] +// CHECK-NEXT: %[[B_DIM0:.*]]:3 = fir.box_dims %[[B]]#0, %[[C0]] +// CHECK-NEXT: %[[B_SHAPE:.*]] = fir.shape %[[B_DIM0]]#1 +// CHECK-NEXT: %[[EXPR:.*]] = hlfir.elemental %[[B_SHAPE]] +// CHECK-NEXT: ^bb0(%[[I:.*]]: index): +// inline the first elemental: +// CHECK-NEXT: %[[B_I_REF:.*]] = hlfir.designate %[[B]]#0 (%[[I]]) +// CHECK-NEXT: %[[C_I_REF:.*]] = hlfir.designate %[[C]]#0 (%[[I]]) +// CHECK-NEXT: %[[B_I:.*]] = fir.load %[[B_I_REF]] +// CHECK-NEXT: %[[C_I:.*]] = fir.load %[[C_I_REF]] +// CHECK-NEXT: %[[MUL:.*]] = arith.muli %[[B_I]], %[[C_I]] +// second elemental: +// CHECK-NEXT: %[[D_I_REF:.*]] = hlfir.designate %[[D]]#0 (%[[I]]) +// CHECK-NEXT: %[[D_I:.*]] = fir.load %[[D_I_REF]] +// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[MUL]], %[[D_I]] +// CHECK-NEXT: hlfir.yield_element %[[ADD]] +// CHECK-NEXT: } +// CHECK-NEXT: hlfir.assign %[[EXPR]] to %[[A]]#0 +// CHECK-NEXT: hlfir.destroy %[[EXPR]] +// CHECK-NEXT: return +// CHECK-NEXT: } + +// check inlining into a do_loop +func.func @inline_to_loop(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}, %arg2: !fir.box> {fir.bindc_name = "c"}, %arg3: !fir.box> {fir.bindc_name = "d"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg1 {uniq_name = "b"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg2 {uniq_name = "c"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %3:2 = hlfir.declare %arg3 {uniq_name = "d"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %c0 = arith.constant 0 : index + %4:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.designate %1#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %9 = hlfir.designate %2#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %8 : !fir.ref + %11 = fir.load %9 : !fir.ref + %12 = arith.muli %10, %11 : i32 + hlfir.yield_element %12 : i32 + } + %array = fir.array_load %0#0 : (!fir.box>) -> !fir.array + %c1 = arith.constant 1 : index + %max = arith.subi %4#1, %c1 : index + %7 = fir.do_loop %arg4 = %c0 to %max step %c1 unordered iter_args(%arg5 = %array) -> (!fir.array) { + %8 = hlfir.apply %6, %arg4 : (!hlfir.expr, index) -> i32 + %9 = hlfir.designate %3#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = arith.addi %8, %10 : i32 + %12 = fir.array_update %arg5, %11, %arg4 : (!fir.array, i32, index) -> !fir.array + fir.result %12 : !fir.array + } + fir.array_merge_store %array, %7 to %arg0 : !fir.array, !fir.array, !fir.box> + hlfir.destroy %6 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @inline_to_loop +// CHECK-SAME: %[[A_ARG:.*]]: !fir.box> {fir.bindc_name = "a"} +// CHECK-SAME: %[[B_ARG:.*]]: !fir.box> {fir.bindc_name = "b"} +// CHECK-SAME: %[[C_ARG:.*]]: !fir.box> {fir.bindc_name = "c"} +// CHECK-SAME: %[[D_ARG:.*]]: !fir.box> {fir.bindc_name = "d"} +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-DAG: %[[B:.*]]:2 = hlfir.declare %[[B_ARG]] +// CHECK-DAG: %[[C:.*]]:2 = hlfir.declare %[[C_ARG]] +// CHECK-DAG: %[[D:.*]]:2 = hlfir.declare %[[D_ARG]] +// CHECK-NEXT: %[[B_DIM0:.*]]:3 = fir.box_dims %[[B]]#0, %[[C0]] +// CHECK-NEXT: %[[ARRAY:.*]] = fir.array_load %[[A]]#0 +// CHECK-NEXT: %[[MAX:.*]] = arith.subi %[[B_DIM0]]#1, %[[C1]] +// CHECK-NEXT: %[[LOOP:.*]] = fir.do_loop %[[I:.*]] = %[[C0]] to %[[MAX]] step %[[C1]] unordered iter_args(%[[LOOP_ARRAY:.*]] = %[[ARRAY]]) +// inline the elemental: +// CHECK-NEXT: %[[B_I_REF:.*]] = hlfir.designate %[[B]]#0 (%[[I]]) +// CHECK-NEXT: %[[C_I_REF:.*]] = hlfir.designate %[[C]]#0 (%[[I]]) +// CHECK-NEXT: %[[B_I:.*]] = fir.load %[[B_I_REF]] +// CHECK-NEXT: %[[C_I:.*]] = fir.load %[[C_I_REF]] +// CHECK-NEXT: %[[MUL:.*]] = arith.muli %[[B_I]], %[[C_I]] +// loop body: +// CHECK-NEXT: %[[D_I_REF:.*]] = hlfir.designate %[[D]]#0 (%[[I]]) +// CHECK-NEXT: %[[D_I:.*]] = fir.load %[[D_I_REF]] +// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[MUL]], %[[D_I]] +// CHECK-NEXT: %[[ARRAY_UPD:.*]] = fir.array_update %[[LOOP_ARRAY]], %[[ADD]], %[[I]] +// CHECK-NEXT: fir.result %[[ARRAY_UPD]] +// CHECK-NEXT: } +// CHECK-NEXT: fir.array_merge_store %[[ARRAY]], %[[LOOP]] to %[[A_ARG]] +// CHECK-NEXT: return +// CHECK-NEXT: } + +// inlining into a single hlfir.apply +// a = (b * c)[1] +func.func @inline_to_apply(%arg0: !fir.ref {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}, %arg2: !fir.box> {fir.bindc_name = "c"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %1:2 = hlfir.declare %arg1 {uniq_name = "b"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg2 {uniq_name = "c"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %c0 = arith.constant 0 : index + %4:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.designate %1#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %9 = hlfir.designate %2#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %8 : !fir.ref + %11 = fir.load %9 : !fir.ref + %12 = arith.muli %10, %11 : i32 + hlfir.yield_element %12 : i32 + } + %c1 = arith.constant 1 : index + %res = hlfir.apply %6, %c1 : (!hlfir.expr, index) -> i32 + fir.store %res to %0#0 : !fir.ref + hlfir.destroy %6 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @inline_to_apply +// CHECK-SAME: %[[A_ARG:.*]]: !fir.ref {fir.bindc_name = "a"} +// CHECK-SAME: %[[B_ARG:.*]]: !fir.box> {fir.bindc_name = "b"} +// CHECK-SAME: %[[C_ARG:.*]]: !fir.box> {fir.bindc_name = "c"} +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-DAG: %[[B:.*]]:2 = hlfir.declare %[[B_ARG]] +// CHECK-DAG: %[[C:.*]]:2 = hlfir.declare %[[C_ARG]] +// inline the elemental: +// CHECK-NEXT: %[[B_1_REF:.*]] = hlfir.designate %[[B]]#0 (%[[C1]]) +// CHECK-NEXT: %[[C_1_REF:.*]] = hlfir.designate %[[C]]#0 (%[[C1]]) +// CHECK-NEXT: %[[B_1:.*]] = fir.load %[[B_1_REF]] +// CHECK-NEXT: %[[C_1:.*]] = fir.load %[[C_1_REF]] +// CHECK-NEXT: %[[MUL:.*]] = arith.muli %[[B_1]], %[[C_1]] +// store: +// CHECK-NEXT: fir.store %[[MUL]] to %0#0 : !fir.ref +// CHECK-NEXT: return +// CHECK-NEXT: } + +// Check long chains of elementals +// subroutine reproducer(a) +// real, dimension(:) :: a +// a = sqrt(a * (a - 1)) +// end subroutine +func.func @_QPreproducer(%arg0: !fir.box> {fir.bindc_name = "a"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFreproducerEa"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %cst = arith.constant 1.000000e+00 : f32 + %c0 = arith.constant 0 : index + %1:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %2 = fir.shape %1#1 : (index) -> !fir.shape<1> + %3 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.designate %0#0 (%arg1) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = arith.subf %10, %cst fastmath : f32 + hlfir.yield_element %11 : f32 + } + %4 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.apply %3, %arg1 : (!hlfir.expr, index) -> f32 + %10 = hlfir.no_reassoc %9 : f32 + hlfir.yield_element %10 : f32 + } + %c0_0 = arith.constant 0 : index + %5:3 = fir.box_dims %0#0, %c0_0 : (!fir.box>, index) -> (index, index, index) + %6 = fir.shape %5#1 : (index) -> !fir.shape<1> + %7 = hlfir.elemental %6 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.designate %0#0 (%arg1) : (!fir.box>, index) -> !fir.ref + %10 = hlfir.apply %4, %arg1 : (!hlfir.expr, index) -> f32 + %11 = fir.load %9 : !fir.ref + %12 = arith.mulf %11, %10 fastmath : f32 + hlfir.yield_element %12 : f32 + } + %8 = hlfir.elemental %6 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.apply %7, %arg1 : (!hlfir.expr, index) -> f32 + %10 = math.sqrt %9 fastmath : f32 + hlfir.yield_element %10 : f32 + } + hlfir.assign %8 to %0#0 : !hlfir.expr, !fir.box> + hlfir.destroy %8 : !hlfir.expr + hlfir.destroy %7 : !hlfir.expr + hlfir.destroy %4 : !hlfir.expr + hlfir.destroy %3 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPreproducer +// CHECK-SAME: %[[A_ARG:.*]]: !fir.box> {fir.bindc_name = "a"} +// CHECK-DAG: %[[CST:.*]] = arith.constant 1.0000 +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 +// CHECK-DAG: %[[A_VAR:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-NEXT: %[[A_DIMS_0:.*]]:3 = fir.box_dims %[[A_VAR]]#0, %[[C0]] +// CHECK-NEXT: %[[SHAPE_0:.*]] = fir.shape %[[A_DIMS_0]]#1 +// all in one elemental: +// CHECK-NEXT: %[[EXPR:.*]] = hlfir.elemental %[[SHAPE_0]] +// CHECK-NEXT: ^bb0(%[[I:.*]]: index): +// CHECK-NEXT: %[[A_I0:.*]] = hlfir.designate %[[A_VAR]]#0 (%[[I]]) +// CHECK-NEXT: %[[A_I1:.*]] = hlfir.designate %[[A_VAR]]#0 (%[[I]]) +// CHECK-NEXT: %[[A_I1_VAL:.*]] = fir.load %[[A_I1]] +// CHECK-NEXT: %[[SUB:.*]] = arith.subf %[[A_I1_VAL]], %[[CST]] +// CHECK-NEXT: %[[SUB0:.*]] = hlfir.no_reassoc %[[SUB]] : f32 +// CHECK-NEXT: %[[A_I0_VAL:.*]] = fir.load %[[A_I0]] +// CHECK-NEXT: %[[MUL:.*]] = arith.mulf %[[A_I0_VAL]], %[[SUB0]] +// CHECK-NEXT: %[[SQRT:.*]] = math.sqrt %[[MUL]] +// CHECK-NEXT: hlfir.yield_element %[[SQRT]] +// CHECK-NEXT: } +// CHECK-NEXT: hlfir.assign %[[EXPR]] to %[[A_VAR]]#0 +// CHECK-NEXT: hlfir.destroy %[[EXPR]] +// CHECK-NEXT: return +// CHECK-NEXT: }