diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -31,6 +31,7 @@ std::unique_ptr createFirToCfgPass(); std::unique_ptr createCharacterConversionPass(); std::unique_ptr createExternalNameConversionPass(); +std::unique_ptr createMemDataFlowOptPass(); std::unique_ptr createPromoteToAffinePass(); /// Support for inlining on FIR. diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -120,4 +120,17 @@ let constructor = "::fir::createExternalNameConversionPass()"; } +def MemRefDataFlowOpt : FunctionPass<"fir-memref-dataflow-opt"> { + let summary = + "Perform store/load forwarding and potentially removing dead stores."; + let description = [{ + This pass performs store to load forwarding to eliminate memory accesses and + potentially the entire allocation if all the accesses are forwarded. + }]; + let constructor = "::fir::createMemDataFlowOptPass()"; + let dependentDialects = [ + "fir::FIROpsDialect", "mlir::StandardOpsDialect" + ]; +} + #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ CharacterConversion.cpp Inliner.cpp ExternalNameConversion.cpp + MemRefDataFlowOpt.cpp RewriteLoop.cpp DEPENDS diff --git a/flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp b/flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp new file mode 100644 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp @@ -0,0 +1,130 @@ +//===- MemRefDataFlowOpt.cpp - Memory DataFlow Optimization pass ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Dominance.h" +#include "mlir/IR/Operation.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +#define DEBUG_TYPE "fir-memref-dataflow-opt" + +namespace { + +template +static std::vector getSpecificUsers(mlir::Value v) { + std::vector ops; + for (auto *user : v.getUsers()) + if (auto op = dyn_cast(user)) + ops.push_back(op); + return ops; +} + +/// This is based on MLIR's MemRefDataFlowOpt which is specialized on AffineRead +/// and AffineWrite interface +template +class LoadStoreForwarding { +public: + LoadStoreForwarding(mlir::DominanceInfo *di) : domInfo(di) {} + + // FIXME: This algorithm has a bug. It ignores escaping references between a + // store and a load. + llvm::Optional findStoreToForward(ReadOp loadOp, + std::vector &&storeOps) { + llvm::SmallVector candidateSet; + + for (auto storeOp : storeOps) + if (domInfo->dominates(storeOp, loadOp)) + candidateSet.push_back(storeOp); + + if (candidateSet.empty()) + return {}; + + llvm::Optional nearestStore; + for (auto candidate : candidateSet) { + auto nearerThan = [&](WriteOp otherStore) { + if (candidate == otherStore) + return false; + auto rv = domInfo->properlyDominates(candidate, otherStore); + if (rv) { + LLVM_DEBUG(llvm::dbgs() + << "candidate " << candidate << " is not the nearest to " + << loadOp << " because " << otherStore << " is closer\n"); + } + return rv; + }; + if (!llvm::any_of(candidateSet, nearerThan)) { + nearestStore = mlir::cast(candidate); + break; + } + } + if (!nearestStore) { + LLVM_DEBUG( + llvm::dbgs() + << "load " << loadOp << " has " << candidateSet.size() + << " store candidates, but this algorithm can't find a best.\n"); + } + return nearestStore; + } + + llvm::Optional findReadForWrite(WriteOp storeOp, + std::vector &&loadOps) { + for (auto &loadOp : loadOps) { + if (domInfo->dominates(storeOp, loadOp)) + return loadOp; + } + return {}; + } + +private: + mlir::DominanceInfo *domInfo; +}; + +class MemDataFlowOpt : public fir::MemRefDataFlowOptBase { +public: + void runOnFunction() override { + mlir::FuncOp f = getFunction(); + + auto *domInfo = &getAnalysis(); + LoadStoreForwarding lsf(domInfo); + f.walk([&](fir::LoadOp loadOp) { + auto maybeStore = lsf.findStoreToForward( + loadOp, getSpecificUsers(loadOp.memref())); + if (maybeStore) { + auto storeOp = maybeStore.getValue(); + LLVM_DEBUG(llvm::dbgs() << "FlangMemDataFlowOpt: In " << f.getName() + << " erasing load " << loadOp + << " with value from " << storeOp << '\n'); + loadOp.getResult().replaceAllUsesWith(storeOp.value()); + loadOp.erase(); + } + }); + f.walk([&](fir::AllocaOp alloca) { + for (auto &storeOp : getSpecificUsers(alloca.getResult())) { + if (!lsf.findReadForWrite( + storeOp, getSpecificUsers(storeOp.memref()))) { + LLVM_DEBUG(llvm::dbgs() << "FlangMemDataFlowOpt: In " << f.getName() + << " erasing store " << storeOp << '\n'); + storeOp.erase(); + } + } + }); + } +}; +} // namespace + +std::unique_ptr fir::createMemDataFlowOptPass() { + return std::make_unique(); +} diff --git a/flang/test/Fir/memref-data-flow.fir b/flang/test/Fir/memref-data-flow.fir new file mode 100644 --- /dev/null +++ b/flang/test/Fir/memref-data-flow.fir @@ -0,0 +1,427 @@ +// RUN: fir-opt --split-input-file --fir-memref-dataflow-opt %s | FileCheck %s + +// All store-load chains are removed + +func @forward_store0(%arg0: !fir.ref>, %arg1: !fir.ref>, %arg2: !fir.ref>) { + %c1_i64 = arith.constant 1 : i64 + %c60 = arith.constant 60 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFf1dcEi"} + %1 = fir.alloca !fir.array<60xi32> {bindc_name = "t1", uniq_name = "_QFf1dcEt1"} + br ^bb1(%c1, %c60 : index, index) +^bb1(%2: index, %3: index): // 2 preds: ^bb0, ^bb2 + %4 = arith.cmpi sgt, %3, %c0 : index + cond_br %4, ^bb2, ^bb3 +^bb2: // pred: ^bb1 + %5 = fir.convert %2 : (index) -> i32 + fir.store %5 to %0 : !fir.ref + %6 = fir.load %0 : !fir.ref + %7 = fir.convert %6 : (i32) -> i64 + %8 = arith.subi %7, %c1_i64 : i64 + %9 = fir.coordinate_of %arg0, %8 : (!fir.ref>, i64) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = arith.addi %10, %10 : i32 + %12 = fir.coordinate_of %1, %8 : (!fir.ref>, i64) -> !fir.ref + fir.store %11 to %12 : !fir.ref + %13 = arith.addi %2, %c1 : index + %14 = arith.subi %3, %c1 : index + br ^bb1(%13, %14 : index, index) +^bb3: // pred: ^bb1 + %15 = fir.convert %2 : (index) -> i32 + fir.store %15 to %0 : !fir.ref + br ^bb4(%c1, %c60 : index, index) +^bb4(%16: index, %17: index): // 2 preds: ^bb3, ^bb5 + %18 = arith.cmpi sgt, %17, %c0 : index + cond_br %18, ^bb5, ^bb6 +^bb5: // pred: ^bb4 + %19 = fir.convert %16 : (index) -> i32 + fir.store %19 to %0 : !fir.ref + %20 = fir.load %0 : !fir.ref + %21 = fir.convert %20 : (i32) -> i64 + %22 = arith.subi %21, %c1_i64 : i64 + %23 = fir.coordinate_of %1, %22 : (!fir.ref>, i64) -> !fir.ref + %24 = fir.load %23 : !fir.ref + %25 = fir.coordinate_of %arg1, %22 : (!fir.ref>, i64) -> !fir.ref + %26 = fir.load %25 : !fir.ref + %27 = arith.muli %24, %26 : i32 + %28 = fir.coordinate_of %arg2, %22 : (!fir.ref>, i64) -> !fir.ref + fir.store %27 to %28 : !fir.ref + %29 = arith.addi %16, %c1 : index + %30 = arith.subi %17, %c1 : index + br ^bb4(%29, %30 : index, index) +^bb6: // pred: ^bb4 + %31 = fir.convert %16 : (index) -> i32 + fir.store %31 to %0 : !fir.ref + return +} + +// CHECK-LABEL: func @forward_store0 +// CHECK-LABEL: ^bb1 +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-NOT: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb2: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-NOT: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb3: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb5: +// CHECK: %{{.*}} = fir.convert %{{.*}} : (index) -> i32 +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-NOT: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.convert %{{.*}} : (i32) -> i64 +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb6: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref + +// ----- + +func @forward_store1(%arg0: !fir.ref>, %arg1: !fir.ref>, %arg2: !fir.ref>, %arg3: !fir.ref) { + %c1_i64 = arith.constant 1 : i64 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFf1dvEi"} + %1 = fir.load %arg3 : !fir.ref + %2 = fir.convert %1 : (i32) -> index + %3 = fir.alloca !fir.array, %2 {bindc_name = "t1", uniq_name = "_QFf1dvEt1"} + br ^bb1(%c1, %2 : index, index) +^bb1(%4: index, %5: index): // 2 preds: ^bb0, ^bb2 + %6 = arith.cmpi sgt, %5, %c0 : index + cond_br %6, ^bb2, ^bb3 +^bb2: // pred: ^bb1 + %7 = fir.convert %4 : (index) -> i32 + fir.store %7 to %0 : !fir.ref + %8 = fir.load %0 : !fir.ref + %9 = fir.convert %8 : (i32) -> i64 + %10 = arith.subi %9, %c1_i64 : i64 + %11 = fir.coordinate_of %arg0, %10 : (!fir.ref>, i64) -> !fir.ref + %12 = fir.load %11 : !fir.ref + %13 = arith.addi %12, %12 : i32 + %14 = fir.coordinate_of %3, %10 : (!fir.ref>, i64) -> !fir.ref + fir.store %13 to %14 : !fir.ref + %15 = arith.addi %4, %c1 : index + %16 = arith.subi %5, %c1 : index + br ^bb1(%15, %16 : index, index) +^bb3: // pred: ^bb1 + %17 = fir.convert %4 : (index) -> i32 + fir.store %17 to %0 : !fir.ref + %18 = fir.load %arg3 : !fir.ref + %19 = fir.convert %18 : (i32) -> index + br ^bb4(%c1, %19 : index, index) +^bb4(%20: index, %21: index): // 2 preds: ^bb3, ^bb5 + %22 = arith.cmpi sgt, %21, %c0 : index + cond_br %22, ^bb5, ^bb6 +^bb5: // pred: ^bb4 + %23 = fir.convert %20 : (index) -> i32 + fir.store %23 to %0 : !fir.ref + %24 = fir.load %0 : !fir.ref + %25 = fir.convert %24 : (i32) -> i64 + %26 = arith.subi %25, %c1_i64 : i64 + %27 = fir.coordinate_of %3, %26 : (!fir.ref>, i64) -> !fir.ref + %28 = fir.load %27 : !fir.ref + %29 = fir.coordinate_of %arg1, %26 : (!fir.ref>, i64) -> !fir.ref + %30 = fir.load %29 : !fir.ref + %31 = arith.muli %28, %30 : i32 + %32 = fir.coordinate_of %arg2, %26 : (!fir.ref>, i64) -> !fir.ref + fir.store %31 to %32 : !fir.ref + %33 = arith.addi %20, %c1 : index + %34 = arith.subi %21, %c1 : index + br ^bb4(%33, %34 : index, index) +^bb6: // pred: ^bb4 + %35 = fir.convert %20 : (index) -> i32 + fir.store %35 to %0 : !fir.ref + return +} + +// CHECK-LABEL: func @forward_store1 +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb2: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb3: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb5: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb6: +// CHECK-NOT fir.store %{{.*}} to %{{.*}} : !fir.ref + +// ----- + +func @forward_store2(%arg0: !fir.ref>, %arg1: !fir.ref>, %arg2: !fir.ref>) { + %c100_i32 = arith.constant 100 : i32 + %c10000_i32 = arith.constant 10000 : i32 + %c1_i64 = arith.constant 1 : i64 + %c3 = arith.constant 3 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFf2dcEi"} + %1 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFf2dcEj"} + %2 = fir.alloca !fir.array<3x3xi32> {bindc_name = "t1", uniq_name = "_QFf2dcEt1"} + br ^bb1(%c1, %c3 : index, index) +^bb1(%3: index, %4: index): // 2 preds: ^bb0, ^bb5 + %5 = arith.cmpi sgt, %4, %c0 : index + cond_br %5, ^bb2, ^bb6 +^bb2: // pred: ^bb1 + %6 = fir.convert %3 : (index) -> i32 + fir.store %6 to %0 : !fir.ref + br ^bb3(%c1, %c3 : index, index) +^bb3(%7: index, %8: index): // 2 preds: ^bb2, ^bb4 + %9 = arith.cmpi sgt, %8, %c0 : index + cond_br %9, ^bb4, ^bb5 +^bb4: // pred: ^bb3 + %10 = fir.convert %7 : (index) -> i32 + fir.store %10 to %1 : !fir.ref + %11 = fir.load %0 : !fir.ref + %12 = fir.convert %11 : (i32) -> i64 + %13 = arith.subi %12, %c1_i64 : i64 + %14 = fir.load %1 : !fir.ref + %15 = fir.convert %14 : (i32) -> i64 + %16 = arith.subi %15, %c1_i64 : i64 + %17 = fir.coordinate_of %arg0, %13, %16 : (!fir.ref>, i64, i64) -> !fir.ref + %18 = fir.load %17 : !fir.ref + %19 = fir.coordinate_of %arg0, %16, %13 : (!fir.ref>, i64, i64) -> !fir.ref + %20 = fir.load %19 : !fir.ref + %21 = arith.muli %20, %c100_i32 : i32 + %22 = arith.addi %18, %21 : i32 + %23 = fir.coordinate_of %2, %13, %16 : (!fir.ref>, i64, i64) -> !fir.ref + fir.store %22 to %23 : !fir.ref + %24 = arith.addi %7, %c1 : index + %25 = arith.subi %8, %c1 : index + br ^bb3(%24, %25 : index, index) +^bb5: // pred: ^bb3 + %26 = fir.convert %7 : (index) -> i32 + fir.store %26 to %1 : !fir.ref + %27 = arith.addi %3, %c1 : index + %28 = arith.subi %4, %c1 : index + br ^bb1(%27, %28 : index, index) +^bb6: // pred: ^bb1 + %29 = fir.convert %3 : (index) -> i32 + fir.store %29 to %0 : !fir.ref + br ^bb7(%c1, %c3 : index, index) +^bb7(%30: index, %31: index): // 2 preds: ^bb6, ^bb11 + %32 = arith.cmpi sgt, %31, %c0 : index + cond_br %32, ^bb8, ^bb12 +^bb8: // pred: ^bb7 + %33 = fir.convert %30 : (index) -> i32 + fir.store %33 to %0 : !fir.ref + br ^bb9(%c1, %c3 : index, index) +^bb9(%34: index, %35: index): // 2 preds: ^bb8, ^bb10 + %36 = arith.cmpi sgt, %35, %c0 : index + cond_br %36, ^bb10, ^bb11 +^bb10: // pred: ^bb9 + %37 = fir.convert %34 : (index) -> i32 + fir.store %37 to %1 : !fir.ref + %38 = fir.load %0 : !fir.ref + %39 = fir.convert %38 : (i32) -> i64 + %40 = arith.subi %39, %c1_i64 : i64 + %41 = fir.load %1 : !fir.ref + %42 = fir.convert %41 : (i32) -> i64 + %43 = arith.subi %42, %c1_i64 : i64 + %44 = fir.coordinate_of %2, %40, %43 : (!fir.ref>, i64, i64) -> !fir.ref + %45 = fir.load %44 : !fir.ref + %46 = fir.coordinate_of %arg1, %40, %43 : (!fir.ref>, i64, i64) -> !fir.ref + %47 = fir.load %46 : !fir.ref + %48 = arith.muli %47, %c10000_i32 : i32 + %49 = arith.addi %45, %48 : i32 + %50 = fir.coordinate_of %arg2, %40, %43 : (!fir.ref>, i64, i64) -> !fir.ref + fir.store %49 to %50 : !fir.ref + %51 = arith.addi %34, %c1 : index + %52 = arith.subi %35, %c1 : index + br ^bb9(%51, %52 : index, index) +^bb11: // pred: ^bb9 + %53 = fir.convert %34 : (index) -> i32 + fir.store %53 to %1 : !fir.ref + %54 = arith.addi %30, %c1 : index + %55 = arith.subi %31, %c1 : index + br ^bb7(%54, %55 : index, index) +^bb12: // pred: ^bb7 + %56 = fir.convert %30 : (index) -> i32 + fir.store %56 to %0 : !fir.ref + return +} + +// CHECK-LABEL: func @forward_store2 +// CHECK-LABEL: ^bb2: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb4: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-COUNT-2: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb5: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb6: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb10: +// CHECK: %{{.*}} = fir.convert %{{.*}} : (index) -> i32 +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-COUNT-2: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb11: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb12: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref + +// ----- + +func @forward_store3(%arg0: !fir.ref>, %arg1: !fir.ref>, %arg2: !fir.ref>, %arg3: !fir.ref, %arg4: !fir.ref) { + %c100_i32 = arith.constant 100 : i32 + %c10000_i32 = arith.constant 10000 : i32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFf2dvEi"} + %1 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFf2dvEj"} + %2 = fir.alloca i32 {bindc_name = "s1e", uniq_name = "_QFf2dvEs1e"} + %3 = fir.alloca i32 {bindc_name = "s2e", uniq_name = "_QFf2dvEs2e"} + %4 = fir.load %arg3 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = fir.load %arg4 : !fir.ref + %7 = fir.convert %6 : (i32) -> index + %8 = fir.alloca !fir.array, %5, %7 {bindc_name = "t", uniq_name = "_QFf2dvEt"} + fir.store %4 to %2 : !fir.ref + %9 = fir.load %arg4 : !fir.ref + fir.store %9 to %3 : !fir.ref + %10 = fir.load %2 : !fir.ref + %11 = fir.convert %10 : (i32) -> index + br ^bb1(%c1, %11 : index, index) +^bb1(%12: index, %13: index): // 2 preds: ^bb0, ^bb5 + %14 = arith.cmpi sgt, %13, %c0 : index + cond_br %14, ^bb2, ^bb6 +^bb2: // pred: ^bb1 + %15 = fir.convert %12 : (index) -> i32 + fir.store %15 to %0 : !fir.ref + %16 = fir.load %3 : !fir.ref + %17 = fir.convert %16 : (i32) -> index + br ^bb3(%c1, %17 : index, index) +^bb3(%18: index, %19: index): // 2 preds: ^bb2, ^bb4 + %20 = arith.cmpi sgt, %19, %c0 : index + cond_br %20, ^bb4, ^bb5 +^bb4: // pred: ^bb3 + %21 = fir.convert %18 : (index) -> i32 + fir.store %21 to %1 : !fir.ref + %22 = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> + %23 = fir.load %0 : !fir.ref + %24 = fir.convert %23 : (i32) -> index + %25 = arith.subi %24, %c1 : index + %26 = fir.load %1 : !fir.ref + %27 = fir.convert %26 : (i32) -> index + %28 = arith.subi %27, %c1 : index + %29 = arith.muli %5, %28 : index + %30 = arith.addi %29, %25 : index + %31 = fir.coordinate_of %22, %30 : (!fir.ref>, index) -> !fir.ref + %32 = fir.load %31 : !fir.ref + %33 = arith.muli %5, %25 : index + %34 = arith.addi %33, %28 : index + %35 = fir.coordinate_of %22, %34 : (!fir.ref>, index) -> !fir.ref + %36 = fir.load %35 : !fir.ref + %37 = arith.muli %36, %c100_i32 : i32 + %38 = arith.addi %32, %37 : i32 + %39 = fir.convert %8 : (!fir.ref>) -> !fir.ref> + %40 = fir.coordinate_of %39, %30 : (!fir.ref>, index) -> !fir.ref + fir.store %38 to %40 : !fir.ref + %41 = arith.addi %18, %c1 : index + %42 = arith.subi %19, %c1 : index + br ^bb3(%41, %42 : index, index) +^bb5: // pred: ^bb3 + %43 = fir.convert %18 : (index) -> i32 + fir.store %43 to %1 : !fir.ref + %44 = arith.addi %12, %c1 : index + %45 = arith.subi %13, %c1 : index + br ^bb1(%44, %45 : index, index) +^bb6: // pred: ^bb1 + %46 = fir.convert %12 : (index) -> i32 + fir.store %46 to %0 : !fir.ref + %47 = fir.load %2 : !fir.ref + %48 = fir.convert %47 : (i32) -> index + br ^bb7(%c1, %48 : index, index) +^bb7(%49: index, %50: index): // 2 preds: ^bb6, ^bb11 + %51 = arith.cmpi sgt, %50, %c0 : index + cond_br %51, ^bb8, ^bb12 +^bb8: // pred: ^bb7 + %52 = fir.convert %49 : (index) -> i32 + fir.store %52 to %0 : !fir.ref + %53 = fir.load %3 : !fir.ref + %54 = fir.convert %53 : (i32) -> index + br ^bb9(%c1, %54 : index, index) +^bb9(%55: index, %56: index): // 2 preds: ^bb8, ^bb10 + %57 = arith.cmpi sgt, %56, %c0 : index + cond_br %57, ^bb10, ^bb11 +^bb10: // pred: ^bb9 + %58 = fir.convert %55 : (index) -> i32 + fir.store %58 to %1 : !fir.ref + %59 = fir.convert %8 : (!fir.ref>) -> !fir.ref> + %60 = fir.load %0 : !fir.ref + %61 = fir.convert %60 : (i32) -> index + %62 = arith.subi %61, %c1 : index + %63 = fir.load %1 : !fir.ref + %64 = fir.convert %63 : (i32) -> index + %65 = arith.subi %64, %c1 : index + %66 = arith.muli %5, %65 : index + %67 = arith.addi %66, %62 : index + %68 = fir.coordinate_of %59, %67 : (!fir.ref>, index) -> !fir.ref + %69 = fir.load %68 : !fir.ref + %70 = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> + %71 = fir.coordinate_of %70, %67 : (!fir.ref>, index) -> !fir.ref + %72 = fir.load %71 : !fir.ref + %73 = arith.muli %72, %c10000_i32 : i32 + %74 = arith.addi %69, %73 : i32 + %75 = fir.convert %arg2 : (!fir.ref>) -> !fir.ref> + %76 = fir.coordinate_of %75, %67 : (!fir.ref>, index) -> !fir.ref + fir.store %74 to %76 : !fir.ref + %77 = arith.addi %55, %c1 : index + %78 = arith.subi %56, %c1 : index + br ^bb9(%77, %78 : index, index) +^bb11: // pred: ^bb9 + %79 = fir.convert %55 : (index) -> i32 + fir.store %79 to %1 : !fir.ref + %80 = arith.addi %49, %c1 : index + %81 = arith.subi %50, %c1 : index + br ^bb7(%80, %81 : index, index) +^bb12: // pred: ^bb7 + %82 = fir.convert %49 : (index) -> i32 + fir.store %82 to %0 : !fir.ref + return +} + +// CHECK-LABEL: func @forward_store3 +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-NOT: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb2: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb4: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-NOT: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-NOT: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb5: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb6: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-NOT %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb8: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-NOT: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: ^bb10: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-COUNT-2: %{{.*}} = fir.load %{{.*}} : !fir.ref +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb11: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref +// CHECK-LABEL: ^bb12: +// CHECK-NOT: fir.store %{{.*}} to %{{.*}} : !fir.ref