diff --git a/flang/include/flang/Optimizer/Builder/Array.h b/flang/include/flang/Optimizer/Builder/Array.h new file mode 100644 --- /dev/null +++ b/flang/include/flang/Optimizer/Builder/Array.h @@ -0,0 +1,27 @@ +//===-- Array.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_BUILDER_ARRAY_H +#define FORTRAN_OPTIMIZER_BUILDER_ARRAY_H + +#include "flang/Optimizer/Dialect/FIROps.h" + +namespace fir::factory { + +/// Return true if and only if the extents are those of an assumed-size array. +/// An assumed-size array in Fortran is declared with `*` as the upper bound of +/// the last dimension of the array. Lowering converts the asterisk to an +/// undefined value. +inline bool isAssumedSize(const llvm::SmallVectorImpl &extents) { + return !extents.empty() && + mlir::isa_and_nonnull(extents.back().getDefiningOp()); +} + +} // namespace fir::factory + +#endif // FORTRAN_OPTIMIZER_BUILDER_ARRAY_H diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -371,6 +371,12 @@ /// Generate code testing \p addr is a null address. mlir::Value genIsNull(mlir::Location loc, mlir::Value addr); + /// Compute the extent of (lb:ub:step) as max((ub-lb+step)/step, 0). See + /// Fortran 2018 9.5.3.3.2 section for more details. + mlir::Value genExtentFromTriplet(mlir::Location loc, mlir::Value lb, + mlir::Value ub, mlir::Value step, + mlir::Type type); + private: const KindMapping &kindMap; }; diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -82,6 +82,10 @@ return "fir.host_assoc"; } +/// Does the function, \p func, have a host-associations tuple argument? +/// Some internal procedures may have access to host procedure variables. +bool hasHostAssociationArgument(mlir::FuncOp func); + /// Tell if \p value is: /// - a function argument that has attribute \p attributeName /// - or, the result of fir.alloca/fir.allocamem op that has attribute \p diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -507,6 +507,23 @@ mlir::arith::CmpIPredicate::eq); } +mlir::Value fir::FirOpBuilder::genExtentFromTriplet(mlir::Location loc, + mlir::Value lb, + mlir::Value ub, + mlir::Value step, + mlir::Type type) { + auto zero = createIntegerConstant(loc, type, 0); + lb = createConvert(loc, type, lb); + ub = createConvert(loc, type, ub); + step = createConvert(loc, type, step); + auto diff = create(loc, ub, lb); + auto add = create(loc, diff, step); + auto div = create(loc, add, step); + auto cmp = create(loc, mlir::arith::CmpIPredicate::sgt, + div, zero); + return create(loc, cmp, div, zero); +} + //===--------------------------------------------------------------------===// // ExtendedValue inquiry helper implementation //===--------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -3257,6 +3257,15 @@ return result; } +bool fir::hasHostAssociationArgument(mlir::FuncOp func) { + if (auto allArgAttrs = func.getAllArgAttrs()) + for (auto attr : allArgAttrs) + if (auto dict = attr.template dyn_cast_or_null()) + if (dict.get(fir::getHostAssocAttrName())) + return true; + return false; +} + bool fir::valueHasFirAttribute(mlir::Value value, llvm::StringRef attributeName) { // If this is a fir.box that was loaded, the fir attributes will be on the diff --git a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp --- a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp +++ b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp @@ -7,10 +7,13 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" +#include "flang/Lower/Todo.h" +#include "flang/Optimizer/Builder/Array.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Factory.h" #include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Support/FIRContext.h" #include "flang/Optimizer/Transforms/Passes.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" @@ -60,8 +63,8 @@ public: using ConflictSetT = llvm::SmallPtrSet; using UseSetT = llvm::SmallPtrSet; - using LoadMapSetsT = - llvm::DenseMap>; + using LoadMapSetsT = llvm::DenseMap; + using AmendAccessSetT = llvm::SmallPtrSet; ArrayCopyAnalysis(mlir::Operation *op) : operation{op} { construct(op); } @@ -75,13 +78,27 @@ return conflicts.contains(op); } - /// Return the use map. The use map maps array fetch and update operations - /// back to the array load that is the original source of the array value. + /// Return the use map. + /// The use map maps array access, amend, fetch and update operations back to + /// the array load that is the original source of the array value. + /// It maps an array_load to an array_merge_store, if and only if the loaded + /// array value has pending modifications to be merged. const OperationUseMapT &getUseMap() const { return useMap; } - /// Find all the array operations that access the array value that is loaded - /// by the array load operation, `load`. - const llvm::SmallVector &arrayAccesses(ArrayLoadOp load); + /// Return the set of array_access ops directly associated with array_amend + /// ops. + bool inAmendAccessSet(mlir::Operation *op) const { + return amendAccesses.count(op); + } + + /// For ArrayLoad `load`, return the transitive set of all OpOperands. + UseSetT getLoadUseSet(mlir::Operation *load) const { + assert(loadMapSets.count(load) && "analysis missed an array load?"); + return loadMapSets.lookup(load); + } + + void arrayMentions(llvm::SmallVectorImpl &mentions, + ArrayLoadOp load); private: void construct(mlir::Operation *topLevelOp); @@ -90,36 +107,53 @@ ConflictSetT conflicts; // set of conflicts (loads and merge stores) OperationUseMapT useMap; LoadMapSetsT loadMapSets; + // Set of array_access ops associated with array_amend ops. + AmendAccessSetT amendAccesses; }; } // namespace namespace { /// Helper class to collect all array operations that produced an array value. class ReachCollector { -private: - // If provided, the `loopRegion` is the body of a loop that produces the array - // of interest. +public: ReachCollector(llvm::SmallVectorImpl &reach, mlir::Region *loopRegion) : reach{reach}, loopRegion{loopRegion} {} - void collectArrayAccessFrom(mlir::Operation *op, mlir::ValueRange range) { - llvm::errs() << "COLLECT " << *op << "\n"; + void collectArrayMentionFrom(mlir::Operation *op, mlir::ValueRange range) { if (range.empty()) { - collectArrayAccessFrom(op, mlir::Value{}); + collectArrayMentionFrom(op, mlir::Value{}); return; } for (mlir::Value v : range) - collectArrayAccessFrom(v); + collectArrayMentionFrom(v); + } + + // Collect all the array_access ops in `block`. This recursively looks into + // blocks in ops with regions. + // FIXME: This is temporarily relying on the array_amend appearing in a + // do_loop Region. This phase ordering assumption can be eliminated by using + // dominance information to find the array_access ops or by scanning the + // transitive closure of the amending array_access's users and the defs that + // reach them. + void collectAccesses(llvm::SmallVector &result, + mlir::Block *block) { + for (auto &op : *block) { + if (auto access = mlir::dyn_cast(op)) { + LLVM_DEBUG(llvm::dbgs() << "adding access: " << access << '\n'); + result.push_back(access); + continue; + } + for (auto ®ion : op.getRegions()) + for (auto &bb : region.getBlocks()) + collectAccesses(result, &bb); + } } - // TODO: Replace recursive algorithm on def-use chain with an iterative one - // with an explicit stack. - void collectArrayAccessFrom(mlir::Operation *op, mlir::Value val) { + void collectArrayMentionFrom(mlir::Operation *op, mlir::Value val) { // `val` is defined by an Op, process the defining Op. // If `val` is defined by a region containing Op, we want to drill down // and through that Op's region(s). - llvm::errs() << "COLLECT " << *op << "\n"; LLVM_DEBUG(llvm::dbgs() << "popset: " << *op << '\n'); auto popFn = [&](auto rop) { assert(val && "op must have a result value"); @@ -127,9 +161,13 @@ llvm::SmallVector results; rop.resultToSourceOps(results, resNum); for (auto u : results) - collectArrayAccessFrom(u); + collectArrayMentionFrom(u); }; - if (auto rop = mlir::dyn_cast(op)) { + if (auto rop = mlir::dyn_cast(op)) { + popFn(rop); + return; + } + if (auto rop = mlir::dyn_cast(op)) { popFn(rop); return; } @@ -137,44 +175,73 @@ popFn(rop); return; } + if (auto box = mlir::dyn_cast(op)) { + for (auto *user : box.getMemref().getUsers()) + if (user != op) + collectArrayMentionFrom(user, user->getResults()); + return; + } if (auto mergeStore = mlir::dyn_cast(op)) { if (opIsInsideLoops(mergeStore)) - collectArrayAccessFrom(mergeStore.getSequence()); + collectArrayMentionFrom(mergeStore.getSequence()); return; } if (mlir::isa(op)) { // Look for any stores inside the loops, and collect an array operation // that produced the value being stored to it. - for (mlir::Operation *user : op->getUsers()) + for (auto *user : op->getUsers()) if (auto store = mlir::dyn_cast(user)) if (opIsInsideLoops(store)) - collectArrayAccessFrom(store.getValue()); + collectArrayMentionFrom(store.getValue()); return; } + // Scan the uses of amend's memref + if (auto amend = mlir::dyn_cast(op)) { + reach.push_back(op); + llvm::SmallVector accesses; + collectAccesses(accesses, op->getBlock()); + for (auto access : accesses) + collectArrayMentionFrom(access.getResult()); + } + // Otherwise, Op does not contain a region so just chase its operands. - if (mlir::isa( - op)) { + if (mlir::isa(op)) { LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n"); - reach.emplace_back(op); + reach.push_back(op); } - // Array modify assignment is performed on the result. So the analysis - // must look at the what is done with the result. + + // Include all array_access ops using an array_load. + if (auto arrLd = mlir::dyn_cast(op)) + for (auto *user : arrLd.getResult().getUsers()) + if (mlir::isa(user)) { + LLVM_DEBUG(llvm::dbgs() << "add " << *user << " to reachable set\n"); + reach.push_back(user); + } + + // Array modify assignment is performed on the result. So the analysis must + // look at the what is done with the result. if (mlir::isa(op)) - for (mlir::Operation *user : op->getResult(0).getUsers()) + for (auto *user : op->getResult(0).getUsers()) followUsers(user); + if (mlir::isa(op)) { + LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n"); + reach.push_back(op); + } + for (auto u : op->getOperands()) - collectArrayAccessFrom(u); + collectArrayMentionFrom(u); } - void collectArrayAccessFrom(mlir::BlockArgument ba) { + void collectArrayMentionFrom(mlir::BlockArgument ba) { auto *parent = ba.getOwner()->getParentOp(); // If inside an Op holding a region, the block argument corresponds to an // argument passed to the containing Op. auto popFn = [&](auto rop) { - collectArrayAccessFrom(rop.blockArgToSourceOp(ba.getArgNumber())); + collectArrayMentionFrom(rop.blockArgToSourceOp(ba.getArgNumber())); }; if (auto rop = mlir::dyn_cast(parent)) { popFn(rop); @@ -187,46 +254,64 @@ // Otherwise, a block argument is provided via the pred blocks. for (auto *pred : ba.getOwner()->getPredecessors()) { auto u = pred->getTerminator()->getOperand(ba.getArgNumber()); - collectArrayAccessFrom(u); + collectArrayMentionFrom(u); } } // Recursively trace operands to find all array operations relating to the // values merged. - void collectArrayAccessFrom(mlir::Value val) { + void collectArrayMentionFrom(mlir::Value val) { if (!val || visited.contains(val)) return; visited.insert(val); // Process a block argument. if (auto ba = val.dyn_cast()) { - collectArrayAccessFrom(ba); + collectArrayMentionFrom(ba); return; } // Process an Op. if (auto *op = val.getDefiningOp()) { - collectArrayAccessFrom(op, val); + collectArrayMentionFrom(op, val); return; } - fir::emitFatalError(val.getLoc(), "unhandled value"); + emitFatalError(val.getLoc(), "unhandled value"); + } + + /// Return all ops that produce the array value that is stored into the + /// `array_merge_store`. + static void reachingValues(llvm::SmallVectorImpl &reach, + mlir::Value seq) { + reach.clear(); + mlir::Region *loopRegion = nullptr; + if (auto doLoop = mlir::dyn_cast_or_null(seq.getDefiningOp())) + loopRegion = &doLoop->getRegion(0); + ReachCollector collector(reach, loopRegion); + collector.collectArrayMentionFrom(seq); } +private: /// Is \op inside the loop nest region ? + /// FIXME: replace this structural dependence with graph properties. bool opIsInsideLoops(mlir::Operation *op) const { - return loopRegion && loopRegion->isAncestor(op->getParentRegion()); + auto *region = op->getParentRegion(); + while (region) { + if (region == loopRegion) + return true; + region = region->getParentRegion(); + } + return false; } /// Recursively trace the use of an operation results, calling - /// collectArrayAccessFrom on the direct and indirect user operands. - /// TODO: Replace recursive algorithm on def-use chain with an iterative one - /// with an explicit stack. + /// collectArrayMentionFrom on the direct and indirect user operands. void followUsers(mlir::Operation *op) { for (auto userOperand : op->getOperands()) - collectArrayAccessFrom(userOperand); + collectArrayMentionFrom(userOperand); // Go through potential converts/coordinate_op. - for (mlir::Operation *indirectUser : op->getUsers()) + for (auto indirectUser : op->getUsers()) followUsers(indirectUser); } @@ -234,39 +319,30 @@ llvm::SmallPtrSet visited; /// Region of the loops nest that produced the array value. mlir::Region *loopRegion; - -public: - /// Return all ops that produce the array value that is stored into the - /// `array_merge_store`. - static void reachingValues(llvm::SmallVectorImpl &reach, - mlir::Value seq) { - reach.clear(); - mlir::Region *loopRegion = nullptr; - // Only `DoLoopOp` is tested here since array operations are currently only - // associated with this kind of loop. - if (auto doLoop = - mlir::dyn_cast_or_null(seq.getDefiningOp())) - loopRegion = &doLoop->getRegion(0); - ReachCollector collector(reach, loopRegion); - collector.collectArrayAccessFrom(seq); - } }; } // namespace /// Find all the array operations that access the array value that is loaded by /// the array load operation, `load`. -const llvm::SmallVector & -ArrayCopyAnalysis::arrayAccesses(ArrayLoadOp load) { +void ArrayCopyAnalysis::arrayMentions( + llvm::SmallVectorImpl &mentions, ArrayLoadOp load) { + mentions.clear(); auto lmIter = loadMapSets.find(load); - if (lmIter != loadMapSets.end()) - return lmIter->getSecond(); + if (lmIter != loadMapSets.end()) { + for (auto *opnd : lmIter->second) { + auto *owner = opnd->getOwner(); + if (mlir::isa(owner)) + mentions.push_back(owner); + } + return; + } - llvm::SmallVector accesses; UseSetT visited; llvm::SmallVector queue; // uses of ArrayLoad[orig] auto appendToQueue = [&](mlir::Value val) { - for (mlir::OpOperand &use : val.getUses()) + for (auto &use : val.getUses()) if (!visited.count(&use)) { visited.insert(&use); queue.push_back(&use); @@ -281,7 +357,8 @@ while (!queue.empty()) { mlir::OpOperand *operand = queue.pop_back_val(); mlir::Operation *owner = operand->getOwner(); - + if (!owner) + continue; auto structuredLoop = [&](auto ro) { if (auto blockArg = ro.iterArgToBlockArg(operand->get())) { int64_t arg = blockArg.getArgNumber(); @@ -313,26 +390,32 @@ structuredLoop(ro); } else if (auto rs = mlir::dyn_cast(owner)) { // Thread any uses of fir.if that return the marked array value. - if (auto ifOp = rs->getParentOfType()) + mlir::Operation *parent = rs->getParentRegion()->getParentOp(); + if (auto ifOp = mlir::dyn_cast(parent)) appendToQueue(ifOp.getResult(operand->getOperandNumber())); } else if (mlir::isa(owner)) { // Keep track of array value fetches. LLVM_DEBUG(llvm::dbgs() << "add fetch {" << *owner << "} to array value set\n"); - accesses.push_back(owner); + mentions.push_back(owner); } else if (auto update = mlir::dyn_cast(owner)) { // Keep track of array value updates and thread the return value uses. LLVM_DEBUG(llvm::dbgs() << "add update {" << *owner << "} to array value set\n"); - accesses.push_back(owner); + mentions.push_back(owner); appendToQueue(update.getResult()); } else if (auto update = mlir::dyn_cast(owner)) { // Keep track of array value modification and thread the return value // uses. LLVM_DEBUG(llvm::dbgs() << "add modify {" << *owner << "} to array value set\n"); - accesses.push_back(owner); + mentions.push_back(owner); appendToQueue(update.getResult(1)); + } else if (auto mention = mlir::dyn_cast(owner)) { + mentions.push_back(owner); + } else if (auto amend = mlir::dyn_cast(owner)) { + mentions.push_back(owner); + appendToQueue(amend.getResult()); } else if (auto br = mlir::dyn_cast(owner)) { branchOp(br.getDest(), br.getDestOperands()); } else if (auto br = mlir::dyn_cast(owner)) { @@ -344,7 +427,107 @@ llvm::report_fatal_error("array value reached unexpected op"); } } - return loadMapSets.insert({load, accesses}).first->getSecond(); + loadMapSets.insert({load, visited}); +} + +static bool hasPointerType(mlir::Type type) { + if (auto boxTy = type.dyn_cast()) + type = boxTy.getEleTy(); + return type.isa(); +} + +// This is a NF performance hack. It makes a simple test that the slices of the +// load, \p ld, and the merge store, \p st, are trivially mutually exclusive. +static bool mutuallyExclusiveSliceRange(ArrayLoadOp ld, ArrayMergeStoreOp st) { + // If the same array_load, then no further testing is warranted. + if (ld.getResult() == st.getOriginal()) + return false; + + auto getSliceOp = [](mlir::Value val) -> SliceOp { + if (!val) + return {}; + auto sliceOp = mlir::dyn_cast_or_null(val.getDefiningOp()); + if (!sliceOp) + return {}; + return sliceOp; + }; + + auto ldSlice = getSliceOp(ld.getSlice()); + auto stSlice = getSliceOp(st.getSlice()); + if (!ldSlice || !stSlice) + return false; + + // Resign on subobject slices. + if (!ldSlice.getFields().empty() || !stSlice.getFields().empty() || + !ldSlice.getSubstr().empty() || !stSlice.getSubstr().empty()) + return false; + + // Crudely test that the two slices do not overlap by looking for the + // following general condition. If the slices look like (i:j) and (j+1:k) then + // these ranges do not overlap. The addend must be a constant. + auto ldTriples = ldSlice.getTriples(); + auto stTriples = stSlice.getTriples(); + const auto size = ldTriples.size(); + if (size != stTriples.size()) + return false; + + auto displacedByConstant = [](mlir::Value v1, mlir::Value v2) { + auto removeConvert = [](mlir::Value v) -> mlir::Operation * { + auto *op = v.getDefiningOp(); + while (auto conv = mlir::dyn_cast_or_null(op)) + op = conv.getValue().getDefiningOp(); + return op; + }; + + auto isPositiveConstant = [](mlir::Value v) -> bool { + if (auto conOp = + mlir::dyn_cast(v.getDefiningOp())) + if (auto iattr = conOp.getValue().dyn_cast()) + return iattr.getInt() > 0; + return false; + }; + + auto *op1 = removeConvert(v1); + auto *op2 = removeConvert(v2); + if (!op1 || !op2) + return false; + if (auto addi = mlir::dyn_cast(op2)) + if ((addi.getLhs().getDefiningOp() == op1 && + isPositiveConstant(addi.getRhs())) || + (addi.getRhs().getDefiningOp() == op1 && + isPositiveConstant(addi.getLhs()))) + return true; + if (auto subi = mlir::dyn_cast(op1)) + if (subi.getLhs().getDefiningOp() == op2 && + isPositiveConstant(subi.getRhs())) + return true; + return false; + }; + + for (std::remove_const_t i = 0; i < size; i += 3) { + // If both are loop invariant, skip to the next triple. + if (mlir::isa_and_nonnull(ldTriples[i + 1].getDefiningOp()) && + mlir::isa_and_nonnull(stTriples[i + 1].getDefiningOp())) { + // Unless either is a vector index, then be conservative. + if (mlir::isa_and_nonnull(ldTriples[i].getDefiningOp()) || + mlir::isa_and_nonnull(stTriples[i].getDefiningOp())) + return false; + continue; + } + // If identical, skip to the next triple. + if (ldTriples[i] == stTriples[i] && ldTriples[i + 1] == stTriples[i + 1] && + ldTriples[i + 2] == stTriples[i + 2]) + continue; + // If ubound and lbound are the same with a constant offset, skip to the + // next triple. + if (displacedByConstant(ldTriples[i + 1], stTriples[i]) || + displacedByConstant(stTriples[i + 1], ldTriples[i])) + continue; + return false; + } + LLVM_DEBUG(llvm::dbgs() << "detected non-overlapping slice ranges on " << ld + << " and " << st << ", which is not a conflict\n"); + return true; } /// Is there a conflict between the array value that was updated and to be @@ -354,67 +537,132 @@ ArrayMergeStoreOp st) { mlir::Value load; mlir::Value addr = st.getMemref(); - auto stEleTy = fir::dyn_cast_ptrOrBoxEleTy(addr.getType()); - for (auto *op : reach) { - auto ld = mlir::dyn_cast(op); - if (!ld) - continue; - mlir::Type ldTy = ld.getMemref().getType(); - if (auto boxTy = ldTy.dyn_cast()) - ldTy = boxTy.getEleTy(); - if (ldTy.isa() && stEleTy == dyn_cast_ptrEleTy(ldTy)) - return true; - if (ld.getMemref() == addr) { - if (ld.getResult() != st.getOriginal()) - return true; - if (load) + const bool storeHasPointerType = hasPointerType(addr.getType()); + for (auto *op : reach) + if (auto ld = mlir::dyn_cast(op)) { + mlir::Type ldTy = ld.getMemref().getType(); + if (ld.getMemref() == addr) { + if (mutuallyExclusiveSliceRange(ld, st)) + continue; + if (ld.getResult() != st.getOriginal()) + return true; + if (load) { + // TODO: extend this to allow checking if the first `load` and this + // `ld` are mutually exclusive accesses but not identical. + return true; + } + load = ld; + } else if ((hasPointerType(ldTy) || storeHasPointerType)) { + // TODO: Use target attribute to restrict this case further. + // TODO: Check if types can also allow ruling out some cases. For now, + // the fact that equivalences is using pointer attribute to enforce + // aliasing is preventing any attempt to do so, and in general, it may + // be wrong to use this if any of the types is a complex or a derived + // for which it is possible to create a pointer to a part with a + // different type than the whole, although this deserve some more + // investigation because existing compiler behavior seem to diverge + // here. return true; - load = ld; + } } - } return false; } -/// Check if there is any potential conflict in the chained update operations -/// (ArrayFetchOp, ArrayUpdateOp, ArrayModifyOp) while merging back to the -/// array. A potential conflict is detected if two operations work on the same -/// indices. -static bool conflictOnMerge(llvm::ArrayRef accesses) { - if (accesses.size() < 2) +/// Is there an access vector conflict on the array being merged into? If the +/// access vectors diverge, then assume that there are potentially overlapping +/// loop-carried references. +static bool conflictOnMerge(llvm::ArrayRef mentions) { + if (mentions.size() < 2) return false; llvm::SmallVector indices; - LLVM_DEBUG(llvm::dbgs() << "check merge conflict on with " << accesses.size() - << " accesses on the list\n"); - for (auto *op : accesses) { - assert((mlir::isa(op)) && - "unexpected operation in analysis"); + LLVM_DEBUG(llvm::dbgs() << "check merge conflict on with " << mentions.size() + << " mentions on the list\n"); + bool valSeen = false; + bool refSeen = false; + for (auto *op : mentions) { llvm::SmallVector compareVector; if (auto u = mlir::dyn_cast(op)) { + valSeen = true; if (indices.empty()) { indices = u.getIndices(); continue; } compareVector = u.getIndices(); } else if (auto f = mlir::dyn_cast(op)) { + valSeen = true; if (indices.empty()) { indices = f.getIndices(); continue; } compareVector = f.getIndices(); } else if (auto f = mlir::dyn_cast(op)) { + valSeen = true; + if (indices.empty()) { + indices = f.getIndices(); + continue; + } + compareVector = f.getIndices(); + } else if (auto f = mlir::dyn_cast(op)) { + refSeen = true; if (indices.empty()) { indices = f.getIndices(); continue; } compareVector = f.getIndices(); + } else if (mlir::isa(op)) { + refSeen = true; + continue; + } else { + mlir::emitError(op->getLoc(), "unexpected operation in analysis"); } - if (compareVector != indices) + if (compareVector.size() != indices.size() || + llvm::any_of(llvm::zip(compareVector, indices), [&](auto pair) { + return std::get<0>(pair) != std::get<1>(pair); + })) return true; LLVM_DEBUG(llvm::dbgs() << "vectors compare equal\n"); } + return valSeen && refSeen; +} + +/// With element-by-reference semantics, an amended array with more than once +/// access to the same loaded array are conservatively considered a conflict. +/// Note: the array copy can still be eliminated in subsequent optimizations. +static bool conflictOnReference(llvm::ArrayRef mentions) { + LLVM_DEBUG(llvm::dbgs() << "checking reference semantics " << mentions.size() + << '\n'); + if (mentions.size() < 3) + return false; + unsigned amendCount = 0; + unsigned accessCount = 0; + for (auto *op : mentions) { + if (mlir::isa(op) && ++amendCount > 1) { + LLVM_DEBUG(llvm::dbgs() << "conflict: multiple amends of array value\n"); + return true; + } + if (mlir::isa(op) && ++accessCount > 1) { + LLVM_DEBUG(llvm::dbgs() + << "conflict: multiple accesses of array value\n"); + return true; + } + if (mlir::isa(op)) { + LLVM_DEBUG(llvm::dbgs() + << "conflict: array value has both uses by-value and uses " + "by-reference. conservative assumption.\n"); + return true; + } + } return false; } +static mlir::Operation * +amendingAccess(llvm::ArrayRef mentions) { + for (auto *op : mentions) + if (auto amend = mlir::dyn_cast(op)) + return amend.getMemref().getDefiningOp(); + return {}; +} + // Are either of types of conflicts present? inline bool conflictDetected(llvm::ArrayRef reach, llvm::ArrayRef accesses, @@ -422,49 +670,78 @@ return conflictOnLoad(reach, st) || conflictOnMerge(accesses); } +// Assume that any call to a function that uses host-associations will be +// modifying the output array. +static bool +conservativeCallConflict(llvm::ArrayRef reaches) { + return llvm::any_of(reaches, [](mlir::Operation *op) { + if (auto call = mlir::dyn_cast(op)) + if (auto callee = + call.getCallableForCallee().dyn_cast()) { + auto module = op->getParentOfType(); + return hasHostAssociationArgument( + module.lookupSymbol(callee)); + } + return false; + }); +} + /// Constructor of the array copy analysis. /// This performs the analysis and saves the intermediate results. void ArrayCopyAnalysis::construct(mlir::Operation *topLevelOp) { topLevelOp->walk([&](Operation *op) { if (auto st = mlir::dyn_cast(op)) { - llvm::SmallVector values; + llvm::SmallVector values; ReachCollector::reachingValues(values, st.getSequence()); - const llvm::SmallVector &accesses = arrayAccesses( - mlir::cast(st.getOriginal().getDefiningOp())); - if (conflictDetected(values, accesses, st)) { + bool callConflict = conservativeCallConflict(values); + llvm::SmallVector mentions; + arrayMentions(mentions, + mlir::cast(st.getOriginal().getDefiningOp())); + bool conflict = conflictDetected(values, mentions, st); + bool refConflict = conflictOnReference(mentions); + if (callConflict || conflict || refConflict) { LLVM_DEBUG(llvm::dbgs() << "CONFLICT: copies required for " << st << '\n' << " adding conflicts on: " << op << " and " << st.getOriginal() << '\n'); conflicts.insert(op); conflicts.insert(st.getOriginal().getDefiningOp()); + if (auto *access = amendingAccess(mentions)) + amendAccesses.insert(access); } auto *ld = st.getOriginal().getDefiningOp(); LLVM_DEBUG(llvm::dbgs() << "map: adding {" << *ld << " -> " << st << "}\n"); useMap.insert({ld, op}); } else if (auto load = mlir::dyn_cast(op)) { - const llvm::SmallVector &accesses = - arrayAccesses(load); + llvm::SmallVector mentions; + arrayMentions(mentions, load); LLVM_DEBUG(llvm::dbgs() << "process load: " << load - << ", accesses: " << accesses.size() << '\n'); - for (auto *acc : accesses) { - LLVM_DEBUG(llvm::dbgs() << " access: " << *acc << '\n'); - assert((mlir::isa(acc))); - if (!useMap.insert({acc, op}).second) { - mlir::emitError( - load.getLoc(), - "The parallel semantics of multiple array_merge_stores per " - "array_load are not supported."); - return; + << ", mentions: " << mentions.size() << '\n'); + for (auto *acc : mentions) { + LLVM_DEBUG(llvm::dbgs() << " mention: " << *acc << '\n'); + if (mlir::isa(acc)) { + if (useMap.count(acc)) { + mlir::emitError( + load.getLoc(), + "The parallel semantics of multiple array_merge_stores per " + "array_load are not supported."); + continue; + } + LLVM_DEBUG(llvm::dbgs() + << "map: adding {" << *acc << "} -> {" << load << "}\n"); + useMap.insert({acc, op}); } - LLVM_DEBUG(llvm::dbgs() - << "map: adding {" << *acc << "} -> {" << load << "}\n"); } } }); } +//===----------------------------------------------------------------------===// +// Conversions for converting out of array value form. +//===----------------------------------------------------------------------===// + namespace { class ArrayLoadConversion : public mlir::OpRewritePattern { public: @@ -495,60 +772,93 @@ } // namespace static mlir::Type getEleTy(mlir::Type ty) { - if (auto t = dyn_cast_ptrEleTy(ty)) - ty = t; - if (auto t = ty.dyn_cast()) - ty = t.getEleTy(); + auto eleTy = unwrapSequenceType(unwrapPassByRefType(ty)); // FIXME: keep ptr/heap/ref information. - return ReferenceType::get(ty); + return ReferenceType::get(eleTy); } // Extract extents from the ShapeOp/ShapeShiftOp into the result vector. -// TODO: getExtents on op should return a ValueRange instead of a vector. -static void getExtents(llvm::SmallVectorImpl &result, - mlir::Value shape) { +static bool getAdjustedExtents(mlir::Location loc, + mlir::PatternRewriter &rewriter, + ArrayLoadOp arrLoad, + llvm::SmallVectorImpl &result, + mlir::Value shape) { + bool copyUsingSlice = false; auto *shapeOp = shape.getDefiningOp(); - if (auto s = mlir::dyn_cast(shapeOp)) { + if (auto s = mlir::dyn_cast_or_null(shapeOp)) { auto e = s.getExtents(); result.insert(result.end(), e.begin(), e.end()); - return; - } - if (auto s = mlir::dyn_cast(shapeOp)) { + } else if (auto s = mlir::dyn_cast_or_null(shapeOp)) { auto e = s.getExtents(); result.insert(result.end(), e.begin(), e.end()); - return; + } else { + emitFatalError(loc, "not a fir.shape/fir.shape_shift op"); + } + auto idxTy = rewriter.getIndexType(); + if (factory::isAssumedSize(result)) { + // Use slice information to compute the extent of the column. + auto one = rewriter.create(loc, 1); + mlir::Value size = one; + if (mlir::Value sliceArg = arrLoad.getSlice()) { + if (auto sliceOp = + mlir::dyn_cast_or_null(sliceArg.getDefiningOp())) { + auto triples = sliceOp.getTriples(); + const std::size_t tripleSize = triples.size(); + auto module = arrLoad->getParentOfType(); + FirOpBuilder builder(rewriter, getKindMapping(module)); + size = builder.genExtentFromTriplet(loc, triples[tripleSize - 3], + triples[tripleSize - 2], + triples[tripleSize - 1], idxTy); + copyUsingSlice = true; + } + } + result[result.size() - 1] = size; } - llvm::report_fatal_error("not a fir.shape/fir.shape_shift op"); + return copyUsingSlice; } -// Place the extents of the array loaded by an ArrayLoadOp into the result -// vector and return a ShapeOp/ShapeShiftOp with the corresponding extents. If -// the ArrayLoadOp is loading a fir.box, code will be generated to read the -// extents from the fir.box, and a the retunred ShapeOp is built with the read -// extents. -// Otherwise, the extents will be extracted from the ShapeOp/ShapeShiftOp -// argument of the ArrayLoadOp that is returned. -static mlir::Value -getOrReadExtentsAndShapeOp(mlir::Location loc, mlir::PatternRewriter &rewriter, - fir::ArrayLoadOp loadOp, - llvm::SmallVectorImpl &result) { +/// Place the extents of the array load, \p arrLoad, into \p result and +/// return a ShapeOp or ShapeShiftOp with the same extents. If \p arrLoad is +/// loading a `!fir.box`, code will be generated to read the extents from the +/// boxed value, and the retunred shape Op will be built with the extents read +/// from the box. Otherwise, the extents will be extracted from the ShapeOp (or +/// ShapeShiftOp) argument of \p arrLoad. \p copyUsingSlice will be set to true +/// if slicing of the output array is to be done in the copy-in/copy-out rather +/// than in the elemental computation step. +static mlir::Value getOrReadExtentsAndShapeOp( + mlir::Location loc, mlir::PatternRewriter &rewriter, ArrayLoadOp arrLoad, + llvm::SmallVectorImpl &result, bool ©UsingSlice) { assert(result.empty()); - if (auto boxTy = loadOp.getMemref().getType().dyn_cast()) { - auto rank = fir::dyn_cast_ptrOrBoxEleTy(boxTy) - .cast() - .getDimension(); + if (arrLoad->hasAttr(fir::getOptionalAttrName())) + fir::emitFatalError( + loc, "shapes from array load of OPTIONAL arrays must not be used"); + if (auto boxTy = arrLoad.getMemref().getType().dyn_cast()) { + auto rank = + dyn_cast_ptrOrBoxEleTy(boxTy).cast().getDimension(); auto idxTy = rewriter.getIndexType(); for (decltype(rank) dim = 0; dim < rank; ++dim) { - auto dimVal = rewriter.create(loc, dim); - auto dimInfo = rewriter.create( - loc, idxTy, idxTy, idxTy, loadOp.getMemref(), dimVal); + auto dimVal = rewriter.create(loc, dim); + auto dimInfo = rewriter.create(loc, idxTy, idxTy, idxTy, + arrLoad.getMemref(), dimVal); result.emplace_back(dimInfo.getResult(1)); } - auto shapeType = fir::ShapeType::get(rewriter.getContext(), rank); - return rewriter.create(loc, shapeType, result); + if (!arrLoad.getShape()) { + auto shapeType = ShapeType::get(rewriter.getContext(), rank); + return rewriter.create(loc, shapeType, result); + } + auto shiftOp = arrLoad.getShape().getDefiningOp(); + auto shapeShiftType = ShapeShiftType::get(rewriter.getContext(), rank); + llvm::SmallVector shapeShiftOperands; + for (auto [lb, extent] : llvm::zip(shiftOp.getOrigins(), result)) { + shapeShiftOperands.push_back(lb); + shapeShiftOperands.push_back(extent); + } + return rewriter.create(loc, shapeShiftType, + shapeShiftOperands); } - getExtents(result, loadOp.getShape()); - return loadOp.getShape(); + copyUsingSlice = + getAdjustedExtents(loc, rewriter, arrLoad, result, arrLoad.getShape()); + return arrLoad.getShape(); } static mlir::Type toRefType(mlir::Type ty) { @@ -582,6 +892,121 @@ return result; } +static mlir::Value getCharacterLen(mlir::Location loc, FirOpBuilder &builder, + ArrayLoadOp load, CharacterType charTy) { + auto charLenTy = builder.getCharacterLengthType(); + if (charTy.hasDynamicLen()) { + if (load.getMemref().getType().isa()) { + // The loaded array is an emboxed value. Get the CHARACTER length from + // the box value. + auto eleSzInBytes = + builder.create(loc, charLenTy, load.getMemref()); + auto kindSize = + builder.getKindMap().getCharacterBitsize(charTy.getFKind()); + auto kindByteSize = + builder.createIntegerConstant(loc, charLenTy, kindSize / 8); + return builder.create(loc, eleSzInBytes, + kindByteSize); + } + // The loaded array is a (set of) unboxed values. If the CHARACTER's + // length is not a constant, it must be provided as a type parameter to + // the array_load. + auto typeparams = load.getTypeparams(); + assert(typeparams.size() > 0 && "expected type parameters on array_load"); + return typeparams.back(); + } + // The typical case: the length of the CHARACTER is a compile-time + // constant that is encoded in the type information. + return builder.createIntegerConstant(loc, charLenTy, charTy.getLen()); +} +/// Generate a shallow array copy. This is used for both copy-in and copy-out. +template +void genArrayCopy(mlir::Location loc, mlir::PatternRewriter &rewriter, + mlir::Value dst, mlir::Value src, mlir::Value shapeOp, + mlir::Value sliceOp, ArrayLoadOp arrLoad) { + auto insPt = rewriter.saveInsertionPoint(); + llvm::SmallVector indices; + llvm::SmallVector extents; + bool copyUsingSlice = + getAdjustedExtents(loc, rewriter, arrLoad, extents, shapeOp); + auto idxTy = rewriter.getIndexType(); + // Build loop nest from column to row. + for (auto sh : llvm::reverse(extents)) { + auto ubi = rewriter.create(loc, idxTy, sh); + auto zero = rewriter.create(loc, 0); + auto one = rewriter.create(loc, 1); + auto ub = rewriter.create(loc, idxTy, ubi, one); + auto loop = rewriter.create(loc, zero, ub, one); + rewriter.setInsertionPointToStart(loop.getBody()); + indices.push_back(loop.getInductionVar()); + } + // Reverse the indices so they are in column-major order. + std::reverse(indices.begin(), indices.end()); + auto typeparams = arrLoad.getTypeparams(); + auto fromAddr = rewriter.create( + loc, getEleTy(src.getType()), src, shapeOp, + CopyIn && copyUsingSlice ? sliceOp : mlir::Value{}, + factory::originateIndices(loc, rewriter, src.getType(), shapeOp, indices), + typeparams); + auto toAddr = rewriter.create( + loc, getEleTy(dst.getType()), dst, shapeOp, + !CopyIn && copyUsingSlice ? sliceOp : mlir::Value{}, + factory::originateIndices(loc, rewriter, dst.getType(), shapeOp, indices), + typeparams); + auto eleTy = unwrapSequenceType(unwrapPassByRefType(dst.getType())); + auto module = toAddr->getParentOfType(); + FirOpBuilder builder(rewriter, getKindMapping(module)); + // Copy from (to) object to (from) temp copy of same object. + if (auto charTy = eleTy.dyn_cast()) { + auto len = getCharacterLen(loc, builder, arrLoad, charTy); + CharBoxValue toChar(toAddr, len); + CharBoxValue fromChar(fromAddr, len); + factory::genScalarAssignment(builder, loc, toChar, fromChar); + } else { + if (hasDynamicSize(eleTy)) + TODO(loc, "copy element of dynamic size"); + factory::genScalarAssignment(builder, loc, toAddr, fromAddr); + } + rewriter.restoreInsertionPoint(insPt); +} + +/// The array load may be either a boxed or unboxed value. If the value is +/// boxed, we read the type parameters from the boxed value. +static llvm::SmallVector +genArrayLoadTypeParameters(mlir::Location loc, mlir::PatternRewriter &rewriter, + ArrayLoadOp load) { + if (load.getTypeparams().empty()) { + auto eleTy = + unwrapSequenceType(unwrapPassByRefType(load.getMemref().getType())); + if (hasDynamicSize(eleTy)) { + if (auto charTy = eleTy.dyn_cast()) { + assert(load.getMemref().getType().isa()); + auto module = load->getParentOfType(); + FirOpBuilder builder(rewriter, getKindMapping(module)); + return {getCharacterLen(loc, builder, load, charTy)}; + } + TODO(loc, "unhandled dynamic type parameters"); + } + return {}; + } + return load.getTypeparams(); +} + +static llvm::SmallVector +findNonconstantExtents(mlir::Type memrefTy, + llvm::ArrayRef extents) { + llvm::SmallVector nce; + auto arrTy = unwrapPassByRefType(memrefTy); + auto seqTy = arrTy.cast(); + for (auto [s, x] : llvm::zip(seqTy.getShape(), extents)) + if (s == SequenceType::getUnknownExtent()) + nce.emplace_back(x); + if (extents.size() > seqTy.getShape().size()) + for (auto x : extents.drop_front(seqTy.getShape().size())) + nce.emplace_back(x); + return nce; +} + namespace { /// Conversion of fir.array_update and fir.array_modify Ops. /// If there is a conflict for the update, then we need to perform a @@ -590,60 +1015,68 @@ template class ArrayUpdateConversionBase : public mlir::OpRewritePattern { public: + // TODO: Implement copy/swap semantics? explicit ArrayUpdateConversionBase(mlir::MLIRContext *ctx, const ArrayCopyAnalysis &a, const OperationUseMapT &m) : mlir::OpRewritePattern{ctx}, analysis{a}, useMap{m} {} - void genArrayCopy(mlir::Location loc, mlir::PatternRewriter &rewriter, - mlir::Value dst, mlir::Value src, mlir::Value shapeOp, - mlir::Type arrTy) const { - auto insPt = rewriter.saveInsertionPoint(); - llvm::SmallVector indices; + /// The array_access, \p access, is to be to a cloned copy due to a potential + /// conflict. Uses copy-in/copy-out semantics and not copy/swap. + mlir::Value referenceToClone(mlir::Location loc, + mlir::PatternRewriter &rewriter, + ArrayOp access) const { + LLVM_DEBUG(llvm::dbgs() + << "generating copy-in/copy-out loops for " << access << '\n'); + auto *op = access.getOperation(); + auto *loadOp = useMap.lookup(op); + auto load = mlir::cast(loadOp); + auto eleTy = access.getType(); + rewriter.setInsertionPoint(loadOp); + // Copy in. llvm::SmallVector extents; - getExtents(extents, shapeOp); - // Build loop nest from column to row. - for (auto sh : llvm::reverse(extents)) { - auto idxTy = rewriter.getIndexType(); - auto ubi = rewriter.create(loc, idxTy, sh); - auto zero = rewriter.create(loc, 0); - auto one = rewriter.create(loc, 1); - auto ub = rewriter.create(loc, idxTy, ubi, one); - auto loop = rewriter.create(loc, zero, ub, one); - rewriter.setInsertionPointToStart(loop.getBody()); - indices.push_back(loop.getInductionVar()); - } - // Reverse the indices so they are in column-major order. - std::reverse(indices.begin(), indices.end()); - auto ty = getEleTy(arrTy); - auto fromAddr = rewriter.create( - loc, ty, src, shapeOp, mlir::Value{}, - fir::factory::originateIndices(loc, rewriter, src.getType(), shapeOp, - indices), - mlir::ValueRange{}); - auto load = rewriter.create(loc, fromAddr); - auto toAddr = rewriter.create( - loc, ty, dst, shapeOp, mlir::Value{}, - fir::factory::originateIndices(loc, rewriter, dst.getType(), shapeOp, - indices), - mlir::ValueRange{}); - rewriter.create(loc, load, toAddr); - rewriter.restoreInsertionPoint(insPt); + bool copyUsingSlice = false; + auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents, + copyUsingSlice); + llvm::SmallVector nonconstantExtents = + findNonconstantExtents(load.getMemref().getType(), extents); + auto allocmem = rewriter.create( + loc, dyn_cast_ptrOrBoxEleTy(load.getMemref().getType()), + genArrayLoadTypeParameters(loc, rewriter, load), nonconstantExtents); + genArrayCopy(load.getLoc(), rewriter, allocmem, + load.getMemref(), shapeOp, load.getSlice(), + load); + // Generate the reference for the access. + rewriter.setInsertionPoint(op); + auto coor = + genCoorOp(rewriter, loc, getEleTy(load.getType()), eleTy, allocmem, + shapeOp, copyUsingSlice ? mlir::Value{} : load.getSlice(), + access.getIndices(), load.getTypeparams(), + access->hasAttr(factory::attrFortranArrayOffsets())); + // Copy out. + auto *storeOp = useMap.lookup(loadOp); + auto store = mlir::cast(storeOp); + rewriter.setInsertionPoint(storeOp); + // Copy out. + genArrayCopy(store.getLoc(), rewriter, store.getMemref(), + allocmem, shapeOp, store.getSlice(), load); + rewriter.create(loc, allocmem); + return coor; } /// Copy the RHS element into the LHS and insert copy-in/copy-out between a /// temp and the LHS if the analysis found potential overlaps between the RHS - /// and LHS arrays. The element copy generator must be provided through \p + /// and LHS arrays. The element copy generator must be provided in \p /// assignElement. \p update must be the ArrayUpdateOp or the ArrayModifyOp. /// Returns the address of the LHS element inside the loop and the LHS /// ArrayLoad result. std::pair materializeAssignment(mlir::Location loc, mlir::PatternRewriter &rewriter, ArrayOp update, - llvm::function_ref assignElement, + const std::function &assignElement, mlir::Type lhsEltRefType) const { auto *op = update.getOperation(); - mlir::Operation *loadOp = useMap.lookup(op); + auto *loadOp = useMap.lookup(op); auto load = mlir::cast(loadOp); LLVM_DEBUG(llvm::outs() << "does " << load << " have a conflict?\n"); if (analysis.hasPotentialConflict(loadOp)) { @@ -654,25 +1087,31 @@ rewriter.setInsertionPoint(loadOp); // Copy in. llvm::SmallVector extents; - mlir::Value shapeOp = - getOrReadExtentsAndShapeOp(loc, rewriter, load, extents); + bool copyUsingSlice = false; + auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents, + copyUsingSlice); + llvm::SmallVector nonconstantExtents = + findNonconstantExtents(load.getMemref().getType(), extents); auto allocmem = rewriter.create( loc, dyn_cast_ptrOrBoxEleTy(load.getMemref().getType()), - load.getTypeparams(), extents); - genArrayCopy(load.getLoc(), rewriter, allocmem, load.getMemref(), shapeOp, - load.getType()); + genArrayLoadTypeParameters(loc, rewriter, load), nonconstantExtents); + genArrayCopy(load.getLoc(), rewriter, allocmem, + load.getMemref(), shapeOp, load.getSlice(), + load); rewriter.setInsertionPoint(op); - mlir::Value coor = genCoorOp( + auto coor = genCoorOp( rewriter, loc, getEleTy(load.getType()), lhsEltRefType, allocmem, - shapeOp, load.getSlice(), update.getIndices(), load.getTypeparams(), - update->hasAttr(fir::factory::attrFortranArrayOffsets())); + shapeOp, copyUsingSlice ? mlir::Value{} : load.getSlice(), + update.getIndices(), load.getTypeparams(), + update->hasAttr(factory::attrFortranArrayOffsets())); assignElement(coor); - mlir::Operation *storeOp = useMap.lookup(loadOp); + auto *storeOp = useMap.lookup(loadOp); auto store = mlir::cast(storeOp); rewriter.setInsertionPoint(storeOp); // Copy out. - genArrayCopy(store.getLoc(), rewriter, store.getMemref(), allocmem, - shapeOp, load.getType()); + genArrayCopy(store.getLoc(), rewriter, + store.getMemref(), allocmem, shapeOp, + store.getSlice(), load); rewriter.create(loc, allocmem); return {coor, load.getResult()}; } @@ -681,15 +1120,15 @@ LLVM_DEBUG(llvm::outs() << "No, conflict wasn't found\n"); rewriter.setInsertionPoint(op); auto coorTy = getEleTy(load.getType()); - mlir::Value coor = genCoorOp( - rewriter, loc, coorTy, lhsEltRefType, load.getMemref(), load.getShape(), - load.getSlice(), update.getIndices(), load.getTypeparams(), - update->hasAttr(fir::factory::attrFortranArrayOffsets())); + auto coor = genCoorOp(rewriter, loc, coorTy, lhsEltRefType, + load.getMemref(), load.getShape(), load.getSlice(), + update.getIndices(), load.getTypeparams(), + update->hasAttr(factory::attrFortranArrayOffsets())); assignElement(coor); return {coor, load.getResult()}; } -private: +protected: const ArrayCopyAnalysis &analysis; const OperationUseMapT &useMap; }; @@ -706,7 +1145,12 @@ mlir::PatternRewriter &rewriter) const override { auto loc = update.getLoc(); auto assignElement = [&](mlir::Value coor) { - rewriter.create(loc, update.getMerge(), coor); + auto input = update.getMerge(); + if (auto inEleTy = dyn_cast_ptrEleTy(input.getType())) { + emitFatalError(loc, "array_update on references not supported"); + } else { + rewriter.create(loc, input, coor); + } }; auto lhsEltRefType = toRefType(update.getMerge().getType()); auto [_, lhsLoadResult] = materializeAssignment( @@ -753,21 +1197,77 @@ rewriter.setInsertionPoint(op); auto load = mlir::cast(useMap.lookup(op)); auto loc = fetch.getLoc(); - mlir::Value coor = + auto coor = genCoorOp(rewriter, loc, getEleTy(load.getType()), toRefType(fetch.getType()), load.getMemref(), load.getShape(), load.getSlice(), fetch.getIndices(), load.getTypeparams(), - fetch->hasAttr(fir::factory::attrFortranArrayOffsets())); - rewriter.replaceOpWithNewOp(fetch, coor); + fetch->hasAttr(factory::attrFortranArrayOffsets())); + if (isa_ref_type(fetch.getType())) + rewriter.replaceOp(fetch, coor); + else + rewriter.replaceOpWithNewOp(fetch, coor); return mlir::success(); } private: const OperationUseMapT &useMap; }; -} // namespace -namespace { +/// As array_access op is like an array_fetch op, except that it does not imply +/// a load op. (It operates in the reference domain.) +class ArrayAccessConversion : public ArrayUpdateConversionBase { +public: + explicit ArrayAccessConversion(mlir::MLIRContext *ctx, + const ArrayCopyAnalysis &a, + const OperationUseMapT &m) + : ArrayUpdateConversionBase{ctx, a, m} {} + + mlir::LogicalResult + matchAndRewrite(ArrayAccessOp access, + mlir::PatternRewriter &rewriter) const override { + auto *op = access.getOperation(); + auto loc = access.getLoc(); + if (analysis.inAmendAccessSet(op)) { + // This array_access is associated with an array_amend and there is a + // conflict. Make a copy to store into. + auto result = referenceToClone(loc, rewriter, access); + access.replaceAllUsesWith(result); + rewriter.replaceOp(access, result); + return mlir::success(); + } + rewriter.setInsertionPoint(op); + auto load = mlir::cast(useMap.lookup(op)); + auto coor = genCoorOp(rewriter, loc, getEleTy(load.getType()), + toRefType(access.getType()), load.getMemref(), + load.getShape(), load.getSlice(), access.getIndices(), + load.getTypeparams(), + access->hasAttr(factory::attrFortranArrayOffsets())); + rewriter.replaceOp(access, coor); + return mlir::success(); + } +}; + +/// An array_amend op is a marker to record which array access is being used to +/// update an array value. After this pass runs, an array_amend has no +/// semantics. We rewrite these to undefined values here to remove them while +/// preserving SSA form. +class ArrayAmendConversion : public mlir::OpRewritePattern { +public: + explicit ArrayAmendConversion(mlir::MLIRContext *ctx) + : OpRewritePattern{ctx} {} + + mlir::LogicalResult + matchAndRewrite(ArrayAmendOp amend, + mlir::PatternRewriter &rewriter) const override { + auto *op = amend.getOperation(); + rewriter.setInsertionPoint(op); + auto loc = amend.getLoc(); + auto undef = rewriter.create(loc, amend.getType()); + rewriter.replaceOp(amend, undef.getResult()); + return mlir::success(); + } +}; + class ArrayValueCopyConverter : public ArrayValueCopyBase { public: @@ -778,22 +1278,21 @@ auto *context = &getContext(); // Perform the conflict analysis. - auto &analysis = getAnalysis(); + const auto &analysis = getAnalysis(); const auto &useMap = analysis.getUseMap(); - // Phase 1 is performing a rewrite on the array accesses. Once all the - // array accesses are rewritten we can go on phase 2. - // Phase 2 gets rid of the useless copy-in/copyout operations. The copy-in - // /copy-out refers the Fortran copy-in/copy-out semantics on statements. mlir::RewritePatternSet patterns1(context); patterns1.insert(context, useMap); patterns1.insert(context, analysis, useMap); patterns1.insert(context, analysis, useMap); + patterns1.insert(context, analysis, useMap); + patterns1.insert(context); mlir::ConversionTarget target(*context); - target.addLegalDialect< - FIROpsDialect, mlir::scf::SCFDialect, mlir::arith::ArithmeticDialect, - mlir::cf::ControlFlowDialect, mlir::func::FuncDialect>(); - target.addIllegalOp(); + target.addLegalDialect(); + target.addIllegalOp(); // Rewrite the array fetch and array update ops. if (mlir::failed( mlir::applyPartialConversion(func, target, std::move(patterns1)))) { diff --git a/flang/test/Fir/array-value-copy.fir b/flang/test/Fir/array-value-copy.fir --- a/flang/test/Fir/array-value-copy.fir +++ b/flang/test/Fir/array-value-copy.fir @@ -104,12 +104,12 @@ // CHECK-LABEL: func @conversion_with_temporary( // CHECK-SAME: %[[ARR0:.*]]: !fir.ref>) // Allocation of temporary array. -// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32>, %{{.*}} +// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> // Copy of original array to temp. // CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { // CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref -// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: %[[COOR1:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shape<1>, index) -> !fir.ref +// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: } // Perform the assignment i = i(10:1:-1) using the temporary array. @@ -125,8 +125,8 @@ // Copy the result back to the original array. // CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { // CHECK: %[[COOR0:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shape<1>, index) -> !fir.ref -// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0:.*]] : !fir.ref // CHECK: %[[COOR1:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref +// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0:.*]] : !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: } // Free temporary array. @@ -176,7 +176,7 @@ // CHECK-SAME: %[[ARR0:.*]]: !fir.ref>) { // CHECK: %[[CST10:.*]] = arith.constant 10 : index // CHECK: %[[CST5:.*]] = arith.constant 5 : index -// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10x5xi32>, %c10, %c5 +// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10x5xi32> // CHECK: %[[IDX5:.*]] = fir.convert %[[CST5]] : (index) -> index // CHECK: %[[UB5:.*]] = arith.subi %[[IDX5]], %{{.*}} : index // CHECK: fir.do_loop %[[INDUC0:.*]] = %{{.*}} to %[[UB5]] step %{{.*}} { @@ -186,8 +186,8 @@ // CHECK: %[[IDX1:.*]] = arith.addi %[[INDUC1]], %{{.*}} : index // CHECK: %[[IDX2:.*]] = arith.addi %[[INDUC0]], %{{.*}} : index // CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %[[IDX1:.*]], %[[IDX2:.*]] : (!fir.ref>, !fir.shape<2>, index, index) -> !fir.ref -// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: %[[COOR1:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}}, %{{.*}} : (!fir.heap>, !fir.shape<2>, index, index) -> !fir.ref +// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: %{{.*}} = fir.do_loop %[[INDUC0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%{{.*}} = %{{.*}}) -> (!fir.array<10x5xi32>) { // CHECK: %{{.*}} = fir.do_loop %[[INDUC1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%{{.*}} = %{{.*}}) -> (!fir.array<10x5xi32>) { @@ -208,8 +208,8 @@ // CHECK: %[[IDX1:.*]] = arith.addi %[[INDUC1]], %{{.*}} : index // CHECK: %[[IDX2:.*]] = arith.addi %[[INDUC0]], %{{.*}} : index // CHECK: %[[COOR0:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %[[IDX1]], %[[IDX2]] : (!fir.heap>, !fir.shape<2>, index, index) -> !fir.ref -// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: %[[COOR1:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}}, %{{.*}} : (!fir.ref>, !fir.shape<2>, index, index) -> !fir.ref +// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: fir.freemem %[[TEMP]] : > @@ -283,12 +283,12 @@ // CHECK-SAME: %[[ARR0:.*]]: !fir.ref>) { // CHECK: %[[VAR0:.*]] = fir.alloca f32 // Allocate the temporary array. -// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<100xf32>, %{{.*}} +// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<100xf32> // Copy original array to temp. // CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { // CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref -// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: %[[COOR1:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shape<1>, index) -> !fir.ref +// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: } // CHECK: %[[VAL_21:.*]] = fir.undefined !fir.array<100xf32> @@ -304,8 +304,8 @@ // Copy back result to original array from temp. // CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { // CHECK: %[[COOR0:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shape<1>, index) -> !fir.ref -// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: %[[COOR1:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref +// CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: } // Free the temporary array. @@ -374,8 +374,9 @@ // Test fir.array_load/boxed array func @conversion_with_temporary_boxed_array(%arr0 : !fir.box>) { %c10 = arith.constant 10 : index - %1 = fir.shape %c10 : (index) -> !fir.shape<1> - %2 = fir.array_load %arr0(%1) : (!fir.box>, !fir.shape<1>) -> !fir.array<10xi32> + %1:3 = fir.box_dims %arr0, %c10 : (!fir.box>, index) -> (index, index, index) + %shift = fir.shift %1#0 : (index) -> !fir.shift<1> + %2 = fir.array_load %arr0(%shift) : (!fir.box>, !fir.shift<1>) -> !fir.array<10xi32> %c10_i64 = arith.constant 10 : i64 %3 = fir.convert %c10_i64 : (i64) -> index %c1_i64 = arith.constant 1 : i64 @@ -398,12 +399,12 @@ // CHECK-LABEL: func @conversion_with_temporary_boxed_array( // CHECK-SAME: %[[ARR0:.*]]: !fir.box>) // Allocation of temporary array. -// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32>, %{{.*}} +// CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> // Copy of original array to temp. // CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.box>, !fir.shape<1>, index) -> !fir.ref +// CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.box>, !fir.shapeshift<1>, index) -> !fir.ref +// CHECK: %[[COOR1:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shapeshift<1>, index) -> !fir.ref // CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref -// CHECK: %[[COOR1:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shape<1>, index) -> !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: } // Perform the assignment i = i(10:1:-1) using the temporary array. @@ -412,15 +413,15 @@ // CHECK-NOT: %{{.*}} = fir.update // CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) [%{{.*}}] %{{.*}} : (!fir.box>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref // CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0]] : !fir.ref -// CHECK: %[[COOR1:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shape<1>, index) -> !fir.ref +// CHECK: %[[COOR1:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shapeshift<1>, index) -> !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: fir.result %{{.*}} : !fir.array<10xi32> // CHECK: } // Copy the result back to the original array. // CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: %[[COOR0:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shape<1>, index) -> !fir.ref +// CHECK: %[[COOR0:.*]] = fir.array_coor %[[TEMP]](%{{.*}}) %{{.*}} : (!fir.heap>, !fir.shapeshift<1>, index) -> !fir.ref +// CHECK: %[[COOR1:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.box>, !fir.shapeshift<1>, index) -> !fir.ref // CHECK: %[[LOAD0:.*]] = fir.load %[[COOR0:.*]] : !fir.ref -// CHECK: %[[COOR1:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.box>, !fir.shape<1>, index) -> !fir.ref // CHECK: fir.store %[[LOAD0]] to %[[COOR1]] : !fir.ref // CHECK: } // Free temporary array.