diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h --- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h +++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h @@ -93,5 +93,52 @@ /// Temporary storage. mlir::Value temp; }; + +/// Structure to hold the value of a single entity. +class SimpleCopy { +public: + SimpleCopy(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity source, llvm::StringRef tempName); + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value) { + assert(false && "must not be called: value already set"); + } + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder){}; + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) { + return copy.getBase(); + } + void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + +public: + /// Temporary storage for the copy. + hlfir::AssociateOp copy; +}; + +/// Generic wrapper over the different sorts of temporary storages. +class TemporaryStorage { +public: + template + TemporaryStorage(T &&impl) : impl{std::forward(impl)} {} + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value) { + std::visit([&](auto &temp) { temp.pushValue(loc, builder, value); }, impl); + } + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder) { + std::visit([&](auto &temp) { temp.resetFetchPosition(loc, builder); }, + impl); + } + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) { + return std::visit([&](auto &temp) { return temp.fetch(loc, builder); }, + impl); + } + void destroy(mlir::Location loc, fir::FirOpBuilder &builder) { + std::visit([&](auto &temp) { temp.destroy(loc, builder); }, impl); + } + +private: + std::variant impl; +}; } // namespace fir::factory #endif // FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp --- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp +++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp @@ -10,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Builder/TemporaryStorage.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" -#include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" @@ -133,3 +133,24 @@ auto hlfirExpr = builder.create(loc, temp, mustFree); return hlfir::Entity{hlfirExpr}; } + +//===----------------------------------------------------------------------===// +// fir::factory::SimpleCopy implementation. +//===----------------------------------------------------------------------===// + +fir::factory::SimpleCopy::SimpleCopy(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity source, + llvm::StringRef tempName) { + // Use hlfir.as_expr and hlfir.associate to create a copy and leave + // bufferization deals with how best to make the copy. + if (source.isVariable()) + source = hlfir::Entity{builder.create(loc, source)}; + copy = hlfir::genAssociateExpr(loc, builder, source, + source.getFortranElementType(), tempName); +} + +void fir::factory::SimpleCopy::destroy(mlir::Location loc, + fir::FirOpBuilder &builder) { + builder.create(loc, copy); +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -20,9 +20,11 @@ #include "ScheduleOrderedAssignments.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Builder/TemporaryStorage.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallSet.h" @@ -106,8 +108,20 @@ currentRun = nullptr; assert(constructStack.empty() && "must exit constructs after a run"); mapper.clear(); + savedInCurrentRunBeforeUse.clear(); } + /// After all run have been lowered, clean-up all the temporary + /// storage that were created (do not call final routines). + void cleanupSavedEntities() { + for (auto &temp : savedEntities) + temp.second.destroy(root.getLoc(), builder); + } + + /// Lowered value for an expression, and the original hlfir.yield if any + /// clean-up needs to be cloned after usage. + using ValueAndCleanUp = std::pair>; + private: /// Walk the part of an order assignment tree node that needs /// to be evaluated in the current run. @@ -130,7 +144,9 @@ /// Is this an assignment to a vector subscripted entity? static bool hasVectorSubscriptedLhs(hlfir::RegionAssignOp regionAssignOp); /// Are they any leaf region in node that must be saved in the current run? - bool mustSavedRegionIn(hlfir::OrderedAssignmentTreeOpInterface node) const; + bool mustSaveRegionIn( + hlfir::OrderedAssignmentTreeOpInterface node, + llvm::SmallVectorImpl &saveEntities) const; /// Should this node be evaluated in the current run? Saving a region in a /// node does not imply the node needs to be evaluated. bool @@ -154,7 +170,7 @@ /// should be done after using the entity. Like, generateYieldedScalarValue, /// this will return the saved value if the region was saved in a previous /// run. - std::pair> + ValueAndCleanUp generateYieldedEntity(mlir::Region ®ion, std::optional castToType = std::nullopt); @@ -176,12 +192,46 @@ /// given a mask expression. void generateMaskIfOp(MaskedArrayExpr &mask); + void generateSaveEntity(hlfir::SaveEntity savedEntity, + bool willUseSavedEntityInSameRun); + + /// Generate code before the loop nest for the current run, if any. + void doBeforeLoopNest(const std::function &callback) { + if (constructStack.empty()) { + callback(); + return; + } + auto insertionPoint = builder.saveInsertionPoint(); + builder.setInsertionPoint(constructStack[0]); + callback(); + builder.restoreInsertionPoint(insertionPoint); + } + + /// Can the current loop nest iteration number be computed? For simplicity, + /// this is true if an only if all the bounds and steps of the fir.do_loop + /// nest dominates the outer loop. The argument is filled with the current + /// loop nest on success. + bool currentLoopNestIterationNumberCanBeComputed( + llvm::SmallVectorImpl &loopNest); + + template + fir::factory::TemporaryStorage *insertSavedEntity(mlir::Region ®ion, + T &&temp) { + auto inserted = savedEntities.try_emplace(®ion, std::forward(temp)); + assert(inserted.second && "temp must have been emplaced"); + return &inserted.first->second; + } + fir::FirOpBuilder &builder; /// Map containing the mapping between the original order assignment tree /// operations and the operations that have been cloned in the current run. /// It is reset between two runs. mlir::IRMapping mapper; + /// Dominance info is used to determine if inner loop bounds are all computed + /// before outer loop for the current loop. It does not need to be reset + /// between runs. + mlir::DominanceInfo dominanceInfo; /// Construct stack in the current run. This allows setting back the insertion /// point correctly when leaving a node that requires a fir.do_loop or fir.if /// operation. @@ -189,19 +239,38 @@ /// Current where loop nest, if any. std::optional whereLoopNest; + /// Map of temporary storage to keep track of saved entity once the run + /// that saves them has been lowered. It is kept in-between runs. + llvm::DenseMap savedEntities; + /// Map holding the value that were saved in the current run and that also + /// need to be used (because their construct will be visited). It is reset + /// after each run. It avoids having to store and fetch in the temporary + /// during the same run, which would required the temporary to have different + /// fetching and storing counters. + llvm::DenseMap savedInCurrentRunBeforeUse; + /// Root of the order assignment tree being lowered. hlfir::OrderedAssignmentTreeOpInterface root; /// Pointer to the current run of the schedule being lowered. hlfir::Run *currentRun = nullptr; + + /// When allocating temporary storage inlined, indicate if the storage should + /// be heap or stack allocated. Temporary allocated with the runtime are heap + /// allocated by the runtime. + bool allocateOnHeap = true; }; } // namespace void OrderedAssignmentRewriter::walk( hlfir::OrderedAssignmentTreeOpInterface node) { - if (mustSavedRegionIn(node)) - TODO(node.getLoc(), - "creating temporary storage in FORALL or WHERE constructs"); - if (isRequiredInCurrentRun(node) || mlir::isa(node)) { + bool mustVisit = + isRequiredInCurrentRun(node) || mlir::isa(node); + llvm::SmallVector saveEntities; + if (mustSaveRegionIn(node, saveEntities)) { + for (hlfir::SaveEntity saveEntity : saveEntities) + generateSaveEntity(saveEntity, mustVisit); + } + if (mustVisit) { llvm::TypeSwitch(node.getOperation()) .Case( @@ -370,14 +439,42 @@ return value.getDefiningOp(); } -std::pair> +static OrderedAssignmentRewriter::ValueAndCleanUp +castIfNeeded(mlir::Location loc, fir::FirOpBuilder &builder, + OrderedAssignmentRewriter::ValueAndCleanUp valueAndCleanUp, + std::optional castToType) { + if (!castToType.has_value()) + return valueAndCleanUp; + mlir::Value cast = + builder.createConvert(loc, *castToType, valueAndCleanUp.first); + return {cast, valueAndCleanUp.second}; +} + +OrderedAssignmentRewriter::ValueAndCleanUp OrderedAssignmentRewriter::generateYieldedEntity( mlir::Region ®ion, std::optional castToType) { - // TODO: if the region was saved, use that instead of generating code again. - if (whereLoopNest.has_value()) { - mlir::Location loc = region.getParentOp()->getLoc(); - return {generateMaskedEntity(loc, region), std::nullopt}; + mlir::Location loc = region.getParentOp()->getLoc(); + // If the region was saved in the same run, use the value that was evaluated + // instead of fetching the temp, and do clean-up, if any, that were delayed. + // This is done to avoid requiring the temporary stack to have different + // fetching and storing counters, and also because it produces slightly better + // code. + if (auto savedInSameRun = savedInCurrentRunBeforeUse.find(®ion); + savedInSameRun != savedInCurrentRunBeforeUse.end()) + return castIfNeeded(loc, builder, savedInSameRun->second, castToType); + // If the region was saved in a previous run, fetch the saved value. + if (auto temp = savedEntities.find(®ion); temp != savedEntities.end()) { + doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); }); + ValueAndCleanUp saved{temp->second.fetch(loc, builder), std::nullopt}; + return castIfNeeded(loc, builder, saved, castToType); } + // Otherwise, evaluate the region now. + + // Masked expression must not evaluate the elemental parts that are masked, + // they have custom code generation. + if (whereLoopNest.has_value()) + return {generateMaskedEntity(loc, region), std::nullopt}; + assert(region.hasOneBlock() && "region must contain one block"); auto oldYield = mlir::dyn_cast_or_null( region.back().getOperations().back()); @@ -468,7 +565,8 @@ assert(maybeYield->getCleanup().hasOneBlock() && "region must contain one block"); for (auto &op : maybeYield->getCleanup().back().getOperations()) - builder.clone(op, mapper); + if (!mlir::isa(op)) + builder.clone(op, mapper); } } @@ -478,14 +576,15 @@ regionAssignOp.getLhsRegion().back().back()); } -bool OrderedAssignmentRewriter::mustSavedRegionIn( - hlfir::OrderedAssignmentTreeOpInterface node) const { +bool OrderedAssignmentRewriter::mustSaveRegionIn( + hlfir::OrderedAssignmentTreeOpInterface node, + llvm::SmallVectorImpl &saveEntities) const { for (auto &action : currentRun->actions) if (hlfir::SaveEntity *savedEntity = std::get_if(&action)) if (node.getOperation() == savedEntity->yieldRegion->getParentOp()) - return true; - return false; + saveEntities.push_back(*savedEntity); + return !saveEntities.empty(); } bool OrderedAssignmentRewriter::isRequiredInCurrentRun( @@ -634,6 +733,125 @@ } } +// TODO: deal with "outer" where -> an array temp is easy and OK. + +static bool isLeftHandSide(mlir::Region ®ion) { + auto assign = mlir::dyn_cast(region.getParentOp()); + return assign && (&assign.getLhsRegion() == ®ion); +} + +bool OrderedAssignmentRewriter::currentLoopNestIterationNumberCanBeComputed( + llvm::SmallVectorImpl &loopNest) { + if (constructStack.empty()) + return true; + mlir::Operation *outerLoop = constructStack[0]; + mlir::Operation *currentConstruct = constructStack.back(); + // Loop through the loops until the outer construct is met, and test if the + // loop operands dominate the outer construct. + while (currentConstruct) { + if (auto doLoop = mlir::dyn_cast(currentConstruct)) { + if (llvm::any_of(doLoop->getOperands(), [&](mlir::Value value) { + return !dominanceInfo.properlyDominates(value, outerLoop); + })) { + return false; + } + loopNest.push_back(doLoop); + } + if (currentConstruct == outerLoop) + currentConstruct = nullptr; + else + currentConstruct = currentConstruct->getParentOp(); + } + return true; +} + +static mlir::Value +computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder, + llvm::ArrayRef loopNest) { + mlir::Value loopExtent; + for (fir::DoLoopOp doLoop : loopNest) { + mlir::Value extent = builder.genExtentFromTriplet( + loc, doLoop.getLowerBound(), doLoop.getUpperBound(), doLoop.getStep(), + builder.getIndexType()); + if (!loopExtent) + loopExtent = extent; + else + loopExtent = builder.create(loc, loopExtent, extent); + } + assert(loopExtent && "loopNest must not be empty"); + return loopExtent; +} + +void OrderedAssignmentRewriter::generateSaveEntity( + hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun) { + mlir::Region ®ion = *savedEntity.yieldRegion; + mlir::Location loc = region.getParentOp()->getLoc(); + + if (!mlir::isa(region.back().back())) + TODO(loc, "creating temporary storage for vector subscripted LHS"); + if (isLeftHandSide(region)) // Need to save the address, not the values. + TODO(loc, "creating temporary storage for LHS"); + + // Evaluate the region inside the loop nest (if any). + auto [entity, oldYield] = generateYieldedEntity(region); + mlir::Type entityType = entity.getType(); + + static constexpr char tempName[] = ".tmp.forall"; + if (constructStack.empty()) { + // Value evaluated outside of any loops (this may be the first MASK of a + // WHERE construct, or an LHS/RHS temp of hlfir.region_assign outside of + // WHERE/FORALL). + insertSavedEntity(region, fir::factory::SimpleCopy(loc, builder, + hlfir::Entity{entity}, + tempName)); + } else { + // Need to create a temporary for values computed inside loops. + // Create temporary storage outside of the loop nest given the entity + // type (and the loop context). + fir::factory::TemporaryStorage *temp; + llvm::SmallVector loopNest; + bool loopShapeCanBePreComputed = + currentLoopNestIterationNumberCanBeComputed(loopNest); + doBeforeLoopNest([&] { + /// For simple scalars inside loops whose total iteration number can be + /// pre-computed, create a rank-1 array outside of the loops. It will be + /// assigned/fetched inside the loops like a normal Fortran array given + /// the iteration count. + if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) { + mlir::Value loopExtent = + computeLoopNestIterationNumber(loc, builder, loopNest); + auto sequenceType = + builder.getVarLenSeqTy(entityType).cast(); + temp = insertSavedEntity(region, + fir::factory::HomogeneousScalarStack{ + loc, builder, sequenceType, loopExtent, + /*lenParams=*/{}, allocateOnHeap, + /*stackThroughLoops=*/true, tempName}); + + } else { + // If the number of iteration is not known, or if the values at each + // iterations are values that may have different shape, type parameters + // or dynamic type, use the runtime to create and manage a stack-like + // temporary. + TODO(loc, "use runtime to create temporary storage in FORALL or WHERE"); + } + }); + // Inside the loop nest (and any fir.if if there are active masks), copy + // the value to the temp and do clean-ups for the value if any. + temp->pushValue(loc, builder, entity); + } + + // Delay the clean-up if the entity will be used in the same run (i.e., the + // parent construct will be visited and needs to be lowered). + if (willUseSavedEntityInSameRun) { + auto inserted = + savedInCurrentRunBeforeUse.try_emplace(®ion, entity, oldYield); + assert(inserted.second && "entity must have been emplaced"); + } else { + generateCleanupIfAny(oldYield); + } +} + /// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given /// a schedule. static void lower(hlfir::OrderedAssignmentTreeOpInterface root, @@ -643,6 +861,7 @@ OrderedAssignmentRewriter assignmentRewriter(builder, root); for (auto &run : schedule) assignmentRewriter.lowerRun(run); + assignmentRewriter.cleanupSavedEntities(); } /// Shared rewrite entry point for all the ordered assignment tree root diff --git a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir @@ -0,0 +1,258 @@ +// Test code generation of hlfir.forall and hlfir.where when temporary +// storage is needed and can be allocated inline. +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +func.func @test_scalar_save(%arg0: !fir.box>) { + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.forall lb { + hlfir.yield %c1_i32 : i32 + } ub { + hlfir.yield %c10_i32 : i32 + } (%arg1: i32) { + hlfir.region_assign { + %1 = fir.convert %arg1 : (i32) -> i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + hlfir.yield %3 : i32 + } to { + %1 = arith.addi %arg1, %c1_i32 : i32 + %2 = fir.convert %1 : (i32) -> i64 + %3 = hlfir.designate %0#0 (%2) : (!fir.box>, i64) -> !fir.ref + hlfir.yield %3 : !fir.ref + } + } + return +} +// CHECK-LABEL: func.func @test_scalar_save( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca index +// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (i32) -> index +// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (i32) -> index +// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index +// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index +// CHECK: %[[VAL_11:.*]] = arith.divsi %[[VAL_10]], %[[VAL_7]] : index +// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_8]] : index +// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_8]] : index +// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_15:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.allocmem !fir.array, %[[VAL_13]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]](%[[VAL_17]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_7]] { +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i32 +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64 +// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_21]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_15]] : index +// CHECK: fir.store %[[VAL_25]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_24]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_26]] : i32, !fir.ref +// CHECK: } +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_3]] : (i32) -> index +// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_2]] : (i32) -> index +// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref +// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_27]] to %[[VAL_28]] step %[[VAL_29]] { +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i32 +// CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_15]] : index +// CHECK: fir.store %[[VAL_33]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_32]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref +// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_31]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> i64 +// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_37]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_38]] : i32, !fir.ref +// CHECK: } +// CHECK: fir.freemem %[[VAL_16]] : !fir.heap> +// CHECK: return +// CHECK: } + +func.func @mask_and_rhs_conflict(%arg0: !fir.box>) { + %c42_i32 = arith.constant 42 : i32 + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.forall lb { + hlfir.yield %c1_i32 : i32 + } ub { + hlfir.yield %c10_i32 : i32 + } (%arg1: i32) { + hlfir.forall_mask { + %1 = fir.convert %arg1 : (i32) -> i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + %4 = arith.cmpi sgt, %3, %c42_i32 : i32 + hlfir.yield %4 : i1 + } do { + hlfir.region_assign { + %1 = fir.convert %arg1 : (i32) -> i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + hlfir.yield %3 : i32 + } to { + %1 = arith.addi %arg1, %c1_i32 : i32 + %2 = fir.convert %1 : (i32) -> i64 + %3 = hlfir.designate %0#0 (%2) : (!fir.box>, i64) -> !fir.ref + hlfir.yield %3 : !fir.ref + } + } + } + return +} +// CHECK-LABEL: func.func @mask_and_rhs_conflict( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca index +// CHECK: %[[VAL_2:.*]] = fir.alloca index +// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (i32) -> index +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i32) -> index +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_10:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_9]] : index +// CHECK: %[[VAL_13:.*]] = arith.divsi %[[VAL_12]], %[[VAL_9]] : index +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_10]] : index +// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_13]], %[[VAL_10]] : index +// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array, %[[VAL_15]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_19]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: %[[VAL_21:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_22]], %[[VAL_9]] : index +// CHECK: %[[VAL_24:.*]] = arith.divsi %[[VAL_23]], %[[VAL_9]] : index +// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_24]], %[[VAL_21]] : index +// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_24]], %[[VAL_21]] : index +// CHECK: %[[VAL_27:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_28:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array, %[[VAL_26]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_30:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%[[VAL_30]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %[[VAL_32:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_9]] { +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i32 +// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64 +// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_34]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref +// CHECK: %[[VAL_37:.*]] = arith.cmpi sgt, %[[VAL_36]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_17]] : index +// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_38]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_40]] : i1, !fir.ref +// CHECK: fir.if %[[VAL_37]] { +// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64 +// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_41]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref +// CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_45:.*]] = arith.addi %[[VAL_44]], %[[VAL_28]] : index +// CHECK: fir.store %[[VAL_45]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_44]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_46]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_5]] : (i32) -> index +// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_4]] : (i32) -> index +// CHECK: %[[VAL_49:.*]] = arith.constant 1 : index +// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref +// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref +// CHECK: fir.do_loop %[[VAL_50:.*]] = %[[VAL_47]] to %[[VAL_48]] step %[[VAL_49]] { +// CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (index) -> i32 +// CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_17]] : index +// CHECK: fir.store %[[VAL_53]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_54:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_52]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_55:.*]] = fir.load %[[VAL_54]] : !fir.ref +// CHECK: fir.if %[[VAL_55]] { +// CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_56]], %[[VAL_28]] : index +// CHECK: fir.store %[[VAL_57]] to %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_56]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_59:.*]] = fir.load %[[VAL_58]] : !fir.ref +// CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_51]], %[[VAL_5]] : i32 +// CHECK: %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (i32) -> i64 +// CHECK: %[[VAL_62:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_61]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_59]] to %[[VAL_62]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: fir.freemem %[[VAL_18]] : !fir.heap> +// CHECK: fir.freemem %[[VAL_29]] : !fir.heap> +// CHECK: return +// CHECK: } + +func.func @test_where_mask_save(%arg0: !fir.box>) { + %c0 = arith.constant 0 : index + %c42_i32 = arith.constant 42 : i32 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.where { + %1:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %2 = fir.shape %1#1 : (index) -> !fir.shape<1> + %3 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg1: index): + %4 = hlfir.designate %0#0 (%arg1) : (!fir.box>, index) -> !fir.ref + %5 = fir.load %4 : !fir.ref + %6 = arith.cmpi sgt, %5, %c42_i32 : i32 + %7 = fir.convert %6 : (i1) -> !fir.logical<4> + hlfir.yield_element %7 : !fir.logical<4> + } + hlfir.yield %3 : !hlfir.expr> cleanup { + hlfir.destroy %3 : !hlfir.expr> + } + } do { + hlfir.region_assign { + hlfir.yield %c42_i32 : i32 + } to { + hlfir.yield %0#0 : !fir.box> + } + } + return +} +// CHECK-LABEL: func.func @test_where_mask_save( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 42 : i32 +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_1]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] : (!fir.shape<1>) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_7:.*]]: index): +// CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_7]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]] : !fir.ref +// CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i1) -> !fir.logical<4> +// CHECK: hlfir.yield_element %[[VAL_11]] : !fir.logical<4> +// CHECK: } +// CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_13:.*]](%[[VAL_5]]) {uniq_name = ".tmp.forall"} : (!hlfir.expr>, !fir.shape<1>) -> (!fir.box>>, !fir.ref>>, i1) +// CHECK: hlfir.destroy %[[VAL_13]] : !hlfir.expr> +// CHECK: %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_1]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_14]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_16]] to %[[VAL_14]]#1 step %[[VAL_16]] { +// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_17]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref +// CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4> +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_22]] { +// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_17]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_23]] : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: hlfir.end_associate %[[VAL_12]]#1, %[[VAL_12]]#2 : !fir.ref>>, i1 +// CHECK: return +// CHECK: } diff --git a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir b/flang/test/HLFIR/ordered-assignments-codegen-todo.fir deleted file mode 100644 --- a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir +++ /dev/null @@ -1,24 +0,0 @@ -// Just test that Ordered assignment pass TODOs are properly reported. -// RUN: %not_todo_cmd fir-opt --lower-hlfir-ordered-assignments %s 2>&1 | FileCheck %s - - -// CHECK: not yet implemented: creating temporary storage in FORALL or WHERE constructs - -func.func @forall_todo(%arg0: !fir.ref>) { - %c1 = arith.constant 1 : index - %c10 = arith.constant 10 : index - hlfir.forall lb { - hlfir.yield %c1 : index - } ub { - hlfir.yield %c10 : index - } (%arg2: i64) { - hlfir.region_assign { - %1 = hlfir.designate %arg0 (%arg2) : (!fir.ref>, i64) -> !fir.ref - hlfir.yield %1 : !fir.ref - } to { - %1 = hlfir.designate %arg0 (%arg2) : (!fir.ref>, i64) -> !fir.ref - hlfir.yield %1 : !fir.ref - } - } - return -}