diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h --- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h +++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h @@ -85,6 +85,10 @@ hlfir::Entity moveStackAsArrayExpr(mlir::Location loc, fir::FirOpBuilder &builder); + /// "fetch" cannot be called right after "pushValue" because the counter is + /// both used for pushing and fetching. + bool canBeFetchedAfterPush() const { return false; } + private: /// Allocate the temporary on the heap. const bool allocateOnHeap; @@ -109,6 +113,7 @@ return copy.getBase(); } void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + bool canBeFetchedAfterPush() const { return true; } public: /// Temporary storage for the copy. @@ -130,6 +135,7 @@ void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder); mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder); void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + bool canBeFetchedAfterPush() const { return true; } private: /// Keep the original value type. Values may be stored by the runtime @@ -165,6 +171,12 @@ void destroy(mlir::Location loc, fir::FirOpBuilder &builder) { std::visit([&](auto &temp) { temp.destroy(loc, builder); }, impl); } + /// Can "fetch" be called to get the last value pushed with + /// "pushValue"? + bool canBeFetchedAfterPush() const { + return std::visit([&](auto &temp) { return temp.canBeFetchedAfterPush(); }, + impl); + } private: std::variant impl; diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -979,6 +979,17 @@ return loopExtent; } +/// Return a name for temporary storage that indicates in which context +/// the temporary storage was created. +static llvm::StringRef +getTempName(hlfir::OrderedAssignmentTreeOpInterface root) { + if (mlir::isa(root.getOperation())) + return ".tmp.forall"; + if (mlir::isa(root.getOperation())) + return ".tmp.where"; + return ".tmp.assign"; +} + void OrderedAssignmentRewriter::generateSaveEntity( hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun) { mlir::Region ®ion = *savedEntity.yieldRegion; @@ -996,18 +1007,18 @@ entity = hlfir::loadTrivialScalar(loc, builder, entity); mlir::Type entityType = entity.getType(); - static constexpr char tempName[] = ".tmp.forall"; + llvm::StringRef tempName = getTempName(root); + fir::factory::TemporaryStorage *temp = nullptr; if (constructStack.empty()) { // Value evaluated outside of any loops (this may be the first MASK of a // WHERE construct, or an LHS/RHS temp of hlfir.region_assign outside of // WHERE/FORALL). - insertSavedEntity(region, - fir::factory::SimpleCopy(loc, builder, entity, tempName)); + temp = insertSavedEntity( + region, fir::factory::SimpleCopy(loc, builder, entity, tempName)); } else { // Need to create a temporary for values computed inside loops. // Create temporary storage outside of the loop nest given the entity // type (and the loop context). - fir::factory::TemporaryStorage *temp; llvm::SmallVector loopNest; bool loopShapeCanBePreComputed = currentLoopNestIterationNumberCanBeComputed(loopNest); @@ -1042,8 +1053,13 @@ } // Delay the clean-up if the entity will be used in the same run (i.e., the - // parent construct will be visited and needs to be lowered). - if (willUseSavedEntityInSameRun) { + // parent construct will be visited and needs to be lowered). When possible, + // this is not done for hlfir.expr because this use would prevent the + // hlfir.expr storage from being moved when creating the temporary in + // bufferization, and that would lead to an extra copy. + if (willUseSavedEntityInSameRun && + (!temp->canBeFetchedAfterPush() || + !mlir::isa(entity.getType()))) { auto inserted = savedInCurrentRunBeforeUse.try_emplace(®ion, entity, oldYield); assert(inserted.second && "entity must have been emplaced"); diff --git a/flang/test/HLFIR/order_assignments/impure-where.fir b/flang/test/HLFIR/order_assignments/impure-where.fir --- a/flang/test/HLFIR/order_assignments/impure-where.fir +++ b/flang/test/HLFIR/order_assignments/impure-where.fir @@ -36,7 +36,7 @@ // CHECK: %[[VAL_12:.*]] = fir.call @impure() : () -> !fir.heap>> // CHECK: %[[VAL_21:.*]] = fir.allocmem !fir.array>, %[[extent:[^ ]*]] // CHECK: %[[VAL_22:.*]] = fir.shape %[[extent]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%{{.*}}) {uniq_name = ".tmp.forall"} +// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%{{.*}}) {uniq_name = ".tmp.where"} // CHECK: fir.do_loop // CHECK: fir.if {{.*}} { // CHECK: } else { diff --git a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir --- a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir +++ b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir @@ -237,7 +237,7 @@ // CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i1) -> !fir.logical<4> // CHECK: hlfir.yield_element %[[VAL_11]] : !fir.logical<4> // CHECK: } -// CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_13:.*]](%[[VAL_5]]) {uniq_name = ".tmp.forall"} : (!hlfir.expr>, !fir.shape<1>) -> (!fir.box>>, !fir.ref>>, i1) +// CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_13:.*]](%[[VAL_5]]) {uniq_name = ".tmp.where"} : (!hlfir.expr>, !fir.shape<1>) -> (!fir.box>>, !fir.ref>>, i1) // CHECK: hlfir.destroy %[[VAL_13]] : !hlfir.expr> // CHECK: %[[VAL_14:.*]] = arith.constant 1 : index // CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_14]] to %[[VAL_4]]#1 step %[[VAL_14]] { @@ -292,9 +292,9 @@ // CHECK: %[[VAL_17:.*]] = arith.constant 1 : index // CHECK: %[[VAL_18:.*]] = arith.constant 1 : index // CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref -// CHECK: %[[VAL_19:.*]] = fir.allocmem !fir.array, %[[VAL_16]] {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_19:.*]] = fir.allocmem !fir.array, %[[VAL_16]] {bindc_name = ".tmp.where", uniq_name = ""} // CHECK: %[[VAL_20:.*]] = fir.shape %[[VAL_16]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_19]](%[[VAL_20]]) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_19]](%[[VAL_20]]) {uniq_name = ".tmp.where"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) // CHECK: fir.do_loop %[[VAL_22:.*]] = %[[VAL_9]] to %[[VAL_7]] step %[[VAL_9]] { // CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_22]]) : (!fir.ref>>, index) -> !fir.ref> // CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref> diff --git a/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir b/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir @@ -0,0 +1,104 @@ +// Test when the saved mask is used in the same run to save +// another value (like the RHS). +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +func.func @saving_mask_and_rhs(%arg0: !fir.ref>) { + %c-1 = arith.constant -1 : index + %c1 = arith.constant 1 : index + %c0_i32 = arith.constant 0 : i32 + %c10 = arith.constant 10 : index + %0 = fir.shape %c10 : (index) -> !fir.shape<1> + %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + hlfir.where { + %2 = hlfir.elemental %0 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> { + ^bb0(%arg1: index): + %3 = hlfir.designate %1#0 (%arg1) : (!fir.ref>, index) -> !fir.ref + %4 = fir.load %3 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i32 : i32 + %6 = fir.convert %5 : (i1) -> !fir.logical<4> + hlfir.yield_element %6 : !fir.logical<4> + } + hlfir.yield %2 : !hlfir.expr<10x!fir.logical<4>> cleanup { + hlfir.destroy %2 : !hlfir.expr<10x!fir.logical<4>> + } + } do { + hlfir.region_assign { + hlfir.yield %1#0 : !fir.ref> + } to { + %2 = hlfir.designate %1#0 (%c10:%c1:%c-1) shape %0 : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.yield %2 : !fir.box> + } + } + return +} + + +// Creating mask temporary. + +// CHECK-LABEL: func.func @saving_mask_and_rhs( +// CHECK: %[[VAL_8:.*]] = hlfir.elemental {{.*}} !hlfir.expr<10x!fir.logical<4>> +// CHECK: %[[VAL_14:.*]]:3 = hlfir.associate %[[VAL_8]]({{.*}}) {uniq_name = ".tmp.where"} + +// Creating RHS temporary using the mask temporary (and not the hlfir.elemental) + +// CHECK: %[[VAL_25:.*]] = fir.allocmem !fir.array, %{{.*}} {bindc_name = ".tmp.where", uniq_name = ""} +// CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_25]]({{.*}}) {uniq_name = ".tmp.where"} +// CHECK: fir.do_loop +// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_14]]#0 ({{.*}}) +// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref> +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_31]] { +// CHECK: %[[VAL_36:.*]] = hlfir.designate %[[VAL_27]]#0 ({{.*}}) +// CHECK: hlfir.assign %{{.*}} to %[[VAL_36]] : i32, !fir.ref +// CHECK: } +// CHECK: } + +func.func @forall_mask_and_rhs(%arg0: !fir.ref>) { + %c0_i32 = arith.constant 0 : i32 + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %c11 = arith.constant 11 : index + %0 = fir.shape %c10 : (index) -> !fir.shape<1> + %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + hlfir.forall lb { + hlfir.yield %c1 : index + } ub { + hlfir.yield %c10 : index + } (%arg1: index) { + hlfir.forall_mask { + %2 = arith.subi %c11, %arg1 : index + %3 = hlfir.designate %1#0 (%2) : (!fir.ref>, index) -> !fir.ref + %4 = fir.load %3 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i32 : i32 + hlfir.yield %5 : i1 + } do { + hlfir.region_assign { + %2 = arith.subi %c11, %arg1 : index + %3 = hlfir.designate %1#0 (%2) : (!fir.ref>, index) -> !fir.ref + %4 = fir.load %3 : !fir.ref + hlfir.yield %4 : i32 + } to { + %2 = hlfir.designate %1#0 (%arg1) : (!fir.ref>, index) -> !fir.ref + hlfir.yield %2 : !fir.ref + } + } + } + return +} + +// The mask and rhs are saved in the same loop, the mask value is a scalar i1 +// and it can be used directly to mask the rhs (instead of loading the temp). + +// CHECK-LABEL: func.func @forall_mask_and_rhs( +// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%{{.*}}) {uniq_name = ".tmp.forall"} +// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""} +// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%{{.*}}) {uniq_name = ".tmp.forall"} +// CHECK: %[[VAL_36:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32 +// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_20]]#0 (%{{.*}}) +// CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_39]] : i1, !fir.ref +// CHECK: fir.if %[[VAL_36]] { +// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_31]]#0 (%{{.*}}) +// CHECK: hlfir.assign %{{.*}} to %[[VAL_45]] : i32, !fir.ref +// CHECK: } +// CHECK: } diff --git a/flang/test/HLFIR/order_assignments/user-defined-assignment.fir b/flang/test/HLFIR/order_assignments/user-defined-assignment.fir --- a/flang/test/HLFIR/order_assignments/user-defined-assignment.fir +++ b/flang/test/HLFIR/order_assignments/user-defined-assignment.fir @@ -16,7 +16,7 @@ // CHECK-SAME: %[[VAL_0:.*]]: !fir.ref, // CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>) { // CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref> -// CHECK: %[[VAL_3:.*]]:3 = hlfir.associate %[[VAL_2]] {uniq_name = ".tmp.forall"} : (!fir.logical<4>) -> (!fir.ref>, !fir.ref>, i1) +// CHECK: %[[VAL_3:.*]]:3 = hlfir.associate %[[VAL_2]] {uniq_name = ".tmp.assign"} : (!fir.logical<4>) -> (!fir.ref>, !fir.ref>, i1) // CHECK: fir.call @logical_to_numeric(%[[VAL_0]], %[[VAL_3]]#0) : (!fir.ref, !fir.ref>) -> () // CHECK: hlfir.end_associate %[[VAL_3]]#1, %[[VAL_3]]#2 : !fir.ref>, i1 @@ -54,7 +54,7 @@ // CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> // CHECK: hlfir.yield_element %[[VAL_9]] : !fir.logical<4> // CHECK: } -// CHECK: %[[VAL_10:.*]]:3 = hlfir.associate %[[VAL_11:.*]](%[[VAL_3]]) {uniq_name = ".tmp.forall"} : (!hlfir.expr<10x!fir.logical<4>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>, i1) +// CHECK: %[[VAL_10:.*]]:3 = hlfir.associate %[[VAL_11:.*]](%[[VAL_3]]) {uniq_name = ".tmp.assign"} : (!hlfir.expr<10x!fir.logical<4>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>, i1) // CHECK: %[[VAL_12:.*]] = arith.constant 10 : index // CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_14:.*]] = arith.constant 1 : index @@ -102,7 +102,7 @@ // CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> // CHECK: hlfir.yield_element %[[VAL_9]] : !fir.logical<4> // CHECK: } -// CHECK: %[[VAL_10:.*]]:3 = hlfir.associate %[[VAL_11:.*]](%[[VAL_3]]) {uniq_name = ".tmp.forall"} : (!hlfir.expr<10x!fir.logical<4>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>, i1) +// CHECK: %[[VAL_10:.*]]:3 = hlfir.associate %[[VAL_11:.*]](%[[VAL_3]]) {uniq_name = ".tmp.assign"} : (!hlfir.expr<10x!fir.logical<4>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>, i1) // CHECK: %[[VAL_12:.*]] = hlfir.as_expr %[[VAL_10]]#0 : (!fir.ref>>) -> !hlfir.expr<10x!fir.logical<4>> // CHECK: %[[VAL_13:.*]]:3 = hlfir.associate %[[VAL_12]](%[[VAL_3]]) {uniq_name = "adapt.valuebyref"} : (!hlfir.expr<10x!fir.logical<4>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>, i1) // CHECK: fir.call @logical_array_to_numeric(%[[VAL_0]], %[[VAL_13]]#0) : (!fir.ref>, !fir.ref>>) -> ()