Index: flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp =================================================================== --- flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -25,6 +25,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" @@ -448,9 +449,10 @@ mlir::isa(useOp)) { if (!endAssociate) continue; - // not known to occur in practice: + // If useOp dominates the endAssociate, then it is definitely safe. if (useOp->getBlock() != endAssociate->getBlock()) - TODO(endAssociate->getLoc(), "Associate split over multiple blocks"); + if (mlir::DominanceInfo{}.dominates(useOp, endAssociate)) + continue; if (useOp->isBeforeInBlock(endAssociate)) continue; } @@ -530,14 +532,31 @@ firVar = adjustVar(firVar, associateFirVarType); associate.getResult(1).replaceAllUsesWith(firVar); associate.getResult(2).replaceAllUsesWith(flag); - rewriter.replaceOp(associate, {hlfirVar, firVar, flag}); + // FIXME: note that the AssociateOp that is being erased + // here will continue to be a user of the original Source + // operand (e.g. a result of hlfir.elemental), because + // the erasure is not immediate in the rewriter. + // In case there are multiple uses of the Source operand, + // the allOtherUsesAreSafeForAssociate() below will always + // see them, so there is no way to reuse the buffer. + // I think we have to run this analysis before doing + // the conversions, so that we can analyze HLFIR in its + // original form and decide which of the AssociateOp + // users of hlfir.expr can reuse the buffer (if it can). + rewriter.eraseOp(associate); }; // If this is the last use of the expression value and this is an hlfir.expr // that was bufferized, re-use the storage. // Otherwise, create a temp and assign the storage to it. + // + // WARNING: it is important to use the original Source operand + // of the AssociateOp to look for the users, because its replacement + // has zero materialized users at this point. + // So allOtherUsesAreSafeForAssociate() may incorrectly return + // true here. if (!isTrivialValue && allOtherUsesAreSafeForAssociate( - adaptor.getSource(), associate.getOperation(), + associate.getSource(), associate.getOperation(), getEndAssociate(associate))) { // Re-use hlfir.expr buffer if this is the only use of the hlfir.expr // outside of the hlfir.destroy. Take on the cleaning-up responsibility @@ -771,6 +790,12 @@ mlir::Value bufferizedExpr = packageBufferizedExpr(loc, builder, temp, cleanup); + // Explicitly delete the body of the elemental to get rid + // of any users of hlfir.expr values inside the body as early + // as possible. + rewriter.startRootUpdate(elemental); + rewriter.eraseBlock(elemental.getBody()); + rewriter.finalizeRootUpdate(elemental); rewriter.replaceOp(elemental, bufferizedExpr); return mlir::success(); } Index: flang/test/HLFIR/associate-codegen.fir =================================================================== --- flang/test/HLFIR/associate-codegen.fir +++ flang/test/HLFIR/associate-codegen.fir @@ -239,55 +239,6 @@ // CHECK: return // CHECK: } -// The hlfir.associate op is cloned when the elemental is bufferized into the -// fir.do_loop. When the associate op conversion is run, if the source of the -// assoicate is used directly (not accessing the bufferized version through -// the adaptor) then both the associate inside the elemental and the associate -// inside the fir.do_loop are found as uses. Therefore being erroneously -// flagged as an associate with more than one use -func.func @test_cloned_associate() { - %false = arith.constant false - %c1 = arith.constant 1 : index - %c10 = arith.constant 10 : index - %0 = fir.alloca !fir.char<1> - %2 = fir.shape %c10 : (index) -> !fir.shape<1> - %4 = hlfir.as_expr %0 move %false : (!fir.ref>, i1) -> !hlfir.expr> - %5 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> { - ^bb0(%arg0: index): - %8:3 = hlfir.associate %4 typeparams %c1 {uniq_name = "adapt.valuebyref"} : (!hlfir.expr>, index) -> (!fir.ref>, !fir.ref>, i1) - hlfir.end_associate %8#1, %8#2 : !fir.ref>, i1 - %15 = fir.convert %false : (i1) -> !fir.logical<4> - hlfir.yield_element %15 : !fir.logical<4> - } - hlfir.destroy %5 : !hlfir.expr<10x!fir.logical<4>> - hlfir.destroy %4 : !hlfir.expr> - return -} -// CHECK-LABEL: func.func @test_cloned_associate() { -// CHECK: %[[VAL_0:.*]] = arith.constant false -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_2:.*]] = arith.constant 10 : index -// CHECK: %[[VAL_3:.*]] = fir.alloca !fir.char<1> -// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_5:.*]] = fir.undefined tuple>, i1> -// CHECK: %[[VAL_6:.*]] = fir.insert_value %[[VAL_5]], %[[VAL_0]], [1 : index] : (tuple>, i1>, i1) -> tuple>, i1> -// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (tuple>, i1>, !fir.ref>) -> tuple>, i1> -// CHECK: %[[VAL_8:.*]] = fir.allocmem !fir.array<10x!fir.logical<4>> {bindc_name = ".tmp.array", uniq_name = ""} -// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_4]]) {uniq_name = ".tmp.array"} : (!fir.heap>>, !fir.shape<1>) -> (!fir.heap>>, !fir.heap>>) -// CHECK: %[[VAL_10:.*]] = arith.constant true -// CHECK: %[[VAL_11:.*]] = arith.constant 1 : index -// CHECK: fir.do_loop %[[VAL_12:.*]] = %[[VAL_11]] to %[[VAL_2]] step %[[VAL_11]] unordered { -// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_0]] : (i1) -> !fir.logical<4> -// CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_12]]) : (!fir.heap>>, index) -> !fir.ref> -// CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_14]] temporary_lhs : !fir.logical<4>, !fir.ref> -// CHECK: } -// CHECK: %[[VAL_15:.*]] = fir.undefined tuple>>, i1> -// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_10]], [1 : index] : (tuple>>, i1>, i1) -> tuple>>, i1> -// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_9]]#0, [0 : index] : (tuple>>, i1>, !fir.heap>>) -> tuple>>, i1> -// CHECK: fir.freemem %[[VAL_9]]#1 : !fir.heap>> -// CHECK: return -// CHECK: } - func.func @test_multiple_associations(%arg0: !hlfir.expr<1x2xi32>) { %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index @@ -420,3 +371,86 @@ // CHECK: %[[VAL_27:.*]] = fir.insert_value %[[VAL_26]], %[[VAL_12]]#0, [0 : index] : (tuple>>, i1>, !fir.box>>) -> tuple>>, i1> // CHECK: return // CHECK: } + +// Verify that we properly recognize mutliple consequent hlfir.associate using +// the same result of hlfir.elemental. +func.func @_QPtest_multitple_associates_for_same_expr() { + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %4 = fir.shape %c10 : (index) -> !fir.shape<1> + %11 = hlfir.elemental %4 typeparams %c1 unordered : (!fir.shape<1>, index) -> !hlfir.expr<10x!fir.char<1>> { + ^bb0(%arg1: index): + %44 = fir.undefined !fir.ref> + hlfir.yield_element %44 : !fir.ref> + } + %12:3 = hlfir.associate %11(%4) typeparams %c1 {uniq_name = "adapt.valuebyref"} : (!hlfir.expr<10x!fir.char<1>>, !fir.shape<1>, index) -> (!fir.ref>>, !fir.ref>>, i1) + hlfir.end_associate %12#1, %12#2 : !fir.ref>>, i1 + %31:3 = hlfir.associate %11(%4) typeparams %c1 {uniq_name = "adapt.valuebyref"} : (!hlfir.expr<10x!fir.char<1>>, !fir.shape<1>, index) -> (!fir.ref>>, !fir.ref>>, i1) + hlfir.end_associate %31#1, %31#2 : !fir.ref>>, i1 + hlfir.destroy %11 : !hlfir.expr<10x!fir.char<1>> + return +} +// CHECK-LABEL: func.func @_QPtest_multitple_associates_for_same_expr() { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_3:.*]] = fir.allocmem !fir.array<10x!fir.char<1>> {bindc_name = ".tmp.array", uniq_name = ""} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]](%[[VAL_2]]) typeparams %[[VAL_0]] {uniq_name = ".tmp.array"} : (!fir.heap>>, !fir.shape<1>, index) -> (!fir.heap>>, !fir.heap>>) +// CHECK: %[[VAL_5:.*]] = arith.constant true +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_6]] to %[[VAL_1]] step %[[VAL_6]] unordered { +// CHECK: %[[VAL_8:.*]] = fir.undefined !fir.ref> +// CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_7]]) typeparams %[[VAL_0]] : (!fir.heap>>, index, index) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_9]] temporary_lhs : !fir.ref>, !fir.ref> +// CHECK: } +// CHECK: %[[VAL_10:.*]] = fir.undefined tuple>>, i1> +// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_5]], [1 : index] : (tuple>>, i1>, i1) -> tuple>>, i1> +// CHECK: %[[VAL_12:.*]] = fir.insert_value %[[VAL_11]], %[[VAL_4]]#0, [0 : index] : (tuple>>, i1>, !fir.heap>>) -> tuple>>, i1> +// CHECK: %[[VAL_13:.*]] = fir.allocmem !fir.array<10x!fir.char<1>> {bindc_name = ".tmp", uniq_name = ""} +// CHECK: %[[VAL_14:.*]] = arith.constant true +// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]](%[[VAL_2]]) typeparams %[[VAL_0]] {uniq_name = ".tmp"} : (!fir.heap>>, !fir.shape<1>, index) -> (!fir.heap>>, !fir.heap>>) +// CHECK: hlfir.assign %[[VAL_4]]#0 to %[[VAL_15]]#0 temporary_lhs : !fir.heap>>, !fir.heap>> +// CHECK: %[[VAL_16:.*]] = fir.undefined tuple>>, i1> +// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_14]], [1 : index] : (tuple>>, i1>, i1) -> tuple>>, i1> +// CHECK: %[[VAL_18:.*]] = fir.insert_value %[[VAL_17]], %[[VAL_15]]#0, [0 : index] : (tuple>>, i1>, !fir.heap>>) -> tuple>>, i1> +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_15]]#0 : (!fir.heap>>) -> !fir.ref>> +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_15]]#1 : (!fir.heap>>) -> !fir.ref>> +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.ref>>) -> !fir.heap>> +// CHECK: fir.freemem %[[VAL_21]] : !fir.heap>> +// CHECK: %[[VAL_22:.*]] = fir.allocmem !fir.array<10x!fir.char<1>> {bindc_name = ".tmp", uniq_name = ""} +// CHECK: %[[VAL_23:.*]] = arith.constant true +// CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_22]](%[[VAL_2]]) typeparams %[[VAL_0]] {uniq_name = ".tmp"} : (!fir.heap>>, !fir.shape<1>, index) -> (!fir.heap>>, !fir.heap>>) +// CHECK: hlfir.assign %[[VAL_4]]#0 to %[[VAL_24]]#0 temporary_lhs : !fir.heap>>, !fir.heap>> +// CHECK: %[[VAL_25:.*]] = fir.undefined tuple>>, i1> +// CHECK: %[[VAL_26:.*]] = fir.insert_value %[[VAL_25]], %[[VAL_23]], [1 : index] : (tuple>>, i1>, i1) -> tuple>>, i1> +// CHECK: %[[VAL_27:.*]] = fir.insert_value %[[VAL_26]], %[[VAL_24]]#0, [0 : index] : (tuple>>, i1>, !fir.heap>>) -> tuple>>, i1> +// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_24]]#0 : (!fir.heap>>) -> !fir.ref>> +// CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_24]]#1 : (!fir.heap>>) -> !fir.ref>> +// CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (!fir.ref>>) -> !fir.heap>> +// CHECK: fir.freemem %[[VAL_30]] : !fir.heap>> +// CHECK: fir.freemem %[[VAL_4]]#1 : !fir.heap>> +// CHECK: return +// CHECK: } + +// Test hlfir.associate codegen, when its operand is used +// by hlfir.shape located in a block different from the block +// of the hlfir.end_associate. +func.func @_QPtest(%arg0: index, %arg1: index, %arg2 : i32) { + %c1_i32 = arith.constant 1 : i32 + %3 = fir.shape %arg0, %arg1 : (index, index) -> !fir.shape<2> + %4 = hlfir.elemental %3 unordered : (!fir.shape<2>) -> !hlfir.expr { + ^bb0(%arg3: index, %arg4: index): + %16 = fir.undefined i32 + hlfir.yield_element %16 : i32 + } + %5 = hlfir.shape_of %4 : (!hlfir.expr) -> !fir.shape<2> + %6:3 = hlfir.associate %4(%5) {uniq_name = "adapt.valuebyref"} : (!hlfir.expr, !fir.shape<2>) -> (!fir.box>, !fir.ref>, i1) + %13 = arith.cmpi ne, %arg2, %c1_i32 : i32 + cf.cond_br %13, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + fir.unreachable +^bb2: // pred: ^bb0 + hlfir.end_associate %6#1, %6#2 : !fir.ref>, i1 + hlfir.destroy %4 : !hlfir.expr + return +}