diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -157,6 +157,83 @@ return mlir::AliasResult::NoAlias; } +// Returns true if the given array references represent identical +// or completely disjoint array slices. The callers may use this +// method when the alias analysis reports an alias of some kind, +// so that we can run Fortran specific analysis on the array slices +// to see if they are identical or disjoint. Note that the alias +// analysis are not able to give such an answer about the references. +static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) { + if (ref1 == ref2) + return true; + + auto des1 = ref1.getDefiningOp(); + auto des2 = ref2.getDefiningOp(); + // We only support a pair of designators right now. + if (!des1 || !des2) + return false; + + if (des1.getMemref() != des2.getMemref()) { + // If the bases are different, then there is unknown overlap. + LLVM_DEBUG(llvm::dbgs() << "No identical base for:\n" + << des1 << "and:\n" + << des2 << "\n"); + return false; + } + + // Require all components of the designators to be the same. + // It might be too strict, e.g. we may probably allow for + // different type parameters. + if (des1.getComponent() != des2.getComponent() || + des1.getComponentShape() != des2.getComponentShape() || + des1.getSubstring() != des2.getSubstring() || + des1.getComplexPart() != des2.getComplexPart() || + des1.getShape() != des2.getShape() || + des1.getTypeparams() != des2.getTypeparams()) { + LLVM_DEBUG(llvm::dbgs() << "Different designator specs for:\n" + << des1 << "and:\n" + << des2 << "\n"); + return false; + } + + if (des1.getIsTriplet() != des2.getIsTriplet()) { + LLVM_DEBUG(llvm::dbgs() << "Different sections for:\n" + << des1 << "and:\n" + << des2 << "\n"); + return false; + } + + // Analyze the subscripts. + // For example: + // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0) shape %9 + // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %1) shape %9 + // + // If all the triplets (section speficiers) are the same, then + // we do not care if %0 is equal to %1 - the slices are either + // identical or completely disjoint. + // + // TODO: if we can prove that all non-triplet subscripts are different + // (by value), then we may return true regardless of the triplet + // values - the sections must be completely disjoint. + auto des1It = des1.getIndices().begin(); + auto des2It = des2.getIndices().begin(); + for (bool isTriplet : des1.getIsTriplet()) { + if (isTriplet) { + for (int i = 0; i < 3; ++i) + if (*des1It++ != *des2It++) { + LLVM_DEBUG(llvm::dbgs() << "Triplet mismatch for:\n" + << des1 << "and:\n" + << des2 << "\n"); + return false; + } + } else { + ++des1It; + ++des2It; + } + } + return true; +} + std::optional ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { mlir::Operation::user_range users = elemental->getUsers(); @@ -274,7 +351,7 @@ if (!res.isPartial()) { if (auto designate = effect.getValue().getDefiningOp()) { - if (designate.getMemref() != match.array) { + if (!areIdenticalOrDisjointSlices(match.array, designate.getMemref())) { LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate << " at " << elemental.getLoc() << "\n"); return std::nullopt; @@ -291,7 +368,7 @@ continue; } } - LLVM_DEBUG(llvm::dbgs() << "diasllowed side-effect: " << effect.getValue() + LLVM_DEBUG(llvm::dbgs() << "disallowed side-effect: " << effect.getValue() << " for " << elemental.getLoc() << "\n"); return std::nullopt; } @@ -484,6 +561,8 @@ fir::AliasAnalysis aliasAnalysis; mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs); + // TODO: use areIdenticalOrDisjointSlices() to check if + // we can still do the expansion. if (!aliasRes.isNo()) { LLVM_DEBUG(llvm::dbgs() << "VariableAssignBufferization:\n" << "\tLHS: " << lhs << "\n" diff --git a/flang/test/HLFIR/opt-array-slice-assign.fir b/flang/test/HLFIR/opt-array-slice-assign.fir new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/opt-array-slice-assign.fir @@ -0,0 +1,130 @@ +// Test optimized bufferization for hlfir.assign of array +// slices, e.g.: +// x(2:7999,1:120,new) = (x(2:7999,1:120,old)) +// We can expand hlfir.assign if the slices are either identical +// or completely disjoint. In case they are identical, we still +// need to make sure that the one-based indices are used +// uniformly for both LHS and RHS. +// RUN: fir-opt --opt-bufferization %s | FileCheck %s + +func.func @_QPtest1(%arg0: !fir.ref> {fir.bindc_name = "x"}) { + %c7998 = arith.constant 7998 : index + %c1 = arith.constant 1 : index + %c7999 = arith.constant 7999 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %c120 = arith.constant 120 : index + %c8000 = arith.constant 8000 : index + %0 = fir.alloca i32 {bindc_name = "new", uniq_name = "_QFtest1Enew"} + %1:2 = hlfir.declare %0 {uniq_name = "_QFtest1Enew"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %2 = fir.alloca i32 {bindc_name = "old", uniq_name = "_QFtest1Eold"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFtest1Eold"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %4 = fir.shape %c8000, %c120, %c3 : (index, index, index) -> !fir.shape<3> + %5:2 = hlfir.declare %arg0(%4) {uniq_name = "_QFtest1Ex"} : (!fir.ref>, !fir.shape<3>) -> (!fir.ref>, !fir.ref>) + %6 = fir.load %3#0 : !fir.ref + %7 = fir.convert %6 : (i32) -> i64 + %8 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2> + %9 = hlfir.designate %5#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %7) shape %8 : (!fir.ref>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box> + %10 = hlfir.elemental %8 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> { + ^bb0(%arg1: index, %arg2: index): + %14 = hlfir.designate %9 (%arg1, %arg2) : (!fir.box>, index, index) -> !fir.ref + %15 = fir.load %14 : !fir.ref + %16 = hlfir.no_reassoc %15 : f32 + hlfir.yield_element %16 : f32 + } + %11 = fir.load %1#0 : !fir.ref + %12 = fir.convert %11 : (i32) -> i64 + %13 = hlfir.designate %5#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %12) shape %8 : (!fir.ref>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box> + hlfir.assign %10 to %13 : !hlfir.expr<7998x120xf32>, !fir.box> + hlfir.destroy %10 : !hlfir.expr<7998x120xf32> + return +} +// CHECK-LABEL: func.func @_QPtest1( +// CHECK: fir.do_loop %[[VAL_21:.*]] = +// CHECK: fir.do_loop %[[VAL_22:.*]] = +// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_17:.*]] (%[[VAL_22]], %[[VAL_21]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref +// CHECK: %[[VAL_25:.*]] = hlfir.no_reassoc %[[VAL_24]] : f32 +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_20:.*]] (%[[VAL_22]], %[[VAL_21]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_26]] : f32, !fir.ref +// CHECK: } +// CHECK: } + +func.func @_QPtest2(%arg0: !fir.ref>>> {fir.bindc_name = "x"}) { + %c120 = arith.constant 120 : index + %c7998 = arith.constant 7998 : index + %c1 = arith.constant 1 : index + %c7999 = arith.constant 7999 : index + %c2 = arith.constant 2 : index + %0:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest2Ex"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %1 = fir.load %0#0 : !fir.ref>>> + %2 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2> + %3 = hlfir.designate %1 (%c2:%c7999:%c1, %c1:%c120:%c1, %c2) shape %2 : (!fir.box>>, index, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box> + %4 = hlfir.elemental %2 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> { + ^bb0(%arg1: index, %arg2: index): + %6 = hlfir.designate %3 (%arg1, %arg2) : (!fir.box>, index, index) -> !fir.ref + %7 = fir.load %6 : !fir.ref + %8 = hlfir.no_reassoc %7 : f32 + hlfir.yield_element %8 : f32 + } + %5 = hlfir.designate %1 (%c2:%c7999:%c1, %c1:%c120:%c1, %c1) shape %2 : (!fir.box>>, index, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box> + hlfir.assign %4 to %5 : !hlfir.expr<7998x120xf32>, !fir.box> + hlfir.destroy %4 : !hlfir.expr<7998x120xf32> + return +} +// CHECK-LABEL: func.func @_QPtest2( +// CHECK: fir.do_loop %[[VAL_11:.*]] = +// CHECK: fir.do_loop %[[VAL_12:.*]] = +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_9:.*]] (%[[VAL_12]], %[[VAL_11]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref +// CHECK: %[[VAL_15:.*]] = hlfir.no_reassoc %[[VAL_14]] : f32 +// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10:.*]] (%[[VAL_12]], %[[VAL_11]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref +// CHECK: } +// CHECK: } + +func.func @_QPtest3(%arg0: !fir.ref}>>> {fir.bindc_name = "x"}) { + %c7998 = arith.constant 7998 : index + %c7999 = arith.constant 7999 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %c120 = arith.constant 120 : index + %c8000 = arith.constant 8000 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %0 = fir.alloca i32 {bindc_name = "new", uniq_name = "_QFtest3Enew"} + %1:2 = hlfir.declare %0 {uniq_name = "_QFtest3Enew"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %2 = fir.alloca i32 {bindc_name = "old", uniq_name = "_QFtest3Eold"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFtest3Eold"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %4 = fir.shape %c10 : (index) -> !fir.shape<1> + %5:2 = hlfir.declare %arg0(%4) {uniq_name = "_QFtest3Ex"} : (!fir.ref}>>>, !fir.shape<1>) -> (!fir.ref}>>>, !fir.ref}>>>) + %6 = hlfir.designate %5#0 (%c1) : (!fir.ref}>>>, index) -> !fir.ref}>> + %7 = fir.shape %c8000, %c120, %c3 : (index, index, index) -> !fir.shape<3> + %8 = fir.load %3#0 : !fir.ref + %9 = fir.convert %8 : (i32) -> i64 + %10 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2> + %11 = hlfir.designate %6{"x"} <%7> (%c2:%c7999:%c1, %c1:%c120:%c1, %9) shape %10 : (!fir.ref}>>, !fir.shape<3>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box> + %12 = hlfir.elemental %10 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> { + ^bb0(%arg1: index, %arg2: index): + %16 = hlfir.designate %11 (%arg1, %arg2) : (!fir.box>, index, index) -> !fir.ref + %17 = fir.load %16 : !fir.ref + %18 = hlfir.no_reassoc %17 : f32 + hlfir.yield_element %18 : f32 + } + %13 = fir.load %1#0 : !fir.ref + %14 = fir.convert %13 : (i32) -> i64 + %15 = hlfir.designate %6{"x"} <%7> (%c2:%c7999:%c1, %c1:%c120:%c1, %14) shape %10 : (!fir.ref}>>, !fir.shape<3>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box> + hlfir.assign %12 to %15 : !hlfir.expr<7998x120xf32>, !fir.box> + hlfir.destroy %12 : !hlfir.expr<7998x120xf32> + return +} +// CHECK-LABEL: func.func @_QPtest3( +// CHECK: fir.do_loop %[[VAL_24:.*]] = +// CHECK: fir.do_loop %[[VAL_25:.*]] = +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_20:.*]] (%[[VAL_25]], %[[VAL_24]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = hlfir.no_reassoc %[[VAL_27]] : f32 +// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_23:.*]] (%[[VAL_25]], %[[VAL_24]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_29]] : f32, !fir.ref +// CHECK: } +// CHECK: }