diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -53,6 +53,9 @@ Region *region = value.getParentRegion(); while (region) { Operation *op = region->getParentOp(); + // Stop lookup when reaching a function. + if (isa(op)) + return region; if (auto bufferizableOp = options.dynCastBufferizableOp(op)) if (bufferizableOp.isRepetitiveRegion(region->getRegionNumber())) return region; @@ -67,6 +70,9 @@ Operation *op = nullptr; do { op = region->getParentOp(); + // Stop lookup when reaching a function. + if (isa(op)) + return region; if (auto bufferizableOp = options.dynCastBufferizableOp(op)) if (bufferizableOp.isRepetitiveRegion(region->getRegionNumber())) return region; diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp @@ -346,12 +346,12 @@ return false; } -/// Return `true` if op dominance can be used to rule out read-after-write -/// conflicts wrt. the given reads and writes. +/// Return `true` if op dominance can be used to rule out a read-after-write +/// conflicts. /// -/// Op dominance can often be used to rule out potential conflicts such as -/// "read" happens before "write". E.g., the following IR is not a RaW conflict -/// because the the read happens *before* the write. +/// Op dominance can often be used to rule out potential conflicts due to +/// "read happens before write". E.g., the following IR is not a RaW conflict +/// because the read happens *before* the write. /// /// %0 = ... : tensor /// "reading_op"(%0) : tensor @@ -387,39 +387,41 @@ /// "writing_op", so op dominance can be used to compute the `happensBefore` /// relationship. /// -/// Whether op dominance can be used or not is decided as follows: Find the -/// closest enclosing repetitive region of all buffer writes wrt. the given -/// tensor reads and writes. (The given sets of reads and writes contain the -/// entire alias set.) In case of a read, we look at the op that defines the -/// read value. In case of a write, we look at the op that is writing. If all of -/// those ops are in the same closest enclosing repetitive region (nullptr in -/// case of "no repetitive region" found at all), then op dominance can be used. -/// Otherwise, it cannot be used. +/// Counter example: +/// +/// %0 = ... : tensor +/// %1 = "writing_op"(%0) : tensor -> tensor +/// scf.for ... { +/// "reading_op"(%1) : tensor +/// ... +/// } +/// +/// In this example, the definition %1 is in the same repetitive region as +/// "writing_op", so op dominance can be used to compute the `happensBefore` +/// relationship. +/// +/// Op dominance can be used if: +/// repetitive_reg(write) == repetitive_reg(def(read)) /// -/// Example: The common enclosing repetitive region is the scf.for loop. -/// Op dominance can be used. +/// Example: There is no write, so op dominance can be used. /// scf.for ... { /// %0 = tensor.generate /// "read"(%0) /// } /// -/// Example: The common enclosing repetitive region is nullptr: There is no -/// repetitive region around the tensor.generate. Op dominance can be -/// used. +/// Example: There is no write, so op dominance can be used. /// %0 = tensor.generate /// scf.for ... { "read"(%0) } /// -/// Example: The common enclosing repetitive regions of tensor.generate and -/// "write" differ. Op dominance cannot be used. +/// Example: The write is in a different repetitive region than %0. Op dominance +/// cannot be used. /// %0 = tensor.generate /// scf.for ... { /// "read"(%0) /// "write"(%0) /// } /// -/// Example: The common enclosing repetitive regions of tensor.generate and -/// "write" differ, but there is no read of %0, so op dominance can be -/// used. +/// Example: There is no read, so op dominance can be used. /// %0 = tensor.generate /// scf.for ... { /// "write"(%0) @@ -428,38 +430,20 @@ /// Note: iter_args of loops are not aliases of their respective block /// arguments, so op domanice can be used when analyzing ops that operate /// on them. -bool canUseOpDominance(const DenseSet &usesRead, - const DenseSet &usesWrite, +bool canUseOpDominance(OpOperand *uRead, + OpOperand *uWrite, + const SetVector &definitions, const AnalysisState &state) { const BufferizationOptions &options = state.getOptions(); - std::optional commonEnclosingRegion; - - // In case of a write, take the region in which the write takes place. - for (OpOperand *uWrite : usesWrite) { - Region *r = getEnclosingRepetitiveRegion(uWrite->getOwner(), options); - if (!commonEnclosingRegion.has_value()) { - commonEnclosingRegion = r; - continue; - } - if (*commonEnclosingRegion != r) - return false; - } - - // In case of a read, take the region which the read value is defined. - for (OpOperand *uRead : usesRead) { - // Optimization: Skip reads of values that have no defined contents. - if (!state.bufferizesToMemoryWrite(uRead->get())) - continue; - Region *r = getEnclosingRepetitiveRegion(uRead->get(), options); - if (!commonEnclosingRegion.has_value()) { - commonEnclosingRegion = r; + Region *r = getEnclosingRepetitiveRegion(uWrite->getOwner(), options); + for (Value def : definitions) { + // Skip tensors that have no defined contents. + if (!state.bufferizesToMemoryWrite(def)) continue; - } - if (*commonEnclosingRegion != r) + if (r != getEnclosingRepetitiveRegion(def, options)) return false; } - - return commonEnclosingRegion.has_value(); + return true; } /// Annotate IR with details about the detected RaW conflict. @@ -507,10 +491,6 @@ AnalysisState &state, const BufferizationAliasInfo &aliasInfo) { const BufferizationOptions &options = state.getOptions(); - // Check if op dominance can be used to rule out read-after-write conflicts. - bool useDominance = canUseOpDominance(usesRead, usesWrite, state); - LLVM_DEBUG(llvm::dbgs() << "\n- useDominance = " << useDominance << "\n"); - for (OpOperand *uRead : usesRead) { Operation *readingOp = uRead->getOwner(); @@ -529,6 +509,11 @@ // Look for conflicting memory writes. Potential conflicts are writes to an // alias that have been decided to bufferize inplace. for (OpOperand *uConflictingWrite : usesWrite) { + // Check if op dominance can be used to rule out read-after-write + // conflicts. + bool useDominance = canUseOpDominance(uRead, uConflictingWrite, definitions, state); + LLVM_DEBUG(llvm::dbgs() << "\n- useDominance = " << useDominance << "\n"); + LLVM_DEBUG(llvm::dbgs() << "\n- check conflict:\n"); LLVM_DEBUG(llvm::dbgs() << " uRead = operand " << uRead->getOperandNumber() << " of " diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -697,3 +697,60 @@ return %2, %7 : tensor<4xf32>, tensor<4xf32> } + +// ----- + +// CHECK-LABEL: func @read_of_bbarg_in_repetitive_region( +func.func @read_of_bbarg_in_repetitive_region( + %t: tensor<10xf32>, %a: index, %b: index, %c: index, %cst: f32) { + // CHECK: scf.for + scf.for %iv = %a to %b step %c { + // Must bufferize out-of-place because definition of read is in a different + // repetitive region. + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["false"]} + %2 = tensor.extract_slice %t[0][4][1] : tensor<10xf32> to tensor<4xf32> + %3 = tensor.extract %2[%a] : tensor<4xf32> + vector.print %3 : f32 + // CHECK: tensor.insert {{.*}} {__inplace_operands_attr__ = ["none", "true", "none"]} + %4 = tensor.insert %cst into %2[%a] : tensor<4xf32> + %5 = tensor.extract %4[%a] : tensor<4xf32> + vector.print %5 : f32 + } + return +} + +// ----- + +// CHECK-LABEL: func @read_definition_in_same_repetitive_region_as_write( +func.func @read_definition_in_same_repetitive_region_as_write( + %t: tensor<10xf32>, %a: index, %b: index, %c: index, %cst: f32) { + // CHECK: tensor.insert {{.*}} {__inplace_operands_attr__ = ["none", "true", "none"]} + %1 = tensor.insert %cst into %t[%a] : tensor<10xf32> + // CHECK: scf.for + scf.for %iv = %a to %b step %c { + // Can bufferize in-place. + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true"]} + %2 = tensor.extract_slice %1[0][4][1] : tensor<10xf32> to tensor<4xf32> + %3 = tensor.extract %2[%a] : tensor<4xf32> + vector.print %3 : f32 + } + return +} + +// ----- + +// CHECK-LABEL: func @read_definition_in_same_repetitive_region_as_conflicting_write( +func.func @read_definition_in_same_repetitive_region_as_conflicting_write( + %t: tensor<10xf32>, %a: index, %b: index, %c: index, %cst: f32) { + // Cannot bufferize in-place according to normal op dominance rules. + // CHECK: tensor.insert {{.*}} {__inplace_operands_attr__ = ["none", "false", "none"]} + %1 = tensor.insert %cst into %t[%a] : tensor<10xf32> + // CHECK: scf.for + scf.for %iv = %a to %b step %c { + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true"]} + %2 = tensor.extract_slice %t[0][4][1] : tensor<10xf32> to tensor<4xf32> + %3 = tensor.extract %2[%a] : tensor<4xf32> + vector.print %3 : f32 + } + return +}