diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h --- a/mlir/include/mlir/Transforms/LoopUtils.h +++ b/mlir/include/mlir/Transforms/LoopUtils.h @@ -187,25 +187,26 @@ /// Performs explicit copying for the contiguous sequence of operations in the /// block iterator range [`begin', `end'), where `end' can't be past the /// terminator of the block (since additional operations are potentially -/// inserted right before `end`. Returns the total size of fast memory space -/// buffers used. `copyOptions` provides various parameters, and the output -/// argument `copyNests` is the set of all copy nests inserted, each represented -/// by its root affine.for. Since we generate alloc's and dealloc's for all fast -/// buffers (before and after the range of operations resp. or at a hoisted -/// position), all of the fast memory capacity is assumed to be available for -/// processing this block range. When 'filterMemRef' is specified, copies are -/// only generated for the provided MemRef. -uint64_t affineDataCopyGenerate(Block::iterator begin, Block::iterator end, - const AffineCopyOptions ©Options, - Optional filterMemRef, - DenseSet ©Nests); +/// inserted right before `end`. `copyOptions` provides various parameters, and +/// the output argument `copyNests` is the set of all copy nests inserted, each +/// represented by its root affine.for. Since we generate alloc's and dealloc's +/// for all fast buffers (before and after the range of operations resp. or at a +/// hoisted position), all of the fast memory capacity is assumed to be +/// available for processing this block range. When 'filterMemRef' is specified, +/// copies are only generated for the provided MemRef. Returns success if the +/// explicit copying succeeded for all memrefs on which affine load/stores were +/// encountered. +LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, + const AffineCopyOptions ©Options, + Optional filterMemRef, + DenseSet ©Nests); /// A convenience version of affineDataCopyGenerate for all ops in the body of /// an AffineForOp. -uint64_t affineDataCopyGenerate(AffineForOp forOp, - const AffineCopyOptions ©Options, - Optional filterMemRef, - DenseSet ©Nests); +LogicalResult affineDataCopyGenerate(AffineForOp forOp, + const AffineCopyOptions ©Options, + Optional filterMemRef, + DenseSet ©Nests); /// Result for calling generateCopyForMemRegion. struct CopyGenerateResult { diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp @@ -66,7 +66,7 @@ } void runOnFunction() override; - LogicalResult runOnBlock(Block *block, DenseSet ©Nests); + void runOnBlock(Block *block, DenseSet ©Nests); // Constant zero index to avoid too many duplicates. Value zeroIndex = nullptr; @@ -94,11 +94,10 @@ /// ranges: each range is either a sequence of one or more operations starting /// and ending with an affine load or store op, or just an affine.forop (which /// could have other affine for op's nested within). -LogicalResult -AffineDataCopyGeneration::runOnBlock(Block *block, - DenseSet ©Nests) { +void AffineDataCopyGeneration::runOnBlock(Block *block, + DenseSet ©Nests) { if (block->empty()) - return success(); + return; uint64_t fastMemCapacityBytes = fastMemoryCapacity != std::numeric_limits::max() @@ -108,7 +107,7 @@ fastMemorySpace, tagMemorySpace, fastMemCapacityBytes}; - // Every affine.forop in the block starts and ends a block range for copying; + // Every affine.for op in the block starts and ends a block range for copying; // in addition, a contiguous sequence of operations starting with a // load/store op but not including any copy nests themselves is also // identified as a copy block range. Straightline code (a contiguous chunk of @@ -133,8 +132,8 @@ // If you hit a non-copy for loop, we will split there. if ((forOp = dyn_cast(&*it)) && copyNests.count(forOp) == 0) { // Perform the copying up unti this 'for' op first. - affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions, - /*filterMemRef=*/llvm::None, copyNests); + (void)affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions, + /*filterMemRef=*/llvm::None, copyNests); // Returns true if the footprint is known to exceed capacity. auto exceedsCapacity = [&](AffineForOp forOp) { @@ -157,7 +156,7 @@ if (recurseInner) { // We'll recurse and do the copies at an inner level for 'forInst'. // Recurse onto the body of this loop. - (void)runOnBlock(forOp.getBody(), copyNests); + runOnBlock(forOp.getBody(), copyNests); } else { // We have enough capacity, i.e., copies will be computed for the // portion of the block until 'it', and for 'it', which is 'forOp'. Note @@ -167,8 +166,9 @@ // Inner loop copies have their own scope - we don't thus update // consumed capacity. The footprint check above guarantees this inner // loop's footprint fits. - affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it), copyOptions, - /*filterMemRef=*/llvm::None, copyNests); + (void)affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it), + copyOptions, + /*filterMemRef=*/llvm::None, copyNests); } // Get to the next load or store op after 'forOp'. curBegin = std::find_if(std::next(it), block->end(), [&](Operation &op) { @@ -190,11 +190,10 @@ assert(!curBegin->hasTrait() && "can't be a terminator"); // Exclude the affine.yield - hence, the std::prev. - affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/std::prev(block->end()), - copyOptions, /*filterMemRef=*/llvm::None, copyNests); + (void)affineDataCopyGenerate(/*begin=*/curBegin, + /*end=*/std::prev(block->end()), copyOptions, + /*filterMemRef=*/llvm::None, copyNests); } - - return success(); } void AffineDataCopyGeneration::runOnFunction() { @@ -210,7 +209,7 @@ copyNests.clear(); for (auto &block : f) - (void)runOnBlock(&block, copyNests); + runOnBlock(&block, copyNests); // Promote any single iteration loops in the copy nests and collect // load/stores to simplify. diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -2970,24 +2970,13 @@ return true; } -/// Performs explicit copying for the contiguous sequence of operations in the -/// block iterator range [`begin', `end'), where `end' can't be past the -/// terminator of the block (since additional operations are potentially -/// inserted right before `end`. Returns the total size of fast memory space -/// buffers used. `copyOptions` provides various parameters, and the output -/// argument `copyNests` is the set of all copy nests inserted, each represented -/// by its root affine.for. Since we generate alloc's and dealloc's for all fast -/// buffers (before and after the range of operations resp. or at a hoisted -/// position), all of the fast memory capacity is assumed to be available for -/// processing this block range. When 'filterMemRef' is specified, copies are -/// only generated for the provided MemRef. -uint64_t mlir::affineDataCopyGenerate(Block::iterator begin, - Block::iterator end, - const AffineCopyOptions ©Options, - Optional filterMemRef, - DenseSet ©Nests) { +LogicalResult mlir::affineDataCopyGenerate(Block::iterator begin, + Block::iterator end, + const AffineCopyOptions ©Options, + Optional filterMemRef, + DenseSet ©Nests) { if (begin == end) - return 0; + return success(); assert(begin->getBlock() == std::prev(end)->getBlock() && "Inconsistent block begin/end args"); @@ -3109,9 +3098,9 @@ }); if (error) { - begin->emitError( - "copy generation failed for one or more memref's in this block\n"); - return 0; + LLVM_DEBUG(begin->emitError( + "copy generation failed for one or more memref's in this block\n")); + return failure(); } uint64_t totalCopyBuffersSizeInBytes = 0; @@ -3147,18 +3136,18 @@ processRegions(writeRegions); if (!ret) { - begin->emitError( - "copy generation failed for one or more memref's in this block\n"); - return totalCopyBuffersSizeInBytes; + LLVM_DEBUG(begin->emitError( + "copy generation failed for one or more memref's in this block\n")); + return failure(); } // For a range of operations, a note will be emitted at the caller. AffineForOp forOp; uint64_t sizeInKib = llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024); if (llvm::DebugFlag && (forOp = dyn_cast(&*begin))) { - forOp.emitRemark() - << sizeInKib - << " KiB of copy buffers in fast memory space for this block\n"; + LLVM_DEBUG(forOp.emitRemark() + << sizeInKib + << " KiB of copy buffers in fast memory space for this block\n"); } if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) { @@ -3167,15 +3156,15 @@ block->getParentOp()->emitWarning(str); } - return totalCopyBuffersSizeInBytes; + return success(); } // A convenience version of affineDataCopyGenerate for all ops in the body of // an AffineForOp. -uint64_t mlir::affineDataCopyGenerate(AffineForOp forOp, - const AffineCopyOptions ©Options, - Optional filterMemRef, - DenseSet ©Nests) { +LogicalResult mlir::affineDataCopyGenerate(AffineForOp forOp, + const AffineCopyOptions ©Options, + Optional filterMemRef, + DenseSet ©Nests) { return affineDataCopyGenerate(forOp.getBody()->begin(), std::prev(forOp.getBody()->end()), copyOptions, filterMemRef, copyNests); diff --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir --- a/mlir/test/Dialect/Affine/dma-generate.mlir +++ b/mlir/test/Dialect/Affine/dma-generate.mlir @@ -277,7 +277,6 @@ // size -- not yet implemented. // CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref affine.load %arg0[%i, %j] : memref - // expected-error@-6 {{copy generation failed for one or more memref's in this block}} } } return @@ -297,7 +296,6 @@ // not yet implemented. // CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<1024x1024x1024xf32> %v = affine.load %arg0[%idx, %idy, %idz] : memref<1024 x 1024 x 1024 x f32> - // expected-error@-10 {{copy generation failed for one or more memref's in this block}} } } } diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp --- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp @@ -90,12 +90,16 @@ /*fastMemCapacityBytes=*/32 * 1024 * 1024UL}; DenseSet copyNests; if (clMemRefFilter) { - affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(), copyNests); + if (failed(affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(), + copyNests))) + return; } else if (clTestGenerateCopyForMemRegion) { CopyGenerateResult result; MemRefRegion region(loopNest.getLoc()); - (void)region.compute(load, /*loopDepth=*/0); - (void)generateCopyForMemRegion(region, loopNest, copyOptions, result); + if (failed(region.compute(load, /*loopDepth=*/0))) + return; + if (failed(generateCopyForMemRegion(region, loopNest, copyOptions, result))) + return; } // Promote any single iteration loops in the copy nests and simplify