diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -1276,10 +1276,10 @@ numThreads, tileSizes, getMapping()); if (failed(result)) { - results.assign(3, nullptr); - Diagnostic diag(target->getLoc(), DiagnosticSeverity::Remark); - diag << "could not tile reduction in target."; - return DiagnosedSilenceableFailure::silenceableFailure(std::move(diag)); + results.assign(4, nullptr); + auto diag = emitSilenceableError() << "could not tile reduction"; + diag.attachNote(target.getLoc()) << "target operation"; + return diag; } results.push_back(result->loops); results.push_back(result->initialOp); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -626,6 +626,10 @@ "many elements as number of threads"); int reductionDim = static_cast(redDims.front()); + if (redDims.front() >= numThreads.size()) + return b.notifyMatchFailure( + op, "reduction dimension must be mapped to threads"); + // 1. Create the inital tensor value. FailureOr identityTensor = op.generateInitialTensorForPartialReduction(b, loc, numThreads, diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -258,6 +258,8 @@ // Insert the new parallel dimension based on the index of the reduction // loop. This could be controlled by user for more flexibility. int64_t insertSplitDimension = reductionDims[0]; + assert(sizes.size() >= static_cast(insertSplitDimension) && + "reduction dimension must be tiled"); SmallVector combinerOps; if (!matchReduction(linalgOp.getRegionOutputArgs(), 0, combinerOps) || diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -424,6 +424,9 @@ break; } } + if (static_cast(reductionDim) >= tileSize.size()) + return b.notifyMatchFailure(op, "reduction dimension must be tiled"); + // 1. create the inital tensor value. FailureOr identityTensor = op.generateInitialTensorForPartialReduction(b, loc, tileSize, diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir --- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file -canonicalize -cse | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file -canonicalize -cse -verify-diagnostics | FileCheck %s func.func @reduction_tile(%arg0: tensor, %out: tensor) -> tensor { %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, @@ -303,3 +303,30 @@ // CHECK: iterator_types = ["parallel", "reduction"] transform.print %3 {name = "expecting parallel reduction"} : !pdl.operation } + +// ----- + +func.func @reduction_untiled_foreach_thread( + %arg0: tensor, %out: tensor) -> tensor { + // expected-note @below {{target operation}} + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0 : tensor) + outs(%out : tensor) { + ^bb0(%arg7: f32, %arg9: f32): + %1 = arith.mulf %arg7, %arg7 : f32 + %2 = arith.addf %1, %arg9 : f32 + linalg.yield %2 : f32 + } -> tensor + return %red : tensor +} + +transform.sequence failures(propagate) { +^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + // expected-error @below {{could not tile reduction}} + %loop, %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0 + by num_threads = [5], tile_sizes = [3], mapping = [#gpu.thread] + +}