diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -381,7 +381,8 @@ if (destinationStyleOp) { for (OpOperand *outOperand : destinationStyleOp.getDpsInitOperands()) { auto *it = llvm::find(dest, outOperand->get()); - assert(it != dest.end() && "dest operand not found in dest"); + if (it == dest.end()) + return op->emitOpError("must have \"tensor semantic\" for tiling"); unsigned destNum = std::distance(dest.begin(), it); outOperand->set(destBbArgs[destNum]); } diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir --- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir @@ -274,4 +274,32 @@ transform.gpu.map_nested_foreach_to_threads %funcop { blockDim = [32, 32]} } +// ----- + +func.func @tiling_buffer_semantic_op(%x: memref<32x32xf32>, %y: memref<32x32xf32>, %stream : !gpu.async.token) { + %one = arith.constant 1 : index + %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one) + threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one) + { + // expected-error @below {{'linalg.generic' op must have "tensor semantic" for tiling}} + // expected-note @below {{when applied to this op}} + linalg.generic + {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%x : memref<32x32xf32>) + outs(%y : memref<32x32xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } + gpu.terminator + } + return +} +transform.sequence failures(propagate) { +^bb1(%arg0: !pdl.operation): + %matmul = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!pdl.operation) -> !pdl.operation + // expected-error @below {{transform.structured.tile_to_foreach_thread_op failed to apply}} + %foreach, %tiled = transform.structured.tile_to_foreach_thread_op %matmul num_threads [10, 20, 30] (mapping = [ #gpu.thread, #gpu.thread, #gpu.thread ] ) +}