diff --git a/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h b/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h --- a/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h +++ b/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h @@ -12,9 +12,10 @@ namespace mlir { class AffineForOp; +class ConversionTarget; +struct LogicalResult; class MLIRContext; class OwningRewritePatternList; -struct LogicalResult; class Value; namespace scf { @@ -44,6 +45,10 @@ void populateParallelLoopToGPUPatterns(OwningRewritePatternList &patterns, MLIRContext *ctx); +/// Configures the rewrite target such that only `scf.parallel` operations that +/// are not rewritten by the provided patterns are legal. +void configureParallelLoopToGPULegality(ConversionTarget &target); + } // namespace mlir #endif // MLIR_CONVERSION_SCFTOGPU_SCFTOGPU_H_ diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp --- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp +++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp @@ -458,9 +458,10 @@ if (!boundIsPrecise) { upperBound = deriveStaticUpperBound(upperBound, rewriter); if (!upperBound) { - return parallelOp.emitOpError() - << "cannot derive loop-invariant upper bound for number " - "of iterations"; + return rewriter.notifyMatchFailure( + parallelOp, + "cannot derive loop-invariant upper bound for number of" + "iterations"); } } // Compute the number of iterations needed. We compute this as an @@ -481,9 +482,9 @@ // todo(herhut,ravishankarm): Update the behavior of setMappingAttr // when this condition is relaxed. if (bounds.find(processor) != bounds.end()) { - return parallelOp.emitOpError() - << "cannot redefine the bound for processor " - << static_cast(processor); + return rewriter.notifyMatchFailure( + parallelOp, "cannot redefine the bound for processor " + + Twine(static_cast(processor))); } bounds[processor] = launchBound; } @@ -565,6 +566,10 @@ LogicalResult ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp, PatternRewriter &rewriter) const { + // We can only transform starting at the outer-most loop. Launches inside of + // parallel loops are not supported. + if (auto parentLoop = parallelOp.getParentOfType()) + return failure(); // Create a launch operation. We start with bound one for all grid/block // sizes. Those will be refined later as we discover them from mappings. Location loc = parallelOp.getLoc(); @@ -640,3 +645,9 @@ MLIRContext *ctx) { patterns.insert(ctx); } + +void mlir::configureParallelLoopToGPULegality(ConversionTarget &target) { + target.addDynamicallyLegalOp([](scf::ParallelOp parallelOp) { + return !parallelOp.getAttr(gpu::getMappingAttrName()); + }); +} diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp --- a/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp +++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp @@ -53,7 +53,7 @@ target.addLegalDialect(); target.addLegalDialect(); target.addLegalDialect(); - target.addIllegalOp(); + configureParallelLoopToGPULegality(target); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) signalPassFailure(); diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir --- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir +++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir @@ -317,15 +317,13 @@ // ----- -// Mapping to the same processor twice. +// Mapping to the same processor twice. Cannot be mapped. func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %buf : memref, %res : memref) { %four = constant 4 : index - // expected-error@+2 {{cannot redefine the bound for processor 1}} - // expected-error@+1 {{failed to legalize operation 'scf.parallel'}} scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%four, %four) { } { mapping = [ @@ -335,9 +333,12 @@ return } +// CHECK-LABEL: @parallel_double_map +// CHECK: scf.parallel + // ----- -// Loop with loop-variant upper bound. +// Loop with loop-variant upper bound. Cannot be mapped. func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, @@ -346,10 +347,8 @@ %zero = constant 0 : index %one = constant 1 : index %four = constant 4 : index - // expected-error@+1 {{failed to legalize operation 'scf.parallel'}} scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%four, %four) { - // expected-error@+1 {{cannot derive loop-invariant upper bound}} scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1) step (%one, %one) { %idx0 = addi %i0, %si0 : index @@ -366,3 +365,25 @@ ] } return } + +// CHECK-LABEL: @parallel_loop_loop_variant_bound +// CHECK: scf.parallel +// CHECK: scf.parallel + +// ----- + +// Loop without annotations. Cannot be mapped. + +func @parallel_no_annotations(%arg0 : index, %arg1 : index, %arg2 : index, + %arg3 : index, + %buf : memref, + %res : memref) { + %four = constant 4 : index + scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) + step (%four, %four) { + } + return +} + +// CHECK-LABEL: @parallel_no_annotations +// CHECK: scf.parallel