diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -403,10 +403,10 @@ Value constOne = (numBlockDims < 3 || numThreadDims < 3) ? builder.create(rootForOp.getLoc(), 1) : nullptr; - Value gridSizeX = dims[0]; + Value gridSizeX = numBlockDims > 0 ? dims[0] : constOne; Value gridSizeY = numBlockDims > 1 ? dims[1] : constOne; Value gridSizeZ = numBlockDims > 2 ? dims[2] : constOne; - Value blockSizeX = dims[numBlockDims]; + Value blockSizeX = numThreadDims > 0 ? dims[numBlockDims] : constOne; Value blockSizeY = numThreadDims > 1 ? dims[numBlockDims + 1] : constOne; Value blockSizeZ = numThreadDims > 2 ? dims[numBlockDims + 2] : constOne; diff --git a/mlir/test/Conversion/LoopsToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/LoopsToGPU/no_blocks_no_threads.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Conversion/LoopsToGPU/no_blocks_no_threads.mlir @@ -0,0 +1,34 @@ +// RUN: mlir-opt -convert-loops-to-gpu -gpu-block-dims=0 -gpu-thread-dims=1 %s | FileCheck --check-prefix=CHECK-THREADS %s --dump-input-on-failure +// RUN: mlir-opt -convert-loops-to-gpu -gpu-block-dims=1 -gpu-thread-dims=0 %s | FileCheck --check-prefix=CHECK-BLOCKS %s --dump-input-on-failure + +// CHECK-THREADS-LABEL: @one_d_loop +// CHECK-BLOCKS-LABEL: @one_d_loop +func @one_d_loop(%A : memref, %B : memref) { + // Bounds of the loop, its range and step. + // CHECK-THREADS-NEXT: %{{.*}} = constant 0 : index + // CHECK-THREADS-NEXT: %{{.*}} = constant 42 : index + // CHECK-THREADS-NEXT: %[[BOUND:.*]] = subi %{{.*}}, %{{.*}} : index + // CHECK-THREADS-NEXT: %{{.*}} = constant 1 : index + // CHECK-THREADS-NEXT: %[[ONE:.*]] = constant 1 : index + // + // CHECK-BLOCKS-NEXT: %{{.*}} = constant 0 : index + // CHECK-BLOCKS-NEXT: %{{.*}} = constant 42 : index + // CHECK-BLOCKS-NEXT: %[[BOUND:.*]] = subi %{{.*}}, %{{.*}} : index + // CHECK-BLOCKS-NEXT: %{{.*}} = constant 1 : index + // CHECK-BLOCKS-NEXT: %[[ONE:.*]] = constant 1 : index + + // CHECK-THREADS-NEXT: gpu.launch blocks(%[[B0:.*]], %[[B1:.*]], %[[B2:.*]]) in (%{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]], %{{.*}}0 = %[[ONE]]) threads(%[[T0:.*]], %[[T1:.*]], %[[T2:.*]]) in (%{{.*}} = %[[BOUND]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]]) + // CHECK-BLOCKS-NEXT: gpu.launch blocks(%[[B0:.*]], %[[B1:.*]], %[[B2:.*]]) in (%{{.*}} = %[[BOUND]], %{{.*}} = %[[ONE]], %{{.*}}0 = %[[ONE]]) threads(%[[T0:.*]], %[[T1:.*]], %[[T2:.*]]) in (%{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]]) + affine.for %i = 0 to 42 { + // CHECK-THREADS-NEXT: %[[INDEX:.*]] = addi %{{.*}}, %[[T0]] + // CHECK-THREADS-NEXT: load %{{.*}}[%[[INDEX]]] + // CHECK-BLOCKS-NEXT: %[[INDEX:.*]] = addi %{{.*}}, %[[B0]] + // CHECK-BLOCKS-NEXT: load %{{.*}}[%[[INDEX]]] + %0 = load %A[%i] : memref + store %0, %B[%i] : memref + // CHECK-THREADS: gpu.terminator + // CHECK-BLOCKS: gpu.terminator + } + return +} +