Index: mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h =================================================================== --- mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h +++ mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h @@ -8,6 +8,7 @@ #ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_ #define MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_ +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include namespace mlir { @@ -24,9 +25,11 @@ void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, OwningRewritePatternList &patterns); -/// Creates a pass that lowers GPU dialect operations to NVVM counterparts. -std::unique_ptr> -createLowerGpuOpsToNVVMOpsPass(); +/// Creates a pass that lowers GPU dialect operations to NVVM counterparts. The +/// index bitwidth used for the lowering of the device side index computations +/// is configurable. +std::unique_ptr> createLowerGpuOpsToNVVMOpsPass( + unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout); } // namespace mlir Index: mlir/include/mlir/Conversion/Passes.td =================================================================== --- mlir/include/mlir/Conversion/Passes.td +++ mlir/include/mlir/Conversion/Passes.td @@ -94,6 +94,11 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { let summary = "Generate NVVM operations for gpu operations"; let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()"; + let options = [ + Option<"indexBitwidth", "index-bitwidth", "unsigned", + /*default=kDeriveIndexBitwidthFromDataLayout*/"0", + "Bitwidth of the index type, 0 to use size of machine word"> + ]; } //===----------------------------------------------------------------------===// Index: mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp =================================================================== --- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -30,7 +30,6 @@ namespace { - struct GPUShuffleOpLowering : public ConvertToLLVMPattern { explicit GPUShuffleOpLowering(LLVMTypeConverter &lowering_) : ConvertToLLVMPattern(gpu::ShuffleOp::getOperationName(), @@ -97,17 +96,25 @@ /// /// This pass only handles device code and is not meant to be run on GPU host /// code. -class LowerGpuOpsToNVVMOpsPass +struct LowerGpuOpsToNVVMOpsPass : public ConvertGpuOpsToNVVMOpsBase { -public: + LowerGpuOpsToNVVMOpsPass() = default; + LowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth) { + this->indexBitwidth = indexBitwidth; + } + void runOnOperation() override { gpu::GPUModuleOp m = getOperation(); + /// Customize the bitwidth used for the device side index computations + LLVMTypeConverterCustomization customs; + customs.indexBitwidth = indexBitwidth; + /// MemRef conversion for GPU to NVVM lowering. The GPU dialect uses memory /// space 5 for private memory attributions, but NVVM represents private /// memory allocations as local `alloca`s in the default address space. This /// converter drops the private memory space to support the use case above. - LLVMTypeConverter converter(m.getContext()); + LLVMTypeConverter converter(m.getContext(), customs); converter.addConversion([&](MemRefType type) -> Optional { if (type.getMemorySpace() != gpu::GPUDialect::getPrivateAddressSpace()) return llvm::None; @@ -176,6 +183,6 @@ } std::unique_ptr> -mlir::createLowerGpuOpsToNVVMOpsPass() { - return std::make_unique(); +mlir::createLowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth) { + return std::make_unique(indexBitwidth); } Index: mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir =================================================================== --- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1,36 +1,52 @@ // RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -convert-gpu-to-nvvm='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() + // CHECK32-LABEL: func @gpu_index_ops() func @gpu_index_ops() -> (index, index, index, index, index, index, index, index, index, index, index, index) { + // CHECK32-NOT: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 + // CHECK: = nvvm.read.ptx.sreg.tid.x : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.tid.y : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.tid.z : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.ntid.x : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.ntid.y : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.ntid.z : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.ctaid.x : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.ctaid.y : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.ctaid.z : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.nctaid.x : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.nctaid.y : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) // CHECK: = nvvm.read.ptx.sreg.nctaid.z : !llvm.i32 + // CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ, @@ -42,6 +58,21 @@ // ----- +gpu.module @test_module { + // CHECK-LABEL: func @gpu_index_comp + // CHECK32-LABEL: func @gpu_index_comp + func @gpu_index_comp(%idx : index) -> index { + // CHECK: = llvm.add %{{.*}}, %{{.*}} : !llvm.i64 + // CHECK32: = llvm.add %{{.*}}, %{{.*}} : !llvm.i32 + %0 = addi %idx, %idx : index + // CHECK: llvm.return %{{.*}} : !llvm.i64 + // CHECK32: llvm.return %{{.*}} : !llvm.i32 + std.return %0 : index + } +} + +// ----- + gpu.module @test_module { // CHECK-LABEL: func @gpu_all_reduce_op() gpu.func @gpu_all_reduce_op() {