Index: mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h =================================================================== --- mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h +++ mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h @@ -8,6 +8,7 @@ #ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_ #define MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_ +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include namespace mlir { @@ -24,9 +25,11 @@ void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, OwningRewritePatternList &patterns); -/// Creates a pass that lowers GPU dialect operations to NVVM counterparts. -std::unique_ptr> -createLowerGpuOpsToNVVMOpsPass(); +/// Creates a pass that lowers GPU dialect operations to NVVM counterparts. The +/// index bitwidth used for the lowering of the device side index computations +/// is configurable. +std::unique_ptr> createLowerGpuOpsToNVVMOpsPass( + unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout); } // namespace mlir Index: mlir/include/mlir/Conversion/Passes.td =================================================================== --- mlir/include/mlir/Conversion/Passes.td +++ mlir/include/mlir/Conversion/Passes.td @@ -94,6 +94,11 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { let summary = "Generate NVVM operations for gpu operations"; let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()"; + let options = [ + Option<"indexBitwidth", "index-bitwidth", "unsigned", + /*default=kDeriveIndexBitwidthFromDataLayout*/"0", + "Bitwidth of the index type, 0 to use size of machine word"> + ]; } //===----------------------------------------------------------------------===// Index: mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp =================================================================== --- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -30,7 +30,6 @@ namespace { - struct GPUShuffleOpLowering : public ConvertToLLVMPattern { explicit GPUShuffleOpLowering(LLVMTypeConverter &lowering_) : ConvertToLLVMPattern(gpu::ShuffleOp::getOperationName(), @@ -97,17 +96,25 @@ /// /// This pass only handles device code and is not meant to be run on GPU host /// code. -class LowerGpuOpsToNVVMOpsPass +struct LowerGpuOpsToNVVMOpsPass : public ConvertGpuOpsToNVVMOpsBase { -public: + LowerGpuOpsToNVVMOpsPass() = default; + LowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth) { + this->indexBitwidth = indexBitwidth; + } + void runOnOperation() override { gpu::GPUModuleOp m = getOperation(); + /// Customize the bidtwidth used for the device side index computations + LLVMTypeConverterCustomization customs; + customs.indexBitwidth = indexBitwidth; + /// MemRef conversion for GPU to NVVM lowering. The GPU dialect uses memory /// space 5 for private memory attributions, but NVVM represents private /// memory allocations as local `alloca`s in the default address space. This /// converter drops the private memory space to support the use case above. - LLVMTypeConverter converter(m.getContext()); + LLVMTypeConverter converter(m.getContext(), customs); converter.addConversion([&](MemRefType type) -> Optional { if (type.getMemorySpace() != gpu::GPUDialect::getPrivateAddressSpace()) return llvm::None; @@ -176,6 +183,6 @@ } std::unique_ptr> -mlir::createLowerGpuOpsToNVVMOpsPass() { - return std::make_unique(); +mlir::createLowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth) { + return std::make_unique(indexBitwidth); }