Changeset View
Changeset View
Standalone View
Standalone View
mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
Show All 22 Lines | |||||
#include "mlir/Transforms/DialectConversion.h" | #include "mlir/Transforms/DialectConversion.h" | ||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h" | #include "mlir/Transforms/GreedyPatternRewriteDriver.h" | ||||
#include "llvm/Support/FormatVariadic.h" | #include "llvm/Support/FormatVariadic.h" | ||||
#include "../GPUCommon/GPUOpsLowering.h" | #include "../GPUCommon/GPUOpsLowering.h" | ||||
#include "../GPUCommon/IndexIntrinsicsOpLowering.h" | #include "../GPUCommon/IndexIntrinsicsOpLowering.h" | ||||
#include "../GPUCommon/OpToFuncCallLowering.h" | #include "../GPUCommon/OpToFuncCallLowering.h" | ||||
#include "../PassDetail.h" | #include "../PassDetail.h" | ||||
#include "WmmaLoadStoreToNvvmLowering.h" | |||||
#include "WmmaMmaOptoNvvmLowering.h" | |||||
using namespace mlir; | using namespace mlir; | ||||
namespace { | namespace { | ||||
struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> { | struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> { | ||||
using ConvertOpToLLVMPattern<gpu::ShuffleOp>::ConvertOpToLLVMPattern; | using ConvertOpToLLVMPattern<gpu::ShuffleOp>::ConvertOpToLLVMPattern; | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | void runOnOperation() override { | ||||
LLVMTypeConverter converter(m.getContext(), options); | LLVMTypeConverter converter(m.getContext(), options); | ||||
converter.addConversion([&](MemRefType type) -> Optional<Type> { | converter.addConversion([&](MemRefType type) -> Optional<Type> { | ||||
if (type.getMemorySpaceAsInt() != | if (type.getMemorySpaceAsInt() != | ||||
gpu::GPUDialect::getPrivateAddressSpace()) | gpu::GPUDialect::getPrivateAddressSpace()) | ||||
return llvm::None; | return llvm::None; | ||||
return converter.convertType(MemRefType::Builder(type).setMemorySpace(0)); | return converter.convertType(MemRefType::Builder(type).setMemorySpace(0)); | ||||
}); | }); | ||||
// Lowering for MMAMatrixType. | |||||
converter.addConversion([&](gpu::MMAMatrixType type) -> Type { | |||||
// The number of items in structToReturn are dependent on the the dataType | |||||
// and the MMA operand that this operation is associated with. | |||||
llvm::DenseMap<StringRef, int64_t> numElemsPerThreadF16, | |||||
numElemsPerThreadF32; | |||||
numElemsPerThreadF16["AOp"] = 8; | |||||
numElemsPerThreadF16["BOp"] = 8; | |||||
numElemsPerThreadF16["COp"] = 4; | |||||
numElemsPerThreadF16["DOp"] = 4; | |||||
numElemsPerThreadF32["AOp"] = 8; | |||||
numElemsPerThreadF32["BOp"] = 8; | |||||
numElemsPerThreadF32["COp"] = 8; | |||||
numElemsPerThreadF32["DOp"] = 8; | |||||
Type structToReturn; | |||||
if (type.getElementType().isF16()) { | |||||
unsigned vecSize = 2 /*number of f16's in 32-bit.*/; | |||||
bondhugula: C++ style comment here. Terminate with full stop. | |||||
Doesn't look addressed. C++-style comments are line comments starting with //. ftynse: Doesn't look addressed. C++-style comments are line comments starting with `//`. | |||||
Type vec = VectorType::get(vecSize, FloatType::getF16(&getContext())); | |||||
unsigned size = numElemsPerThreadF16[type.getOperand()]; | |||||
SmallVector<Type> elements(size, vec); | |||||
structToReturn = | |||||
LLVM::LLVMStructType::getLiteral(&getContext(), elements); | |||||
} else if (type.getElementType().isF32()) { | |||||
unsigned size = numElemsPerThreadF32[type.getOperand()]; | |||||
SmallVector<Type> elements(size, FloatType::getF32(&getContext())); | |||||
structToReturn = | |||||
LLVM::LLVMStructType::getLiteral(&getContext(), elements); | |||||
} | |||||
return structToReturn; | |||||
}); | |||||
RewritePatternSet patterns(m.getContext()); | RewritePatternSet patterns(m.getContext()); | ||||
RewritePatternSet llvmPatterns(m.getContext()); | RewritePatternSet llvmPatterns(m.getContext()); | ||||
// Apply in-dialect lowering first. In-dialect lowering will replace ops | // Apply in-dialect lowering first. In-dialect lowering will replace ops | ||||
// which need to be lowered further, which is not supported by a single | // which need to be lowered further, which is not supported by a single | ||||
// conversion pass. | // conversion pass. | ||||
populateGpuRewritePatterns(patterns); | populateGpuRewritePatterns(patterns); | ||||
(void)applyPatternsAndFoldGreedily(m, std::move(patterns)); | (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); | ||||
Show All 39 Lines | void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, | ||||
// Explicitly drop memory space when lowering private memory | // Explicitly drop memory space when lowering private memory | ||||
// attributions since NVVM models it as `alloca`s in the default | // attributions since NVVM models it as `alloca`s in the default | ||||
// memory space and does not support `alloca`s with addrspace(5). | // memory space and does not support `alloca`s with addrspace(5). | ||||
patterns.add<GPUFuncOpLowering>( | patterns.add<GPUFuncOpLowering>( | ||||
converter, /*allocaAddrSpace=*/0, | converter, /*allocaAddrSpace=*/0, | ||||
Identifier::get(NVVM::NVVMDialect::getKernelFuncAttrName(), | Identifier::get(NVVM::NVVMDialect::getKernelFuncAttrName(), | ||||
&converter.getContext())); | &converter.getContext())); | ||||
patterns.insert<WmmaLoadOpToNVVMLowering>(converter); | |||||
patterns.insert<WmmaMmaOpToNVVMLowering>(converter); | |||||
patterns.insert<WmmaStoreOpToNVVMLowering>(converter); | |||||
patterns.add<OpToFuncCallLowering<AbsFOp>>(converter, "__nv_fabsf", | patterns.add<OpToFuncCallLowering<AbsFOp>>(converter, "__nv_fabsf", | ||||
"__nv_fabs"); | "__nv_fabs"); | ||||
patterns.add<OpToFuncCallLowering<math::AtanOp>>(converter, "__nv_atanf", | patterns.add<OpToFuncCallLowering<math::AtanOp>>(converter, "__nv_atanf", | ||||
"__nv_atan"); | "__nv_atan"); | ||||
patterns.add<OpToFuncCallLowering<math::Atan2Op>>(converter, "__nv_atan2f", | patterns.add<OpToFuncCallLowering<math::Atan2Op>>(converter, "__nv_atan2f", | ||||
"__nv_atan2"); | "__nv_atan2"); | ||||
patterns.add<OpToFuncCallLowering<CeilFOp>>(converter, "__nv_ceilf", | patterns.add<OpToFuncCallLowering<CeilFOp>>(converter, "__nv_ceilf", | ||||
"__nv_ceil"); | "__nv_ceil"); | ||||
Show All 32 Lines |
C++ style comment here. Terminate with full stop.