diff --git a/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h b/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h --- a/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h +++ b/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h @@ -12,13 +12,28 @@ namespace mlir { +class Attribute; class LLVMTypeConverter; +class MemRefType; +class MLIRContext; class RewritePatternSet; class Pass; #define GEN_PASS_DECL_CONVERTNVGPUTONVVMPASS #include "mlir/Conversion/Passes.h.inc" +namespace nvgpu { +class MBarrierType; + +/// Returns the memory space attribute of the mbarrier object. +Attribute getMbarrierMemorySpace(MLIRContext *context, + MBarrierType barrierType); + +/// Return the memref type that can be used to represent an mbarrier object. +MemRefType getMBarrierMemrefType(MLIRContext *context, + MBarrierType barrierType); +} // namespace nvgpu + void populateNVGPUToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns); } // namespace mlir diff --git a/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td b/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td --- a/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td +++ b/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td @@ -15,6 +15,21 @@ include "mlir/Dialect/Transform/IR/TransformTypes.td" include "mlir/Interfaces/SideEffectInterfaces.td" +//===----------------------------------------------------------------------===// +// ApplyNVGPUToNVVMConversionPatternsOp +//===----------------------------------------------------------------------===// + +def ApplyNVGPUToNVVMConversionPatternsOp : Op]> { + let description = [{ + Collects patterns that convert NVGPU dialect ops to NVVM dialect ops. These + patterns require an "LLVMTypeConverter". + }]; + let assemblyFormat = "attr-dict"; +} + //===----------------------------------------------------------------------===// // CreateAsyncGroupsOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -206,6 +206,46 @@ return result; } +/// Returns whether mbarrier object has shared memory address space. +static bool isMbarrierShared(nvgpu::MBarrierType barrierType) { + return (mlir::nvgpu::NVGPUDialect::isSharedMemoryAddressSpace( + barrierType.getMemorySpace())); +} + +/// Returns the memory space attribute of the mbarrier object. +Attribute nvgpu::getMbarrierMemorySpace(MLIRContext *context, + nvgpu::MBarrierType barrierType) { + Attribute memorySpace = {}; + if (isMbarrierShared(barrierType)) { + memorySpace = + IntegerAttr::get(IntegerType::get(context, 64), + nvgpu::NVGPUDialect::kSharedMemoryAddressSpace); + } + return memorySpace; +} + +/// Returns memref type of the mbarrier object. The type is defined in the +/// MBarrierType. +MemRefType nvgpu::getMBarrierMemrefType(MLIRContext *context, + nvgpu::MBarrierType barrierType) { + Attribute memorySpace = nvgpu::getMbarrierMemorySpace(context, barrierType); + MemRefLayoutAttrInterface layout; + return MemRefType::get({1}, IntegerType::get(context, 64), layout, + memorySpace); +} + +/// Returns the base pointer of the mbarrier object. +static Value getMbarrierPtr(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, + TypedValue barrier, + Value barrierMemref) { + MemRefType memrefType = + nvgpu::getMBarrierMemrefType(rewriter.getContext(), barrier.getType()); + MemRefDescriptor memRefDescriptor(barrierMemref); + return memRefDescriptor.bufferPtr(rewriter, barrier.getLoc(), typeConverter, + memrefType); +} + namespace { struct MmaLdMatrixOpToNVVM : public ConvertOpToLLVMPattern { @@ -353,43 +393,6 @@ } }; -/// Returns whether mbarrier object has shared memory address space. -static bool isMbarrierShared(nvgpu::MBarrierType barrierType) { - return (mlir::nvgpu::NVGPUDialect::isSharedMemoryAddressSpace( - barrierType.getMemorySpace())); -} - -/// Returns whether memory space attribute of the mbarrier object. -static Attribute getMbarrierMemorySpace(RewriterBase &rewriter, - nvgpu::MBarrierType barrierType) { - Attribute memorySpace = {}; - if (isMbarrierShared(barrierType)) { - memorySpace = rewriter.getI64IntegerAttr( - nvgpu::NVGPUDialect::kSharedMemoryAddressSpace); - } - return memorySpace; -} - -/// Returns memref type of the mbarrier object. The type is defined in the -/// MBarrierType. -static MemRefType createMBarrierMemrefType(RewriterBase &rewriter, - nvgpu::MBarrierType barrierType) { - Attribute memorySpace = getMbarrierMemorySpace(rewriter, barrierType); - MemRefLayoutAttrInterface layout; - return MemRefType::get({1}, rewriter.getI64Type(), layout, memorySpace); -} - -/// Returns the base pointer of the mbarrier object. -static Value getMbarrierPtr(ConversionPatternRewriter &rewriter, - LLVMTypeConverter &typeConverter, - TypedValue barrier, - Value barrierMemref) { - MemRefType memrefType = createMBarrierMemrefType(rewriter, barrier.getType()); - MemRefDescriptor memRefDescriptor(barrierMemref); - return memRefDescriptor.bufferPtr(rewriter, barrier.getLoc(), typeConverter, - memrefType); -} - struct ConvertNVGPUToNVVMPass : public impl::ConvertNVGPUToNVVMPassBase { using Base::Base; @@ -415,7 +418,8 @@ return converter.convertType(IntegerType::get(type.getContext(), 64)); }); converter.addConversion([&](nvgpu::MBarrierType type) -> Type { - return converter.convertType(createMBarrierMemrefType(rewriter, type)); + return converter.convertType( + nvgpu::getMBarrierMemrefType(rewriter.getContext(), type)); }); converter.addConversion([&](nvgpu::TensorMapDescriptorType type) -> Type { return converter.getPointerType(type.getTensor().getElementType()); @@ -748,8 +752,8 @@ matchAndRewrite(nvgpu::MBarrierCreateOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { Operation *funcOp = op->getParentOp(); - MemRefType barrierType = - createMBarrierMemrefType(rewriter, op.getBarrier().getType()); + MemRefType barrierType = nvgpu::getMBarrierMemrefType( + rewriter.getContext(), op.getBarrier().getType()); memref::GlobalOp global; if (auto moduleOp = funcOp->getParentOfType()) diff --git a/mlir/lib/Dialect/NVGPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/NVGPU/TransformOps/CMakeLists.txt --- a/mlir/lib/Dialect/NVGPU/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/NVGPU/TransformOps/CMakeLists.txt @@ -21,4 +21,7 @@ MLIRTransformDialect MLIRTransformDialectUtils MLIRVectorTransforms + + # Conversions + MLIRNVGPUToNVVM ) diff --git a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp --- a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp +++ b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp @@ -9,6 +9,8 @@ #include "mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.h" #include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" @@ -39,6 +41,45 @@ #define DBGSNL() (llvm::dbgs() << "\n") #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n") +//===----------------------------------------------------------------------===// +// Apply...ConversionPatternsOp +//===----------------------------------------------------------------------===// + +void transform::ApplyNVGPUToNVVMConversionPatternsOp::populatePatterns( + TypeConverter &typeConverter, RewritePatternSet &patterns) { + auto &llvmTypeConverter = static_cast(typeConverter); + /// device-side async tokens cannot be materialized in nvvm. We just + /// convert them to a dummy i32 type in order to easily drop them during + /// conversion. + llvmTypeConverter.addConversion( + [&](nvgpu::DeviceAsyncTokenType type) -> Type { + return llvmTypeConverter.convertType( + IntegerType::get(type.getContext(), 32)); + }); + llvmTypeConverter.addConversion([&](nvgpu::MBarrierTokenType type) -> Type { + return llvmTypeConverter.convertType( + IntegerType::get(type.getContext(), 64)); + }); + llvmTypeConverter.addConversion([&](nvgpu::MBarrierType type) -> Type { + return llvmTypeConverter.convertType( + getMBarrierMemrefType(type.getContext(), type)); + }); + llvmTypeConverter.addConversion( + [&](nvgpu::TensorMapDescriptorType type) -> Type { + return llvmTypeConverter.getPointerType( + type.getTensor().getElementType()); + }); + populateNVGPUToNVVMConversionPatterns(llvmTypeConverter, patterns); +} + +LogicalResult +transform::ApplyNVGPUToNVVMConversionPatternsOp::verifyTypeConverter( + transform::TypeConverterBuilderOpInterface builder) { + if (builder.getTypeConverterType() != "LLVMTypeConverter") + return emitOpError("expected LLVMTypeConverter"); + return success(); +} + //===---------------------------------------------------------------------===// // CreateAsyncGroupsOp //===---------------------------------------------------------------------===// @@ -966,7 +1007,8 @@ auto copyOp = cast(op); auto inMemRef = cast>(copyOp.getDpsInputOperand(0)->get()); - assert(inMemRef.getType().getRank() == 2 && "expected in to be a 2D memref"); + assert(inMemRef.getType().getRank() == 2 && + "expected in to be a 2D memref"); // 2. Build global memory descriptor. TypedValue globalDesc = diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -490,8 +490,13 @@ transform::TransformResults &results, transform::TransformState &state) { MLIRContext *ctx = getContext(); - // Default type converter is built on demand. + // Instantiate the default type converter if a type converter builder is + // specified. std::unique_ptr defaultTypeConverter; + transform::TypeConverterBuilderOpInterface typeConverterBuilder = + getDefaultTypeConverter(); + if (typeConverterBuilder) + defaultTypeConverter = typeConverterBuilder.getTypeConverter(); // Configure conversion target. ConversionTarget conversionTarget(*ctx); @@ -512,6 +517,10 @@ // Gather all specified patterns. RewritePatternSet patterns(ctx); + // Need to keep the converters alive until after pattern application because + // the patterns take a reference to an object that would otherwise get out of + // scope. + SmallVector> keepAliveConverters; if (!getPatterns().empty()) { for (Operation &op : getPatterns().front()) { auto descriptor = @@ -522,31 +531,25 @@ descriptor.getTypeConverter(); TypeConverter *converter = nullptr; if (typeConverter) { - converter = typeConverter.get(); + keepAliveConverters.emplace_back(std::move(typeConverter)); + converter = keepAliveConverters.back().get(); } else { // No type converter specified: Use the default type converter. if (!defaultTypeConverter) { - // Instantiate the default type converter. - transform::TypeConverterBuilderOpInterface typeConverterBuilder = - getDefaultTypeConverter(); - if (!typeConverterBuilder) { - auto diag = emitDefiniteFailure() - << "pattern descriptor does not specify type " - "converter and apply_conversion_patterns op has " - "no default type converter"; - diag.attachNote(op.getLoc()) << "pattern descriptor op"; - return diag; - } - defaultTypeConverter = typeConverterBuilder.getTypeConverter(); - assert(defaultTypeConverter && "expected type converter"); + auto diag = emitDefiniteFailure() + << "pattern descriptor does not specify type " + "converter and apply_conversion_patterns op has " + "no default type converter"; + diag.attachNote(op.getLoc()) << "pattern descriptor op"; + return diag; } converter = defaultTypeConverter.get(); } descriptor.populatePatterns(*converter, patterns); } } - FrozenRewritePatternSet frozenPatterns(std::move(patterns)); + FrozenRewritePatternSet frozenPatterns(std::move(patterns)); for (Operation *target : state.getPayloadOps(getTarget())) { // Make sure that this transform is not applied to itself. Modifying the // transform IR while it is being interpreted is generally dangerous. diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-opt --convert-nvgpu-to-nvvm='use-opaque-pointers=1' --split-input-file %s | FileCheck %s +// RUN: mlir-opt %s -convert-nvgpu-to-nvvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter | FileCheck %s // CHECK-LABEL: @m16n8k16_fp16 func.func @m16n8k16_fp16(%arg0: vector<4x2xf16>, %arg1: vector<2x2xf16>, %arg2: vector<2x2xf16>) -> vector<2x2xf16> { @@ -22,8 +23,6 @@ return %d : vector<2x2xf16> } -// ----- - // Same as above but with fp32 acumulation type. // CHECK-LABEL: @m16n8k16_fp16_fp32 @@ -50,8 +49,6 @@ return %d : vector<2x2xf32> } -// ----- - // CHECK-LABEL: @m16n8k8_fp16 func.func @m16n8k8_fp16(%arg0: vector<2x2xf16>, %arg1: vector<1x2xf16>, %arg2: vector<2x2xf16>) -> vector<2x2xf16> { // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> @@ -72,9 +69,6 @@ return %d : vector<2x2xf16> } -// ----- - - // CHECK-LABEL: @m16n8k32_int8 func.func @m16n8k32_int8(%arg0: vector<4x4xi8>, %arg1: vector<2x4xi8>, %arg2: vector<2x2xi32>) -> vector<2x2xi32> { // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<4 x vector<4xi8>> @@ -100,8 +94,6 @@ return %d : vector<2x2xi32> } -// ----- - // CHECK-LABEL: @m16n8k32_i4 func.func @m16n8k32_i4(%arg0: vector<2x8xi4>, %arg1: vector<1x8xi4>, %arg2: vector<2x2xi32>) -> vector<2x2xi32> { // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<8xi4>> @@ -121,8 +113,6 @@ return %d : vector<2x2xi32> } -// ----- - // CHECK-LABEL: @m16n8k64_i4 func.func @m16n8k64_i4(%arg0: vector<4x8xi4>, %arg1: vector<2x8xi4>, %arg2: vector<2x2xi32>) -> vector<2x2xi32> { // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<4 x vector<8xi4>> @@ -148,8 +138,6 @@ return %d : vector<2x2xi32> } -// ----- - // CHECK-LABEL: @m8n8k4_f64 func.func @m8n8k4_f64(%arg0: vector<1x1xf64>, %arg1: vector<1x1xf64>, %arg2: vector<1x2xf64>) -> vector<1x2xf64> { // CHECK: llvm.extractvalue @@ -167,8 +155,6 @@ return %d : vector<1x2xf64> } -// ----- - // CHECK-LABEL: @ldmatrix_x4 func.func @ldmatrix_x4(%arg0: memref<128x128xf16, 3>) -> vector<4x2xf16> { @@ -190,8 +176,6 @@ return %a : vector<4x2xf16> } -// ----- - // CHECK-LABEL: @ldmatrix_x1 func.func @ldmatrix_x1(%arg0: memref<128x128xf16, 3>) -> vector<1x2xf16> { %c0 = arith.constant 0 : index @@ -202,8 +186,6 @@ return %a : vector<1x2xf16> } -// ----- - // CHECK-LABEL: @m16n8k4_tf32 func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> { // The A, B operand should be bitcast to i32 @@ -237,8 +219,6 @@ return %d : vector<2x2xf32> } -// ----- - // CHECK-LABEL: @async_cp( // CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index) func.func @async_cp( @@ -270,8 +250,6 @@ return } -// ----- - // CHECK-LABEL: @async_cp_i4( // CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index) func.func @async_cp_i4( @@ -293,8 +271,6 @@ return %0 : !nvgpu.device.async.token } -// ----- - // CHECK-LABEL: @async_cp_zfill_f32_align4( // CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index, %[[SRCELEMENTS:[a-zA-Z0-9_]+]]: index func.func @async_cp_zfill_f32_align4( @@ -330,8 +306,6 @@ return } -// ----- - // CHECK-LABEL: @async_cp_zfill_f32_align1( // CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index, %[[SRCELEMENTS:[a-zA-Z0-9_]+]]: index) func.func @async_cp_zfill_f32_align1( @@ -367,8 +341,6 @@ return } -// ----- - // CHECK-LABEL: func @mma_sp_sync_f16_16832( func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>, @@ -409,8 +381,6 @@ return %d : vector<2x2xf16> } -// ----- - // CHECK-LABEL: func @mma_sp_sync_f16_16816( func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>, %arg1: vector<2x2xf16>, @@ -441,8 +411,6 @@ return %d : vector<2x2xf16> } -// ----- - // CHECK-LABEL: func @mma_sp_sync_f16_16816_01( func.func @mma_sp_sync_f16_16816_01(%arg0: vector<2x2xf16>, %arg1: vector<2x2xf16>, @@ -464,8 +432,6 @@ return %d : vector<2x2xf16> } -// ----- - // CHECK-LABEL: func @mma_sp_sync_i8_16864( func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>, %arg1: vector<4x4xi8>, @@ -504,7 +470,6 @@ return %d : vector<2x2xi32> } -// ----- !barrierType = !nvgpu.mbarrier.barrier> !tokenType = !nvgpu.mbarrier.token @@ -512,7 +477,7 @@ func.func @mbarrier() { %num_threads = arith.constant 128 : index - // CHECK: %[[barMemref:.+]] = memref.get_global @__mbarrier : memref<1xi64, 3> + // CHECK: %[[barMemref:.+]] = memref.get_global @__mbarrier{{.*}} : memref<1xi64, 3> %barrier = nvgpu.mbarrier.create -> !barrierType // CHECK: %[[barStr:.+]] = builtin.unrealized_conversion_cast %[[barMemref]] : memref<1xi64, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> @@ -531,16 +496,12 @@ func.return } -// ----- -!barrierType = !nvgpu.mbarrier.barrier> -!tokenType = !nvgpu.mbarrier.token - // CHECK-LABEL: func @mbarrier_nocomplete func.func @mbarrier_nocomplete() { %num_threads = arith.constant 128 : index %count = arith.constant 12 : index - // CHECK: %[[barMemref:.+]] = memref.get_global @__mbarrier : memref<1xi64, 3> + // CHECK: %[[barMemref:.+]] = memref.get_global @__mbarrier{{.*}} : memref<1xi64, 3> %barrier = nvgpu.mbarrier.create -> !barrierType // CHECK: %[[barStr:.+]] = builtin.unrealized_conversion_cast %[[barMemref]] : memref<1xi64, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> @@ -559,15 +520,11 @@ func.return } -// ----- -!barrierType = !nvgpu.mbarrier.barrier> -!tokenType = !nvgpu.mbarrier.token - // CHECK-LABEL: func @mbarrier_txcount func.func @mbarrier_txcount() { %num_threads = arith.constant 128 : index - // CHECK: %[[barMemref:.+]] = memref.get_global @__mbarrier : memref<1xi64, 3> + // CHECK: %[[barMemref:.+]] = memref.get_global @__mbarrier{{.*}} : memref<1xi64, 3> %barrier = nvgpu.mbarrier.create -> !barrierType // CHECK: %[[barStr:.+]] = builtin.unrealized_conversion_cast %[[barMemref]] : memref<1xi64, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> @@ -604,8 +561,6 @@ func.return } -// ----- - // CHECK-LABEL: func @async_tma_load !tensorMap1d = !nvgpu.tensormap.descriptor, swizzle=none, l2promo = none, oob = nan, interleave = none> !tensorMap2d = !nvgpu.tensormap.descriptor, swizzle=swizzle_32b, l2promo = none, oob = zero, interleave = none> @@ -648,13 +603,9 @@ func.return } -// ----- - !lhsTensorMap = !nvgpu.tensormap.descriptor, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none> !rhsTensorMap = !nvgpu.tensormap.descriptor, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none> -!barrierType = !nvgpu.mbarrier.barrier> - !shmemlhs = memref<128x64xf16,3> !shmemrhs = memref<64x128xf16, strided<[128, 1], offset: 8192>, 3> @@ -679,3 +630,15 @@ return } } + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.apply_conversion_patterns to %0 { + transform.apply_conversion_patterns.nvgpu.nvgpu_to_nvvm + } with type_converter { + transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter + {use_opaque_pointers = true} + } {legal_dialects = ["arith", "func", "llvm", "memref", "nvvm", "scf"], partial_conversion} : !transform.any_op +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2999,6 +2999,7 @@ ":LinalgDialect", ":MemRefDialect", ":NVGPUDialect", + ":NVGPUToNVVM", ":NVGPUTransformOpsIncGen", ":NVGPUTransforms", ":NVVMDialect",