This is an archive of the discontinued LLVM Phabricator instance.

mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
1473–1477	This is a problem: The code is adding an extension (`registry.addExtension`) as part of an extension. I haven't figured out the details yet, but this can cause a reallocation in the underlying `extensions` vector while it is being iterated over.

springerm commandeered this revision.Aug 25 2023, 7:22 AM

springerm edited reviewers, added: nicolasvasilache; removed: springerm.

rebase

springerm edited the summary of this revision. (Show Details)Aug 25 2023, 7:22 AM

springerm added a parent revision: D158838: [mlir] Fix crash when adding nested dialect extensions.

springerm added inline comments.

mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
1473–1477	fixed in D158838

Harbormaster completed remote builds in B254882: Diff 553466.Aug 25 2023, 7:56 AM

(drive by comment)

mlir/lib/Dialect/Transform/IR/TransformOps.cpp
712 ↗	(On Diff #553466)	Leftover debug

thanks @springerm !

nicolasvasilache commandeered this revision.Aug 30 2023, 1:05 AM

nicolasvasilache edited reviewers, added: springerm; removed: nicolasvasilache.

springerm accepted this revision.Sep 4 2023, 4:31 AM

This revision is now accepted and ready to land.Sep 4 2023, 4:31 AM

Rebase

Harbormaster completed remote builds in B256540: Diff 555756.Sep 4 2023, 8:32 AM

Revision Contents

Path

Size

mlir/

include/

mlir/

Dialect/

Transform/

IR/

TransformDialect.h

9 lines

lib/

Dialect/

GPU/

TransformOps/

CMakeLists.txt

6 lines

GPUTransformOps.cpp

16 lines

Linalg/

TransformOps/

CMakeLists.txt

1 line

DialectExtension.cpp

2 lines

test/

Integration/

GPU/

CUDA/

TensorCore/

sm80/

transform-mma-sync-matmul-f32.mlir

145 lines

Diff 549404

mlir/include/mlir/Dialect/Transform/IR/TransformDialect.h

Show First 20 Lines • Show All 227 Lines • ▼ Show 20 Lines	protected:
/// dependent dialects of a pass. These dialects will be loaded along with the		/// dependent dialects of a pass. These dialects will be loaded along with the
/// transform dialect unless the extension is in the build-only mode.		/// transform dialect unless the extension is in the build-only mode.
template <typename DialectTy>		template <typename DialectTy>
void declareGeneratedDialect() {		void declareGeneratedDialect() {
generatedDialectLoaders.push_back(		generatedDialectLoaders.push_back(
[](MLIRContext *context) { context->loadDialect<DialectTy>(); });		[](MLIRContext *context) { context->loadDialect<DialectTy>(); });
}		}

		/// Declares that the transformations associated with the operations
		/// registered by this dialect extension need to register additional
		/// extensions, beyond just dialects. This is used in particular for
		/// registering translations that need to be called during IR transformation
		/// (e.g. generating embedded binary blobs).
		void declareRegistration(std::function<void(MLIRContext *)> fun) {
		generatedDialectLoaders.push_back(fun);
		}

private:		private:
/// Callbacks performing extension initialization, e.g., registering ops,		/// Callbacks performing extension initialization, e.g., registering ops,
/// types and defining the additional data.		/// types and defining the additional data.
SmallVector<Initializer> initializers;		SmallVector<Initializer> initializers;

/// Callbacks loading the dependent dialects, i.e. the dialect needed for the		/// Callbacks loading the dependent dialects, i.e. the dialect needed for the
/// extension ops.		/// extension ops.
SmallVector<DialectLoader> dialectLoaders;		SmallVector<DialectLoader> dialectLoaders;
▲ Show 20 Lines • Show All 78 Lines • Show Last 20 Lines

mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt

Show All 18 Lines	add_mlir_dialect_library(MLIRGPUTransformOps
MLIRSideEffectInterfaces		MLIRSideEffectInterfaces
MLIRTransformDialect		MLIRTransformDialect
MLIRVectorDialect		MLIRVectorDialect
MLIRVectorTransforms		MLIRVectorTransforms

# ConversionPatterns		# ConversionPatterns
MLIRNVGPUToNVVM		MLIRNVGPUToNVVM
MLIRGPUToNVVMTransforms		MLIRGPUToNVVMTransforms

		# Translations (needed to serialize to cubin)
		MLIRNVVMToLLVMIRTranslation
		MLIRGPUToLLVMIRTranslation
		MLIRLLVMIRToLLVMTranslation
		MLIRLLVMToLLVMIRTranslation
)		)

mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp

	//===- GPUTransformOps.cpp - Implementation of GPU transform ops ----------===//			//===- GPUTransformOps.cpp - Implementation of GPU transform ops ----------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"			#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"

	#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"			#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
	#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"			#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
	#include "mlir/Conversion/LLVMCommon/TypeConverter.h"			#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
	#include "mlir/Dialect/Affine/IR/AffineOps.h"			#include "mlir/Dialect/Affine/IR/AffineOps.h"
	#include "mlir/Dialect/Arith/IR/Arith.h"			#include "mlir/Dialect/Arith/IR/Arith.h"
				#include "mlir/Dialect/DLTI/DLTI.h"
	#include "mlir/Dialect/Func/IR/FuncOps.h"			#include "mlir/Dialect/Func/IR/FuncOps.h"
	#include "mlir/Dialect/GPU/IR/GPUDialect.h"			#include "mlir/Dialect/GPU/IR/GPUDialect.h"
	#include "mlir/Dialect/GPU/TransformOps/Utils.h"			#include "mlir/Dialect/GPU/TransformOps/Utils.h"
	#include "mlir/Dialect/GPU/Transforms/Passes.h"			#include "mlir/Dialect/GPU/Transforms/Passes.h"
	#include "mlir/Dialect/LLVMIR/NVVMDialect.h"			#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
	#include "mlir/Dialect/MemRef/IR/MemRef.h"			#include "mlir/Dialect/MemRef/IR/MemRef.h"
	#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"			#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
	#include "mlir/Dialect/SCF/IR/SCF.h"			#include "mlir/Dialect/SCF/IR/SCF.h"
	#include "mlir/Dialect/Transform/IR/TransformDialect.h"			#include "mlir/Dialect/Transform/IR/TransformDialect.h"
	#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"			#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
	#include "mlir/Dialect/Utils/IndexingUtils.h"			#include "mlir/Dialect/Utils/IndexingUtils.h"
	#include "mlir/Dialect/Vector/IR/VectorOps.h"			#include "mlir/Dialect/Vector/IR/VectorOps.h"
	#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"			#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
	#include "mlir/IR/AffineExpr.h"			#include "mlir/IR/AffineExpr.h"
	#include "mlir/IR/Builders.h"			#include "mlir/IR/Builders.h"
	#include "mlir/IR/BuiltinAttributes.h"			#include "mlir/IR/BuiltinAttributes.h"
				#include "mlir/IR/BuiltinDialect.h"
				#include "mlir/IR/DialectRegistry.h"
	#include "mlir/IR/IRMapping.h"			#include "mlir/IR/IRMapping.h"
	#include "mlir/IR/MLIRContext.h"			#include "mlir/IR/MLIRContext.h"
	#include "mlir/IR/OpDefinition.h"			#include "mlir/IR/OpDefinition.h"
	#include "mlir/IR/Visitors.h"			#include "mlir/IR/Visitors.h"
				#include "mlir/Pass/Pass.h"
	#include "mlir/Support/LLVM.h"			#include "mlir/Support/LLVM.h"
				#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
				#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
				#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
	#include "mlir/Transforms/DialectConversion.h"			#include "mlir/Transforms/DialectConversion.h"
	#include "llvm/ADT/STLExtras.h"			#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallVector.h"			#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/TypeSwitch.h"			#include "llvm/ADT/TypeSwitch.h"
	#include "llvm/Support/Debug.h"			#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"			#include "llvm/Support/ErrorHandling.h"

	using namespace mlir;			using namespace mlir;
	▲ Show 20 Lines • Show All 1,405 Lines • ▼ Show 20 Lines
	class GPUTransformDialectExtension			class GPUTransformDialectExtension
	: public transform::TransformDialectExtension<			: public transform::TransformDialectExtension<
	GPUTransformDialectExtension> {			GPUTransformDialectExtension> {
	public:			public:
	GPUTransformDialectExtension() {			GPUTransformDialectExtension() {
	declareGeneratedDialect<scf::SCFDialect>();			declareGeneratedDialect<scf::SCFDialect>();
	declareGeneratedDialect<arith::ArithDialect>();			declareGeneratedDialect<arith::ArithDialect>();
	declareGeneratedDialect<GPUDialect>();			declareGeneratedDialect<GPUDialect>();
				declareGeneratedDialect<LLVM::LLVMDialect>();
				declareGeneratedDialect<NVVM::NVVMDialect>();
				declareGeneratedDialect<DLTIDialect>();
	registerTransformOps<			registerTransformOps<
	#define GET_OP_LIST			#define GET_OP_LIST
	#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"			#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"
	>();			>();
				// Register translations(needed to serialize to cubin)
				declareRegistration([](MLIRContext *c) {
				registerNVVMDialectTranslation(*c);
				registerGPUDialectTranslation(*c);
				registerLLVMDialectTranslation(*c);
				});
				springermUnsubmitted Done Reply Inline Actions This is a problem: The code is adding an extension (`registry.addExtension`) as part of an extension. I haven't figured out the details yet, but this can cause a reallocation in the underlying `extensions` vector while it is being iterated over. springerm: This is a problem: The code is adding an extension (`registry.addExtension`) as part of an…
				springermUnsubmitted Done Reply Inline Actions fixed in D158838 springerm: fixed in D158838
	}			}
	};			};
	} // namespace			} // namespace

	#define GET_OP_CLASSES			#define GET_OP_CLASSES
	#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"			#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"

	void mlir::gpu::registerTransformDialectExtension(DialectRegistry &registry) {			void mlir::gpu::registerTransformDialectExtension(DialectRegistry &registry) {
	registry.addExtensions<GPUTransformDialectExtension>();			registry.addExtensions<GPUTransformDialectExtension>();
	}			}

mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt

Show All 14 Lines	add_mlir_dialect_library(MLIRLinalgTransformOps

LINK_LIBS PUBLIC		LINK_LIBS PUBLIC
MLIRAffineDialect		MLIRAffineDialect
MLIRArithDialect		MLIRArithDialect
MLIRBufferizationDialect		MLIRBufferizationDialect
MLIRBufferizationTransforms		MLIRBufferizationTransforms
MLIRFuncDialect		MLIRFuncDialect
MLIRIR		MLIRIR
		MLIRIndexDialect
MLIRLinalgDialect		MLIRLinalgDialect
MLIRLinalgTransforms		MLIRLinalgTransforms
MLIRParser		MLIRParser
MLIRSCFDialect		MLIRSCFDialect
MLIRSideEffectInterfaces		MLIRSideEffectInterfaces
MLIRTransformDialect		MLIRTransformDialect
MLIRTransformDialectUtils		MLIRTransformDialectUtils
MLIRVectorTransforms		MLIRVectorTransforms
)		)

mlir/lib/Dialect/Linalg/TransformOps/DialectExtension.cpp

	//===- DialectExtension.cpp - Linalg transform dialect extension ----------===//			//===- DialectExtension.cpp - Linalg transform dialect extension ----------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/Linalg/TransformOps/DialectExtension.h"			#include "mlir/Dialect/Linalg/TransformOps/DialectExtension.h"
	#include "mlir/Dialect/Affine/IR/AffineOps.h"			#include "mlir/Dialect/Affine/IR/AffineOps.h"
	#include "mlir/Dialect/Arith/IR/Arith.h"			#include "mlir/Dialect/Arith/IR/Arith.h"
	#include "mlir/Dialect/GPU/IR/GPUDialect.h"			#include "mlir/Dialect/GPU/IR/GPUDialect.h"
				#include "mlir/Dialect/Index/IR/IndexDialect.h"
	#include "mlir/Dialect/Linalg/IR/Linalg.h"			#include "mlir/Dialect/Linalg/IR/Linalg.h"
	#include "mlir/Dialect/Linalg/TransformOps/LinalgMatchOps.h"			#include "mlir/Dialect/Linalg/TransformOps/LinalgMatchOps.h"
	#include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h"			#include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h"
	#include "mlir/Dialect/Linalg/Transforms/Transforms.h"			#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
	#include "mlir/Dialect/SCF/IR/SCF.h"			#include "mlir/Dialect/SCF/IR/SCF.h"
	#include "mlir/Dialect/Tensor/IR/Tensor.h"			#include "mlir/Dialect/Tensor/IR/Tensor.h"
	#include "mlir/Dialect/Transform/IR/TransformDialect.h"			#include "mlir/Dialect/Transform/IR/TransformDialect.h"
	#include "mlir/Dialect/Transform/IR/TransformOps.h"			#include "mlir/Dialect/Transform/IR/TransformOps.h"
	Show All 10 Lines
	public:			public:
	using Base::Base;			using Base::Base;

	void init() {			void init() {
	declareDependentDialect<linalg::LinalgDialect>();			declareDependentDialect<linalg::LinalgDialect>();

	declareGeneratedDialect<affine::AffineDialect>();			declareGeneratedDialect<affine::AffineDialect>();
	declareGeneratedDialect<arith::ArithDialect>();			declareGeneratedDialect<arith::ArithDialect>();
				declareGeneratedDialect<index::IndexDialect>();
	declareGeneratedDialect<scf::SCFDialect>();			declareGeneratedDialect<scf::SCFDialect>();
	declareGeneratedDialect<vector::VectorDialect>();			declareGeneratedDialect<vector::VectorDialect>();
	declareGeneratedDialect<gpu::GPUDialect>();			declareGeneratedDialect<gpu::GPUDialect>();
	declareGeneratedDialect<tensor::TensorDialect>();			declareGeneratedDialect<tensor::TensorDialect>();

	registerTransformOps<			registerTransformOps<
	#define GET_OP_LIST			#define GET_OP_LIST
	#include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp.inc"			#include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp.inc"
	Show All 13 Lines

mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir

// RUN: mlir-opt %s \		// RUN: mlir-opt %s \
// RUN: -test-transform-dialect-interpreter \		// RUN: -test-transform-dialect-interpreter=debug-payload-root-tag="payload" \
// RUN: \| FileCheck %s --check-prefix=CHECK-MMA-SYNC

// CHECK-MMA-SYNC-LABEL: func @main() {
// CHECK-MMA-SYNC: nvgpu.mma.sync(%{{.*}}) {mmaShape = [16, 8, 4], tf32Enabled}
// CHECK-MMA-SYNC-SAME: : (vector<2x1xf32>, vector<1x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>

// Tested to run locally in 1.7s.

// RUN: mlir-opt %s \
// RUN: -test-transform-dialect-interpreter \
// RUN: -test-transform-dialect-erase-schedule \		// RUN: -test-transform-dialect-erase-schedule \
// RUN: -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76" \
// RUN: \| mlir-cpu-runner \		// RUN: \| mlir-cpu-runner \
// RUN: --shared-libs=%mlir_cuda_runtime \		// RUN: --shared-libs=%mlir_cuda_runtime \
// RUN: --shared-libs=%mlir_runner_utils \		// RUN: --shared-libs=%mlir_runner_utils \
// RUN: --entry-point-result=void \		// RUN: --entry-point-result=void \
// RUN: \| FileCheck %s		// RUN: \| FileCheck %s


!lhs_memref_type = memref<16x4xf32>		!lhs_memref_type = memref<16x4xf32>
!rhs_memref_type = memref<4x8xf32>		!rhs_memref_type = memref<4x8xf32>
!res_memref_type = memref<16x8xf32>		!res_memref_type = memref<16x8xf32>

		module attributes {transform.target_tag="payload"} {

func.func @compute_linspace_val(%ridx: index, %cidx: index, %strideCidx: index) -> f32 {		func.func @compute_linspace_val(%ridx: index, %cidx: index, %strideCidx: index) -> f32 {
%r = arith.index_cast %ridx : index to i32		%r = arith.index_cast %ridx : index to i32
%c = arith.index_cast %cidx : index to i32		%c = arith.index_cast %cidx : index to i32
%strideC = arith.index_cast %strideCidx : index to i32		%strideC = arith.index_cast %strideCidx : index to i32
%2 = arith.muli %r, %strideC : i32		%2 = arith.muli %r, %strideC : i32
%3 = arith.addi %c, %2 : i32		%3 = arith.addi %c, %2 : i32
%4 = arith.sitofp %3 : i32 to f32		%4 = arith.sitofp %3 : i32 to f32
return %4: f32		return %4: f32
▲ Show 20 Lines • Show All 116 Lines • ▼ Show 20 Lines	func.func @main() {
// CHECK: [2912, 3143, 3374, 3605, 3836, 4067, 4298, 4529],		// CHECK: [2912, 3143, 3374, 3605, 3836, 4067, 4298, 4529],
// CHECK: [3112, 3359, 3606, 3853, 4100, 4347, 4594, 4841]		// CHECK: [3112, 3359, 3606, 3853, 4100, 4347, 4594, 4841]

return		return
}		}

func.func private @printMemrefF32(memref<*xf32>)		func.func private @printMemrefF32(memref<*xf32>)

		} // module


		/// Schedule to lower device GPU IR and host IR to LLVM.
		/// In the future this should be preloaded from a separate file.
		module @named_inclusion_in_named attributes { transform.with_named_sequence } {

		// Spell out lowering to NVVM to make it less bespoke and more easily configurable.
		transform.named_sequence @lower_gpu(
		%module: !transform.any_op {transform.consumed}) -> !transform.any_op {

		%m2 = transform.apply_registered_pass "gpu-kernel-outlining" to %module : (!transform.any_op) -> !transform.any_op

		%gpu_module = transform.structured.match ops{["gpu.module"]} in %m2 : (!transform.any_op) -> !transform.any_op
		%gm2 = transform.apply_registered_pass "convert-vector-to-scf" to %gpu_module : (!transform.any_op) -> !transform.any_op
		%gm3 = transform.apply_registered_pass "convert-scf-to-cf" to %gm2 : (!transform.any_op) -> !transform.any_op
		%gm4 = transform.apply_registered_pass "expand-strided-metadata" to %gm3 : (!transform.any_op) -> !transform.any_op
		%gm5 = transform.apply_registered_pass "lower-affine" to %gm4 : (!transform.any_op) -> !transform.any_op
		transform.apply_conversion_patterns to %gm5 {
		transform.apply_conversion_patterns.dialect_to_llvm "math"
		transform.apply_conversion_patterns.dialect_to_llvm "memref"
		transform.apply_conversion_patterns.func.func_to_llvm
		transform.apply_conversion_patterns.dialect_to_llvm "index"
		} with type_converter {
		transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter
		{index_bitwidth = 32,
		use_bare_ptr = true,
		use_bare_ptr_memref_call_conv = true,
		use_opaque_pointers = true}
		} {
		legal_dialects = ["llvm", "gpu", "nvvm"],
		partial_conversion
		} : !transform.any_op

		// apply_conversion_patterns loses track of handles so we rematch.
		%gpu_module2 = transform.structured.match ops{["gpu.module"]} in %m2 : (!transform.any_op) -> !transform.any_op
		transform.apply_patterns to %gpu_module2 {
		transform.apply_patterns.gpu.gpu_rewrite_patterns
		} : !transform.any_op

		// apply_conversion_patterns loses track of handles so we rematch.
		%gpu_module3 = transform.structured.match ops{["gpu.module"]} in %m2 : (!transform.any_op) -> !transform.any_op
		transform.apply_conversion_patterns to %gpu_module3 {
		transform.apply_conversion_patterns.dialect_to_llvm "arith"
		transform.apply_conversion_patterns.dialect_to_llvm "cf"
		transform.apply_conversion_patterns.vector.vector_to_llvm
		transform.apply_conversion_patterns.func.func_to_llvm
		transform.apply_conversion_patterns.dialect_to_llvm "memref"
		transform.apply_conversion_patterns.gpu.gpu_to_nvvm
		transform.apply_conversion_patterns.gpu.gpu_wmma_to_nvvm
		transform.apply_conversion_patterns.gpu.gpu_subgroup_reduce_to_nvvm
		transform.apply_conversion_patterns.nvgpu.nvgpu_to_nvvm
		} with type_converter {
		transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter
		{index_bitwidth = 32,
		use_bare_ptr = true,
		use_bare_ptr_memref_call_conv = true,
		use_opaque_pointers = true}
		} {
		legal_dialects = ["llvm", "memref", "nvvm"],
		legal_ops = ["func.func", "gpu.module", "gpu.module_end", "gpu.yield"],
		illegal_dialects = ["gpu"],
		illegal_ops = ["llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil",
		"llvm.ffloor", "llvm.log", "llvm.log10", "llvm.log2","llvm.pow",
		"llvm.sin", "llvm.sqrt"],
		partial_conversion
		} : !transform.any_op

		// apply_conversion_patterns loses track of handles so we rematch.
		%gpu_module4 = transform.structured.match ops{["gpu.module"]} in %m2 : (!transform.any_op) -> !transform.any_op
		%mm2 = transform.apply_registered_pass "convert-vector-to-llvm" to %gpu_module4 : (!transform.any_op) -> !transform.any_op
		%mm3 = transform.apply_registered_pass "canonicalize" to %mm2 : (!transform.any_op) -> !transform.any_op
		%mm4 = transform.apply_registered_pass "cse" to %mm3 : (!transform.any_op) -> !transform.any_op
		%mm5 = transform.apply_registered_pass "reconcile-unrealized-casts" to %mm4 : (!transform.any_op) -> !transform.any_op
		%mm6 = transform.apply_registered_pass "gpu-to-cubin" to %mm5 {options="chip=sm_80 features=+ptx76"} : (!transform.any_op) -> !transform.any_op

		transform.yield %m2 : !transform.any_op
		}

		transform.named_sequence @lower_host(
		%module: !transform.any_op {transform.consumed}) -> !transform.any_op {
		%m3 = transform.apply_registered_pass "convert-vector-to-scf" to %module : (!transform.any_op) -> !transform.any_op
		%m4 = transform.apply_registered_pass "convert-scf-to-cf" to %m3 : (!transform.any_op) -> !transform.any_op
		%m5 = transform.apply_registered_pass "expand-strided-metadata" to %m4 : (!transform.any_op) -> !transform.any_op
		%m6 = transform.apply_registered_pass "lower-affine" to %m5 : (!transform.any_op) -> !transform.any_op

		// TODO: apply_conversion_patterns loses track of handles so we only apply it to func.func ops.
		%func = transform.structured.match ops{["func.func"]} in %m6 : (!transform.any_op) -> !transform.any_op
		transform.apply_conversion_patterns to %func {
		transform.apply_conversion_patterns.dialect_to_llvm "math"
		transform.apply_conversion_patterns.vector.vector_to_llvm
		transform.apply_conversion_patterns.dialect_to_llvm "memref"
		transform.apply_conversion_patterns.func.func_to_llvm
		transform.apply_conversion_patterns.dialect_to_llvm "index"
		transform.apply_conversion_patterns.dialect_to_llvm "arith"
		transform.apply_conversion_patterns.dialect_to_llvm "cf"
		} with type_converter {
		transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter
		{index_bitwidth = 64,
		use_bare_ptr = true,
		use_bare_ptr_memref_call_conv = true,
		use_opaque_pointers = true}
		} {
		legal_dialects = ["llvm", "nvvm"],
		legal_ops = ["builtin.module", "gpu.module", "gpu.module_end", "gpu.yield"],
		partial_conversion
		} : !transform.any_op

		%m7 = transform.apply_registered_pass "gpu-to-llvm" to %m6
		: (!transform.any_op) -> !transform.any_op
		%m8 = transform.apply_registered_pass "reconcile-unrealized-casts" to %m7
		: (!transform.any_op) -> !transform.any_op

		transform.yield %m8 : !transform.any_op
		}


transform.sequence failures(propagate) {		transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):		^bb1(%toplevel_module: !transform.any_op):
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
		%matmul = transform.structured.match ops{["linalg.matmul"]} in %toplevel_module
: (!transform.any_op) -> !transform.any_op		: (!transform.any_op) -> !transform.any_op
transform.nvgpu.rewrite_matmul_as_mma_sync %matmul		transform.nvgpu.rewrite_matmul_as_mma_sync %matmul
: (!transform.any_op) -> ()		: (!transform.any_op) -> ()

		%m2 = transform.include @lower_gpu failures(suppress) (%toplevel_module)
		: (!transform.any_op) -> (!transform.any_op)
		%m3 = transform.include @lower_host failures(suppress) (%m2)
		: (!transform.any_op) -> (!transform.any_op)
}		}

		} // transform module

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][transform] Replace complex test-lower-to-nvvm by an explicit TD listing in transform-mma-sync-matmul-f32.mlirAcceptedPublic

Details

Diff Detail

Event Timeline