Diff 272367

mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h

	//===- GPUToNVVMPass.h - Convert GPU kernel to NVVM dialect ------ C++ --===//			//===- GPUToNVVMPass.h - Convert GPU kernel to NVVM dialect ------ C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	#ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_			#ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_
	#define MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_			#define MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_

				#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
	#include <memory>			#include <memory>

	namespace mlir {			namespace mlir {
	class LLVMTypeConverter;			class LLVMTypeConverter;
	class OwningRewritePatternList;			class OwningRewritePatternList;

	template <typename OpT> class OperationPass;			template <typename OpT> class OperationPass;

	namespace gpu {			namespace gpu {
	class GPUModuleOp;			class GPUModuleOp;
	}			}

	/// Collect a set of patterns to convert from the GPU dialect to NVVM.			/// Collect a set of patterns to convert from the GPU dialect to NVVM.
	void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,			void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
	OwningRewritePatternList &patterns);			OwningRewritePatternList &patterns);

	/// Creates a pass that lowers GPU dialect operations to NVVM counterparts.			/// Creates a pass that lowers GPU dialect operations to NVVM counterparts. The
	std::unique_ptr<OperationPass<gpu::GPUModuleOp>>			/// index bitwidth used for the lowering of the device side index computations
	createLowerGpuOpsToNVVMOpsPass();			/// is configurable.
				std::unique_ptr<OperationPass<gpu::GPUModuleOp>> createLowerGpuOpsToNVVMOpsPass(
				unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout);

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_			#endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_

mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h

	//===- GPUToROCDLPass.h - Convert GPU kernel to ROCDL dialect ---- C++ --===//			//===- GPUToROCDLPass.h - Convert GPU kernel to ROCDL dialect ---- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	#ifndef MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_			#ifndef MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
	#define MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_			#define MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_

				#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
	#include <memory>			#include <memory>

	namespace mlir {			namespace mlir {
	class LLVMTypeConverter;			class LLVMTypeConverter;
	class OwningRewritePatternList;			class OwningRewritePatternList;

	template <typename OpT>			template <typename OpT>
	class OperationPass;			class OperationPass;

	namespace gpu {			namespace gpu {
	class GPUModuleOp;			class GPUModuleOp;
	} // namespace gpu			} // namespace gpu

	/// Collect a set of patterns to convert from the GPU dialect to ROCDL.			/// Collect a set of patterns to convert from the GPU dialect to ROCDL.
	void populateGpuToROCDLConversionPatterns(LLVMTypeConverter &converter,			void populateGpuToROCDLConversionPatterns(LLVMTypeConverter &converter,
	OwningRewritePatternList &patterns);			OwningRewritePatternList &patterns);

	/// Creates a pass that lowers GPU dialect operations to ROCDL counterparts.			/// Creates a pass that lowers GPU dialect operations to ROCDL counterparts. The
				/// index bitwidth used for the lowering of the device side index computations
				/// is configurable.
	std::unique_ptr<OperationPass<gpu::GPUModuleOp>>			std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
	createLowerGpuOpsToROCDLOpsPass();			createLowerGpuOpsToROCDLOpsPass(
				unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout);

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_			#endif // MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_

mlir/include/mlir/Conversion/Passes.td

	Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// GPUToNVVM			// GPUToNVVM
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {			def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
	let summary = "Generate NVVM operations for gpu operations";			let summary = "Generate NVVM operations for gpu operations";
	let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()";			let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()";
				let options = [
				Option<"indexBitwidth", "index-bitwidth", "unsigned",
				/default=kDeriveIndexBitwidthFromDataLayout/"0",
				"Bitwidth of the index type, 0 to use size of machine word">
				];
	}			}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// GPUToROCDL			// GPUToROCDL
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {			def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
	let summary = "Generate ROCDL operations for gpu operations";			let summary = "Generate ROCDL operations for gpu operations";
	let constructor = "mlir::createLowerGpuOpsToROCDLOpsPass()";			let constructor = "mlir::createLowerGpuOpsToROCDLOpsPass()";
				let options = [
				Option<"indexBitwidth", "index-bitwidth", "unsigned",
				/default=kDeriveIndexBitwidthFromDataLayout/"0",
				"Bitwidth of the index type, 0 to use size of machine word">
				];
	}			}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// GPUToSPIRV			// GPUToSPIRV
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def ConvertGPUToSPIRV : Pass<"convert-gpu-to-spirv", "ModuleOp"> {			def ConvertGPUToSPIRV : Pass<"convert-gpu-to-spirv", "ModuleOp"> {
	let summary = "Convert GPU dialect to SPIR-V dialect";			let summary = "Convert GPU dialect to SPIR-V dialect";
	▲ Show 20 Lines • Show All 204 Lines • Show Last 20 Lines

mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h

Show All 9 Lines
// converts Standard ops and types and provides hooks for dialect-specific		// converts Standard ops and types and provides hooks for dialect-specific
// extensions to the conversion.		// extensions to the conversion.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H		#ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H
#define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H		#define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H

		#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
#include "mlir/Transforms/DialectConversion.h"		#include "mlir/Transforms/DialectConversion.h"

namespace llvm {		namespace llvm {
class IntegerType;		class IntegerType;
class LLVMContext;		class LLVMContext;
class Module;		class Module;
class Type;		class Type;
} // namespace llvm		} // namespace llvm

namespace mlir {		namespace mlir {

class ComplexType;		class ComplexType;
class LLVMTypeConverter;		class LLVMTypeConverter;
class UnrankedMemRefType;		class UnrankedMemRefType;

namespace LLVM {		namespace LLVM {
class LLVMDialect;		class LLVMDialect;
class LLVMType;		class LLVMType;
} // namespace LLVM		} // namespace LLVM

/// Set of callbacks that allows the customization of LLVMTypeConverter.
struct LLVMTypeConverterCustomization {
using CustomCallback = std::function<LogicalResult(LLVMTypeConverter &, Type,
SmallVectorImpl<Type> &)>;

/// Customize the type conversion of function arguments.
CustomCallback funcArgConverter;

/// Used to determine the bitwidth of the LLVM integer type that the index
/// type gets lowered to. Defaults to deriving the size from the data layout.
unsigned indexBitwidth;

/// Initialize customization to default callbacks.
LLVMTypeConverterCustomization();
};

/// Callback to convert function argument types. It converts a MemRef function		/// Callback to convert function argument types. It converts a MemRef function
/// argument to a list of non-aggregate types containing descriptor		/// argument to a list of non-aggregate types containing descriptor
/// information, and an UnrankedmemRef function argument to a list containing		/// information, and an UnrankedmemRef function argument to a list containing
/// the rank and a pointer to a descriptor struct.		/// the rank and a pointer to a descriptor struct.
LogicalResult structFuncArgTypeConverter(LLVMTypeConverter &converter,		LogicalResult structFuncArgTypeConverter(LLVMTypeConverter &converter,
Type type,		Type type,
SmallVectorImpl<Type> &result);		SmallVectorImpl<Type> &result);

/// Callback to convert function argument types. It converts MemRef function		/// Callback to convert function argument types. It converts MemRef function
/// arguments to bare pointers to the MemRef element type.		/// arguments to bare pointers to the MemRef element type.
LogicalResult barePtrFuncArgTypeConverter(LLVMTypeConverter &converter,		LogicalResult barePtrFuncArgTypeConverter(LLVMTypeConverter &converter,
Type type,		Type type,
SmallVectorImpl<Type> &result);		SmallVectorImpl<Type> &result);

/// Conversion from types in the Standard dialect to the LLVM IR dialect.		/// Conversion from types in the Standard dialect to the LLVM IR dialect.
class LLVMTypeConverter : public TypeConverter {		class LLVMTypeConverter : public TypeConverter {
/// Give structFuncArgTypeConverter access to memref-specific functions.		/// Give structFuncArgTypeConverter access to memref-specific functions.
friend LogicalResult		friend LogicalResult
structFuncArgTypeConverter(LLVMTypeConverter &converter, Type type,		structFuncArgTypeConverter(LLVMTypeConverter &converter, Type type,
SmallVectorImpl<Type> &result);		SmallVectorImpl<Type> &result);

public:		public:
using TypeConverter::convertType;		using TypeConverter::convertType;

/// Create an LLVMTypeConverter using the default		/// Create an LLVMTypeConverter using the default LowerToLLVMOptions.
/// LLVMTypeConverterCustomization.
LLVMTypeConverter(MLIRContext *ctx);		LLVMTypeConverter(MLIRContext *ctx);

/// Create an LLVMTypeConverter using 'custom' customizations.		/// Create an LLVMTypeConverter using custom LowerToLLVMOptions.
LLVMTypeConverter(MLIRContext *ctx,		LLVMTypeConverter(MLIRContext *ctx, const LowerToLLVMOptions &options);
const LLVMTypeConverterCustomization &custom);

/// Convert a function type. The arguments and results are converted one by		/// Convert a function type. The arguments and results are converted one by
/// one and results are packed into a wrapped LLVM IR structure type. `result`		/// one and results are packed into a wrapped LLVM IR structure type. `result`
/// is populated with argument mapping.		/// is populated with argument mapping.
LLVM::LLVMType convertFunctionSignature(FunctionType type, bool isVariadic,		LLVM::LLVMType convertFunctionSignature(FunctionType type, bool isVariadic,
SignatureConversion &result);		SignatureConversion &result);

/// Convert a non-empty list of types to be returned from a function into a		/// Convert a non-empty list of types to be returned from a function into a
Show All 29 Lines	public:
/// pointers to memref descriptors for arguments.		/// pointers to memref descriptors for arguments.
LLVM::LLVMType convertFunctionTypeCWrapper(FunctionType type);		LLVM::LLVMType convertFunctionTypeCWrapper(FunctionType type);

/// Gets the LLVM representation of the index type. The returned type is an		/// Gets the LLVM representation of the index type. The returned type is an
/// integer type with the size configured for this type converter.		/// integer type with the size configured for this type converter.
LLVM::LLVMType getIndexType();		LLVM::LLVMType getIndexType();

/// Gets the bitwidth of the index type when converted to LLVM.		/// Gets the bitwidth of the index type when converted to LLVM.
unsigned getIndexTypeBitwidth() { return customizations.indexBitwidth; }		unsigned getIndexTypeBitwidth() { return options.indexBitwidth; }

protected:		protected:
/// LLVM IR module used to parse/create types.		/// LLVM IR module used to parse/create types.
llvm::Module *module;		llvm::Module *module;
LLVM::LLVMDialect *llvmDialect;		LLVM::LLVMDialect *llvmDialect;

private:		private:
/// Convert a function type. The arguments and results are converted one by		/// Convert a function type. The arguments and results are converted one by
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	private:

// Convert an unranked memref type to an LLVM type that captures the		// Convert an unranked memref type to an LLVM type that captures the
// runtime rank and a pointer to the static ranked memref desc		// runtime rank and a pointer to the static ranked memref desc
Type convertUnrankedMemRefType(UnrankedMemRefType type);		Type convertUnrankedMemRefType(UnrankedMemRefType type);

// Convert a 1D vector type into an LLVM vector type.		// Convert a 1D vector type into an LLVM vector type.
Type convertVectorType(VectorType type);		Type convertVectorType(VectorType type);

/// Callbacks for customizing the type conversion.		/// Options for customizing the llvm lowering.
LLVMTypeConverterCustomization customizations;		LowerToLLVMOptions options;
};		};

/// Helper class to produce LLVM dialect operations extracting or inserting		/// Helper class to produce LLVM dialect operations extracting or inserting
/// values to a struct.		/// values to a struct.
class StructBuilder {		class StructBuilder {
public:		public:
/// Construct a helper for the given value.		/// Construct a helper for the given value.
explicit StructBuilder(Value v);		explicit StructBuilder(Value v);
▲ Show 20 Lines • Show All 178 Lines • ▼ Show 20 Lines	static void unpack(OpBuilder &builder, Location loc, Value packed,
SmallVectorImpl<Value> &results);		SmallVectorImpl<Value> &results);

/// Returns the number of non-aggregate values that would be produced by		/// Returns the number of non-aggregate values that would be produced by
/// `unpack`.		/// `unpack`.
static unsigned getNumUnpackedValues() { return 2; }		static unsigned getNumUnpackedValues() { return 2; }
};		};

/// Base class for operation conversions targeting the LLVM IR dialect. Provides		/// Base class for operation conversions targeting the LLVM IR dialect. Provides
/// conversion patterns with access to an LLVMTypeConverter.		/// conversion patterns with access to an LLVMTypeConverter and the
		/// LowerToLLVMOptions.
class ConvertToLLVMPattern : public ConversionPattern {		class ConvertToLLVMPattern : public ConversionPattern {
public:		public:
ConvertToLLVMPattern(StringRef rootOpName, MLIRContext *context,		ConvertToLLVMPattern(StringRef rootOpName, MLIRContext *context,
LLVMTypeConverter &typeConverter,		LLVMTypeConverter &typeConverter,
		const LowerToLLVMOptions &options = {
		/useBarePtrCallConv=/false,
		/emitCWrappers=/false,
		/indexBitwidth=/kDeriveIndexBitwidthFromDataLayout,
		/useAlignedAlloc=/false},
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I am puzzled how is this working? This default value for this parameter is mapped to a reference member, how isn't it gonna lead to "use-after-free"? mehdi_amini: I am puzzled how is this working? This default value for this parameter is mapped to a…
		gysitAuthorUnsubmitted Done Reply Inline Actions You are right this should not work (the lifetime of the default argument is limited to the body of the constructor -- I believe). I will submit a patch to fix this problem. gysit: You are right this should not work (the lifetime of the default argument is limited to the body…
		ftynseUnsubmitted Not Done Reply Inline Actions Hmm, why wouldn't it? The lifetime of the temporary is that of the constructor body. The reference will be used to copy-construct the member struct at the beginning of the constructor implementation, at which point the temporary is guaranteed to be live. Then we will only use the member. It would have been a problem if ConvertToLLVMPattern kept a reference to the temporary. ftynse: Hmm, why wouldn't it? The lifetime of the temporary is that of the constructor body. The…
		gysitAuthorUnsubmitted Done Reply Inline Actions It would have been a problem if ConvertToLLVMPattern kept a reference to the temporary. The options member is unfortunately a const reference. gysit: > It would have been a problem if ConvertToLLVMPattern kept a reference to the temporary. The…
		ftynseUnsubmitted Not Done Reply Inline Actions My bad, I looked elsewhere. The reference capture semantics should be documented somewhere. Or in a more hacky way, this can accept a non-const reference that would effectively disallow passing in temporaries. ftynse: My bad, I looked elsewhere. The reference capture semantics should be documented somewhere. Or…
		gysitAuthorUnsubmitted Done Reply Inline Actions should we just make the options a non-reference member of ConvertToLLVMPattern? At the moment to struct is super small and copying the options should not harm performance. gysit: should we just make the options a non-reference member of ConvertToLLVMPattern? At the moment…
		jeanPerierUnsubmitted Done Reply Inline Actions + 1, this broke our flang builds with some compilers (they randomly emitted C interface). jeanPerier: + 1, this broke our flang builds with some compilers (they randomly emitted C interface).
		gysitAuthorUnsubmitted Done Reply Inline Actions + 1, this broke our flang builds with some compilers (they randomly emitted C interface). Sorry for breaking your build. I reverted the commit which hopefully solves your problem. gysit: > + 1, this broke our flang builds with some compilers (they randomly emitted C interface).
		jeanPerierUnsubmitted Not Done Reply Inline Actions Thanks, this part of flang code is still in a fork so it was hard for you to know. What went wrong in our builds was the default arguments. They ended-up being temps with the lifetime of the ctor call. Creating the default `LowerToLLVMOptions` on our side and passing it to `populateStdToLLVMConversionPatterns` was working OK. jeanPerier: Thanks, this part of flang code is still in a fork so it was hard for you to know. What went…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions Duplicating the option in every single pattern instance inheriting from ConvertToLLVMPattern seems a bit suboptimal to me. mehdi_amini: Duplicating the option in every single pattern instance inheriting from ConvertToLLVMPattern…
		gysitAuthorUnsubmitted Done Reply Inline Actions Duplicating the option in every single pattern instance inheriting from ConvertToLLVMPattern seems a bit suboptimal to me. Using a reference or a pointer to the options structure are possible alternatives. Both of them have memory lifetime issues if the referenced memory is freed to early. An alternative could be to pass in a callback that returns an options structure (similar to the one used for the type converter before). This solution has no lifetime issues and the memory footprint should be minimal (a function pointer). gysit: > Duplicating the option in every single pattern instance inheriting from ConvertToLLVMPattern…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I'm missing something: how is the lifetime of the callback managed? In some way if you can pass a pointer to a callback that returns the data, you can also provide a pointer to the data. mehdi_amini: I'm missing something: how is the lifetime of the callback managed? In some way if you can pass…
		gysitAuthorUnsubmitted Not Done Reply Inline Actions The idea was to use a local static variable LowerToLLVMOptions myConfigOptions() { static const LowerToLLVMOptions myOptions = { /* ... / }; return myOptions; } However, this solution only works for the options that can be statically initialized (such as the default options) and pointers/references are still needed for the runtime pass parameters. So having pointers / references everywhere may be the better solution. gysit:* The idea was to use a local static variable ``` LowerToLLVMOptions myConfigOptions() { static…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions In particular, if you can have a static you can also pass it by reference :) mehdi_amini: In particular, if you can have a static you can also pass it by reference :)
PatternBenefit benefit = 1);		PatternBenefit benefit = 1);

/// Returns the LLVM dialect.		/// Returns the LLVM dialect.
LLVM::LLVMDialect &getDialect() const;		LLVM::LLVMDialect &getDialect() const;

/// Returns the LLVM IR context.		/// Returns the LLVM IR context.
llvm::LLVMContext &getContext() const;		llvm::LLVMContext &getContext() const;

Show All 35 Lines	public:

Value getDataPtr(Location loc, MemRefType type, Value memRefDesc,		Value getDataPtr(Location loc, MemRefType type, Value memRefDesc,
ValueRange indices, ConversionPatternRewriter &rewriter,		ValueRange indices, ConversionPatternRewriter &rewriter,
llvm::Module &module) const;		llvm::Module &module) const;

protected:		protected:
/// Reference to the type converter, with potential extensions.		/// Reference to the type converter, with potential extensions.
LLVMTypeConverter &typeConverter;		LLVMTypeConverter &typeConverter;

		/// Reference to the llvm lowering options.
		const LowerToLLVMOptions &options;
};		};

/// Utility class for operation conversions targeting the LLVM dialect that		/// Utility class for operation conversions targeting the LLVM dialect that
/// match exactly one source operation.		/// match exactly one source operation.
template <typename OpTy>		template <typename OpTy>
class ConvertOpToLLVMPattern : public ConvertToLLVMPattern {		class ConvertOpToLLVMPattern : public ConvertToLLVMPattern {
public:		public:
ConvertOpToLLVMPattern(LLVMTypeConverter &typeConverter,		ConvertOpToLLVMPattern(LLVMTypeConverter &typeConverter,
		const LowerToLLVMOptions &options,
PatternBenefit benefit = 1)		PatternBenefit benefit = 1)
: ConvertToLLVMPattern(OpTy::getOperationName(),		: ConvertToLLVMPattern(OpTy::getOperationName(),
&typeConverter.getContext(), typeConverter,		&typeConverter.getContext(), typeConverter,
benefit) {}		options, benefit) {}
};		};

namespace LLVM {		namespace LLVM {
namespace detail {		namespace detail {
/// Replaces the given operation "op" with a new operation of type "targetOp"		/// Replaces the given operation "op" with a new operation of type "targetOp"
/// and given operands.		/// and given operands.
LogicalResult oneToOneRewrite(Operation *op, StringRef targetOp,		LogicalResult oneToOneRewrite(Operation *op, StringRef targetOp,
ValueRange operands,		ValueRange operands,
▲ Show 20 Lines • Show All 65 Lines • Show Last 20 Lines

mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h

	//===- ConvertStandardToLLVMPass.h - Pass entrypoint ------------- C++ --===//			//===- ConvertStandardToLLVMPass.h - Pass entrypoint ------------- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_			#ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_
	#define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_			#define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_

	#include <memory>			#include <memory>

	namespace mlir {			namespace mlir {
	class LLVMTypeConverter;			class LLVMTypeConverter;
	class ModuleOp;			class ModuleOp;
	template <typename T> class OperationPass;			template <typename T>
				class OperationPass;
	class OwningRewritePatternList;			class OwningRewritePatternList;

				/// Value to pass as bitwidth for the index type when the converter is expected
				/// to derive the bitwidth from the LLVM data layout.
				static constexpr unsigned kDeriveIndexBitwidthFromDataLayout = 0;

				struct LowerToLLVMOptions {
				bool useBarePtrCallConv = false;
				bool emitCWrappers = false;
				unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout;
				/// Use aligned_alloc for heap allocations.
				bool useAlignedAlloc = false;
				mehdi_aminiUnsubmitted Not Done Reply Inline Actions If you reorder the bool before the unsigned, the struct would be smaller I think (on most platform) mehdi_amini: If you reorder the bool before the unsigned, the struct would be smaller I think (on most…
				};

	/// Collect a set of patterns to convert memory-related operations from the			/// Collect a set of patterns to convert memory-related operations from the
	/// Standard dialect to the LLVM dialect, excluding non-memory-related			/// Standard dialect to the LLVM dialect, excluding non-memory-related
	/// operations and FuncOp.			/// operations and FuncOp.
	void populateStdToLLVMMemoryConversionPatterns(			void populateStdToLLVMMemoryConversionPatterns(
	LLVMTypeConverter &converter, OwningRewritePatternList &patterns,			LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
	bool useAlignedAlloc);			const LowerToLLVMOptions &options);

	/// Collect a set of patterns to convert from the Standard dialect to the LLVM			/// Collect a set of patterns to convert from the Standard dialect to the LLVM
	/// dialect, excluding the memory-related operations.			/// dialect, excluding the memory-related operations.
	void populateStdToLLVMNonMemoryConversionPatterns(			void populateStdToLLVMNonMemoryConversionPatterns(
	LLVMTypeConverter &converter, OwningRewritePatternList &patterns);			LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
				const LowerToLLVMOptions &options);

	/// Collect the default pattern to convert a FuncOp to the LLVM dialect. If			/// Collect the default pattern to convert a FuncOp to the LLVM dialect. If
	/// `emitCWrappers` is set, the pattern will also produce functions			/// `emitCWrappers` is set, the pattern will also produce functions
	/// that pass memref descriptors by pointer-to-structure in addition to the			/// that pass memref descriptors by pointer-to-structure in addition to the
	/// default unpacked form.			/// default unpacked form.
	void populateStdToLLVMDefaultFuncOpConversionPattern(			void populateStdToLLVMFuncOpConversionPattern(
	LLVMTypeConverter &converter, OwningRewritePatternList &patterns,			LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
	bool emitCWrappers = false);			const LowerToLLVMOptions &options);

	/// Collect a set of default patterns to convert from the Standard dialect to			/// Collect the patterns to convert from the Standard dialect to LLVM.
	/// LLVM.			void populateStdToLLVMConversionPatterns(
	void populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
	OwningRewritePatternList &patterns,
	bool emitCWrappers = false,
	bool useAlignedAlloc = false);

	/// Collect a set of patterns to convert from the Standard dialect to
	/// LLVM using the bare pointer calling convention for MemRef function
	/// arguments.
	void populateStdToLLVMBarePtrConversionPatterns(
	LLVMTypeConverter &converter, OwningRewritePatternList &patterns,			LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
	bool useAlignedAlloc);			const LowerToLLVMOptions &options = {
				/useBarePtrCallConv=/false, /emitCWrappers=/false,
	/// Value to pass as bitwidth for the index type when the converter is expected			/indexBitwidth=/kDeriveIndexBitwidthFromDataLayout,
	/// to derive the bitwidth from the LLVM data layout.			/useAlignedAlloc=/false});
	static constexpr unsigned kDeriveIndexBitwidthFromDataLayout = 0;

	struct LowerToLLVMOptions {
	bool useBarePtrCallConv = false;
	bool emitCWrappers = false;
	unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout;
	/// Use aligned_alloc for heap allocations.
	bool useAlignedAlloc = false;
	};

	/// Creates a pass to convert the Standard dialect into the LLVMIR dialect.			/// Creates a pass to convert the Standard dialect into the LLVMIR dialect.
	/// stdlib malloc/free is used by default for allocating memrefs allocated with			/// stdlib malloc/free is used by default for allocating memrefs allocated with
	/// std.alloc, while LLVM's alloca is used for those allocated with std.alloca.			/// std.alloc, while LLVM's alloca is used for those allocated with std.alloca.
	std::unique_ptr<OperationPass<ModuleOp>>			std::unique_ptr<OperationPass<ModuleOp>>
	createLowerToLLVMPass(const LowerToLLVMOptions &options = {			createLowerToLLVMPass(const LowerToLLVMOptions &options = {
	/useBarePtrCallConv=/false, /emitCWrappers=/false,			/useBarePtrCallConv=/false, /emitCWrappers=/false,
	/indexBitwidth=/kDeriveIndexBitwidthFromDataLayout,			/indexBitwidth=/kDeriveIndexBitwidthFromDataLayout,
	/useAlignedAlloc=/false});			/useAlignedAlloc=/false});

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_			#endif // MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Show All 24 Lines
#include "../GPUCommon/IndexIntrinsicsOpLowering.h"		#include "../GPUCommon/IndexIntrinsicsOpLowering.h"
#include "../GPUCommon/OpToFuncCallLowering.h"		#include "../GPUCommon/OpToFuncCallLowering.h"
#include "../PassDetail.h"		#include "../PassDetail.h"

using namespace mlir;		using namespace mlir;

namespace {		namespace {


struct GPUShuffleOpLowering : public ConvertToLLVMPattern {		struct GPUShuffleOpLowering : public ConvertToLLVMPattern {
explicit GPUShuffleOpLowering(LLVMTypeConverter &lowering_)		explicit GPUShuffleOpLowering(LLVMTypeConverter &lowering_)
: ConvertToLLVMPattern(gpu::ShuffleOp::getOperationName(),		: ConvertToLLVMPattern(gpu::ShuffleOp::getOperationName(),
lowering_.getDialect()->getContext(), lowering_) {}		lowering_.getDialect()->getContext(), lowering_) {}

/// Lowers a shuffle to the corresponding NVVM op.		/// Lowers a shuffle to the corresponding NVVM op.
///		///
/// Convert the `width` argument into an activeMask (a bitmask which specifies		/// Convert the `width` argument into an activeMask (a bitmask which specifies
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
/// Import the GPU Ops to NVVM Patterns.		/// Import the GPU Ops to NVVM Patterns.
#include "GPUToNVVM.cpp.inc"		#include "GPUToNVVM.cpp.inc"

/// A pass that replaces all occurrences of GPU device operations with their		/// A pass that replaces all occurrences of GPU device operations with their
/// corresponding NVVM equivalent.		/// corresponding NVVM equivalent.
///		///
/// This pass only handles device code and is not meant to be run on GPU host		/// This pass only handles device code and is not meant to be run on GPU host
/// code.		/// code.
class LowerGpuOpsToNVVMOpsPass		struct LowerGpuOpsToNVVMOpsPass
: public ConvertGpuOpsToNVVMOpsBase<LowerGpuOpsToNVVMOpsPass> {		: public ConvertGpuOpsToNVVMOpsBase<LowerGpuOpsToNVVMOpsPass> {
public:		LowerGpuOpsToNVVMOpsPass() = default;
		LowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth) {
		this->indexBitwidth = indexBitwidth;
		}

void runOnOperation() override {		void runOnOperation() override {
gpu::GPUModuleOp m = getOperation();		gpu::GPUModuleOp m = getOperation();

		/// Customize the bitwidth used for the device side index computations.
		herhutUnsubmitted Done Reply Inline Actions Typo: `bidtwidth` -> `bitwidth` herhut: Typo: `bidtwidth` -> `bitwidth`
		gysitAuthorUnsubmitted Done Reply Inline Actions fixed gysit: fixed
		LowerToLLVMOptions options = {/useBarePtrCallConv =/false,
		/emitCWrappers = / true,
		/indexBitwidth =/indexBitwidth,
		/useAlignedAlloc =/false};

/// MemRef conversion for GPU to NVVM lowering. The GPU dialect uses memory		/// MemRef conversion for GPU to NVVM lowering. The GPU dialect uses memory
/// space 5 for private memory attributions, but NVVM represents private		/// space 5 for private memory attributions, but NVVM represents private
/// memory allocations as local `alloca`s in the default address space. This		/// memory allocations as local `alloca`s in the default address space. This
/// converter drops the private memory space to support the use case above.		/// converter drops the private memory space to support the use case above.
LLVMTypeConverter converter(m.getContext());		LLVMTypeConverter converter(m.getContext(), options);
converter.addConversion([&](MemRefType type) -> Optional<Type> {		converter.addConversion([&](MemRefType type) -> Optional<Type> {
if (type.getMemorySpace() != gpu::GPUDialect::getPrivateAddressSpace())		if (type.getMemorySpace() != gpu::GPUDialect::getPrivateAddressSpace())
return llvm::None;		return llvm::None;
return converter.convertType(MemRefType::Builder(type).setMemorySpace(0));		return converter.convertType(MemRefType::Builder(type).setMemorySpace(0));
});		});

OwningRewritePatternList patterns;		OwningRewritePatternList patterns;

▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines	patterns.insert<OpToFuncCallLowering<Log10Op>>(converter, "__nv_log10f",
"__nv_log10");		"__nv_log10");
patterns.insert<OpToFuncCallLowering<Log2Op>>(converter, "__nv_log2f",		patterns.insert<OpToFuncCallLowering<Log2Op>>(converter, "__nv_log2f",
"__nv_log2");		"__nv_log2");
patterns.insert<OpToFuncCallLowering<TanhOp>>(converter, "__nv_tanhf",		patterns.insert<OpToFuncCallLowering<TanhOp>>(converter, "__nv_tanhf",
"__nv_tanh");		"__nv_tanh");
}		}

std::unique_ptr<OperationPass<gpu::GPUModuleOp>>		std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
mlir::createLowerGpuOpsToNVVMOpsPass() {		mlir::createLowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth) {
return std::make_unique<LowerGpuOpsToNVVMOpsPass>();		return std::make_unique<LowerGpuOpsToNVVMOpsPass>(indexBitwidth);
}		}

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Show All 35 Lines
/// Import the GPU Ops to ROCDL Patterns.		/// Import the GPU Ops to ROCDL Patterns.
#include "GPUToROCDL.cpp.inc"		#include "GPUToROCDL.cpp.inc"

// A pass that replaces all occurrences of GPU device operations with their		// A pass that replaces all occurrences of GPU device operations with their
// corresponding ROCDL equivalent.		// corresponding ROCDL equivalent.
//		//
// This pass only handles device code and is not meant to be run on GPU host		// This pass only handles device code and is not meant to be run on GPU host
// code.		// code.
class LowerGpuOpsToROCDLOpsPass		struct LowerGpuOpsToROCDLOpsPass
: public ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {		: public ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
public:		LowerGpuOpsToROCDLOpsPass() = default;
		LowerGpuOpsToROCDLOpsPass(unsigned indexBitwidth) {
		this->indexBitwidth = indexBitwidth;
		}

void runOnOperation() override {		void runOnOperation() override {
gpu::GPUModuleOp m = getOperation();		gpu::GPUModuleOp m = getOperation();

LLVMTypeConverter converter(m.getContext());		/// Customize the bitwidth used for the device side index computations.
		LowerToLLVMOptions options = {/useBarePtrCallConv =/false,
		/emitCWrappers = / true,
		/indexBitwidth =/indexBitwidth,
		/useAlignedAlloc =/false};
		LLVMTypeConverter converter(m.getContext(), options);

OwningRewritePatternList patterns;		OwningRewritePatternList patterns;

populateGpuRewritePatterns(m.getContext(), patterns);		populateGpuRewritePatterns(m.getContext(), patterns);
applyPatternsAndFoldGreedily(m, patterns);		applyPatternsAndFoldGreedily(m, patterns);
patterns.clear();		patterns.clear();

populateVectorToLLVMConversionPatterns(converter, patterns);		populateVectorToLLVMConversionPatterns(converter, patterns);
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines	patterns.insert<OpToFuncCallLowering<Log10Op>>(converter, "__ocml_log10_f32",
"__ocml_log10_f64");		"__ocml_log10_f64");
patterns.insert<OpToFuncCallLowering<Log2Op>>(converter, "__ocml_log2_f32",		patterns.insert<OpToFuncCallLowering<Log2Op>>(converter, "__ocml_log2_f32",
"__ocml_log2_f64");		"__ocml_log2_f64");
patterns.insert<OpToFuncCallLowering<TanhOp>>(converter, "__ocml_tanh_f32",		patterns.insert<OpToFuncCallLowering<TanhOp>>(converter, "__ocml_tanh_f32",
"__ocml_tanh_f64");		"__ocml_tanh_f64");
}		}

std::unique_ptr<OperationPass<gpu::GPUModuleOp>>		std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
mlir::createLowerGpuOpsToROCDLOpsPass() {		mlir::createLowerGpuOpsToROCDLOpsPass(unsigned indexBitwidth) {
return std::make_unique<LowerGpuOpsToROCDLOpsPass>();		return std::make_unique<LowerGpuOpsToROCDLOpsPass>(indexBitwidth);
}		}

mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp

Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	static LLVM::LLVMType unwrap(Type type) {
auto *mlirContext = type.getContext();		auto *mlirContext = type.getContext();
auto wrappedLLVMType = type.dyn_cast<LLVM::LLVMType>();		auto wrappedLLVMType = type.dyn_cast<LLVM::LLVMType>();
if (!wrappedLLVMType)		if (!wrappedLLVMType)
emitError(UnknownLoc::get(mlirContext),		emitError(UnknownLoc::get(mlirContext),
"conversion resulted in a non-LLVM type");		"conversion resulted in a non-LLVM type");
return wrappedLLVMType;		return wrappedLLVMType;
}		}

/// Initialize customization to default callbacks.
LLVMTypeConverterCustomization::LLVMTypeConverterCustomization()
: funcArgConverter(structFuncArgTypeConverter),
indexBitwidth(kDeriveIndexBitwidthFromDataLayout) {}

/// Callback to convert function argument types. It converts a MemRef function		/// Callback to convert function argument types. It converts a MemRef function
/// argument to a list of non-aggregate types containing descriptor		/// argument to a list of non-aggregate types containing descriptor
/// information, and an UnrankedmemRef function argument to a list containing		/// information, and an UnrankedmemRef function argument to a list containing
/// the rank and a pointer to a descriptor struct.		/// the rank and a pointer to a descriptor struct.
LogicalResult mlir::structFuncArgTypeConverter(LLVMTypeConverter &converter,		LogicalResult mlir::structFuncArgTypeConverter(LLVMTypeConverter &converter,
Type type,		Type type,
SmallVectorImpl<Type> &result) {		SmallVectorImpl<Type> &result) {
if (auto memref = type.dyn_cast<MemRefType>()) {		if (auto memref = type.dyn_cast<MemRefType>()) {
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines	LogicalResult mlir::barePtrFuncArgTypeConverter(LLVMTypeConverter &converter,
auto llvmTy = converter.convertType(type);		auto llvmTy = converter.convertType(type);
if (!llvmTy)		if (!llvmTy)
return failure();		return failure();

result.push_back(llvmTy);		result.push_back(llvmTy);
return success();		return success();
}		}

/// Create an LLVMTypeConverter using default LLVMTypeConverterCustomization.		/// Create an LLVMTypeConverter using default LowerToLLVMOptions.
LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx)		LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx)
: LLVMTypeConverter(ctx, LLVMTypeConverterCustomization()) {}		: LLVMTypeConverter(ctx, LowerToLLVMOptions()) {}

/// Create an LLVMTypeConverter using 'custom' customizations.		/// Create an LLVMTypeConverter using custom LowerToLLVMOptions.
LLVMTypeConverter::LLVMTypeConverter(		LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx,
MLIRContext *ctx, const LLVMTypeConverterCustomization &customs)		const LowerToLLVMOptions &options_)
: llvmDialect(ctx->getRegisteredDialect<LLVM::LLVMDialect>()),		: llvmDialect(ctx->getRegisteredDialect<LLVM::LLVMDialect>()),
customizations(customs) {		options(options_) {
assert(llvmDialect && "LLVM IR dialect is not registered");		assert(llvmDialect && "LLVM IR dialect is not registered");
module = &llvmDialect->getLLVMModule();		module = &llvmDialect->getLLVMModule();
if (customizations.indexBitwidth == kDeriveIndexBitwidthFromDataLayout)		if (options.indexBitwidth == kDeriveIndexBitwidthFromDataLayout)
customizations.indexBitwidth =		options.indexBitwidth = module->getDataLayout().getPointerSizeInBits();
module->getDataLayout().getPointerSizeInBits();

// Register conversions for the standard types.		// Register conversions for the standard types.
addConversion([&](ComplexType type) { return convertComplexType(type); });		addConversion([&](ComplexType type) { return convertComplexType(type); });
addConversion([&](FloatType type) { return convertFloatType(type); });		addConversion([&](FloatType type) { return convertFloatType(type); });
addConversion([&](FunctionType type) { return convertFunctionType(type); });		addConversion([&](FunctionType type) { return convertFunctionType(type); });
addConversion([&](IndexType type) { return convertIndexType(type); });		addConversion([&](IndexType type) { return convertIndexType(type); });
addConversion([&](IntegerType type) { return convertIntegerType(type); });		addConversion([&](IntegerType type) { return convertIntegerType(type); });
addConversion([&](MemRefType type) { return convertMemRefType(type); });		addConversion([&](MemRefType type) { return convertMemRefType(type); });
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines

// Function types are converted to LLVM Function types by recursively converting		// Function types are converted to LLVM Function types by recursively converting
// argument and result types. If MLIR Function has zero results, the LLVM		// argument and result types. If MLIR Function has zero results, the LLVM
// Function has one VoidType result. If MLIR Function has more than one result,		// Function has one VoidType result. If MLIR Function has more than one result,
// they are into an LLVM StructType in their order of appearance.		// they are into an LLVM StructType in their order of appearance.
LLVM::LLVMType LLVMTypeConverter::convertFunctionSignature(		LLVM::LLVMType LLVMTypeConverter::convertFunctionSignature(
FunctionType type, bool isVariadic,		FunctionType type, bool isVariadic,
LLVMTypeConverter::SignatureConversion &result) {		LLVMTypeConverter::SignatureConversion &result) {
		// Select the argument converter depending on the calling convetion.
		auto funcArgConverter = options.useBarePtrCallConv
		? barePtrFuncArgTypeConverter
		: structFuncArgTypeConverter;
// Convert argument types one by one and check for errors.		// Convert argument types one by one and check for errors.
for (auto &en : llvm::enumerate(type.getInputs())) {		for (auto &en : llvm::enumerate(type.getInputs())) {
Type type = en.value();		Type type = en.value();
SmallVector<Type, 8> converted;		SmallVector<Type, 8> converted;
if (failed(customizations.funcArgConverter(*this, type, converted)))		if (failed(funcArgConverter(*this, type, converted)))
return {};		return {};
result.addInputs(en.index(), converted);		result.addInputs(en.index(), converted);
}		}

SmallVector<LLVM::LLVMType, 8> argTypes;		SmallVector<LLVM::LLVMType, 8> argTypes;
argTypes.reserve(llvm::size(result.getConvertedTypes()));		argTypes.reserve(llvm::size(result.getConvertedTypes()));
for (Type type : result.getConvertedTypes())		for (Type type : result.getConvertedTypes())
argTypes.push_back(unwrap(type));		argTypes.push_back(unwrap(type));
▲ Show 20 Lines • Show All 114 Lines • ▼ Show 20 Lines	Type LLVMTypeConverter::convertVectorType(VectorType type) {
for (int i = shape.size() - 2; i >= 0; --i)		for (int i = shape.size() - 2; i >= 0; --i)
vectorType = LLVM::LLVMType::getArrayTy(vectorType, shape[i]);		vectorType = LLVM::LLVMType::getArrayTy(vectorType, shape[i]);
return vectorType;		return vectorType;
}		}

ConvertToLLVMPattern::ConvertToLLVMPattern(StringRef rootOpName,		ConvertToLLVMPattern::ConvertToLLVMPattern(StringRef rootOpName,
MLIRContext *context,		MLIRContext *context,
LLVMTypeConverter &typeConverter_,		LLVMTypeConverter &typeConverter_,
		const LowerToLLVMOptions &options_,
PatternBenefit benefit)		PatternBenefit benefit)
: ConversionPattern(rootOpName, benefit, typeConverter_, context),		: ConversionPattern(rootOpName, benefit, typeConverter_, context),
typeConverter(typeConverter_) {}		typeConverter(typeConverter_), options(options_) {}

/============================================================================/		/============================================================================/
/* StructBuilder implementation */		/* StructBuilder implementation */
/============================================================================/		/============================================================================/

StructBuilder::StructBuilder(Value v) : value(v) {		StructBuilder::StructBuilder(Value v) : value(v) {
assert(value != nullptr && "value cannot be null");		assert(value != nullptr && "value cannot be null");
structType = value.getType().dyn_cast<LLVM::LLVMType>();		structType = value.getType().dyn_cast<LLVM::LLVMType>();
▲ Show 20 Lines • Show All 635 Lines • ▼ Show 20 Lines	protected:
}		}
};		};

/// FuncOp legalization pattern that converts MemRef arguments to pointers to		/// FuncOp legalization pattern that converts MemRef arguments to pointers to
/// MemRef descriptors (LLVM struct data types) containing all the MemRef type		/// MemRef descriptors (LLVM struct data types) containing all the MemRef type
/// information.		/// information.
static constexpr StringRef kEmitIfaceAttrName = "llvm.emit_c_interface";		static constexpr StringRef kEmitIfaceAttrName = "llvm.emit_c_interface";
struct FuncOpConversion : public FuncOpConversionBase {		struct FuncOpConversion : public FuncOpConversionBase {
FuncOpConversion(LLVMTypeConverter &converter, bool emitCWrappers)		FuncOpConversion(LLVMTypeConverter &converter,
: FuncOpConversionBase(converter), emitWrappers(emitCWrappers) {}		const LowerToLLVMOptions &options)
		: FuncOpConversionBase(converter, options) {}
		using ConvertOpToLLVMPattern<FuncOp>::options;

LogicalResult		LogicalResult
matchAndRewrite(Operation *op, ArrayRef<Value> operands,		matchAndRewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {		ConversionPatternRewriter &rewriter) const override {
auto funcOp = cast<FuncOp>(op);		auto funcOp = cast<FuncOp>(op);

auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter);		auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter);
if (!newFuncOp)		if (!newFuncOp)
return failure();		return failure();

if (emitWrappers \|\| funcOp.getAttrOfType<UnitAttr>(kEmitIfaceAttrName)) {		if (options.emitCWrappers \|\| funcOp.getAttrOfType<UnitAttr>(kEmitIfaceAttrName)) {
if (newFuncOp.isExternal())		if (newFuncOp.isExternal())
wrapExternalFunction(rewriter, op->getLoc(), typeConverter, funcOp,		wrapExternalFunction(rewriter, op->getLoc(), typeConverter, funcOp,
newFuncOp);		newFuncOp);
else		else
wrapForExternalCallers(rewriter, op->getLoc(), typeConverter, funcOp,		wrapForExternalCallers(rewriter, op->getLoc(), typeConverter, funcOp,
newFuncOp);		newFuncOp);
}		}

rewriter.eraseOp(op);		rewriter.eraseOp(op);
return success();		return success();
}		}

private:
/// If true, also create the adaptor functions having signatures compatible
/// with those produced by clang.
const bool emitWrappers;
};		};

/// FuncOp legalization pattern that converts MemRef arguments to bare pointers		/// FuncOp legalization pattern that converts MemRef arguments to bare pointers
/// to the MemRef element type. This will impact the calling convention and ABI.		/// to the MemRef element type. This will impact the calling convention and ABI.
struct BarePtrFuncOpConversion : public FuncOpConversionBase {		struct BarePtrFuncOpConversion : public FuncOpConversionBase {
using FuncOpConversionBase::FuncOpConversionBase;		using FuncOpConversionBase::FuncOpConversionBase;

LogicalResult		LogicalResult
▲ Show 20 Lines • Show All 410 Lines • ▼ Show 20 Lines

/// Lowering for AllocOp and AllocaOp.		/// Lowering for AllocOp and AllocaOp.
template <typename AllocLikeOp>		template <typename AllocLikeOp>
struct AllocLikeOpLowering : public ConvertOpToLLVMPattern<AllocLikeOp> {		struct AllocLikeOpLowering : public ConvertOpToLLVMPattern<AllocLikeOp> {
using ConvertOpToLLVMPattern<AllocLikeOp>::createIndexConstant;		using ConvertOpToLLVMPattern<AllocLikeOp>::createIndexConstant;
using ConvertOpToLLVMPattern<AllocLikeOp>::getIndexType;		using ConvertOpToLLVMPattern<AllocLikeOp>::getIndexType;
using ConvertOpToLLVMPattern<AllocLikeOp>::typeConverter;		using ConvertOpToLLVMPattern<AllocLikeOp>::typeConverter;
using ConvertOpToLLVMPattern<AllocLikeOp>::getVoidPtrType;		using ConvertOpToLLVMPattern<AllocLikeOp>::getVoidPtrType;
		using ConvertOpToLLVMPattern<AllocLikeOp>::options;

explicit AllocLikeOpLowering(LLVMTypeConverter &converter,		explicit AllocLikeOpLowering(LLVMTypeConverter &converter,
bool useAlignedAlloc = false)		const LowerToLLVMOptions &options)
: ConvertOpToLLVMPattern<AllocLikeOp>(converter),		: ConvertOpToLLVMPattern<AllocLikeOp>(converter, options) {}
useAlignedAlloc(useAlignedAlloc) {}

LogicalResult match(Operation *op) const override {		LogicalResult match(Operation *op) const override {
MemRefType memRefType = cast<AllocLikeOp>(op).getType();		MemRefType memRefType = cast<AllocLikeOp>(op).getType();
if (isSupportedMemRefType(memRefType))		if (isSupportedMemRefType(memRefType))
return success();		return success();

int64_t offset;		int64_t offset;
SmallVector<int64_t, 4> strides;		SmallVector<int64_t, 4> strides;
▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines	static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
return llvm::divideCeil(sizeInBits, 8);		return llvm::divideCeil(sizeInBits, 8);
}		}

/// Returns the alignment to be used for the allocation call itself.		/// Returns the alignment to be used for the allocation call itself.
/// aligned_alloc requires the allocation size to be a power of two, and the		/// aligned_alloc requires the allocation size to be a power of two, and the
/// allocation size to be a multiple of alignment,		/// allocation size to be a multiple of alignment,
Optional<int64_t> getAllocationAlignment(AllocOp allocOp) const {		Optional<int64_t> getAllocationAlignment(AllocOp allocOp) const {
// No alignment can be used for the 'malloc' call itself.		// No alignment can be used for the 'malloc' call itself.
if (!useAlignedAlloc)		if (!options.useAlignedAlloc)
return None;		return None;

if (allocOp.alignment())		if (allocOp.alignment())
return allocOp.alignment().getValue().getSExtValue();		return allocOp.alignment().getValue().getSExtValue();

// Whenever we don't have alignment set, we will use an alignment		// Whenever we don't have alignment set, we will use an alignment
// consistent with the element type; since the allocation size has to be a		// consistent with the element type; since the allocation size has to be a
// power of two, we will bump to the next power of two if it already isn't.		// power of two, we will bump to the next power of two if it already isn't.
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines	auto memRefDescriptor = createMemRefDescriptor(
loc, rewriter, memRefType, allocatedTypePtr, allocatedBytePtr,		loc, rewriter, memRefType, allocatedTypePtr, allocatedBytePtr,
accessAlignment, offset, strides, sizes);		accessAlignment, offset, strides, sizes);

// Return the final value of the descriptor.		// Return the final value of the descriptor.
rewriter.replaceOp(op, {memRefDescriptor});		rewriter.replaceOp(op, {memRefDescriptor});
}		}

protected:		protected:
/// Use aligned_alloc instead of malloc for all heap allocations.
bool useAlignedAlloc;
/// The minimum alignment to use with aligned_alloc (has to be a power of 2).		/// The minimum alignment to use with aligned_alloc (has to be a power of 2).
uint64_t kMinAlignedAllocAlignment = 16UL;		uint64_t kMinAlignedAllocAlignment = 16UL;
};		};

struct AllocOpLowering : public AllocLikeOpLowering<AllocOp> {		struct AllocOpLowering : public AllocLikeOpLowering<AllocOp> {
explicit AllocOpLowering(LLVMTypeConverter &converter,		explicit AllocOpLowering(LLVMTypeConverter &converter,
bool useAlignedAlloc = false)		const LowerToLLVMOptions &options)
: AllocLikeOpLowering<AllocOp>(converter, useAlignedAlloc) {}		: AllocLikeOpLowering<AllocOp>(converter, options) {}
};		};

using AllocaOpLowering = AllocLikeOpLowering<AllocaOp>;		using AllocaOpLowering = AllocLikeOpLowering<AllocaOp>;

// A CallOp automatically promotes MemRefType to a sequence of alloca/store and		// A CallOp automatically promotes MemRefType to a sequence of alloca/store and
// passes the pointer to the MemRef across function boundaries.		// passes the pointer to the MemRef across function boundaries.
template <typename CallOpType>		template <typename CallOpType>
struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern<CallOpType> {		struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern<CallOpType> {
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
};		};

// A `dealloc` is converted into a call to `free` on the underlying data buffer.		// A `dealloc` is converted into a call to `free` on the underlying data buffer.
// The memref descriptor being an SSA value, there is no need to clean it up		// The memref descriptor being an SSA value, there is no need to clean it up
// in any way.		// in any way.
struct DeallocOpLowering : public ConvertOpToLLVMPattern<DeallocOp> {		struct DeallocOpLowering : public ConvertOpToLLVMPattern<DeallocOp> {
using ConvertOpToLLVMPattern<DeallocOp>::ConvertOpToLLVMPattern;		using ConvertOpToLLVMPattern<DeallocOp>::ConvertOpToLLVMPattern;

explicit DeallocOpLowering(LLVMTypeConverter &converter)		explicit DeallocOpLowering(LLVMTypeConverter &converter,
: ConvertOpToLLVMPattern<DeallocOp>(converter) {}		const LowerToLLVMOptions &options)
		: ConvertOpToLLVMPattern<DeallocOp>(converter, options) {}

LogicalResult		LogicalResult
matchAndRewrite(Operation *op, ArrayRef<Value> operands,		matchAndRewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {		ConversionPatternRewriter &rewriter) const override {
assert(operands.size() == 1 && "dealloc takes one operand");		assert(operands.size() == 1 && "dealloc takes one operand");
DeallocOp::Adaptor transformed(operands);		DeallocOp::Adaptor transformed(operands);

// Insert the `free` declaration if it is not already present.		// Insert the `free` declaration if it is not already present.
▲ Show 20 Lines • Show All 1,003 Lines • ▼ Show 20 Lines	for (auto *it : opsToErase)
rewriter.eraseOp(it);		rewriter.eraseOp(it);
}		}
};		};

} // namespace		} // namespace

/// Collect a set of patterns to convert from the Standard dialect to LLVM.		/// Collect a set of patterns to convert from the Standard dialect to LLVM.
void mlir::populateStdToLLVMNonMemoryConversionPatterns(		void mlir::populateStdToLLVMNonMemoryConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {		LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
		const LowerToLLVMOptions &options) {
// FIXME: this should be tablegen'ed		// FIXME: this should be tablegen'ed
// clang-format off		// clang-format off
patterns.insert<		patterns.insert<
AbsFOpLowering,		AbsFOpLowering,
AddCFOpLowering,		AddCFOpLowering,
AddFOpLowering,		AddFOpLowering,
AddIOpLowering,		AddIOpLowering,
AllocaOpLowering,		AllocaOpLowering,
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	patterns.insert<
SubCFOpLowering,		SubCFOpLowering,
SubFOpLowering,		SubFOpLowering,
SubIOpLowering,		SubIOpLowering,
TruncateIOpLowering,		TruncateIOpLowering,
UnsignedDivIOpLowering,		UnsignedDivIOpLowering,
UnsignedRemIOpLowering,		UnsignedRemIOpLowering,
UnsignedShiftRightOpLowering,		UnsignedShiftRightOpLowering,
XOrOpLowering,		XOrOpLowering,
ZeroExtendIOpLowering>(converter);		ZeroExtendIOpLowering>(converter, options);
// clang-format on		// clang-format on
}		}

void mlir::populateStdToLLVMMemoryConversionPatterns(		void mlir::populateStdToLLVMMemoryConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns,		LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
bool useAlignedAlloc) {		const LowerToLLVMOptions &options) {
// clang-format off		// clang-format off
patterns.insert<		patterns.insert<
AssumeAlignmentOpLowering,		AssumeAlignmentOpLowering,
DeallocOpLowering,		DeallocOpLowering,
DimOpLowering,		DimOpLowering,
LoadOpLowering,		LoadOpLowering,
MemRefCastOpLowering,		MemRefCastOpLowering,
StoreOpLowering,		StoreOpLowering,
SubViewOpLowering,		SubViewOpLowering,
ViewOpLowering>(converter);		ViewOpLowering,
patterns.insert<		AllocOpLowering>(converter, options);
AllocOpLowering
>(converter, useAlignedAlloc);
// clang-format on		// clang-format on
}		}

void mlir::populateStdToLLVMDefaultFuncOpConversionPattern(		void mlir::populateStdToLLVMFuncOpConversionPattern(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns,		LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
bool emitCWrappers) {		const LowerToLLVMOptions &options) {
patterns.insert<FuncOpConversion>(converter, emitCWrappers);		if (options.useBarePtrCallConv)
		patterns.insert<BarePtrFuncOpConversion>(converter, options);
		else
		patterns.insert<FuncOpConversion>(converter, options);
}		}

void mlir::populateStdToLLVMConversionPatterns(		void mlir::populateStdToLLVMConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns,		LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
bool emitCWrappers, bool useAlignedAlloc) {		const LowerToLLVMOptions &options) {
populateStdToLLVMDefaultFuncOpConversionPattern(converter, patterns,		populateStdToLLVMFuncOpConversionPattern(converter, patterns, options);
emitCWrappers);		populateStdToLLVMNonMemoryConversionPatterns(converter, patterns, options);
populateStdToLLVMNonMemoryConversionPatterns(converter, patterns);		populateStdToLLVMMemoryConversionPatterns(converter, patterns, options);
populateStdToLLVMMemoryConversionPatterns(converter, patterns,
useAlignedAlloc);
}

static void populateStdToLLVMBarePtrFuncOpConversionPattern(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
patterns.insert<BarePtrFuncOpConversion>(converter);
}

void mlir::populateStdToLLVMBarePtrConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
bool useAlignedAlloc) {
populateStdToLLVMBarePtrFuncOpConversionPattern(converter, patterns);
populateStdToLLVMNonMemoryConversionPatterns(converter, patterns);
populateStdToLLVMMemoryConversionPatterns(converter, patterns,
useAlignedAlloc);
}		}

// Create an LLVM IR structure type if there is more than one result.		// Create an LLVM IR structure type if there is more than one result.
Type LLVMTypeConverter::packFunctionResults(ArrayRef<Type> types) {		Type LLVMTypeConverter::packFunctionResults(ArrayRef<Type> types) {
assert(!types.empty() && "expected non-empty list of type");		assert(!types.empty() && "expected non-empty list of type");

if (types.size() == 1)		if (types.size() == 1)
return convertType(types.front());		return convertType(types.front());
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines	if (useBarePtrCallConv && emitCWrappers) {
<< "incompatible conversion options: bare-pointer calling convention "		<< "incompatible conversion options: bare-pointer calling convention "
"and C wrapper emission";		"and C wrapper emission";
signalPassFailure();		signalPassFailure();
return;		return;
}		}

ModuleOp m = getOperation();		ModuleOp m = getOperation();

LLVMTypeConverterCustomization customs;		LowerToLLVMOptions options = {useBarePtrCallConv, emitCWrappers,
customs.funcArgConverter = useBarePtrCallConv ? barePtrFuncArgTypeConverter		indexBitwidth, useAlignedAlloc};
: structFuncArgTypeConverter;		LLVMTypeConverter typeConverter(&getContext(), options);
customs.indexBitwidth = indexBitwidth;
LLVMTypeConverter typeConverter(&getContext(), customs);

OwningRewritePatternList patterns;		OwningRewritePatternList patterns;
if (useBarePtrCallConv)		populateStdToLLVMConversionPatterns(typeConverter, patterns, options);
populateStdToLLVMBarePtrConversionPatterns(typeConverter, patterns,
useAlignedAlloc);
else
populateStdToLLVMConversionPatterns(typeConverter, patterns,
emitCWrappers, useAlignedAlloc);

LLVMConversionTarget target(getContext());		LLVMConversionTarget target(getContext());
if (failed(applyPartialConversion(m, target, patterns)))		if (failed(applyPartialConversion(m, target, patterns)))
signalPassFailure();		signalPassFailure();
}		}
};		};
} // end namespace		} // end namespace

Show All 13 Lines

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

	// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file \| FileCheck %s			// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file \| FileCheck %s
				// RUN: mlir-opt %s -convert-gpu-to-nvvm='index-bitwidth=32' -split-input-file \| FileCheck --check-prefix=CHECK32 %s

	gpu.module @test_module {			gpu.module @test_module {
	// CHECK-LABEL: func @gpu_index_ops()			// CHECK-LABEL: func @gpu_index_ops()
				// CHECK32-LABEL: func @gpu_index_ops()
	func @gpu_index_ops()			func @gpu_index_ops()
	-> (index, index, index, index, index, index,			-> (index, index, index, index, index, index,
	index, index, index, index, index, index) {			index, index, index, index, index, index) {
				// CHECK32-NOT: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64

	// CHECK: = nvvm.read.ptx.sreg.tid.x : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.tid.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)			%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.tid.y : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.tid.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)			%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.tid.z : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.tid.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)			%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)

	// CHECK: = nvvm.read.ptx.sreg.ntid.x : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.ntid.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)			%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.ntid.y : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.ntid.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)			%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.ntid.z : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.ntid.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)			%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)

	// CHECK: = nvvm.read.ptx.sreg.ctaid.x : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.ctaid.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)			%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.ctaid.y : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.ctaid.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)			%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.ctaid.z : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.ctaid.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)			%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)

	// CHECK: = nvvm.read.ptx.sreg.nctaid.x : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.nctaid.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)			%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.nctaid.y : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.nctaid.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)			%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
	// CHECK: = nvvm.read.ptx.sreg.nctaid.z : !llvm.i32			// CHECK: = nvvm.read.ptx.sreg.nctaid.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)			%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)

	std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ,			std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ,
	%bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ			%bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ
	: index, index, index, index, index, index,			: index, index, index, index, index, index,
	index, index, index, index, index, index			index, index, index, index, index, index
	}			}
	}			}

	// -----			// -----

	gpu.module @test_module {			gpu.module @test_module {
				// CHECK-LABEL: func @gpu_index_comp
				// CHECK32-LABEL: func @gpu_index_comp
				func @gpu_index_comp(%idx : index) -> index {
				// CHECK: = llvm.add %{{.}}, %{{.}} : !llvm.i64
				// CHECK32: = llvm.add %{{.}}, %{{.}} : !llvm.i32
				%0 = addi %idx, %idx : index
				// CHECK: llvm.return %{{.*}} : !llvm.i64
				// CHECK32: llvm.return %{{.*}} : !llvm.i32
				std.return %0 : index
				}
				}

				// -----

				gpu.module @test_module {
	// CHECK-LABEL: func @gpu_all_reduce_op()			// CHECK-LABEL: func @gpu_all_reduce_op()
	gpu.func @gpu_all_reduce_op() {			gpu.func @gpu_all_reduce_op() {
	%arg0 = constant 1.0 : f32			%arg0 = constant 1.0 : f32
	// TODO(csigg): Check full IR expansion once lowering has settled.			// TODO(csigg): Check full IR expansion once lowering has settled.
	// CHECK: nvvm.shfl.sync.bfly			// CHECK: nvvm.shfl.sync.bfly
	// CHECK: nvvm.barrier0			// CHECK: nvvm.barrier0
	// CHECK: llvm.fadd			// CHECK: llvm.fadd
	%result = "gpu.all_reduce"(%arg0) ({}) {op = "add"} : (f32) -> (f32)			%result = "gpu.all_reduce"(%arg0) ({}) {op = "add"} : (f32) -> (f32)
	▲ Show 20 Lines • Show All 201 Lines • Show Last 20 Lines

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

	// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file \| FileCheck %s			// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file \| FileCheck %s
				// RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file \| FileCheck --check-prefix=CHECK32 %s

	gpu.module @test_module {			gpu.module @test_module {
	// CHECK-LABEL: func @gpu_index_ops()			// CHECK-LABEL: func @gpu_index_ops()
				// CHECK32-LABEL: func @gpu_index_ops()
	func @gpu_index_ops()			func @gpu_index_ops()
	-> (index, index, index, index, index, index,			-> (index, index, index, index, index, index,
	index, index, index, index, index, index) {			index, index, index, index, index, index) {
				// CHECK32-NOT: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64

	// CHECK: rocdl.workitem.id.x : !llvm.i32			// CHECK: rocdl.workitem.id.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)			%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
	// CHECK: rocdl.workitem.id.y : !llvm.i32			// CHECK: rocdl.workitem.id.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)			%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
	// CHECK: rocdl.workitem.id.z : !llvm.i32			// CHECK: rocdl.workitem.id.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)			%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)

	// CHECK: rocdl.workgroup.dim.x : !llvm.i32			// CHECK: rocdl.workgroup.dim.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)			%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
	// CHECK: rocdl.workgroup.dim.y : !llvm.i32			// CHECK: rocdl.workgroup.dim.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)			%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
	// CHECK: rocdl.workgroup.dim.z : !llvm.i32			// CHECK: rocdl.workgroup.dim.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)			%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)

	// CHECK: rocdl.workgroup.id.x : !llvm.i32			// CHECK: rocdl.workgroup.id.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)			%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
	// CHECK: rocdl.workgroup.id.y : !llvm.i32			// CHECK: rocdl.workgroup.id.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)			%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
	// CHECK: rocdl.workgroup.id.z : !llvm.i32			// CHECK: rocdl.workgroup.id.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)			%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)

	// CHECK: rocdl.grid.dim.x : !llvm.i32			// CHECK: rocdl.grid.dim.x : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)			%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
	// CHECK: rocdl.grid.dim.y : !llvm.i32			// CHECK: rocdl.grid.dim.y : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)			%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
	// CHECK: rocdl.grid.dim.z : !llvm.i32			// CHECK: rocdl.grid.dim.z : !llvm.i32
				// CHECK: = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64
	%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)			%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)

	std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ,			std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ,
	%bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ			%bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ
	: index, index, index, index, index, index,			: index, index, index, index, index, index,
	index, index, index, index, index, index			index, index, index, index, index, index
	}			}
	}			}

	// -----			// -----

	gpu.module @test_module {			gpu.module @test_module {
				// CHECK-LABEL: func @gpu_index_comp
				// CHECK32-LABEL: func @gpu_index_comp
				func @gpu_index_comp(%idx : index) -> index {
				// CHECK: = llvm.add %{{.}}, %{{.}} : !llvm.i64
				// CHECK32: = llvm.add %{{.}}, %{{.}} : !llvm.i32
				%0 = addi %idx, %idx : index
				// CHECK: llvm.return %{{.*}} : !llvm.i64
				// CHECK32: llvm.return %{{.*}} : !llvm.i32
				std.return %0 : index
				}
				}

				// -----

				gpu.module @test_module {
	// CHECK-LABEL: func @gpu_sync()			// CHECK-LABEL: func @gpu_sync()
	func @gpu_sync() {			func @gpu_sync() {
	// CHECK: rocdl.barrier			// CHECK: rocdl.barrier
	gpu.barrier			gpu.barrier
	std.return			std.return
	}			}
	}			}

	▲ Show 20 Lines • Show All 143 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] make the bitwidth of device side index computations configurable
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 272367

mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h

mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h

mlir/include/mlir/Conversion/Passes.td

mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h

mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] make the bitwidth of device side index computations configurableClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 272367

mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h

mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h

mlir/include/mlir/Conversion/Passes.td

mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h

mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

[mlir] make the bitwidth of device side index computations configurable
ClosedPublic