Diff 364421

mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h

Show First 20 Lines • Show All 77 Lines • ▼ Show 20 Lines	llvm::Value *&mapValue(Value value) {
return llvm;		return llvm;
}		}

/// Finds an LLVM IR value corresponding to the given MLIR value.		/// Finds an LLVM IR value corresponding to the given MLIR value.
llvm::Value *lookupValue(Value value) const {		llvm::Value *lookupValue(Value value) const {
return valueMapping.lookup(value);		return valueMapping.lookup(value);
}		}

		/// Looks up remapped a list of remapped values.
		SmallVector<llvm::Value *> lookupValues(ValueRange values);

/// Stores the mapping between an MLIR block and LLVM IR basic block.		/// Stores the mapping between an MLIR block and LLVM IR basic block.
void mapBlock(Block mlir, llvm::BasicBlock llvm) {		void mapBlock(Block mlir, llvm::BasicBlock llvm) {
auto result = blockMapping.try_emplace(mlir, llvm);		auto result = blockMapping.try_emplace(mlir, llvm);
(void)result;		(void)result;
assert(result.second && "attempting to map a block that is already mapped");		assert(result.second && "attempting to map a block that is already mapped");
}		}

/// Finds an LLVM IR basic block that corresponds to the given MLIR block.		/// Finds an LLVM IR basic block that corresponds to the given MLIR block.
Show All 11 Lines	public:
}		}

/// Finds an LLVM IR instruction that corresponds to the given MLIR operation		/// Finds an LLVM IR instruction that corresponds to the given MLIR operation
/// with successors.		/// with successors.
llvm::Instruction lookupBranch(Operation op) const {		llvm::Instruction lookupBranch(Operation op) const {
return branchMapping.lookup(op);		return branchMapping.lookup(op);
}		}

		/// Removes the mapping for blocks contained in the region and values defined
		/// in these blocks. Does _not_ recur into nested regions.
		void forgetMapping(Region &region);

/// Returns the LLVM metadata corresponding to a reference to an mlir LLVM		/// Returns the LLVM metadata corresponding to a reference to an mlir LLVM
/// dialect access group operation.		/// dialect access group operation.
llvm::MDNode *getAccessGroup(Operation &opInst,		llvm::MDNode *getAccessGroup(Operation &opInst,
SymbolRefAttr accessGroupRef) const;		SymbolRefAttr accessGroupRef) const;

/// Returns the LLVM metadata corresponding to a llvm loop's codegen		/// Returns the LLVM metadata corresponding to a llvm loop's codegen
/// options attribute.		/// options attribute.
llvm::MDNode *lookupLoopOptionsMetadata(Attribute options) const {		llvm::MDNode *lookupLoopOptionsMetadata(Attribute options) const {
return loopOptionsMetadataMapping.lookup(options);		return loopOptionsMetadataMapping.lookup(options);
}		}

void mapLoopOptionsMetadata(Attribute options, llvm::MDNode *metadata) {		void mapLoopOptionsMetadata(Attribute options, llvm::MDNode *metadata) {
auto result = loopOptionsMetadataMapping.try_emplace(options, metadata);		auto result = loopOptionsMetadataMapping.try_emplace(options, metadata);
(void)result;		(void)result;
assert(result.second &&		assert(result.second &&
"attempting to map loop options that was already mapped");		"attempting to map loop options that was already mapped");
}		}

// Sets LLVM metadata for memory operations that are in a parallel loop.		// Sets LLVM metadata for memory operations that are in a parallel loop.
void setAccessGroupsMetadata(Operation op, llvm::Instruction inst);		void setAccessGroupsMetadata(Operation op, llvm::Instruction inst);

/// Converts the type from MLIR LLVM dialect to LLVM.		/// Converts the type from MLIR LLVM dialect to LLVM.
llvm::Type *convertType(Type type);		llvm::Type *convertType(Type type);

/// Looks up remapped a list of remapped values.
SmallVector<llvm::Value *, 8> lookupValues(ValueRange values);

/// Returns the MLIR context of the module being translated.		/// Returns the MLIR context of the module being translated.
MLIRContext &getContext() { return *mlirModule->getContext(); }		MLIRContext &getContext() { return *mlirModule->getContext(); }

/// Returns the LLVM context in which the IR is being constructed.		/// Returns the LLVM context in which the IR is being constructed.
llvm::LLVMContext &getLLVMContext() const { return llvmModule->getContext(); }		llvm::LLVMContext &getLLVMContext() const { return llvmModule->getContext(); }

/// Finds an LLVM IR global value that corresponds to the given MLIR operation		/// Finds an LLVM IR global value that corresponds to the given MLIR operation
/// defining a global value.		/// defining a global value.
Show All 15 Lines	public:
const llvm::DILocation translateLoc(Location loc, llvm::DILocalScope scope);		const llvm::DILocation translateLoc(Location loc, llvm::DILocalScope scope);

/// Translates the contents of the given block to LLVM IR using this		/// Translates the contents of the given block to LLVM IR using this
/// translator. The LLVM IR basic block corresponding to the given block is		/// translator. The LLVM IR basic block corresponding to the given block is
/// expected to exist in the mapping of this translator. Uses `builder` to		/// expected to exist in the mapping of this translator. Uses `builder` to
/// translate the IR, leaving it at the end of the block. If `ignoreArguments`		/// translate the IR, leaving it at the end of the block. If `ignoreArguments`
/// is set, does not produce PHI nodes for the block arguments. Otherwise, the		/// is set, does not produce PHI nodes for the block arguments. Otherwise, the
/// PHI nodes are constructed for block arguments but are _not_ connected to		/// PHI nodes are constructed for block arguments but are _not_ connected to
/// the predecessors that may not exist yet.		/// the predecessors that may not exist yet. If `convertOp` is provided, use
		/// it to translate operations in the block; otherwise use `convertOperation`.
		kiranchandramohanUnsubmitted Done Reply Inline Actions I don't see any changes in convertBlock to use convertOp. kiranchandramohan: I don't see any changes in convertBlock to use convertOp.
LogicalResult convertBlock(Block &bb, bool ignoreArguments,		LogicalResult convertBlock(Block &bb, bool ignoreArguments,
llvm::IRBuilderBase &builder);		llvm::IRBuilderBase &builder);

/// Gets the named metadata in the LLVM IR module being constructed, creating		/// Gets the named metadata in the LLVM IR module being constructed, creating
/// it if it does not exist.		/// it if it does not exist.
llvm::NamedMDNode *getOrInsertNamedModuleMetadata(StringRef name);		llvm::NamedMDNode *getOrInsertNamedModuleMetadata(StringRef name);

/// Common CRTP base class for ModuleTranslation stack frames.		/// Common CRTP base class for ModuleTranslation stack frames.
▲ Show 20 Lines • Show All 186 Lines • Show Last 20 Lines

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

//===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//		//===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file implements a translation between the MLIR OpenMP dialect and LLVM		// This file implements a translation between the MLIR OpenMP dialect and LLVM
// IR.		// IR.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"		#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"		#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
		#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/Operation.h"		#include "mlir/IR/Operation.h"
#include "mlir/Support/LLVM.h"		#include "mlir/Support/LLVM.h"
#include "mlir/Target/LLVMIR/ModuleTranslation.h"		#include "mlir/Target/LLVMIR/ModuleTranslation.h"

#include "llvm/ADT/SetVector.h"		#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/TypeSwitch.h"		#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"		#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/IRBuilder.h"		#include "llvm/IR/IRBuilder.h"

using namespace mlir;		using namespace mlir;

namespace {		namespace {
/// ModuleTranslation stack frame for OpenMP operations. This keeps track of the		/// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
/// insertion points for allocas.		/// insertion points for allocas.
class OpenMPAllocaStackFrame		class OpenMPAllocaStackFrame
: public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {		: public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
public:		public:
explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)		explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
: allocaInsertPoint(allocaIP) {}		: allocaInsertPoint(allocaIP) {}
llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;		llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
};		};

		/// ModuleTranslation stack frame containing the partial mapping between MLIR
		/// values and their LLVM IR equivalents.
		class OpenMPVarMappingStackFrame
		: public LLVM::ModuleTranslation::StackFrameBase<
		OpenMPVarMappingStackFrame> {
		public:
		explicit OpenMPVarMappingStackFrame(
		const DenseMap<Value, llvm::Value *> &mapping)
		: mapping(mapping) {}

		DenseMap<Value, llvm::Value *> mapping;
		};
} // namespace		} // namespace

/// Find the insertion point for allocas given the current insertion point for		/// Find the insertion point for allocas given the current insertion point for
/// normal operations in the builder.		/// normal operations in the builder.
static llvm::OpenMPIRBuilder::InsertPointTy		static llvm::OpenMPIRBuilder::InsertPointTy
findAllocaInsertPoint(llvm::IRBuilderBase &builder,		findAllocaInsertPoint(llvm::IRBuilderBase &builder,
const LLVM::ModuleTranslation &moduleTranslation) {		const LLVM::ModuleTranslation &moduleTranslation) {
// If there is an alloca insertion point on stack, i.e. we are in a nested		// If there is an alloca insertion point on stack, i.e. we are in a nested
Show All 13 Lines	llvm::BasicBlock &funcEntryBlock =
builder.GetInsertBlock()->getParent()->getEntryBlock();		builder.GetInsertBlock()->getParent()->getEntryBlock();
return llvm::OpenMPIRBuilder::InsertPointTy(		return llvm::OpenMPIRBuilder::InsertPointTy(
&funcEntryBlock, funcEntryBlock.getFirstInsertionPt());		&funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
}		}

/// Converts the given region that appears within an OpenMP dialect operation to		/// Converts the given region that appears within an OpenMP dialect operation to
/// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the		/// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
/// region, and a branch from any block with an successor-less OpenMP terminator		/// region, and a branch from any block with an successor-less OpenMP terminator
/// to `continuationBlock`.		/// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
static void convertOmpOpRegions(Region &region, StringRef blockName,		/// of the continuation block if provided.
llvm::BasicBlock &sourceBlock,		static void convertOmpOpRegions(
llvm::BasicBlock &continuationBlock,		Region &region, StringRef blockName, llvm::BasicBlock &sourceBlock,
llvm::IRBuilderBase &builder,		llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,		LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
LogicalResult &bodyGenStatus) {		SmallVectorImpl<llvm::PHINode > continuationBlockPHIs = nullptr) {
llvm::LLVMContext &llvmContext = builder.getContext();		llvm::LLVMContext &llvmContext = builder.getContext();
for (Block &bb : region) {		for (Block &bb : region) {
llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(		llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
llvmContext, blockName, builder.GetInsertBlock()->getParent());		llvmContext, blockName, builder.GetInsertBlock()->getParent(),
		builder.GetInsertBlock()->getNextNode());
		kiranchandramohanUnsubmitted Done Reply Inline Actions Is this the correct successor block? Or will it be corrected later on? kiranchandramohan: Is this the correct successor block? Or will it be corrected later on?
		ftynseAuthorUnsubmitted Done Reply Inline Actions It's not not a successor block but the block before which the created block is inserted in the IR (https://llvm.org/doxygen/classllvm_1_1BasicBlock.html#ace940beeee97c222f836fe0ac70f6cf5), there's no branch being created. ftynse: It's not not a successor block but the block before which the created block is inserted in the…
moduleTranslation.mapBlock(&bb, llvmBB);		moduleTranslation.mapBlock(&bb, llvmBB);
}		}

llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();		llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();

		// Terminators (namely YieldOp) may be forwarding values to the region that
		// need to be available in the continuation block. Collect the types of these
		// operands in preparation of creating PHI nodes.
		SmallVector<llvm::Type *> continuationBlockPHITypes;
		bool operandsProcessed = false;
		unsigned numYields = 0;
		for (Block &bb : region.getBlocks()) {
		if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
		if (!operandsProcessed) {
		for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
		continuationBlockPHITypes.push_back(
		moduleTranslation.convertType(yield->getOperand(i).getType()));
		}
		operandsProcessed = true;
		} else {
		assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
		"mismatching number of values yielded from the region");
		for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
		llvm::Type *operandType =
		moduleTranslation.convertType(yield->getOperand(i).getType());
		(void)operandType;
		assert(continuationBlockPHITypes[i] == operandType &&
		"values of mismatching types yielded from the region");
		}
		kiranchandramohanUnsubmitted Done Reply Inline Actions Should this be part of the verifier or a verification pass? kiranchandramohan: Should this be part of the verifier or a verification pass?
		ftynseAuthorUnsubmitted Done Reply Inline Actions It is verified in reduction regions where this is actually necessary, but I prefer to assert liberally here in case somebody later decides to use omp.yield differently from current uses ftynse: It is verified in reduction regions where this is actually necessary, but I prefer to [[ https…
		}
		numYields++;
		}
		}

		// Insert PHI nodes in the continuation block for any values forwarded by the
		// terminators in this region.
		if (!continuationBlockPHITypes.empty())
		assert(
		continuationBlockPHIs &&
		"expected continuation block PHIs if converted regions yield values");
		if (continuationBlockPHIs) {
		llvm::IRBuilderBase::InsertPointGuard guard(builder);
		continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
		builder.SetInsertPoint(&continuationBlock, continuationBlock.begin());
		for (llvm::Type *ty : continuationBlockPHITypes)
		continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
		}

// Convert blocks one by one in topological order to ensure		// Convert blocks one by one in topological order to ensure
// defs are converted before uses.		// defs are converted before uses.
SetVector<Block *> blocks =		SetVector<Block *> blocks =
LLVM::detail::getTopologicallySortedBlocks(region);		LLVM::detail::getTopologicallySortedBlocks(region);
for (Block *bb : blocks) {		for (Block *bb : blocks) {
llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);		llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
// Retarget the branch of the entry block to the entry block of the		// Retarget the branch of the entry block to the entry block of the
// converted region (regions are single-entry).		// converted region (regions are single-entry).
Show All 14 Lines	for (Block *bb : blocks) {

// Special handling for `omp.yield` and `omp.terminator` (we may have more		// Special handling for `omp.yield` and `omp.terminator` (we may have more
// than one): they return the control to the parent OpenMP dialect operation		// than one): they return the control to the parent OpenMP dialect operation
// so replace them with the branch to the continuation block. We handle this		// so replace them with the branch to the continuation block. We handle this
// here to avoid relying inter-function communication through the		// here to avoid relying inter-function communication through the
// ModuleTranslation class to set up the correct insertion point. This is		// ModuleTranslation class to set up the correct insertion point. This is
// also consistent with MLIR's idiom of handling special region terminators		// also consistent with MLIR's idiom of handling special region terminators
// in the same code that handles the region-owning operation.		// in the same code that handles the region-owning operation.
if (isa<omp::TerminatorOp, omp::YieldOp>(bb->getTerminator()))		Operation *terminator = bb->getTerminator();
		if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
builder.CreateBr(&continuationBlock);		builder.CreateBr(&continuationBlock);

		for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
		(*continuationBlockPHIs)[i]->addIncoming(
		moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
		}
}		}
// Finally, after all blocks have been traversed and values mapped,		// After all blocks have been traversed and values mapped, connect the PHI
// connect the PHI nodes to the results of preceding blocks.		// nodes to the results of preceding blocks.
LLVM::detail::connectPHINodes(region, moduleTranslation);		LLVM::detail::connectPHINodes(region, moduleTranslation);

		// Remove the blocks and values defined in this region from the mapping since
		// they are not visible outside of this region. This allows the same region to
		// be converted several times, that is cloned, without clashes, and slightly
		// speeds up the lookups. The regions we are translating must be flat at this
		wsmosesUnsubmitted Done Reply Inline Actions I'd add some text here about why the (to me unexpected) default behavior of forgetMapping that isn't recursive, is desired. wsmoses: I'd add some text here about why the (to me unexpected) default behavior of forgetMapping that…
		// point, no need for recursion.
		kiranchandramohanUnsubmitted Done Reply Inline Actions Is that true for nested OpenMP regions? kiranchandramohan: Is that true for nested OpenMP regions?
		ftynseAuthorUnsubmitted Done Reply Inline Actions Okay, since you both think recursion might be necessary (there are no cases currently), I made forgetMapping traverse nested regions. ftynse: Okay, since you both think recursion might be necessary (there are no cases currently), I made…
		moduleTranslation.forgetMapping(region);
}		}

/// Converts the OpenMP parallel operation to LLVM IR.		/// Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult		static LogicalResult
convertOmpParallel(Operation &opInst, llvm::IRBuilderBase &builder,		convertOmpParallel(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {		LLVM::ModuleTranslation &moduleTranslation) {
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;		using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
// TODO: support error propagation in OpenMPIRBuilder and use it instead of		// TODO: support error propagation in OpenMPIRBuilder and use it instead of
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines	convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,

llvm::OpenMPIRBuilder::LocationDescription ompLoc(		llvm::OpenMPIRBuilder::LocationDescription ompLoc(
builder.saveIP(), builder.getCurrentDebugLocation());		builder.saveIP(), builder.getCurrentDebugLocation());
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(		builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
ompLoc, bodyGenCB, finiCB));		ompLoc, bodyGenCB, finiCB));
return success();		return success();
}		}

		/// Returns a reduction declaration that corresponds to the given reduction
		/// operation in the given container. Currently only supports reductions inside
		/// WsLoopOp but can be easily extended.
		static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
		omp::ReductionOp reduction) {
		SymbolRefAttr reductionSymbol;
		for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
		if (container.reduction_vars()[i] != reduction.accumulator())
		continue;
		reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
		break;
		}
		assert(reductionSymbol &&
		"reduction operation must be associated with a declaration");

		return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
		container, reductionSymbol);
		}

		/// Populates `reductions` with reduction declarations used in the given loop.
		static void
		collectReductionDecls(omp::WsLoopOp loop,
		SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
		Optional<ArrayAttr> attr = loop.reductions();
		if (!attr)
		return;

		reductions.reserve(reductions.size() + loop.getNumReductionVars());
		for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
		reductions.push_back(
		SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
		loop, symbolRef));
		}
		}

		/// Translates the blocks contained in the given region and appends them to at
		/// the current insertion point of `builder`. The operations of the entry block
		/// are appended to the current insertion block, which is not expected to have a
		/// terminator. If set, `continuationBlockArgs` is populated with translated
		/// values that correspond to the values omp.yield'ed from the region.
		static LogicalResult inlineConvertOmpRegions(
		Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
		LLVM::ModuleTranslation &moduleTranslation,
		SmallVectorImpl<llvm::Value > continuationBlockArgs = nullptr) {
		if (region.empty())
		return success();

		// Special case for single-block regions that don't create additional blocks:
		wsmosesUnsubmitted Done Reply Inline Actions Nit, doesn't => don't wsmoses: Nit, doesn't => don't
		// insert operations without creating additional blocks.
		if (llvm::hasSingleElement(region)) {
		moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
		if (failed(moduleTranslation.convertBlock(
		region.front(), /ignoreArguments=/true, builder)))
		return failure();

		// The continuation arguments are simply the translated terminator operands.
		if (continuationBlockArgs)
		llvm::append_range(
		*continuationBlockArgs,
		moduleTranslation.lookupValues(region.front().back().getOperands()));

		// Drop the mapping that is no longer necessary so that the same region can
		// be processed multiple times.
		moduleTranslation.forgetMapping(region);
		return success();
		}

		// Create the continuation block manually instead of calling splitBlock
		// because the current insertion block may not have a terminator.
		llvm::BasicBlock *continuationBlock =
		llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont",
		builder.GetInsertBlock()->getParent(),
		builder.GetInsertBlock()->getNextNode());
		builder.CreateBr(continuationBlock);

		LogicalResult bodyGenStatus = success();
		SmallVector<llvm::PHINode *> phis;
		convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(),
		*continuationBlock, builder, moduleTranslation,
		bodyGenStatus, &phis);
		if (failed(bodyGenStatus))
		return failure();
		if (continuationBlockArgs)
		llvm::append_range(*continuationBlockArgs, phis);
		builder.SetInsertPoint(continuationBlock,
		continuationBlock->getFirstInsertionPt());
		return success();
		}

		namespace {
		/// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
		/// store lambdas with capture.
		using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
		llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value , llvm::Value ,
		llvm::Value *&)>;
		using OwningAtomicReductionGen =
		std::function<llvm::OpenMPIRBuilder::InsertPointTy(
		llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value , llvm::Value )>;
		} // namespace

		/// Create an OpenMPIRBuilder-compatible reduction generator for the given
		/// reduction declaration. The generator uses `builder` but ignores its
		/// insertion point.
		static OwningReductionGen
		makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
		LLVM::ModuleTranslation &moduleTranslation) {
		// The lambda is mutable because we need access to non-const methods of decl
		// (which aren't actually mutating it), and we must capture decl by-value to
		// avoid the dangling reference after the parent function returns.
		OwningReductionGen gen =
		[&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
		llvm::Value lhs, llvm::Value rhs,
		llvm::Value *&result) mutable {
		Region &reductionRegion = decl.reductionRegion();
		moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
		moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
		builder.restoreIP(insertPoint);
		SmallVector<llvm::Value *> phis;
		if (failed(inlineConvertOmpRegions(reductionRegion,
		"omp.reduction.nonatomic.body",
		builder, moduleTranslation, &phis)))
		return llvm::OpenMPIRBuilder::InsertPointTy();
		assert(phis.size() == 1);
		result = phis[0];
		return builder.saveIP();
		};
		return gen;
		}

		/// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
		/// given reduction declaration. The generator uses `builder` but ignores its
		/// insertion point. Returns null if there is no atomic region available in the
		/// reduction declaration.
		static OwningAtomicReductionGen
		makeAtomicReductionGen(omp::ReductionDeclareOp decl,
		llvm::IRBuilderBase &builder,
		LLVM::ModuleTranslation &moduleTranslation) {
		if (decl.atomicReductionRegion().empty())
		return OwningAtomicReductionGen();

		// The lambda is mutable because we need access to non-const methods of decl
		// (which aren't actually mutating it), and we must capture decl by-value to
		// avoid the dangling reference after the parent function returns.
		OwningAtomicReductionGen atomicGen =
		[&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
		llvm::Value lhs, llvm::Value rhs) mutable {
		Region &atomicRegion = decl.atomicReductionRegion();
		moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
		moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
		builder.restoreIP(insertPoint);
		SmallVector<llvm::Value *> phis;
		if (failed(inlineConvertOmpRegions(atomicRegion,
		"omp.reduction.atomic.body", builder,
		moduleTranslation, &phis)))
		return llvm::OpenMPIRBuilder::InsertPointTy();
		assert(phis.empty());
		return builder.saveIP();
		};
		return atomicGen;
		}

/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.		/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult		static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,		convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {		LLVM::ModuleTranslation &moduleTranslation) {
auto loop = cast<omp::WsLoopOp>(opInst);		auto loop = cast<omp::WsLoopOp>(opInst);
// TODO: this should be in the op verifier instead.		// TODO: this should be in the op verifier instead.
if (loop.lowerBound().empty())		if (loop.lowerBound().empty())
return failure();		return failure();
Show All 12 Lines	convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::Value *upperBound = moduleTranslation.lookupValue(loop.upperBound()[0]);		llvm::Value *upperBound = moduleTranslation.lookupValue(loop.upperBound()[0]);
llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);		llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
llvm::Type *ivType = step->getType();		llvm::Type *ivType = step->getType();
llvm::Value *chunk =		llvm::Value *chunk =
loop.schedule_chunk_var()		loop.schedule_chunk_var()
? moduleTranslation.lookupValue(loop.schedule_chunk_var())		? moduleTranslation.lookupValue(loop.schedule_chunk_var())
: llvm::ConstantInt::get(ivType, 1);		: llvm::ConstantInt::get(ivType, 1);

		SmallVector<omp::ReductionDeclareOp> reductionDecls;
		collectReductionDecls(loop, reductionDecls);
		llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
		findAllocaInsertPoint(builder, moduleTranslation);

		// Allocate space for privatized reduction variables.
		SmallVector<llvm::Value *> privateReductionVariables;
		DenseMap<Value, llvm::Value *> reductionVariableMap;
		unsigned numReductions = loop.getNumReductionVars();
		privateReductionVariables.reserve(numReductions);
		if (numReductions != 0) {
		llvm::IRBuilderBase::InsertPointGuard guard(builder);
		builder.restoreIP(allocaIP);
		for (unsigned i = 0; i < numReductions; ++i) {
		auto reductionType =
		loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
		llvm::Value *var = builder.CreateAlloca(
		moduleTranslation.convertType(reductionType.getElementType()));
		privateReductionVariables.push_back(var);
		reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
		}
		}

		// Store the mapping between reduction variables and their private copies on
		// ModuleTranslation stack. It can be then recovered when translating
		// omp.reduce operations in a separate call.
		LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
		moduleTranslation, reductionVariableMap);

		// Before the loop, store the initial values of reductions into reduction
		// variables. Although this could be done after allocas, we don't want to mess
		// up with the alloca insertion point.
		for (unsigned i = 0; i < numReductions; ++i) {
		SmallVector<llvm::Value *> phis;
		if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
		"omp.reduction.neutral", builder,
		moduleTranslation, &phis)))
		return failure();
		assert(phis.size() == 1 && "expected one value to be yielded from the "
		"reduction neutral element declaration region");
		builder.CreateStore(phis[0], privateReductionVariables[i]);
		}

// Set up the source location value for OpenMP runtime.		// Set up the source location value for OpenMP runtime.
llvm::DISubprogram *subprogram =		llvm::DISubprogram *subprogram =
builder.GetInsertBlock()->getParent()->getSubprogram();		builder.GetInsertBlock()->getParent()->getSubprogram();
const llvm::DILocation *diLoc =		const llvm::DILocation *diLoc =
moduleTranslation.translateLoc(opInst.getLoc(), subprogram);		moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),		llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
llvm::DebugLoc(diLoc));		llvm::DebugLoc(diLoc));

Show All 23 Lines	convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
// this code when WsLoop clearly supports more cases.		// this code when WsLoop clearly supports more cases.
llvm::CanonicalLoopInfo *loopInfo =		llvm::CanonicalLoopInfo *loopInfo =
moduleTranslation.getOpenMPBuilder()->createCanonicalLoop(		moduleTranslation.getOpenMPBuilder()->createCanonicalLoop(
ompLoc, bodyGen, lowerBound, upperBound, step, /IsSigned=/true,		ompLoc, bodyGen, lowerBound, upperBound, step, /IsSigned=/true,
/InclusiveStop=/loop.inclusive());		/InclusiveStop=/loop.inclusive());
if (failed(bodyGenStatus))		if (failed(bodyGenStatus))
return failure();		return failure();

llvm::OpenMPIRBuilder::InsertPointTy allocaIP =		allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::InsertPointTy afterIP;		llvm::OpenMPIRBuilder::InsertPointTy afterIP;
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();		llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
if (schedule == omp::ClauseScheduleKind::Static) {		if (schedule == omp::ClauseScheduleKind::Static) {
loopInfo = ompBuilder->createStaticWorkshareLoop(ompLoc, loopInfo, allocaIP,		loopInfo = ompBuilder->createStaticWorkshareLoop(ompLoc, loopInfo, allocaIP,
!loop.nowait(), chunk);		!loop.nowait(), chunk);
afterIP = loopInfo->getAfterIP();		afterIP = loopInfo->getAfterIP();
} else {		} else {
llvm::omp::OMPScheduleType schedType;		llvm::omp::OMPScheduleType schedType;
Show All 15 Lines	default:
break;		break;
}		}

afterIP = ompBuilder->createDynamicWorkshareLoop(		afterIP = ompBuilder->createDynamicWorkshareLoop(
ompLoc, loopInfo, allocaIP, schedType, !loop.nowait(), chunk);		ompLoc, loopInfo, allocaIP, schedType, !loop.nowait(), chunk);
}		}

// Continue building IR after the loop.		// Continue building IR after the loop.
builder.restoreIP(afterIP);		builder.restoreIP(loopInfo->getAfterIP());

		// Process the reductions if required.
		if (numReductions == 0)
		return success();

		// Create the reduction generators. We need to own them here because
		// ReductionInfo only accepts references to the generators.
		SmallVector<OwningReductionGen> owningReductionGens;
		SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
		for (unsigned i = 0; i < numReductions; ++i) {
		owningReductionGens.push_back(
		makeReductionGen(reductionDecls[i], builder, moduleTranslation));
		owningAtomicReductionGens.push_back(
		makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
		}

		// Collect the reduction information.
		SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
		reductionInfos.reserve(numReductions);
		for (unsigned i = 0; i < numReductions; ++i) {
		llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
		if (owningAtomicReductionGens[i])
		atomicGen = owningAtomicReductionGens[i];
		reductionInfos.push_back(
		{moduleTranslation.lookupValue(loop.reduction_vars()[i]),
		privateReductionVariables[i], owningReductionGens[i], atomicGen});
		}

		// The call to createReductions below expects the block to have a
		// terminator. Create an unreachable instruction to serve as terminator
		// and remove it later.
		llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
		builder.SetInsertPoint(tempTerminator);
		llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
		moduleTranslation.getOpenMPBuilder()->createReductions(
		kiranchandramohanUnsubmitted Done Reply Inline Actions Nit: Available as ompBuilder from 582. Also applies for 686. kiranchandramohan: Nit: Available as ompBuilder from 582. Also applies for 686.
		builder.saveIP(), allocaIP, reductionInfos,
		/IsNoWait=/false);
		kiranchandramohanUnsubmitted Done Reply Inline Actions Is this related to the nowait of the worksharing loop? kiranchandramohan: Is this related to the nowait of the worksharing loop?
		if (!contInsertPoint.getBlock())
		return loop->emitOpError() << "failed to convert reductions";
		auto nextInsertionPoint = moduleTranslation.getOpenMPBuilder()->createBarrier(
		contInsertPoint, llvm::omp::OMPD_for);
		tempTerminator->eraseFromParent();
		builder.restoreIP(nextInsertionPoint);

		return success();
		}

		/// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
		/// mapping between reduction variables and their private equivalents to have
		/// been stored on the ModuleTranslation stack. Currently only supports
		/// reduction within WsLoopOp, but can be easily extended.
		static LogicalResult
		convertOmpReductionOp(omp::ReductionOp reductionOp,
		llvm::IRBuilderBase &builder,
		LLVM::ModuleTranslation &moduleTranslation) {
		// Find the declaration that corresponds to the reduction op.
		auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
		omp::ReductionDeclareOp declaration =
		findReductionDecl(reductionContainer, reductionOp);
		assert(declaration && "could not find reduction declaration");

		// Retrieve the mapping between reduction variables and their private
		// equivalents.
		const DenseMap<Value, llvm::Value > reductionVariableMap = nullptr;
		moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
		[&](const OpenMPVarMappingStackFrame &frame) {
		reductionVariableMap = &frame.mapping;
		return WalkResult::interrupt();
		});
		assert(reductionVariableMap && "couldn't find private reduction variables");

		// Translate the reduction operation by emitting the body of the corresponding
		// reduction declaration.
		Region &reductionRegion = declaration.reductionRegion();
		llvm::Value *privateReductionVar =
		reductionVariableMap->lookup(reductionOp.accumulator());
		llvm::Value *reductionVal = builder.CreateLoad(
		moduleTranslation.convertType(reductionOp.operand().getType()),
		privateReductionVar);

		moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
		reductionVal);
		moduleTranslation.mapValue(
		reductionRegion.front().getArgument(1),
		moduleTranslation.lookupValue(reductionOp.operand()));

		SmallVector<llvm::Value *> phis;
		if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
		builder, moduleTranslation, &phis)))
		kiranchandramohanUnsubmitted Done Reply Inline Actions The OpenMPIRBuilder already takes in OwningReductionGen and OwningAtomicReductionGen which inlines the reduction body based on whether it is non-atomic or atomic. Why is a separate non-atomic reduction required? kiranchandramohan: The OpenMPIRBuilder already takes in OwningReductionGen and OwningAtomicReductionGen which…
		ftynseAuthorUnsubmitted Done Reply Inline Actions Because we also need to generate the body of the reduction in the loop: #pragma omp parallel for reduction(+:x) for (...) { x += 42; //< this needs to be in the IR executed by the loop, //< and we just `omp.reduce 42` in MLIR. } ftynse: Because we also need to generate the body of the reduction in the loop: ``` #pragma omp…
		return failure();
		assert(phis.size() == 1 && "expected one value to be yielded from "
		"the reduction body declaration region");
		builder.CreateStore(phis[0], privateReductionVar);
return success();		return success();
}		}

namespace {		namespace {

/// Implementation of the dialect interface that converts operations belonging		/// Implementation of the dialect interface that converts operations belonging
/// to the OpenMP dialect to LLVM IR.		/// to the OpenMP dialect to LLVM IR.
class OpenMPDialectLLVMIRTranslationInterface		class OpenMPDialectLLVMIRTranslationInterface
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	return llvm::TypeSwitch<Operation *, LogicalResult>(op)
// The argument list is discarded so that, flush with a list is treated		// The argument list is discarded so that, flush with a list is treated
// same as a flush without a list.		// same as a flush without a list.
ompBuilder->createFlush(builder.saveIP());		ompBuilder->createFlush(builder.saveIP());
return success();		return success();
})		})
.Case([&](omp::ParallelOp) {		.Case([&](omp::ParallelOp) {
return convertOmpParallel(*op, builder, moduleTranslation);		return convertOmpParallel(*op, builder, moduleTranslation);
})		})
		.Case([&](omp::ReductionOp reductionOp) {
		return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
		})
.Case([&](omp::MasterOp) {		.Case([&](omp::MasterOp) {
return convertOmpMaster(*op, builder, moduleTranslation);		return convertOmpMaster(*op, builder, moduleTranslation);
})		})
.Case([&](omp::WsLoopOp) {		.Case([&](omp::WsLoopOp) {
return convertOmpWsLoop(*op, builder, moduleTranslation);		return convertOmpWsLoop(*op, builder, moduleTranslation);
})		})
.Case<omp::YieldOp, omp::TerminatorOp>([](auto op) {		.Case<omp::YieldOp, omp::TerminatorOp>([](auto op) {
// `yield` and `terminator` can be just omitted. The block structure was		// `yield` and `terminator` can be just omitted. The block structure was
// created in the function that handles their parent operation.		// created in the region that handles their parent operation.
assert(op->getNumOperands() == 0 &&
"unexpected OpenMP terminator with operands");
return success();		return success();
})		})
.Default([&](Operation *inst) {		.Default([&](Operation *inst) {
return inst->emitError("unsupported OpenMP operation: ")		return inst->emitError("unsupported OpenMP operation: ")
<< inst->getName();		<< inst->getName();
});		});
}		}

Show All 11 Lines

mlir/lib/Target/LLVMIR/ModuleTranslation.cpp

Show First 20 Lines • Show All 218 Lines • ▼ Show 20 Lines	ModuleTranslation::ModuleTranslation(Operation *module,
assert(satisfiesLLVMModule(mlirModule) &&		assert(satisfiesLLVMModule(mlirModule) &&
"mlirModule should honor LLVM's module semantics.");		"mlirModule should honor LLVM's module semantics.");
}		}
ModuleTranslation::~ModuleTranslation() {		ModuleTranslation::~ModuleTranslation() {
if (ompBuilder)		if (ompBuilder)
ompBuilder->finalize();		ompBuilder->finalize();
}		}

		void ModuleTranslation::forgetMapping(Region &region) {
		for (Block &block : region) {
		blockMapping.erase(&block);
		for (Value arg : block.getArguments())
		valueMapping.erase(arg);
		for (Operation &op : block) {
		for (Value value : op.getResults())
		valueMapping.erase(value);
		if (op.hasSuccessors())
		branchMapping.erase(&op);
		if (isa<LLVM::GlobalOp>(op))
		globalsMapping.erase(&op);
		accessGroupMetadataMapping.erase(&op);
		}
		}
		}

/// Get the SSA value passed to the current block from the terminator operation		/// Get the SSA value passed to the current block from the terminator operation
/// of its predecessor.		/// of its predecessor.
static Value getPHISourceValue(Block current, Block pred,		static Value getPHISourceValue(Block current, Block pred,
unsigned numArguments, unsigned index) {		unsigned numArguments, unsigned index) {
Operation &terminator = *pred->getTerminator();		Operation &terminator = *pred->getTerminator();
if (isa<LLVM::BrOp>(terminator))		if (isa<LLVM::BrOp>(terminator))
return terminator.getOperand(index);		return terminator.getOperand(index);

▲ Show 20 Lines • Show All 449 Lines • ▼ Show 20 Lines	for (NamedAttribute attribute : op->getDialectAttrs())
if (failed(iface.amendOperation(op, attribute, *this)))		if (failed(iface.amendOperation(op, attribute, *this)))
return failure();		return failure();
return success();		return success();
}		}

/// Check whether the module contains only supported ops directly in its body.		/// Check whether the module contains only supported ops directly in its body.
static LogicalResult checkSupportedModuleOps(Operation *m) {		static LogicalResult checkSupportedModuleOps(Operation *m) {
for (Operation &o : getModuleBody(m).getOperations())		for (Operation &o : getModuleBody(m).getOperations())
if (!isa<LLVM::LLVMFuncOp, LLVM::GlobalOp, LLVM::MetadataOp>(&o) &&		if (!isa<LLVM::LLVMFuncOp, LLVM::GlobalOp, LLVM::MetadataOp,
		omp::ReductionDeclareOp>(&o) &&
		kiranchandramohanUnsubmitted Done Reply Inline Actions Is making the LLVM IR ModuleTranslation aware of omp::ReductionDeclareOp the only way possible? kiranchandramohan: Is making the LLVM IR ModuleTranslation aware of omp::ReductionDeclareOp the only way possible?
		ftynseAuthorUnsubmitted Done Reply Inline Actions I'm considering to drop this check entirely, but so far this looks like the simplest change. ftynse: I'm considering to drop this check entirely, but so far this looks like the simplest change.
!o.hasTrait<OpTrait::IsTerminator>())		!o.hasTrait<OpTrait::IsTerminator>())
return o.emitOpError("unsupported module-level operation");		return o.emitOpError("unsupported module-level operation");
return success();		return success();
}		}

LogicalResult ModuleTranslation::convertFunctionSignatures() {		LogicalResult ModuleTranslation::convertFunctionSignatures() {
// Declare all functions first because there may be function calls that form a		// Declare all functions first because there may be function calls that form a
// call graph with cycles, or global initializers that reference functions.		// call graph with cycles, or global initializers that reference functions.
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	if (accessGroups && !accessGroups.empty()) {
inst->setMetadata(module->getMDKindID("llvm.access.group"), unionMD);		inst->setMetadata(module->getMDKindID("llvm.access.group"), unionMD);
}		}
}		}

llvm::Type *ModuleTranslation::convertType(Type type) {		llvm::Type *ModuleTranslation::convertType(Type type) {
return typeTranslator.translateType(type);		return typeTranslator.translateType(type);
}		}

/// A helper to look up remapped operands in the value remapping table.`		/// A helper to look up remapped operands in the value remapping table.
SmallVector<llvm::Value *, 8>		SmallVector<llvm::Value *> ModuleTranslation::lookupValues(ValueRange values) {
ModuleTranslation::lookupValues(ValueRange values) {		SmallVector<llvm::Value *> remapped;
SmallVector<llvm::Value *, 8> remapped;
remapped.reserve(values.size());		remapped.reserve(values.size());
for (Value v : values)		for (Value v : values)
remapped.push_back(lookupValue(v));		remapped.push_back(lookupValue(v));
return remapped;		return remapped;
}		}

const llvm::DILocation *		const llvm::DILocation *
ModuleTranslation::translateLoc(Location loc, llvm::DILocalScope *scope) {		ModuleTranslation::translateLoc(Location loc, llvm::DILocalScope *scope) {
▲ Show 20 Lines • Show All 59 Lines • Show Last 20 Lines

mlir/test/Target/LLVMIR/openmp-reduction.mlir

This file was added.

				// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s \| FileCheck %s

				// Only check the overall shape of the code and the presence of relevant
				// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.

				omp.reduction.declare @add_f32 : f32
				init {
				^bb0(%arg: f32):
				%0 = llvm.mlir.constant(0.0 : f32) : f32
				omp.yield (%0 : f32)
				}
				combiner {
				^bb1(%arg0: f32, %arg1: f32):
				%1 = llvm.fadd %arg0, %arg1 : f32
				omp.yield (%1 : f32)
				}
				atomic {
				^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
				%2 = llvm.load %arg3 : !llvm.ptr<f32>
				llvm.atomicrmw fadd %arg2, %2 monotonic : f32
				omp.yield
				}

				// CHECK-LABEL: @simple_reduction
				llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
				%c1 = llvm.mlir.constant(1 : i32) : i32
				%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				omp.parallel {
				omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
				reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
				%1 = llvm.mlir.constant(2.0 : f32) : f32
				omp.reduction %1, %0 : !llvm.ptr<f32>
				omp.yield
				}
				omp.terminator
				}
				llvm.return
				}

				// Call to the outlined function.
				// CHECK: call void {{.*}} @__kmpc_fork_call
				// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Outlined function.
				// CHECK: define internal void @[[OUTLINED]]

				// Private reduction variable and its initialization.
				// CHECK: %[[PRIVATE:.+]] = alloca float
				// CHECK: store float 0.000000e+00, float* %[[PRIVATE]]

				// Call to the reduction function.
				// CHECK: call i32 @__kmpc_reduce
				// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Atomic reduction.
				// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
				// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL]]

				// Non-atomic reduction:
				// CHECK: fadd float
				// CHECK: call void @__kmpc_end_reduce
				// CHECK: br label %[[FINALIZE:.+]]

				// CHECK: [[FINALIZE]]:
				// CHECK: call void @__kmpc_barrier

				// Update of the private variable using the reduciton region
				kiranchandramohanUnsubmitted Done Reply Inline Actions Nit spelling: reduciton -> reduction. Here and in a few other places as well. kiranchandramohan: Nit spelling: reduciton -> reduction. Here and in a few other places as well.
				// (the body block currently comes after all the other blocks).
				// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
				// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
				// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]

				// Reduction function.
				// CHECK: define internal void @[[REDFUNC]]
				// CHECK: fadd float

				// -----

				omp.reduction.declare @add_f32 : f32
				init {
				^bb0(%arg: f32):
				%0 = llvm.mlir.constant(0.0 : f32) : f32
				omp.yield (%0 : f32)
				}
				combiner {
				^bb1(%arg0: f32, %arg1: f32):
				%1 = llvm.fadd %arg0, %arg1 : f32
				omp.yield (%1 : f32)
				}
				atomic {
				^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
				%2 = llvm.load %arg3 : !llvm.ptr<f32>
				llvm.atomicrmw fadd %arg2, %2 monotonic : f32
				omp.yield
				}

				// When the same reduction declaration is used several times, its regions
				// are translated several times, which shouldn't lead to value/block
				// remapping assertions.
				// CHECK-LABEL: @reuse_declaration
				llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
				%c1 = llvm.mlir.constant(1 : i32) : i32
				%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				omp.parallel {
				omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
				reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
				%1 = llvm.mlir.constant(2.0 : f32) : f32
				omp.reduction %1, %0 : !llvm.ptr<f32>
				omp.reduction %1, %2 : !llvm.ptr<f32>
				omp.yield
				}
				omp.terminator
				}
				llvm.return
				}

				// Call to the outlined function.
				// CHECK: call void {{.*}} @__kmpc_fork_call
				// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Outlined function.
				// CHECK: define internal void @[[OUTLINED]]

				// Private reduction variable and its initialization.
				// CHECK: %[[PRIVATE1:.+]] = alloca float
				// CHECK: %[[PRIVATE2:.+]] = alloca float
				// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
				// CHECK: store float 0.000000e+00, float* %[[PRIVATE2]]

				// Call to the reduction function.
				// CHECK: call i32 @__kmpc_reduce
				// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Atomic reduction.
				// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
				// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL1]]
				// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
				// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL2]]

				// Non-atomic reduction:
				// CHECK: fadd float
				// CHECK: fadd float
				// CHECK: call void @__kmpc_end_reduce
				// CHECK: br label %[[FINALIZE:.+]]

				// CHECK: [[FINALIZE]]:
				// CHECK: call void @__kmpc_barrier

				// Update of the private variable using the reduciton region
				// (the body block currently comes after all the other blocks).
				// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
				// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
				// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
				// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
				// CHECK: %[[UPDATED2:.+]] = fadd float %[[PARTIAL2]], 2.000000e+00
				// CHECK: store float %[[UPDATED2]], float* %[[PRIVATE2]]

				// Reduction function.
				// CHECK: define internal void @[[REDFUNC]]
				// CHECK: fadd float
				// CHECK: fadd float


				// -----

				omp.reduction.declare @add_f32 : f32
				init {
				^bb0(%arg: f32):
				%0 = llvm.mlir.constant(0.0 : f32) : f32
				omp.yield (%0 : f32)
				}
				combiner {
				^bb1(%arg0: f32, %arg1: f32):
				%1 = llvm.fadd %arg0, %arg1 : f32
				omp.yield (%1 : f32)
				}
				atomic {
				^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
				%2 = llvm.load %arg3 : !llvm.ptr<f32>
				llvm.atomicrmw fadd %arg2, %2 monotonic : f32
				omp.yield
				}

				// It's okay not to reference the reduction variable in the body.
				// CHECK-LABEL: @missing_omp_reduction
				llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
				%c1 = llvm.mlir.constant(1 : i32) : i32
				%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				omp.parallel {
				omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
				reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
				%1 = llvm.mlir.constant(2.0 : f32) : f32
				omp.reduction %1, %0 : !llvm.ptr<f32>
				omp.yield
				}
				omp.terminator
				}
				llvm.return
				}

				// Call to the outlined function.
				// CHECK: call void {{.*}} @__kmpc_fork_call
				// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Outlined function.
				// CHECK: define internal void @[[OUTLINED]]

				// Private reduction variable and its initialization.
				// CHECK: %[[PRIVATE1:.+]] = alloca float
				// CHECK: %[[PRIVATE2:.+]] = alloca float
				// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
				// CHECK: store float 0.000000e+00, float* %[[PRIVATE2]]

				// Call to the reduction function.
				// CHECK: call i32 @__kmpc_reduce
				// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Atomic reduction.
				// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
				// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL1]]
				// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
				// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL2]]

				// Non-atomic reduction:
				// CHECK: fadd float
				// CHECK: fadd float
				// CHECK: call void @__kmpc_end_reduce
				// CHECK: br label %[[FINALIZE:.+]]

				// CHECK: [[FINALIZE]]:
				// CHECK: call void @__kmpc_barrier

				// Update of the private variable using the reduciton region
				// (the body block currently comes after all the other blocks).
				// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
				// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
				// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
				// CHECK-NOT: %{{.}} = load float, float %[[PRIVATE2]]
				// CHECK-NOT: %{{.*}} = fadd float %[[PARTIAL2]], 2.000000e+00

				// Reduction function.
				// CHECK: define internal void @[[REDFUNC]]
				// CHECK: fadd float
				// CHECK: fadd float

				// -----

				omp.reduction.declare @add_f32 : f32
				init {
				^bb0(%arg: f32):
				%0 = llvm.mlir.constant(0.0 : f32) : f32
				omp.yield (%0 : f32)
				}
				combiner {
				^bb1(%arg0: f32, %arg1: f32):
				%1 = llvm.fadd %arg0, %arg1 : f32
				omp.yield (%1 : f32)
				}
				atomic {
				^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
				%2 = llvm.load %arg3 : !llvm.ptr<f32>
				llvm.atomicrmw fadd %arg2, %2 monotonic : f32
				omp.yield
				}

				// It's okay to refer to the same reduction variable more than once in the
				// body.
				// CHECK-LABEL: @double_reference
				llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
				%c1 = llvm.mlir.constant(1 : i32) : i32
				%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				omp.parallel {
				omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
				reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
				%1 = llvm.mlir.constant(2.0 : f32) : f32
				omp.reduction %1, %0 : !llvm.ptr<f32>
				omp.reduction %1, %0 : !llvm.ptr<f32>
				omp.yield
				}
				omp.terminator
				}
				llvm.return
				}

				// Call to the outlined function.
				// CHECK: call void {{.*}} @__kmpc_fork_call
				// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Outlined function.
				// CHECK: define internal void @[[OUTLINED]]

				// Private reduction variable and its initialization.
				// CHECK: %[[PRIVATE:.+]] = alloca float
				// CHECK: store float 0.000000e+00, float* %[[PRIVATE]]

				// Call to the reduction function.
				// CHECK: call i32 @__kmpc_reduce
				// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Atomic reduction.
				// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
				// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL]]

				// Non-atomic reduction:
				// CHECK: fadd float
				// CHECK: call void @__kmpc_end_reduce
				// CHECK: br label %[[FINALIZE:.+]]

				// CHECK: [[FINALIZE]]:
				// CHECK: call void @__kmpc_barrier

				// Update of the private variable using the reduciton region
				// (the body block currently comes after all the other blocks).
				// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
				// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
				// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]
				// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
				// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
				// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]

				// Reduction function.
				// CHECK: define internal void @[[REDFUNC]]
				// CHECK: fadd float

				// -----

				omp.reduction.declare @add_f32 : f32
				init {
				^bb0(%arg: f32):
				%0 = llvm.mlir.constant(0.0 : f32) : f32
				omp.yield (%0 : f32)
				}
				combiner {
				^bb1(%arg0: f32, %arg1: f32):
				%1 = llvm.fadd %arg0, %arg1 : f32
				omp.yield (%1 : f32)
				}
				atomic {
				^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
				%2 = llvm.load %arg3 : !llvm.ptr<f32>
				llvm.atomicrmw fadd %arg2, %2 monotonic : f32
				omp.yield
				}

				omp.reduction.declare @mul_f32 : f32
				init {
				^bb0(%arg: f32):
				%0 = llvm.mlir.constant(1.0 : f32) : f32
				omp.yield (%0 : f32)
				}
				combiner {
				^bb1(%arg0: f32, %arg1: f32):
				%1 = llvm.fmul %arg0, %arg1 : f32
				omp.yield (%1 : f32)
				}

				// CHECK-LABEL: @no_atomic
				llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
				%c1 = llvm.mlir.constant(1 : i32) : i32
				%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
				omp.parallel {
				omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
				reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @mul_f32 -> %2 : !llvm.ptr<f32>) {
				%1 = llvm.mlir.constant(2.0 : f32) : f32
				omp.reduction %1, %0 : !llvm.ptr<f32>
				omp.reduction %1, %2 : !llvm.ptr<f32>
				omp.yield
				}
				omp.terminator
				}
				llvm.return
				}

				// Call to the outlined function.
				// CHECK: call void {{.*}} @__kmpc_fork_call
				// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Outlined function.
				// CHECK: define internal void @[[OUTLINED]]

				// Private reduction variable and its initialization.
				// CHECK: %[[PRIVATE1:.+]] = alloca float
				// CHECK: %[[PRIVATE2:.+]] = alloca float
				// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
				// CHECK: store float 1.000000e+00, float* %[[PRIVATE2]]

				// Call to the reduction function.
				// CHECK: call i32 @__kmpc_reduce
				// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Atomic reduction not provided.
				// CHECK: unreachable

				// Non-atomic reduction:
				// CHECK: fadd float
				// CHECK: fmul float
				// CHECK: call void @__kmpc_end_reduce
				// CHECK: br label %[[FINALIZE:.+]]
				wsmosesUnsubmitted Done Reply Inline Actions Presumably handled elsewhere, but I would like to see a custom reducer, or alternatively ensure an XFAIL for a reduction without an equivalent atomic operation. For example if the reduction body were something like the following which picks the value which has the least value when squared. The actual reduction doesn't matter so long as it's something for which there isn't an atomic op (I know there's an atomic min, but believe that this case can't handled by it). ^bb1(%arg0: f32, %arg1: f32): %arg0Sq = llvm.fmul %arg0, %arg0 %arg1Sq = llvm.fmul %arg1, %arg1 %cmp = llvm.fcmp olt %arg0Sq, %arg1Sq %1 = llvm.select %cmp %arg0, %arg1 : f32 omp.yield (%1 : f32) } Also note that I'm not suggesting this be handled presently, just want to double check that this doesn't accidentally and incorrectly succeed if not yet fully implemented. wsmoses: Presumably handled elsewhere, but I would like to see a custom reducer, or alternatively ensure…
				ftynseAuthorUnsubmitted Done Reply Inline Actions OpenMP can handle non-atomic reductions fine, atomic is an optimization. The reduction declaration has an optional `atomic` region that indicates how to combine two values atomically. At no point the translation will try to turn something atomic or non-atomic, it merely passes on whatever is present in the declaration. If the atomic region is missing, OpenMPIRBuilder::createReductions will just emit IR that instructs the runtime to never go on the atomic path. There is an additional safeguard of having `unreachable` in the branch that would have been taken had the runtime decided to go on the atomic pass. This test checks exactly this, 5 lines above. ftynse: OpenMP can handle non-atomic reductions fine, atomic is an optimization. The reduction…

				// CHECK: [[FINALIZE]]:
				// CHECK: call void @__kmpc_barrier

				// Update of the private variable using the reduciton region
				// (the body block currently comes after all the other blocks).
				// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
				// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
				// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
				// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
				// CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00
				// CHECK: store float %[[UPDATED2]], float* %[[PRIVATE2]]

				// Reduction function.
				// CHECK: define internal void @[[REDFUNC]]
				// CHECK: fadd float
				// CHECK: fmul float

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] support translating OpenMP loops with reductions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 364421

mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

mlir/lib/Target/LLVMIR/ModuleTranslation.cpp

mlir/test/Target/LLVMIR/openmp-reduction.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] support translating OpenMP loops with reductionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 364421

mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

mlir/lib/Target/LLVMIR/ModuleTranslation.cpp

mlir/test/Target/LLVMIR/openmp-reduction.mlir

[mlir] support translating OpenMP loops with reductions
ClosedPublic